initial

pageserver: reduce import memory utilization (#12086 )
## Problem Imports can end up allocating too much. ## Summary of Changes Nerf them a bunch and add some logs.
2026-05-23 16:10:37 +00:00 · 2025-06-02 12:36:31 +01:00 · 2025-06-02 10:29:15 +00:00 · 2025-06-02 08:59:21 +00:00 · 2025-06-02 08:38:35 +00:00 · 2025-06-01 18:41:45 +00:00
126 changed files with 6081 additions and 2518 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -29,6 +29,41 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"

+[[package]]
+name = "aead"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0"
+dependencies = [
+ "crypto-common",
+ "generic-array",
+]
+
+[[package]]
+name = "aes"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
+[[package]]
+name = "aes-gcm"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1"
+dependencies = [
+ "aead",
+ "aes",
+ "cipher",
+ "ctr",
+ "ghash",
+ "subtle",
+]
+
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -753,6 +788,7 @@ dependencies = [
 "axum",
 "axum-core",
 "bytes",
+ "cookie",
 "futures-util",
 "headers",
 "http 1.1.0",
@@ -1173,6 +1209,16 @@ dependencies = [
 "half",
 ]

+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
 [[package]]
 name = "clang-sys"
 version = "1.6.1"
@@ -1464,6 +1510,21 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "cookie"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
+dependencies = [
+ "aes-gcm",
+ "base64 0.22.1",
+ "percent-encoding",
+ "rand 0.8.5",
+ "subtle",
+ "time",
+ "version_check",
+]
+
 [[package]]
 name = "core-foundation"
 version = "0.9.3"
@@ -1657,9 +1718,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
 dependencies = [
 "generic-array",
+ "rand_core 0.6.4",
 "typenum",
 ]

+[[package]]
+name = "ctr"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835"
+dependencies = [
+ "cipher",
+]
+
 [[package]]
 name = "curve25519-dalek"
 version = "4.1.3"
@@ -2510,6 +2581,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "ghash"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1"
+dependencies = [
+ "opaque-debug",
+ "polyval",
+]
+
 [[package]]
 name = "gimli"
 version = "0.31.1"
@@ -3281,6 +3362,15 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "inout"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "instant"
 version = "0.1.12"
@@ -3794,6 +3884,15 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

+[[package]]
+name = "nanoid"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8"
+dependencies = [
+ "rand 0.8.5",
+]
+
 [[package]]
 name = "neon-shmem"
 version = "0.1.0"
@@ -4066,6 +4165,12 @@ version = "11.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

+[[package]]
+name = "opaque-debug"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
+
 [[package]]
 name = "openssl-probe"
 version = "0.1.5"
@@ -4330,6 +4435,7 @@ dependencies = [
 "postgres_connection",
 "postgres_ffi",
 "postgres_initdb",
+ "posthog_client_lite",
 "pprof",
 "pq_proto",
 "procfs",
@@ -4458,9 +4564,15 @@ dependencies = [
 name = "pageserver_page_api"
 version = "0.1.0"
 dependencies = [
+ "bytes",
+ "pageserver_api",
+ "postgres_ffi",
 "prost 0.13.5",
+ "smallvec",
+ "thiserror 1.0.69",
 "tonic 0.13.1",
 "tonic-build",
+ "utils",
 "workspace_hack",
 ]

@@ -4578,6 +4690,31 @@ version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"

+[[package]]
+name = "paster"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "axum",
+ "axum-extra",
+ "base64 0.13.1",
+ "chrono",
+ "nanoid",
+ "rand 0.8.5",
+ "reqwest",
+ "rustls 0.23.27",
+ "rustls-native-certs 0.8.0",
+ "serde",
+ "serde_json",
+ "time",
+ "tokio",
+ "tokio-postgres",
+ "tokio-postgres-rustls",
+ "tracing",
+ "tracing-subscriber",
+ "workspace_hack",
+]
+
 [[package]]
 name = "pbkdf2"
 version = "0.12.2"
@@ -4750,6 +4887,18 @@ dependencies = [
 "never-say-never",
 ]

+[[package]]
+name = "polyval"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "opaque-debug",
+ "universal-hash",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.10.0"
@@ -4901,11 +5050,16 @@ name = "posthog_client_lite"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "arc-swap",
 "reqwest",
 "serde",
 "serde_json",
 "sha2",
 "thiserror 1.0.69",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "tracing-utils",
 "workspace_hack",
 ]

@@ -6547,6 +6701,32 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

+[[package]]
+name = "shortener"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "axum",
+ "axum-extra",
+ "base64 0.13.1",
+ "chrono",
+ "cookie",
+ "nanoid",
+ "rand 0.8.5",
+ "reqwest",
+ "rustls 0.23.27",
+ "rustls-native-certs 0.8.0",
+ "serde",
+ "serde_json",
+ "time",
+ "tokio",
+ "tokio-postgres",
+ "tokio-postgres-rustls",
+ "tracing",
+ "tracing-subscriber",
+ "workspace_hack",
+]
+
 [[package]]
 name = "signal-hook"
 version = "0.3.15"
@@ -7915,6 +8095,16 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"

+[[package]]
+name = "universal-hash"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea"
+dependencies = [
+ "crypto-common",
+ "subtle",
+]
+
 [[package]]
 name = "untrusted"
 version = "0.9.0"
@@ -8547,6 +8737,7 @@ dependencies = [
 "anyhow",
 "axum",
 "axum-core",
+ "axum-extra",
 "base64 0.13.1",
 "base64 0.21.7",
 "base64ct",
@@ -8558,6 +8749,7 @@ dependencies = [
 "clap_builder",
 "const-oid",
 "crypto-bigint 0.5.5",
+ "crypto-common",
 "der 0.7.8",
 "deranged",
 "digest",
@@ -8569,10 +8761,8 @@ dependencies = [
 "fail",
 "form_urlencoded",
 "futures-channel",
- "futures-core",
 "futures-executor",
 "futures-io",
- "futures-task",
 "futures-util",
 "generic-array",
 "getrandom 0.2.11",
@@ -8602,7 +8792,6 @@ dependencies = [
 "once_cell",
 "p256 0.13.2",
 "parquet",
- "percent-encoding",
 "prettyplease",
 "proc-macro2",
 "prost 0.13.5",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,8 @@ members = [
    "proxy",
    "safekeeper",
    "safekeeper/client",
+    "shortener",
+    "paster",
    "storage_broker",
    "storage_controller",
    "storage_controller/client",
@@ -247,6 +249,7 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
+desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
@@ -259,19 +262,19 @@ postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
 postgres_initdb = { path = "./libs/postgres_initdb" }
+posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
 safekeeper_client = { path = "./safekeeper/client" }
-desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 storage_controller_client = { path = "./storage_controller/client" }
 tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
 tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
 vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
-walproposer = { version = "0.1", path = "./libs/walproposer/" }
 wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
+walproposer = { version = "0.1", path = "./libs/walproposer/" }

 ## Common library dependency
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -310,13 +310,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools rustfmt clippy && \
-    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
-    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
-    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
-    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
-    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
-    cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
-    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} \
+    cargo install rustfilt            --version ${RUSTFILT_VERSION} --locked && \
+    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} --locked && \
+    cargo install cargo-deny          --version ${CARGO_DENY_VERSION} --locked && \
+    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} --locked && \
+    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} --locked && \
+    cargo install cargo-chef          --version ${CARGO_CHEF_VERSION} --locked && \
+    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} --locked \
                                      --features postgres-bundled --no-default-features && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1847,7 +1847,7 @@ COPY docker-compose/ext-src/ /ext-src/
 COPY --from=pg-build /postgres /postgres
 #COPY --from=postgis-src /ext-src/ /ext-src/
 COPY --from=plv8-src /ext-src/ /ext-src/
-#COPY --from=h3-pg-src /ext-src/ /ext-src/
+COPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src
 COPY --from=postgresql-unit-src /ext-src/ /ext-src/
 COPY --from=pgvector-src /ext-src/ /ext-src/
 COPY --from=pgjwt-src /ext-src/ /ext-src/
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -57,21 +57,6 @@ use tracing::{error, info};
 use url::Url;
 use utils::failpoint_support;

-// Compatibility hack: if the control plane specified any remote-ext-config
-// use the default value for extension storage proxy gateway.
-// Remove this once the control plane is updated to pass the gateway URL
-fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
-    const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
-        "http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
-
-    Ok(if arg.starts_with("http") {
-        arg
-    } else {
-        FALLBACK_PG_EXT_GATEWAY_BASE_URL
-    }
-    .to_owned())
-}
-
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
@@ -79,9 +64,8 @@ struct Cli {
    pub pgbin: String,

    /// The base URL for the remote extension storage proxy gateway.
-    /// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
-    #[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
-    pub remote_ext_base_url: Option<String>,
+    #[arg(short = 'r', long)]
+    pub remote_ext_base_url: Option<Url>,

    /// The port to bind the external listening HTTP server to. Clients running
    /// outside the compute will talk to the compute through this port. Keep
@@ -136,6 +120,10 @@ struct Cli {
        requires = "compute-id"
    )]
    pub control_plane_uri: Option<String>,
+
+    /// Interval in seconds for collecting installed extensions statistics
+    #[arg(long, default_value = "3600")]
+    pub installed_extensions_collection_interval: u64,
 }

 fn main() -> Result<()> {
@@ -179,6 +167,7 @@ fn main() -> Result<()> {
            cgroup: cli.cgroup,
            #[cfg(target_os = "linux")]
            vm_monitor_addr: cli.vm_monitor_addr,
+            installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
        },
        config,
    )?;
@@ -271,18 +260,4 @@ mod test {
    fn verify_cli() {
        Cli::command().debug_assert()
    }
-
-    #[test]
-    fn parse_pg_ext_gateway_base_url() {
-        let arg = "http://pg-ext-s3-gateway2";
-        let result = super::parse_remote_ext_base_url(arg).unwrap();
-        assert_eq!(result, arg);
-
-        let arg = "pg-ext-s3-gateway";
-        let result = super::parse_remote_ext_base_url(arg).unwrap();
-        assert_eq!(
-            result,
-            "http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
-        );
-    }
 }
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -339,6 +339,8 @@ async fn run_dump_restore(
    destination_connstring: String,
 ) -> Result<(), anyhow::Error> {
    let dumpdir = workdir.join("dumpdir");
+    let num_jobs = num_cpus::get().to_string();
+    info!("using {num_jobs} jobs for dump/restore");

    let common_args = [
        // schema mapping (prob suffices to specify them on one side)
@@ -354,7 +356,7 @@ async fn run_dump_restore(
        "directory".to_string(),
        // concurrency
        "--jobs".to_string(),
-        num_cpus::get().to_string(),
+        num_jobs,
        // progress updates
        "--verbose".to_string(),
    ];
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -31,6 +31,7 @@ use std::time::{Duration, Instant};
 use std::{env, fs};
 use tokio::spawn;
 use tracing::{Instrument, debug, error, info, instrument, warn};
+use url::Url;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 use utils::measured_stream::MeasuredReader;
@@ -96,7 +97,10 @@ pub struct ComputeNodeParams {
    pub internal_http_port: u16,

    /// the address of extension storage proxy gateway
-    pub remote_ext_base_url: Option<String>,
+    pub remote_ext_base_url: Option<Url>,
+
+    /// Interval for installed extensions collection
+    pub installed_extensions_collection_interval: u64,
 }

 /// Compute node info shared across several `compute_ctl` threads.
@@ -695,25 +699,18 @@ impl ComputeNode {
                let log_directory_path = Path::new(&self.params.pgdata).join("log");
                let log_directory_path = log_directory_path.to_string_lossy().to_string();

-                // Add project_id,endpoint_id tag to identify the logs.
+                // Add project_id,endpoint_id to identify the logs.
                //
                // These ids are passed from cplane,
-                // for backwards compatibility (old computes that don't have them),
-                // we set them to None.
-                // TODO: Clean up this code when all computes have them.
-                let tag: Option<String> = match (
-                    pspec.spec.project_id.as_deref(),
-                    pspec.spec.endpoint_id.as_deref(),
-                ) {
-                    (Some(project_id), Some(endpoint_id)) => {
-                        Some(format!("{project_id}/{endpoint_id}"))
-                    }
-                    (Some(project_id), None) => Some(format!("{project_id}/None")),
-                    (None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
-                    (None, None) => None,
-                };
+                let endpoint_id = pspec.spec.endpoint_id.as_deref().unwrap_or("");
+                let project_id = pspec.spec.project_id.as_deref().unwrap_or("");

-                configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;
+                configure_audit_rsyslog(
+                    log_directory_path.clone(),
+                    endpoint_id,
+                    project_id,
+                    &remote_endpoint,
+                )?;

                // Launch a background task to clean up the audit logs
                launch_pgaudit_gc(log_directory_path);
@@ -749,17 +746,7 @@ impl ComputeNode {

            let conf = self.get_tokio_conn_conf(None);
            tokio::task::spawn(async {
-                let res = get_installed_extensions(conf).await;
-                match res {
-                    Ok(extensions) => {
-                        info!(
-                            "[NEON_EXT_STAT] {}",
-                            serde_json::to_string(&extensions)
-                                .expect("failed to serialize extensions list")
-                        );
-                    }
-                    Err(err) => error!("could not get installed extensions: {err:?}"),
-                }
+                let _ = installed_extensions(conf).await;
            });
        }

@@ -789,6 +776,9 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

+        // Spawn the extension stats background task
+        self.spawn_extension_stats_task();
+
        if pspec.spec.prewarm_lfc_on_startup {
            self.prewarm_lfc();
        }
@@ -2199,6 +2189,41 @@ LIMIT 100",
            info!("Pageserver config changed");
        }
    }
+
+    pub fn spawn_extension_stats_task(&self) {
+        let conf = self.tokio_conn_conf.clone();
+        let installed_extensions_collection_interval =
+            self.params.installed_extensions_collection_interval;
+        tokio::spawn(async move {
+            // An initial sleep is added to ensure that two collections don't happen at the same time.
+            // The first collection happens during compute startup.
+            tokio::time::sleep(tokio::time::Duration::from_secs(
+                installed_extensions_collection_interval,
+            ))
+            .await;
+            let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
+                installed_extensions_collection_interval,
+            ));
+            loop {
+                interval.tick().await;
+                let _ = installed_extensions(conf.clone()).await;
+            }
+        });
+    }
+}
+
+pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
+    let res = get_installed_extensions(conf).await;
+    match res {
+        Ok(extensions) => {
+            info!(
+                "[NEON_EXT_STAT] {}",
+                serde_json::to_string(&extensions).expect("failed to serialize extensions list")
+            );
+        }
+        Err(err) => error!("could not get installed extensions: {err:?}"),
+    }
+    Ok(())
 }

 pub fn forward_termination_signal() {
--- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
+++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
@@ -2,10 +2,24 @@
 module(load="imfile")

 # Input configuration for log files in the specified directory
-# Replace {log_directory} with the directory containing the log files
-input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
+# The messages can be multiline. The start of the message is a timestamp
+# in "%Y-%m-%d %H:%M:%S.%3N GMT" (so timezone hardcoded).
+# Replace log_directory with the directory containing the log files
+input(type="imfile" File="{log_directory}/*.log"
+  Tag="pgaudit_log" Severity="info" Facility="local5"
+  startmsg.regex="^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,")
+
 # the directory to store rsyslog state files
 global(workDirectory="/var/log/rsyslog")

-# Forward logs to remote syslog server
-*.* @@{remote_endpoint}
+# Construct json, endpoint_id and project_id as additional metadata
+set $.json_log!endpoint_id = "{endpoint_id}";
+set $.json_log!project_id = "{project_id}";
+set $.json_log!msg = $msg;
+
+# Template suitable for rfc5424 syslog format
+template(name="PgAuditLog" type="string"
+    string="<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%")
+
+# Forward to remote syslog receiver (@@<hostname>:<port>;format
+local5.info @@{remote_endpoint};PgAuditLog
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -83,6 +83,7 @@ use reqwest::StatusCode;
 use tar::Archive;
 use tracing::info;
 use tracing::log::warn;
+use url::Url;
 use zstd::stream::read::Decoder;

 use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
@@ -158,14 +159,14 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
 pub async fn download_extension(
    ext_name: &str,
    ext_path: &RemotePath,
-    remote_ext_base_url: &str,
+    remote_ext_base_url: &Url,
    pgbin: &str,
 ) -> Result<u64> {
    info!("Download extension {:?} from {:?}", ext_name, ext_path);

    // TODO add retry logic
    let download_buffer =
-        match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {
+        match download_extension_tar(remote_ext_base_url.as_str(), &ext_path.to_string()).await {
            Ok(buffer) => buffer,
            Err(error_message) => {
                return Err(anyhow::anyhow!(
--- a/compute_tools/src/rsyslog.rs
+++ b/compute_tools/src/rsyslog.rs
@@ -84,13 +84,15 @@ fn restart_rsyslog() -> Result<()> {

 pub fn configure_audit_rsyslog(
    log_directory: String,
-    tag: Option<String>,
+    endpoint_id: &str,
+    project_id: &str,
    remote_endpoint: &str,
 ) -> Result<()> {
    let config_content: String = format!(
        include_str!("config_template/compute_audit_rsyslog_template.conf"),
        log_directory = log_directory,
-        tag = tag.unwrap_or("".to_string()),
+        endpoint_id = endpoint_id,
+        project_id = project_id,
        remote_endpoint = remote_endpoint
    );

--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -1279,6 +1279,7 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
                mode: pageserver_api::models::TimelineCreateRequestMode::Branch {
                    ancestor_timeline_id,
                    ancestor_start_lsn: start_lsn,
+                    read_only: false,
                    pg_version: None,
                },
            };
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -20,7 +20,7 @@ first_path="$(ldconfig --verbose 2>/dev/null \
    | grep --invert-match ^$'\t' \
    | cut --delimiter=: --fields=1 \
    | head --lines=1)"
-test "$first_path" == '/usr/local/lib' || true # Remove the || true in a follow-up PR. Needed for backwards compat.
+test "$first_path" == '/usr/local/lib'

 echo "Waiting pageserver become ready."
 while ! nc -z pageserver 6400; do
--- a/docker-compose/ext-src/h3-pg-src/neon-test.sh
+++ b/docker-compose/ext-src/h3-pg-src/neon-test.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -ex
+cd "$(dirname "${0}")"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+dropdb --if-exists contrib_regression
+createdb contrib_regression
+cd h3_postgis/test
+psql -d contrib_regression -c "CREATE EXTENSION postgis" -c "CREATE EXTENSION postgis_raster" -c "CREATE EXTENSION h3" -c "CREATE EXTENSION h3_postgis"
+TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
+${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
+cd ../../h3/test
+TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
+dropdb --if-exists contrib_regression
+createdb contrib_regression
+psql -d contrib_regression -c "CREATE EXTENSION h3"
+${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
--- a/docker-compose/ext-src/h3-pg-src/test-upgrade.sh
+++ b/docker-compose/ext-src/h3-pg-src/test-upgrade.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+cd h3/test
+TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'  --dbname=contrib_regression  ${TESTS}
--- a/docker-compose/ext-src/online_advisor-src/neon-test.sh
+++ b/docker-compose/ext-src/online_advisor-src/neon-test.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -ex
+cd "$(dirname "${0}")"
+if [ -f Makefile ]; then
+  make installcheck
+fi
--- a/docker-compose/ext-src/online_advisor-src/regular-test.sh
+++ b/docker-compose/ext-src/online_advisor-src/regular-test.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+[ -f Makefile ] || exit 0
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -82,7 +82,8 @@ EXTENSIONS='[
 {"extname": "pg_ivm", "extdir": "pg_ivm-src"},
 {"extname": "pgjwt", "extdir": "pgjwt-src"},
 {"extname": "pgtap", "extdir": "pgtap-src"},
-{"extname": "pg_repack", "extdir": "pg_repack-src"}
+{"extname": "pg_repack", "extdir": "pg_repack-src"},
+{"extname": "h3", "extdir": "h3-pg-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
 COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -107,7 +107,7 @@ impl<const N: usize> MetricType for HyperLogLogState<N> {
 }

 impl<const N: usize> HyperLogLogState<N> {
-    pub fn measure(&self, item: &impl Hash) {
+    pub fn measure(&self, item: &(impl Hash + ?Sized)) {
        // changing the hasher will break compatibility with previous measurements.
        self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));
    }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -27,6 +27,7 @@ pub use prometheus::{

 pub mod launch_timestamp;
 mod wrappers;
+pub use prometheus;
 pub use wrappers::{CountedReader, CountedWriter};
 mod hll;
 pub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -45,6 +45,21 @@ pub struct NodeMetadata {
    pub other: HashMap<String, serde_json::Value>,
 }

+/// PostHog integration config.
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct PostHogConfig {
+    /// PostHog project ID
+    pub project_id: String,
+    /// Server-side (private) API key
+    pub server_api_key: String,
+    /// Client-side (public) API key
+    pub client_api_key: String,
+    /// Private API URL
+    pub private_api_url: String,
+    /// Public API URL
+    pub public_api_url: String,
+}
+
 /// `pageserver.toml`
 ///
 /// We use serde derive with `#[serde(default)]` to generate a deserializer
@@ -186,6 +201,8 @@ pub struct ConfigToml {
    pub tracing: Option<Tracing>,
    pub enable_tls_page_service_api: bool,
    pub dev_mode: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub posthog_config: Option<PostHogConfig>,
    pub timeline_import_config: TimelineImportConfig,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub basebackup_cache_config: Option<BasebackupCacheConfig>,
@@ -696,11 +713,12 @@ impl Default for ConfigToml {
            enable_tls_page_service_api: false,
            dev_mode: false,
            timeline_import_config: TimelineImportConfig {
-                import_job_concurrency: NonZeroUsize::new(128).unwrap(),
-                import_job_soft_size_limit: NonZeroUsize::new(1024 * 1024 * 1024).unwrap(),
-                import_job_checkpoint_threshold: NonZeroUsize::new(128).unwrap(),
+                import_job_concurrency: NonZeroUsize::new(32).unwrap(),
+                import_job_soft_size_limit: NonZeroUsize::new(256 * 1024 * 1024).unwrap(),
+                import_job_checkpoint_threshold: NonZeroUsize::new(32).unwrap(),
            },
            basebackup_cache_config: None,
+            posthog_config: None,
        }
    }
 }
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -354,6 +354,9 @@ pub struct ShardImportProgressV1 {
    pub completed: usize,
    /// Hash of the plan
    pub import_plan_hash: u64,
+    /// Soft limit for the job size
+    /// This needs to remain constant throughout the import
+    pub job_soft_size_limit: usize,
 }

 impl ShardImportStatus {
@@ -402,6 +405,8 @@ pub enum TimelineCreateRequestMode {
        // using a flattened enum, so, it was an accepted field, and
        // we continue to accept it by having it here.
        pg_version: Option<u32>,
+        #[serde(default, skip_serializing_if = "std::ops::Not::not")]
+        read_only: bool,
    },
    ImportPgdata {
        import_pgdata: TimelineCreateRequestModeImportPgdata,
--- a/libs/posthog_client_lite/Cargo.toml
+++ b/libs/posthog_client_lite/Cargo.toml
@@ -6,9 +6,14 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+arc-swap.workspace = true
 reqwest.workspace = true
-serde.workspace = true
 serde_json.workspace = true
+serde.workspace = true
 sha2.workspace = true
-workspace_hack.workspace = true
 thiserror.workspace = true
+tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
+tokio-util.workspace = true
+tracing-utils.workspace = true
+tracing.workspace = true
+workspace_hack.workspace = true
--- a/libs/posthog_client_lite/src/background_loop.rs
+++ b/libs/posthog_client_lite/src/background_loop.rs
@@ -0,0 +1,64 @@
+//! A background loop that fetches feature flags from PostHog and updates the feature store.
+
+use std::{sync::Arc, time::Duration};
+
+use arc_swap::ArcSwap;
+use tokio_util::sync::CancellationToken;
+use tracing::{Instrument, info_span};
+
+use crate::{FeatureStore, PostHogClient, PostHogClientConfig};
+
+/// A background loop that fetches feature flags from PostHog and updates the feature store.
+pub struct FeatureResolverBackgroundLoop {
+    posthog_client: PostHogClient,
+    feature_store: ArcSwap<FeatureStore>,
+    cancel: CancellationToken,
+}
+
+impl FeatureResolverBackgroundLoop {
+    pub fn new(config: PostHogClientConfig, shutdown_pageserver: CancellationToken) -> Self {
+        Self {
+            posthog_client: PostHogClient::new(config),
+            feature_store: ArcSwap::new(Arc::new(FeatureStore::new())),
+            cancel: shutdown_pageserver,
+        }
+    }
+
+    pub fn spawn(self: Arc<Self>, handle: &tokio::runtime::Handle, refresh_period: Duration) {
+        let this = self.clone();
+        let cancel = self.cancel.clone();
+        handle.spawn(
+            async move {
+                tracing::info!("Starting PostHog feature resolver");
+                let mut ticker = tokio::time::interval(refresh_period);
+                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+                loop {
+                    tokio::select! {
+                        _ = ticker.tick() => {}
+                        _ = cancel.cancelled() => break
+                    }
+                    let resp = match this
+                        .posthog_client
+                        .get_feature_flags_local_evaluation()
+                        .await
+                    {
+                        Ok(resp) => resp,
+                        Err(e) => {
+                            tracing::warn!("Cannot get feature flags: {}", e);
+                            continue;
+                        }
+                    };
+                    let feature_store = FeatureStore::new_with_flags(resp.flags);
+                    this.feature_store.store(Arc::new(feature_store));
+                    tracing::info!("Feature flag updated");
+                }
+                tracing::info!("PostHog feature resolver stopped");
+            }
+            .instrument(info_span!("posthog_feature_resolver")),
+        );
+    }
+
+    pub fn feature_store(&self) -> Arc<FeatureStore> {
+        self.feature_store.load_full()
+    }
+}
--- a/libs/posthog_client_lite/src/lib.rs
+++ b/libs/posthog_client_lite/src/lib.rs
@@ -1,5 +1,9 @@
 //! A lite version of the PostHog client that only supports local evaluation of feature flags.

+mod background_loop;
+
+pub use background_loop::FeatureResolverBackgroundLoop;
+
 use std::collections::HashMap;

 use serde::{Deserialize, Serialize};
@@ -20,8 +24,7 @@ pub enum PostHogEvaluationError {

 #[derive(Deserialize)]
 pub struct LocalEvaluationResponse {
-    #[allow(dead_code)]
-    flags: Vec<LocalEvaluationFlag>,
+    pub flags: Vec<LocalEvaluationFlag>,
 }

 #[derive(Deserialize)]
@@ -34,7 +37,7 @@ pub struct LocalEvaluationFlag {
 #[derive(Deserialize)]
 pub struct LocalEvaluationFlagFilters {
    groups: Vec<LocalEvaluationFlagFilterGroup>,
-    multivariate: LocalEvaluationFlagMultivariate,
+    multivariate: Option<LocalEvaluationFlagMultivariate>,
 }

 #[derive(Deserialize)]
@@ -94,6 +97,12 @@ impl FeatureStore {
        }
    }

+    pub fn new_with_flags(flags: Vec<LocalEvaluationFlag>) -> Self {
+        let mut store = Self::new();
+        store.set_flags(flags);
+        store
+    }
+
    pub fn set_flags(&mut self, flags: Vec<LocalEvaluationFlag>) {
        self.flags.clear();
        for flag in flags {
@@ -245,7 +254,7 @@ impl FeatureStore {
        }
    }

-    /// Evaluate a multivariate feature flag. Returns `None` if the flag is not available or if there are errors
+    /// Evaluate a multivariate feature flag. Returns an error if the flag is not available or if there are errors
    /// during the evaluation.
    ///
    /// The parsing logic is as follows:
@@ -263,10 +272,15 @@ impl FeatureStore {
    /// Example: we have a multivariate flag with 3 groups of the configured global rollout percentage: A (10%), B (20%), C (70%).
    /// There is a single group with a condition that has a rollout percentage of 10% and it does not have a variant override.
    /// Then, we will have 1% of the users evaluated to A, 2% to B, and 7% to C.
+    ///
+    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
+    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
+    /// propagated beyond where the feature flag gets resolved.
    pub fn evaluate_multivariate(
        &self,
        flag_key: &str,
        user_id: &str,
+        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
    ) -> Result<String, PostHogEvaluationError> {
        let hash_on_global_rollout_percentage =
            Self::consistent_hash(user_id, flag_key, "multivariate");
@@ -276,10 +290,39 @@ impl FeatureStore {
            flag_key,
            hash_on_global_rollout_percentage,
            hash_on_group_rollout_percentage,
-            &HashMap::new(),
+            properties,
        )
    }

+    /// Evaluate a boolean feature flag. Returns  an error if the flag is not available or if there are errors
+    /// during the evaluation.
+    ///
+    /// The parsing logic is as follows:
+    ///
+    /// * Generate a consistent hash for the tenant-feature.
+    /// * Match each filter group.
+    ///   - If a group is matched, it will first determine whether the user is in the range of the rollout
+    ///     percentage.
+    ///   - If the hash falls within the group's rollout percentage, return true.
+    /// * Otherwise, continue with the next group until all groups are evaluated and no group is within the
+    ///   rollout percentage.
+    /// * If there are no matching groups, return an error.
+    ///
+    /// Returns `Ok(())` if the feature flag evaluates to true. In the future, it will return a payload.
+    ///
+    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
+    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
+    /// propagated beyond where the feature flag gets resolved.
+    pub fn evaluate_boolean(
+        &self,
+        flag_key: &str,
+        user_id: &str,
+        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
+    ) -> Result<(), PostHogEvaluationError> {
+        let hash_on_global_rollout_percentage = Self::consistent_hash(user_id, flag_key, "boolean");
+        self.evaluate_boolean_inner(flag_key, hash_on_global_rollout_percentage, properties)
+    }
+
    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
    /// and avoid duplicate computations.
@@ -306,6 +349,11 @@ impl FeatureStore {
                    flag_key
                )));
            }
+            let Some(ref multivariate) = flag_config.filters.multivariate else {
+                return Err(PostHogEvaluationError::Internal(format!(
+                    "No multivariate available, should use evaluate_boolean?: {flag_key}"
+                )));
+            };
            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
            // does not matter.
@@ -314,7 +362,7 @@ impl FeatureStore {
                    GroupEvaluationResult::MatchedAndOverride(variant) => return Ok(variant),
                    GroupEvaluationResult::MatchedAndEvaluate => {
                        let mut percentage = 0;
-                        for variant in &flag_config.filters.multivariate.variants {
+                        for variant in &multivariate.variants {
                            percentage += variant.rollout_percentage;
                            if self
                                .evaluate_percentage(hash_on_global_rollout_percentage, percentage)
@@ -342,6 +390,89 @@ impl FeatureStore {
            )))
        }
    }
+
+    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
+    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
+    /// and avoid duplicate computations.
+    ///
+    /// Use a different consistent hash for evaluating the group rollout percentage.
+    /// The behavior: if the condition is set to rolling out to 10% of the users, and
+    /// we set the variant A to 20% in the global config, then 2% of the total users will
+    /// be evaluated to variant A.
+    ///
+    /// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two
+    /// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users
+    /// will be evaluated (versus 30% if group evaluation is done independently).
+    pub(crate) fn evaluate_boolean_inner(
+        &self,
+        flag_key: &str,
+        hash_on_global_rollout_percentage: f64,
+        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
+    ) -> Result<(), PostHogEvaluationError> {
+        if let Some(flag_config) = self.flags.get(flag_key) {
+            if !flag_config.active {
+                return Err(PostHogEvaluationError::NotAvailable(format!(
+                    "The feature flag is not active: {}",
+                    flag_key
+                )));
+            }
+            if flag_config.filters.multivariate.is_some() {
+                return Err(PostHogEvaluationError::Internal(format!(
+                    "This looks like a multivariate flag, should use evaluate_multivariate?: {flag_key}"
+                )));
+            };
+            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
+            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
+            // does not matter.
+            for group in &flag_config.filters.groups {
+                match self.evaluate_group(group, hash_on_global_rollout_percentage, properties)? {
+                    GroupEvaluationResult::MatchedAndOverride(_) => {
+                        return Err(PostHogEvaluationError::Internal(format!(
+                            "Boolean flag cannot have overrides: {}",
+                            flag_key
+                        )));
+                    }
+                    GroupEvaluationResult::MatchedAndEvaluate => {
+                        return Ok(());
+                    }
+                    GroupEvaluationResult::Unmatched => continue,
+                }
+            }
+            // If no group is matched, the feature is not available, and up to the caller to decide what to do.
+            Err(PostHogEvaluationError::NoConditionGroupMatched)
+        } else {
+            // The feature flag is not available yet
+            Err(PostHogEvaluationError::NotAvailable(format!(
+                "Not found in the local evaluation spec: {}",
+                flag_key
+            )))
+        }
+    }
+
+    /// Infer whether a feature flag is a boolean flag by checking if it has a multivariate filter.
+    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
+        if let Some(flag_config) = self.flags.get(flag_key) {
+            Ok(flag_config.filters.multivariate.is_none())
+        } else {
+            Err(PostHogEvaluationError::NotAvailable(format!(
+                "Not found in the local evaluation spec: {}",
+                flag_key
+            )))
+        }
+    }
+}
+
+pub struct PostHogClientConfig {
+    /// The server API key.
+    pub server_api_key: String,
+    /// The client API key.
+    pub client_api_key: String,
+    /// The project ID.
+    pub project_id: String,
+    /// The private API URL.
+    pub private_api_url: String,
+    /// The public API URL.
+    pub public_api_url: String,
 }

 /// A lite PostHog client.
@@ -360,37 +491,16 @@ impl FeatureStore {
 /// want to report the feature flag usage back to PostHog. The current plan is to use PostHog only as an UI to
 /// configure feature flags so it is very likely that the client API will not be used.
 pub struct PostHogClient {
-    /// The server API key.
-    server_api_key: String,
-    /// The client API key.
-    client_api_key: String,
-    /// The project ID.
-    project_id: String,
-    /// The private API URL.
-    private_api_url: String,
-    /// The public API URL.
-    public_api_url: String,
+    /// The config.
+    config: PostHogClientConfig,
    /// The HTTP client.
    client: reqwest::Client,
 }

 impl PostHogClient {
-    pub fn new(
-        server_api_key: String,
-        client_api_key: String,
-        project_id: String,
-        private_api_url: String,
-        public_api_url: String,
-    ) -> Self {
+    pub fn new(config: PostHogClientConfig) -> Self {
        let client = reqwest::Client::new();
-        Self {
-            server_api_key,
-            client_api_key,
-            project_id,
-            private_api_url,
-            public_api_url,
-            client,
-        }
+        Self { config, client }
    }

    pub fn new_with_us_region(
@@ -398,13 +508,13 @@ impl PostHogClient {
        client_api_key: String,
        project_id: String,
    ) -> Self {
-        Self::new(
+        Self::new(PostHogClientConfig {
            server_api_key,
            client_api_key,
            project_id,
-            "https://us.posthog.com".to_string(),
-            "https://us.i.posthog.com".to_string(),
-        )
+            private_api_url: "https://us.posthog.com".to_string(),
+            public_api_url: "https://us.i.posthog.com".to_string(),
+        })
    }

    /// Fetch the feature flag specs from the server.
@@ -422,15 +532,23 @@ impl PostHogClient {
        // with bearer token of self.server_api_key
        let url = format!(
            "{}/api/projects/{}/feature_flags/local_evaluation",
-            self.private_api_url, self.project_id
+            self.config.private_api_url, self.config.project_id
        );
        let response = self
            .client
            .get(url)
-            .bearer_auth(&self.server_api_key)
+            .bearer_auth(&self.config.server_api_key)
            .send()
            .await?;
+        let status = response.status();
        let body = response.text().await?;
+        if !status.is_success() {
+            return Err(anyhow::anyhow!(
+                "Failed to get feature flags: {}, {}",
+                status,
+                body
+            ));
+        }
        Ok(serde_json::from_str(&body)?)
    }

@@ -446,11 +564,11 @@ impl PostHogClient {
    ) -> anyhow::Result<()> {
        // PUBLIC_URL/capture/
        // with bearer token of self.client_api_key
-        let url = format!("{}/capture/", self.public_api_url);
+        let url = format!("{}/capture/", self.config.public_api_url);
        self.client
            .post(url)
            .body(serde_json::to_string(&json!({
-                "api_key": self.client_api_key,
+                "api_key": self.config.client_api_key,
                "distinct_id": distinct_id,
                "event": event,
                "properties": properties,
@@ -467,95 +585,162 @@ mod tests {

    fn data() -> &'static str {
        r#"{
-            "flags": [
-                {
-                    "id": 132794,
-                    "team_id": 152860,
-                    "name": "",
-                    "key": "gc-compaction",
-                    "filters": {
-                        "groups": [
-                            {
-                                "variant": "enabled-stage-2",
-                                "properties": [
-                                    {
-                                        "key": "plan_type",
-                                        "type": "person",
-                                        "value": [
-                                            "free"
-                                        ],
-                                        "operator": "exact"
-                                    },
-                                    {
-                                        "key": "pageserver_remote_size",
-                                        "type": "person",
-                                        "value": "10000000",
-                                        "operator": "lt"
-                                    }
-                                ],
-                                "rollout_percentage": 50
-                            },
-                            {
-                                "properties": [
-                                    {
-                                        "key": "plan_type",
-                                        "type": "person",
-                                        "value": [
-                                            "free"
-                                        ],
-                                        "operator": "exact"
-                                    },
-                                    {
-                                        "key": "pageserver_remote_size",
-                                        "type": "person",
-                                        "value": "10000000",
-                                        "operator": "lt"
-                                    }
-                                ],
-                                "rollout_percentage": 80
-                            }
-                        ],
-                        "payloads": {},
-                        "multivariate": {
-                            "variants": [
-                                {
-                                    "key": "disabled",
-                                    "name": "",
-                                    "rollout_percentage": 90
-                                },
-                                {
-                                    "key": "enabled-stage-1",
-                                    "name": "",
-                                    "rollout_percentage": 10
-                                },
-                                {
-                                    "key": "enabled-stage-2",
-                                    "name": "",
-                                    "rollout_percentage": 0
-                                },
-                                {
-                                    "key": "enabled-stage-3",
-                                    "name": "",
-                                    "rollout_percentage": 0
-                                },
-                                {
-                                    "key": "enabled",
-                                    "name": "",
-                                    "rollout_percentage": 0
-                                }
-                            ]
-                        }
-                    },
-                    "deleted": false,
-                    "active": true,
-                    "ensure_experience_continuity": false,
-                    "has_encrypted_payloads": false,
-                    "version": 6
-                }
+  "flags": [
+    {
+      "id": 141807,
+      "team_id": 152860,
+      "name": "",
+      "key": "image-compaction-boundary",
+      "filters": {
+        "groups": [
+          {
+            "variant": null,
+            "properties": [
+              {
+                "key": "plan_type",
+                "type": "person",
+                "value": [
+                  "free"
+                ],
+                "operator": "exact"
+              }
            ],
-            "group_type_mapping": {},
-            "cohorts": {}
-        }"#
+            "rollout_percentage": 40
+          },
+          {
+            "variant": null,
+            "properties": [],
+            "rollout_percentage": 10
+          }
+        ],
+        "payloads": {},
+        "multivariate": null
+      },
+      "deleted": false,
+      "active": true,
+      "ensure_experience_continuity": false,
+      "has_encrypted_payloads": false,
+      "version": 1
+    },
+    {
+      "id": 135586,
+      "team_id": 152860,
+      "name": "",
+      "key": "boolean-flag",
+      "filters": {
+        "groups": [
+          {
+            "variant": null,
+            "properties": [
+              {
+                "key": "plan_type",
+                "type": "person",
+                "value": [
+                  "free"
+                ],
+                "operator": "exact"
+              }
+            ],
+            "rollout_percentage": 47
+          }
+        ],
+        "payloads": {},
+        "multivariate": null
+      },
+      "deleted": false,
+      "active": true,
+      "ensure_experience_continuity": false,
+      "has_encrypted_payloads": false,
+      "version": 1
+    },
+    {
+      "id": 132794,
+      "team_id": 152860,
+      "name": "",
+      "key": "gc-compaction",
+      "filters": {
+        "groups": [
+          {
+            "variant": "enabled-stage-2",
+            "properties": [
+              {
+                "key": "plan_type",
+                "type": "person",
+                "value": [
+                  "free"
+                ],
+                "operator": "exact"
+              },
+              {
+                "key": "pageserver_remote_size",
+                "type": "person",
+                "value": "10000000",
+                "operator": "lt"
+              }
+            ],
+             "rollout_percentage": 50
+          },
+          {
+            "properties": [
+              {
+                "key": "plan_type",
+                "type": "person",
+                "value": [
+                  "free"
+                ],
+                "operator": "exact"
+              },
+              {
+                "key": "pageserver_remote_size",
+                "type": "person",
+                "value": "10000000",
+                "operator": "lt"
+              }
+            ],
+            "rollout_percentage": 80
+          }
+        ],
+        "payloads": {},
+        "multivariate": {
+          "variants": [
+            {
+              "key": "disabled",
+              "name": "",
+              "rollout_percentage": 90
+            },
+            {
+              "key": "enabled-stage-1",
+              "name": "",
+              "rollout_percentage": 10
+            },
+            {
+              "key": "enabled-stage-2",
+              "name": "",
+              "rollout_percentage": 0
+            },
+            {
+              "key": "enabled-stage-3",
+              "name": "",
+              "rollout_percentage": 0
+            },
+            {
+              "key": "enabled",
+              "name": "",
+              "rollout_percentage": 0
+            }
+          ]
+        }
+      },
+      "deleted": false,
+      "active": true,
+      "ensure_experience_continuity": false,
+      "has_encrypted_payloads": false,
+      "version": 7
+    }
+  ],
+  "group_type_mapping": {},
+  "cohorts": {}
+}"#
    }

    #[test]
@@ -631,4 +816,125 @@ mod tests {
            Err(PostHogEvaluationError::NoConditionGroupMatched)
        ),);
    }
+
+    #[test]
+    fn evaluate_boolean_1() {
+        // The `boolean-flag` feature flag only has one group that matches on the free user.
+
+        let mut store = FeatureStore::new();
+        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
+        store.set_flags(response.flags);
+
+        // This lacks the required properties and cannot be evaluated.
+        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &HashMap::new());
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NotAvailable(_))
+        ),);
+
+        let properties_unmatched = HashMap::from([
+            (
+                "plan_type".to_string(),
+                PostHogFlagFilterPropertyValue::String("paid".to_string()),
+            ),
+            (
+                "pageserver_remote_size".to_string(),
+                PostHogFlagFilterPropertyValue::Number(1000.0),
+            ),
+        ]);
+
+        // This does not match any group so there will be an error.
+        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties_unmatched);
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NoConditionGroupMatched)
+        ),);
+
+        let properties = HashMap::from([
+            (
+                "plan_type".to_string(),
+                PostHogFlagFilterPropertyValue::String("free".to_string()),
+            ),
+            (
+                "pageserver_remote_size".to_string(),
+                PostHogFlagFilterPropertyValue::Number(1000.0),
+            ),
+        ]);
+
+        // It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.
+        let variant = store.evaluate_boolean_inner("boolean-flag", 0.10, &properties);
+        assert!(variant.is_ok());
+
+        // It matches the group conditions but not the group rollout percentage.
+        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties);
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NoConditionGroupMatched)
+        ),);
+    }
+
+    #[test]
+    fn evaluate_boolean_2() {
+        // The `image-compaction-boundary` feature flag has one group that matches on the free user and a group that matches on all users.
+
+        let mut store = FeatureStore::new();
+        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
+        store.set_flags(response.flags);
+
+        // This lacks the required properties and cannot be evaluated.
+        let variant =
+            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &HashMap::new());
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NotAvailable(_))
+        ),);
+
+        let properties_unmatched = HashMap::from([
+            (
+                "plan_type".to_string(),
+                PostHogFlagFilterPropertyValue::String("paid".to_string()),
+            ),
+            (
+                "pageserver_remote_size".to_string(),
+                PostHogFlagFilterPropertyValue::Number(1000.0),
+            ),
+        ]);
+
+        // This does not match the filtered group but the all user group.
+        let variant =
+            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties_unmatched);
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NoConditionGroupMatched)
+        ),);
+        let variant =
+            store.evaluate_boolean_inner("image-compaction-boundary", 0.05, &properties_unmatched);
+        assert!(variant.is_ok());
+
+        let properties = HashMap::from([
+            (
+                "plan_type".to_string(),
+                PostHogFlagFilterPropertyValue::String("free".to_string()),
+            ),
+            (
+                "pageserver_remote_size".to_string(),
+                PostHogFlagFilterPropertyValue::Number(1000.0),
+            ),
+        ]);
+
+        // It matches the first group as 0.30 <= 0.40 and the properties are matched. Then it gets evaluated to the variant override.
+        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.30, &properties);
+        assert!(variant.is_ok());
+
+        // It matches the group conditions but not the group rollout percentage.
+        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties);
+        assert!(matches!(
+            variant,
+            Err(PostHogEvaluationError::NoConditionGroupMatched)
+        ),);
+
+        // It matches the second "all" group conditions.
+        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.09, &properties);
+        assert!(variant.is_ok());
+    }
 }
--- a/libs/utils/src/leaky_bucket.rs
+++ b/libs/utils/src/leaky_bucket.rs
@@ -28,6 +28,7 @@ use std::time::Duration;
 use tokio::sync::Notify;
 use tokio::time::Instant;

+#[derive(Clone, Copy)]
 pub struct LeakyBucketConfig {
    /// This is the "time cost" of a single request unit.
    /// Should loosely represent how long it takes to handle a request unit in active resource time.
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -1,6 +1,7 @@
 #![allow(clippy::todo)]

 use std::ffi::CString;
+use std::str::FromStr;

 use postgres_ffi::WAL_SEGMENT_SIZE;
 use utils::id::TenantTimelineId;
@@ -173,6 +174,8 @@ pub struct Config {
    pub ttid: TenantTimelineId,
    /// List of safekeepers in format `host:port`
    pub safekeepers_list: Vec<String>,
+    /// libpq connection info options
+    pub safekeeper_conninfo_options: String,
    /// Safekeeper reconnect timeout in milliseconds
    pub safekeeper_reconnect_timeout: i32,
    /// Safekeeper connection timeout in milliseconds
@@ -202,6 +205,9 @@ impl Wrapper {
            .into_bytes_with_nul();
        assert!(safekeepers_list_vec.len() == safekeepers_list_vec.capacity());
        let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut std::ffi::c_char;
+        let safekeeper_conninfo_options = CString::from_str(&config.safekeeper_conninfo_options)
+            .unwrap()
+            .into_raw();

        let callback_data = Box::into_raw(Box::new(api)) as *mut ::std::os::raw::c_void;

@@ -209,6 +215,7 @@ impl Wrapper {
            neon_tenant,
            neon_timeline,
            safekeepers_list,
+            safekeeper_conninfo_options,
            safekeeper_reconnect_timeout: config.safekeeper_reconnect_timeout,
            safekeeper_connection_timeout: config.safekeeper_connection_timeout,
            wal_segment_size: WAL_SEGMENT_SIZE as i32, // default 16MB
@@ -576,6 +583,7 @@ mod tests {
        let config = crate::walproposer::Config {
            ttid,
            safekeepers_list: vec!["localhost:5000".to_string()],
+            safekeeper_conninfo_options: String::new(),
            safekeeper_reconnect_timeout: 1000,
            safekeeper_connection_timeout: 10000,
            sync_safekeepers: true,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -17,51 +17,69 @@ anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
-bit_field.workspace = true
 bincode.workspace = true
+bit_field.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
-camino.workspace = true
 camino-tempfile.workspace = true
+camino.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
 consumption_metrics.workspace = true
 crc32c.workspace = true
 either.workspace = true
+enum-map.workspace = true
+enumset = { workspace = true, features = ["serde"]}
 fail.workspace = true
 futures.workspace = true
 hashlink.workspace = true
 hex.workspace = true
-humantime.workspace = true
+http-utils.workspace = true
 humantime-serde.workspace = true
+humantime.workspace = true
 hyper0.workspace = true
 itertools.workspace = true
 jsonwebtoken.workspace = true
 md5.workspace = true
+metrics.workspace = true
 nix.workspace = true
-# hack to get the number of worker threads tokio uses
-num_cpus.workspace = true
+num_cpus.workspace = true # hack to get the number of worker threads tokio uses
 num-traits.workspace = true
 once_cell.workspace = true
+pageserver_api.workspace = true
+pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
+pageserver_compaction.workspace = true
 pageserver_page_api.workspace = true
+pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
+postgres_connection.workspace = true
+postgres_ffi.workspace = true
+postgres_initdb.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
-postgres_initdb.workspace = true
+posthog_client_lite.workspace = true
 pprof.workspace = true
+pq_proto.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
+remote_storage.workspace = true
+reqwest.workspace = true
+rpds.workspace = true
 rustls.workspace = true
 scopeguard.workspace = true
 send-future.workspace = true
-serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 serde_path_to_error.workspace = true
 serde_with.workspace = true
+serde.workspace = true
+smallvec.workspace = true
+storage_broker.workspace = true
+strum_macros.workspace = true
+strum.workspace = true
 sysinfo.workspace = true
-tokio-tar.workspace = true
+tenant_size_model.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
@@ -70,6 +88,7 @@ tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 tokio-rustls.workspace = true
 tokio-stream.workspace = true
+tokio-tar.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tonic.workspace = true
@@ -77,29 +96,10 @@ tonic-reflection.workspace = true
 tracing.workspace = true
 tracing-utils.workspace = true
 url.workspace = true
-walkdir.workspace = true
-metrics.workspace = true
-pageserver_api.workspace = true
-pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
-pageserver_compaction.workspace = true
-pem.workspace = true
-postgres_connection.workspace = true
-postgres_ffi.workspace = true
-pq_proto.workspace = true
-remote_storage.workspace = true
-storage_broker.workspace = true
-tenant_size_model.workspace = true
-http-utils.workspace = true
 utils.workspace = true
-workspace_hack.workspace = true
-reqwest.workspace = true
-rpds.workspace = true
-enum-map.workspace = true
-enumset = { workspace = true, features = ["serde"]}
-strum.workspace = true
-strum_macros.workspace = true
 wal_decoder.workspace = true
-smallvec.workspace = true
+walkdir.workspace = true
+workspace_hack.workspace = true
 twox-hash.workspace = true

 [target.'cfg(target_os = "linux")'.dependencies]
--- a/pageserver/benches/bench_metrics.rs
+++ b/pageserver/benches/bench_metrics.rs
@@ -264,10 +264,56 @@ mod propagation_of_cached_label_value {
    }
 }

+criterion_group!(histograms, histograms::bench_bucket_scalability);
+mod histograms {
+    use std::time::Instant;
+
+    use criterion::{BenchmarkId, Criterion};
+    use metrics::core::Collector;
+
+    pub fn bench_bucket_scalability(c: &mut Criterion) {
+        let mut g = c.benchmark_group("bucket_scalability");
+
+        for n in [1, 4, 8, 16, 32, 64, 128, 256] {
+            g.bench_with_input(BenchmarkId::new("nbuckets", n), &n, |b, n| {
+                b.iter_custom(|iters| {
+                    let buckets: Vec<f64> = (0..*n).map(|i| i as f64 * 100.0).collect();
+                    let histo = metrics::Histogram::with_opts(
+                        metrics::prometheus::HistogramOpts::new("name", "help")
+                            .buckets(buckets.clone()),
+                    )
+                    .unwrap();
+                    let start = Instant::now();
+                    for i in 0..usize::try_from(iters).unwrap() {
+                        histo.observe(buckets[i % buckets.len()]);
+                    }
+                    let elapsed = start.elapsed();
+                    // self-test
+                    let mfs = histo.collect();
+                    assert_eq!(mfs.len(), 1);
+                    let metrics = mfs[0].get_metric();
+                    assert_eq!(metrics.len(), 1);
+                    let histo = metrics[0].get_histogram();
+                    let buckets = histo.get_bucket();
+                    assert!(
+                        buckets
+                            .iter()
+                            .enumerate()
+                            .all(|(i, b)| b.get_cumulative_count()
+                                >= i as u64 * (iters / buckets.len() as u64))
+                    );
+                    elapsed
+                })
+            });
+        }
+    }
+}
+
 criterion_main!(
    label_values,
    single_metric_multicore_scalability,
-    propagation_of_cached_label_value
+    propagation_of_cached_label_value,
+    histograms,
 );

 /*
@@ -290,6 +336,14 @@ propagation_of_cached_label_value__naive/nthreads/8 time:   [211.50 ns 214.44 ns
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time:   [14.135 ns 14.147 ns 14.160 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time:   [14.243 ns 14.255 ns 14.268 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time:   [14.470 ns 14.682 ns 14.895 ns]
+bucket_scalability/nbuckets/1     time:   [30.352 ns 30.353 ns 30.354 ns]
+bucket_scalability/nbuckets/4     time:   [30.464 ns 30.465 ns 30.467 ns]
+bucket_scalability/nbuckets/8     time:   [30.569 ns 30.575 ns 30.584 ns]
+bucket_scalability/nbuckets/16      time:   [30.961 ns 30.965 ns 30.969 ns]
+bucket_scalability/nbuckets/32      time:   [35.691 ns 35.707 ns 35.722 ns]
+bucket_scalability/nbuckets/64      time:   [47.829 ns 47.898 ns 47.974 ns]
+bucket_scalability/nbuckets/128     time:   [73.479 ns 73.512 ns 73.545 ns]
+bucket_scalability/nbuckets/256     time:   [127.92 ns 127.94 ns 127.96 ns]

 Results on an i3en.3xlarge instance

@@ -344,6 +398,14 @@ propagation_of_cached_label_value__naive/nthreads/8     time:   [434.87 ns 456.4
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [3.3767 ns 3.3974 ns 3.4220 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [3.6105 ns 4.2355 ns 5.1463 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [4.0889 ns 4.9714 ns 6.0779 ns]
+bucket_scalability/nbuckets/1     time:   [4.8455 ns 4.8542 ns 4.8646 ns]
+bucket_scalability/nbuckets/4     time:   [4.5663 ns 4.5722 ns 4.5787 ns]
+bucket_scalability/nbuckets/8     time:   [4.5531 ns 4.5670 ns 4.5842 ns]
+bucket_scalability/nbuckets/16      time:   [4.6392 ns 4.6524 ns 4.6685 ns]
+bucket_scalability/nbuckets/32      time:   [6.0302 ns 6.0439 ns 6.0589 ns]
+bucket_scalability/nbuckets/64      time:   [10.608 ns 10.644 ns 10.691 ns]
+bucket_scalability/nbuckets/128     time:   [22.178 ns 22.316 ns 22.483 ns]
+bucket_scalability/nbuckets/256     time:   [42.190 ns 42.328 ns 42.492 ns]

 Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor

@@ -362,5 +424,13 @@ propagation_of_cached_label_value__naive/nthreads/8     time:   [164.24 ns 170.1
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [2.2915 ns 2.2960 ns 2.3012 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [2.5726 ns 2.6158 ns 2.6624 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [2.7068 ns 2.8243 ns 2.9824 ns]
+bucket_scalability/nbuckets/1     time:   [6.3998 ns 6.4288 ns 6.4684 ns]
+bucket_scalability/nbuckets/4     time:   [6.3603 ns 6.3620 ns 6.3637 ns]
+bucket_scalability/nbuckets/8     time:   [6.1646 ns 6.1654 ns 6.1667 ns]
+bucket_scalability/nbuckets/16      time:   [6.1341 ns 6.1391 ns 6.1454 ns]
+bucket_scalability/nbuckets/32      time:   [8.2206 ns 8.2254 ns 8.2301 ns]
+bucket_scalability/nbuckets/64      time:   [13.988 ns 13.994 ns 14.000 ns]
+bucket_scalability/nbuckets/128     time:   [28.180 ns 28.216 ns 28.251 ns]
+bucket_scalability/nbuckets/256     time:   [54.914 ns 54.931 ns 54.951 ns]

 */
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -5,8 +5,14 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+bytes.workspace = true
+pageserver_api.workspace = true
+postgres_ffi.workspace = true
 prost.workspace = true
+smallvec.workspace = true
+thiserror.workspace = true
 tonic.workspace = true
+utils.workspace = true
 workspace_hack.workspace = true

 [build-dependencies]
--- a/pageserver/page_api/proto/page_service.proto
+++ b/pageserver/page_api/proto/page_service.proto
@@ -54,9 +54,9 @@ service PageService {
  // RPCs use regular unary requests, since they are not as frequent and
  // performance-critical, and this simplifies implementation.
  //
-  // NB: a status response (e.g. errors) will terminate the stream. The stream
-  // may be shared by e.g. multiple Postgres backends, so we should avoid this.
-  // Most errors are therefore sent as GetPageResponse.status instead.
+  // NB: a gRPC status response (e.g. errors) will terminate the stream. The
+  // stream may be shared by multiple Postgres backends, so we avoid this by
+  // sending them as GetPageResponse.status_code instead.
  rpc GetPages (stream GetPageRequest) returns (stream GetPageResponse);

  // Returns the size of a relation, as # of blocks.
@@ -159,8 +159,8 @@ message GetPageRequest {
 // A GetPageRequest class. Primarily intended for observability, but may also be
 // used for prioritization in the future.
 enum GetPageClass {
-  // Unknown class. For forwards compatibility: used when the client sends a
-  // class that the server doesn't know about.
+  // Unknown class. For backwards compatibility: used when an older client version sends a class
+  // that a newer server version has removed.
  GET_PAGE_CLASS_UNKNOWN = 0;
  // A normal request. This is the default.
  GET_PAGE_CLASS_NORMAL = 1;
@@ -180,31 +180,37 @@ message GetPageResponse {
  // The original request's ID.
  uint64 request_id = 1;
  // The response status code.
-  GetPageStatus status = 2;
+  GetPageStatusCode status_code = 2;
  // A string describing the status, if any.
  string reason = 3;
-  // The 8KB page images, in the same order as the request. Empty if status != OK.
+  // The 8KB page images, in the same order as the request. Empty if status_code != OK.
  repeated bytes page_image = 4;
 }

-// A GetPageResponse status code. Since we use a bidirectional stream, we don't
-// want to send errors as gRPC statuses, since this would terminate the stream.
-enum GetPageStatus {
-  // Unknown status. For forwards compatibility: used when the server sends a
-  // status code that the client doesn't know about.
-  GET_PAGE_STATUS_UNKNOWN = 0;
+// A GetPageResponse status code.
+//
+// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
+// (potentially shared by many backends), and a gRPC status response would terminate the stream so
+// we send GetPageResponse messages with these codes instead.
+enum GetPageStatusCode {
+  // Unknown status. For forwards compatibility: used when an older client version receives a new
+  // status code from a newer server version.
+  GET_PAGE_STATUS_CODE_UNKNOWN = 0;
  // The request was successful.
-  GET_PAGE_STATUS_OK = 1;
+  GET_PAGE_STATUS_CODE_OK = 1;
  // The page did not exist. The tenant/timeline/shard has already been
  // validated during stream setup.
-  GET_PAGE_STATUS_NOT_FOUND = 2;
+  GET_PAGE_STATUS_CODE_NOT_FOUND = 2;
  // The request was invalid.
-  GET_PAGE_STATUS_INVALID = 3;
+  GET_PAGE_STATUS_CODE_INVALID_REQUEST = 3;
+  // The request failed due to an internal server error.
+  GET_PAGE_STATUS_CODE_INTERNAL_ERROR = 4;
  // The tenant is rate limited. Slow down and retry later.
-  GET_PAGE_STATUS_SLOW_DOWN = 4;
-  // TODO: consider adding a GET_PAGE_STATUS_LAYER_DOWNLOAD in the case of a
-  // layer download. This could free up the server task to process other
-  // requests while the layer download is in progress.
+  GET_PAGE_STATUS_CODE_SLOW_DOWN = 5;
+  // NB: shutdown errors are emitted as a gRPC Unavailable status.
+  //
+  // TODO: consider adding a GET_PAGE_STATUS_CODE_LAYER_DOWNLOAD in the case of a layer download.
+  // This could free up the server task to process other requests while the download is in progress.
 }

 // Fetches the size of a relation at a given LSN, as # of blocks. Only valid on
--- a/pageserver/page_api/src/lib.rs
+++ b/pageserver/page_api/src/lib.rs
@@ -17,3 +17,7 @@ pub mod proto {
    pub use page_service_client::PageServiceClient;
    pub use page_service_server::{PageService, PageServiceServer};
 }
+
+mod model;
+
+pub use model::*;
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -0,0 +1,595 @@
+//! Structs representing the canonical page service API.
+//!
+//! These mirror the autogenerated Protobuf types. The differences are:
+//!
+//! - Types that are in fact required by the API are not Options. The protobuf "required"
+//!   attribute is deprecated and 'prost' marks a lot of members as optional because of that.
+//!   (See <https://github.com/tokio-rs/prost/issues/800> for a gripe on this)
+//!
+//! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.
+//!
+//! - Validate protocol invariants, via try_from() and try_into().
+
+use bytes::Bytes;
+use postgres_ffi::Oid;
+use smallvec::SmallVec;
+// TODO: split out Lsn, RelTag, SlruKind, Oid and other basic types to a separate crate, to avoid
+// pulling in all of their other crate dependencies when building the client.
+use utils::lsn::Lsn;
+
+use crate::proto;
+
+/// A protocol error. Typically returned via try_from() or try_into().
+#[derive(thiserror::Error, Debug)]
+pub enum ProtocolError {
+    #[error("field '{0}' has invalid value '{1}'")]
+    Invalid(&'static str, String),
+    #[error("required field '{0}' is missing")]
+    Missing(&'static str),
+}
+
+impl ProtocolError {
+    /// Helper to generate a new ProtocolError::Invalid for the given field and value.
+    pub fn invalid(field: &'static str, value: impl std::fmt::Debug) -> Self {
+        Self::Invalid(field, format!("{value:?}"))
+    }
+}
+
+impl From<ProtocolError> for tonic::Status {
+    fn from(err: ProtocolError) -> Self {
+        tonic::Status::invalid_argument(format!("{err}"))
+    }
+}
+
+/// The LSN a request should read at.
+#[derive(Clone, Copy, Debug)]
+pub struct ReadLsn {
+    /// The request's read LSN.
+    pub request_lsn: Lsn,
+    /// If given, the caller guarantees that the page has not been modified since this LSN. Must be
+    /// smaller than or equal to request_lsn. This allows the Pageserver to serve an old page
+    /// without waiting for the request LSN to arrive. Valid for all request types.
+    ///
+    /// It is undefined behaviour to make a request such that the page was, in fact, modified
+    /// between request_lsn and not_modified_since_lsn. The Pageserver might detect it and return an
+    /// error, or it might return the old page version or the new page version. Setting
+    /// not_modified_since_lsn equal to request_lsn is always safe, but can lead to unnecessary
+    /// waiting.
+    pub not_modified_since_lsn: Option<Lsn>,
+}
+
+impl ReadLsn {
+    /// Validates the ReadLsn.
+    pub fn validate(&self) -> Result<(), ProtocolError> {
+        if self.request_lsn == Lsn::INVALID {
+            return Err(ProtocolError::invalid("request_lsn", self.request_lsn));
+        }
+        if self.not_modified_since_lsn > Some(self.request_lsn) {
+            return Err(ProtocolError::invalid(
+                "not_modified_since_lsn",
+                self.not_modified_since_lsn,
+            ));
+        }
+        Ok(())
+    }
+}
+
+impl TryFrom<proto::ReadLsn> for ReadLsn {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::ReadLsn) -> Result<Self, Self::Error> {
+        let read_lsn = Self {
+            request_lsn: Lsn(pb.request_lsn),
+            not_modified_since_lsn: match pb.not_modified_since_lsn {
+                0 => None,
+                lsn => Some(Lsn(lsn)),
+            },
+        };
+        read_lsn.validate()?;
+        Ok(read_lsn)
+    }
+}
+
+impl TryFrom<ReadLsn> for proto::ReadLsn {
+    type Error = ProtocolError;
+
+    fn try_from(read_lsn: ReadLsn) -> Result<Self, Self::Error> {
+        read_lsn.validate()?;
+        Ok(Self {
+            request_lsn: read_lsn.request_lsn.0,
+            not_modified_since_lsn: read_lsn.not_modified_since_lsn.unwrap_or_default().0,
+        })
+    }
+}
+
+// RelTag is defined in pageserver_api::reltag.
+pub type RelTag = pageserver_api::reltag::RelTag;
+
+impl TryFrom<proto::RelTag> for RelTag {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::RelTag) -> Result<Self, Self::Error> {
+        Ok(Self {
+            spcnode: pb.spc_oid,
+            dbnode: pb.db_oid,
+            relnode: pb.rel_number,
+            forknum: pb
+                .fork_number
+                .try_into()
+                .map_err(|_| ProtocolError::invalid("fork_number", pb.fork_number))?,
+        })
+    }
+}
+
+impl From<RelTag> for proto::RelTag {
+    fn from(rel_tag: RelTag) -> Self {
+        Self {
+            spc_oid: rel_tag.spcnode,
+            db_oid: rel_tag.dbnode,
+            rel_number: rel_tag.relnode,
+            fork_number: rel_tag.forknum as u32,
+        }
+    }
+}
+
+/// Checks whether a relation exists, at the given LSN. Only valid on shard 0, other shards error.
+#[derive(Clone, Copy, Debug)]
+pub struct CheckRelExistsRequest {
+    pub read_lsn: ReadLsn,
+    pub rel: RelTag,
+}
+
+impl TryFrom<proto::CheckRelExistsRequest> for CheckRelExistsRequest {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::CheckRelExistsRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
+        })
+    }
+}
+
+pub type CheckRelExistsResponse = bool;
+
+impl From<proto::CheckRelExistsResponse> for CheckRelExistsResponse {
+    fn from(pb: proto::CheckRelExistsResponse) -> Self {
+        pb.exists
+    }
+}
+
+impl From<CheckRelExistsResponse> for proto::CheckRelExistsResponse {
+    fn from(exists: CheckRelExistsResponse) -> Self {
+        Self { exists }
+    }
+}
+
+/// Requests a base backup at a given LSN.
+#[derive(Clone, Copy, Debug)]
+pub struct GetBaseBackupRequest {
+    /// The LSN to fetch a base backup at.
+    pub read_lsn: ReadLsn,
+    /// If true, logical replication slots will not be created.
+    pub replica: bool,
+}
+
+impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetBaseBackupRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            replica: pb.replica,
+        })
+    }
+}
+
+impl TryFrom<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
+    type Error = ProtocolError;
+
+    fn try_from(request: GetBaseBackupRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: Some(request.read_lsn.try_into()?),
+            replica: request.replica,
+        })
+    }
+}
+
+pub type GetBaseBackupResponseChunk = Bytes;
+
+impl TryFrom<proto::GetBaseBackupResponseChunk> for GetBaseBackupResponseChunk {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetBaseBackupResponseChunk) -> Result<Self, Self::Error> {
+        if pb.chunk.is_empty() {
+            return Err(ProtocolError::Missing("chunk"));
+        }
+        Ok(pb.chunk)
+    }
+}
+
+impl TryFrom<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {
+    type Error = ProtocolError;
+
+    fn try_from(chunk: GetBaseBackupResponseChunk) -> Result<Self, Self::Error> {
+        if chunk.is_empty() {
+            return Err(ProtocolError::Missing("chunk"));
+        }
+        Ok(Self { chunk })
+    }
+}
+
+/// Requests the size of a database, as # of bytes. Only valid on shard 0, other shards will error.
+#[derive(Clone, Copy, Debug)]
+pub struct GetDbSizeRequest {
+    pub read_lsn: ReadLsn,
+    pub db_oid: Oid,
+}
+
+impl TryFrom<proto::GetDbSizeRequest> for GetDbSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetDbSizeRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            db_oid: pb.db_oid,
+        })
+    }
+}
+
+impl TryFrom<GetDbSizeRequest> for proto::GetDbSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(request: GetDbSizeRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: Some(request.read_lsn.try_into()?),
+            db_oid: request.db_oid,
+        })
+    }
+}
+
+pub type GetDbSizeResponse = u64;
+
+impl From<proto::GetDbSizeResponse> for GetDbSizeResponse {
+    fn from(pb: proto::GetDbSizeResponse) -> Self {
+        pb.num_bytes
+    }
+}
+
+impl From<GetDbSizeResponse> for proto::GetDbSizeResponse {
+    fn from(num_bytes: GetDbSizeResponse) -> Self {
+        Self { num_bytes }
+    }
+}
+
+/// Requests one or more pages.
+#[derive(Clone, Debug)]
+pub struct GetPageRequest {
+    /// A request ID. Will be included in the response. Should be unique for in-flight requests on
+    /// the stream.
+    pub request_id: RequestID,
+    /// The request class.
+    pub request_class: GetPageClass,
+    /// The LSN to read at.
+    pub read_lsn: ReadLsn,
+    /// The relation to read from.
+    pub rel: RelTag,
+    /// Page numbers to read. Must belong to the remote shard.
+    ///
+    /// Multiple pages will be executed as a single batch by the Pageserver, amortizing layer access
+    /// costs and parallelizing them. This may increase the latency of any individual request, but
+    /// improves the overall latency and throughput of the batch as a whole.
+    pub block_numbers: SmallVec<[u32; 1]>,
+}
+
+impl TryFrom<proto::GetPageRequest> for GetPageRequest {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetPageRequest) -> Result<Self, Self::Error> {
+        if pb.block_number.is_empty() {
+            return Err(ProtocolError::Missing("block_number"));
+        }
+        Ok(Self {
+            request_id: pb.request_id,
+            request_class: pb.request_class.into(),
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
+            block_numbers: pb.block_number.into(),
+        })
+    }
+}
+
+impl TryFrom<GetPageRequest> for proto::GetPageRequest {
+    type Error = ProtocolError;
+
+    fn try_from(request: GetPageRequest) -> Result<Self, Self::Error> {
+        if request.block_numbers.is_empty() {
+            return Err(ProtocolError::Missing("block_number"));
+        }
+        Ok(Self {
+            request_id: request.request_id,
+            request_class: request.request_class.into(),
+            read_lsn: Some(request.read_lsn.try_into()?),
+            rel: Some(request.rel.into()),
+            block_number: request.block_numbers.into_vec(),
+        })
+    }
+}
+
+/// A GetPage request ID.
+pub type RequestID = u64;
+
+/// A GetPage request class.
+#[derive(Clone, Copy, Debug)]
+pub enum GetPageClass {
+    /// Unknown class. For backwards compatibility: used when an older client version sends a class
+    /// that a newer server version has removed.
+    Unknown,
+    /// A normal request. This is the default.
+    Normal,
+    /// A prefetch request. NB: can only be classified on pg < 18.
+    Prefetch,
+    /// A background request (e.g. vacuum).
+    Background,
+}
+
+impl From<proto::GetPageClass> for GetPageClass {
+    fn from(pb: proto::GetPageClass) -> Self {
+        match pb {
+            proto::GetPageClass::Unknown => Self::Unknown,
+            proto::GetPageClass::Normal => Self::Normal,
+            proto::GetPageClass::Prefetch => Self::Prefetch,
+            proto::GetPageClass::Background => Self::Background,
+        }
+    }
+}
+
+impl From<i32> for GetPageClass {
+    fn from(class: i32) -> Self {
+        proto::GetPageClass::try_from(class)
+            .unwrap_or(proto::GetPageClass::Unknown)
+            .into()
+    }
+}
+
+impl From<GetPageClass> for proto::GetPageClass {
+    fn from(class: GetPageClass) -> Self {
+        match class {
+            GetPageClass::Unknown => Self::Unknown,
+            GetPageClass::Normal => Self::Normal,
+            GetPageClass::Prefetch => Self::Prefetch,
+            GetPageClass::Background => Self::Background,
+        }
+    }
+}
+
+impl From<GetPageClass> for i32 {
+    fn from(class: GetPageClass) -> Self {
+        proto::GetPageClass::from(class).into()
+    }
+}
+
+/// A GetPage response.
+///
+/// A batch response will contain all of the requested pages. We could eagerly emit individual pages
+/// as soon as they are ready, but on a readv() Postgres holds buffer pool locks on all pages in the
+/// batch and we'll only return once the entire batch is ready, so no one can make use of the
+/// individual pages.
+#[derive(Clone, Debug)]
+pub struct GetPageResponse {
+    /// The original request's ID.
+    pub request_id: RequestID,
+    /// The response status code.
+    pub status_code: GetPageStatusCode,
+    /// A string describing the status, if any.
+    pub reason: Option<String>,
+    /// The 8KB page images, in the same order as the request. Empty if status != OK.
+    pub page_images: SmallVec<[Bytes; 1]>,
+}
+
+impl From<proto::GetPageResponse> for GetPageResponse {
+    fn from(pb: proto::GetPageResponse) -> Self {
+        Self {
+            request_id: pb.request_id,
+            status_code: pb.status_code.into(),
+            reason: Some(pb.reason).filter(|r| !r.is_empty()),
+            page_images: pb.page_image.into(),
+        }
+    }
+}
+
+impl From<GetPageResponse> for proto::GetPageResponse {
+    fn from(response: GetPageResponse) -> Self {
+        Self {
+            request_id: response.request_id,
+            status_code: response.status_code.into(),
+            reason: response.reason.unwrap_or_default(),
+            page_image: response.page_images.into_vec(),
+        }
+    }
+}
+
+/// A GetPage response status code.
+///
+/// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
+/// (potentially shared by many backends), and a gRPC status response would terminate the stream so
+/// we send GetPageResponse messages with these codes instead.
+#[derive(Clone, Copy, Debug)]
+pub enum GetPageStatusCode {
+    /// Unknown status. For forwards compatibility: used when an older client version receives a new
+    /// status code from a newer server version.
+    Unknown,
+    /// The request was successful.
+    Ok,
+    /// The page did not exist. The tenant/timeline/shard has already been validated during stream
+    /// setup.
+    NotFound,
+    /// The request was invalid.
+    InvalidRequest,
+    /// The request failed due to an internal server error.
+    InternalError,
+    /// The tenant is rate limited. Slow down and retry later.
+    SlowDown,
+}
+
+impl From<proto::GetPageStatusCode> for GetPageStatusCode {
+    fn from(pb: proto::GetPageStatusCode) -> Self {
+        match pb {
+            proto::GetPageStatusCode::Unknown => Self::Unknown,
+            proto::GetPageStatusCode::Ok => Self::Ok,
+            proto::GetPageStatusCode::NotFound => Self::NotFound,
+            proto::GetPageStatusCode::InvalidRequest => Self::InvalidRequest,
+            proto::GetPageStatusCode::InternalError => Self::InternalError,
+            proto::GetPageStatusCode::SlowDown => Self::SlowDown,
+        }
+    }
+}
+
+impl From<i32> for GetPageStatusCode {
+    fn from(status_code: i32) -> Self {
+        proto::GetPageStatusCode::try_from(status_code)
+            .unwrap_or(proto::GetPageStatusCode::Unknown)
+            .into()
+    }
+}
+
+impl From<GetPageStatusCode> for proto::GetPageStatusCode {
+    fn from(status_code: GetPageStatusCode) -> Self {
+        match status_code {
+            GetPageStatusCode::Unknown => Self::Unknown,
+            GetPageStatusCode::Ok => Self::Ok,
+            GetPageStatusCode::NotFound => Self::NotFound,
+            GetPageStatusCode::InvalidRequest => Self::InvalidRequest,
+            GetPageStatusCode::InternalError => Self::InternalError,
+            GetPageStatusCode::SlowDown => Self::SlowDown,
+        }
+    }
+}
+
+impl From<GetPageStatusCode> for i32 {
+    fn from(status_code: GetPageStatusCode) -> Self {
+        proto::GetPageStatusCode::from(status_code).into()
+    }
+}
+
+// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on shard 0, other
+// shards will error.
+pub struct GetRelSizeRequest {
+    pub read_lsn: ReadLsn,
+    pub rel: RelTag,
+}
+
+impl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(proto: proto::GetRelSizeRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: proto
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            rel: proto.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
+        })
+    }
+}
+
+impl TryFrom<GetRelSizeRequest> for proto::GetRelSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(request: GetRelSizeRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: Some(request.read_lsn.try_into()?),
+            rel: Some(request.rel.into()),
+        })
+    }
+}
+
+pub type GetRelSizeResponse = u32;
+
+impl From<proto::GetRelSizeResponse> for GetRelSizeResponse {
+    fn from(proto: proto::GetRelSizeResponse) -> Self {
+        proto.num_blocks
+    }
+}
+
+impl From<GetRelSizeResponse> for proto::GetRelSizeResponse {
+    fn from(num_blocks: GetRelSizeResponse) -> Self {
+        Self { num_blocks }
+    }
+}
+
+/// Requests an SLRU segment. Only valid on shard 0, other shards will error.
+pub struct GetSlruSegmentRequest {
+    pub read_lsn: ReadLsn,
+    pub kind: SlruKind,
+    pub segno: u32,
+}
+
+impl TryFrom<proto::GetSlruSegmentRequest> for GetSlruSegmentRequest {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetSlruSegmentRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
+            kind: u8::try_from(pb.kind)
+                .ok()
+                .and_then(SlruKind::from_repr)
+                .ok_or_else(|| ProtocolError::invalid("slru_kind", pb.kind))?,
+            segno: pb.segno,
+        })
+    }
+}
+
+impl TryFrom<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {
+    type Error = ProtocolError;
+
+    fn try_from(request: GetSlruSegmentRequest) -> Result<Self, Self::Error> {
+        Ok(Self {
+            read_lsn: Some(request.read_lsn.try_into()?),
+            kind: request.kind as u32,
+            segno: request.segno,
+        })
+    }
+}
+
+pub type GetSlruSegmentResponse = Bytes;
+
+impl TryFrom<proto::GetSlruSegmentResponse> for GetSlruSegmentResponse {
+    type Error = ProtocolError;
+
+    fn try_from(pb: proto::GetSlruSegmentResponse) -> Result<Self, Self::Error> {
+        if pb.segment.is_empty() {
+            return Err(ProtocolError::Missing("segment"));
+        }
+        Ok(pb.segment)
+    }
+}
+
+impl TryFrom<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
+    type Error = ProtocolError;
+
+    fn try_from(segment: GetSlruSegmentResponse) -> Result<Self, Self::Error> {
+        if segment.is_empty() {
+            return Err(ProtocolError::Missing("segment"));
+        }
+        Ok(Self { segment })
+    }
+}
+
+// SlruKind is defined in pageserver_api::reltag.
+pub type SlruKind = pageserver_api::reltag::SlruKind;
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -21,6 +21,7 @@ use pageserver::config::{PageServerConf, PageserverIdentity, ignored_fields};
 use pageserver::controller_upcall_client::StorageControllerUpcallClient;
 use pageserver::deletion_queue::DeletionQueue;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
+use pageserver::feature_resolver::FeatureResolver;
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::{
    BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
@@ -522,6 +523,12 @@ fn start_pageserver(
    // Set up remote storage client
    let remote_storage = BACKGROUND_RUNTIME.block_on(create_remote_storage_client(conf))?;

+    let feature_resolver = create_feature_resolver(
+        conf,
+        shutdown_pageserver.clone(),
+        BACKGROUND_RUNTIME.handle(),
+    )?;
+
    // Set up deletion queue
    let (deletion_queue, deletion_workers) = DeletionQueue::new(
        remote_storage.clone(),
@@ -575,6 +582,7 @@ fn start_pageserver(
            deletion_queue_client,
            l0_flush_global_state,
            basebackup_prepare_sender,
+            feature_resolver,
        },
        order,
        shutdown_pageserver.clone(),
@@ -849,6 +857,14 @@ fn start_pageserver(
    })
 }

+fn create_feature_resolver(
+    conf: &'static PageServerConf,
+    shutdown_pageserver: CancellationToken,
+    handle: &tokio::runtime::Handle,
+) -> anyhow::Result<FeatureResolver> {
+    FeatureResolver::spawn(conf, shutdown_pageserver, handle)
+}
+
 async fn create_remote_storage_client(
    conf: &'static PageServerConf,
 ) -> anyhow::Result<GenericRemoteStorage> {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -14,7 +14,7 @@ use std::time::Duration;
 use anyhow::{Context, bail, ensure};
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
-use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
+use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes, PostHogConfig};
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
 use pem::Pem;
@@ -238,6 +238,9 @@ pub struct PageServerConf {
    /// This is insecure and should only be used in development environments.
    pub dev_mode: bool,

+    /// PostHog integration config.
+    pub posthog_config: Option<PostHogConfig>,
+
    pub timeline_import_config: pageserver_api::config::TimelineImportConfig,

    pub basebackup_cache_config: Option<pageserver_api::config::BasebackupCacheConfig>,
@@ -421,6 +424,7 @@ impl PageServerConf {
            tracing,
            enable_tls_page_service_api,
            dev_mode,
+            posthog_config,
            timeline_import_config,
            basebackup_cache_config,
        } = config_toml;
@@ -536,6 +540,7 @@ impl PageServerConf {
                }
                None => Vec::new(),
            },
+            posthog_config,
        };

        // ------------------------------------------------------------
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -837,7 +837,30 @@ async fn collect_eviction_candidates(
                continue;
            }
            let info = tl.get_local_layers_for_disk_usage_eviction().await;
-            debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
+            debug!(
+                tenant_id=%tl.tenant_shard_id.tenant_id,
+                shard_id=%tl.tenant_shard_id.shard_slug(),
+                timeline_id=%tl.timeline_id,
+                "timeline resident layers count: {}", info.resident_layers.len()
+            );
+
+            tenant_candidates.extend(info.resident_layers.into_iter());
+            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
+
+            if cancel.is_cancelled() {
+                return Ok(EvictionCandidates::Cancelled);
+            }
+        }
+
+        // Also consider layers of timelines being imported for eviction
+        for tl in tenant.list_importing_timelines() {
+            let info = tl.timeline.get_local_layers_for_disk_usage_eviction().await;
+            debug!(
+                tenant_id=%tl.timeline.tenant_shard_id.tenant_id,
+                shard_id=%tl.timeline.tenant_shard_id.shard_slug(),
+                timeline_id=%tl.timeline.timeline_id,
+                "timeline resident layers count: {}", info.resident_layers.len()
+            );

            tenant_candidates.extend(info.resident_layers.into_iter());
            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
--- a/pageserver/src/feature_resolver.rs
+++ b/pageserver/src/feature_resolver.rs
@@ -0,0 +1,104 @@
+use std::{collections::HashMap, sync::Arc, time::Duration};
+
+use posthog_client_lite::{
+    FeatureResolverBackgroundLoop, PostHogClientConfig, PostHogEvaluationError,
+};
+use tokio_util::sync::CancellationToken;
+use utils::id::TenantId;
+
+use crate::config::PageServerConf;
+
+#[derive(Clone)]
+pub struct FeatureResolver {
+    inner: Option<Arc<FeatureResolverBackgroundLoop>>,
+}
+
+impl FeatureResolver {
+    pub fn new_disabled() -> Self {
+        Self { inner: None }
+    }
+
+    pub fn spawn(
+        conf: &PageServerConf,
+        shutdown_pageserver: CancellationToken,
+        handle: &tokio::runtime::Handle,
+    ) -> anyhow::Result<Self> {
+        // DO NOT block in this function: make it return as fast as possible to avoid startup delays.
+        if let Some(posthog_config) = &conf.posthog_config {
+            let inner = FeatureResolverBackgroundLoop::new(
+                PostHogClientConfig {
+                    server_api_key: posthog_config.server_api_key.clone(),
+                    client_api_key: posthog_config.client_api_key.clone(),
+                    project_id: posthog_config.project_id.clone(),
+                    private_api_url: posthog_config.private_api_url.clone(),
+                    public_api_url: posthog_config.public_api_url.clone(),
+                },
+                shutdown_pageserver,
+            );
+            let inner = Arc::new(inner);
+            // TODO: make this configurable
+            inner.clone().spawn(handle, Duration::from_secs(60));
+            Ok(FeatureResolver { inner: Some(inner) })
+        } else {
+            Ok(FeatureResolver { inner: None })
+        }
+    }
+
+    /// Evaluate a multivariate feature flag. Currently, we do not support any properties.
+    ///
+    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
+    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
+    /// propagated beyond where the feature flag gets resolved.
+    pub fn evaluate_multivariate(
+        &self,
+        flag_key: &str,
+        tenant_id: TenantId,
+    ) -> Result<String, PostHogEvaluationError> {
+        if let Some(inner) = &self.inner {
+            inner.feature_store().evaluate_multivariate(
+                flag_key,
+                &tenant_id.to_string(),
+                &HashMap::new(),
+            )
+        } else {
+            Err(PostHogEvaluationError::NotAvailable(
+                "PostHog integration is not enabled".to_string(),
+            ))
+        }
+    }
+
+    /// Evaluate a boolean feature flag. Currently, we do not support any properties.
+    ///
+    /// Returns `Ok(())` if the flag is evaluated to true, otherwise returns an error.
+    ///
+    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
+    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
+    /// propagated beyond where the feature flag gets resolved.
+    pub fn evaluate_boolean(
+        &self,
+        flag_key: &str,
+        tenant_id: TenantId,
+    ) -> Result<(), PostHogEvaluationError> {
+        if let Some(inner) = &self.inner {
+            inner.feature_store().evaluate_boolean(
+                flag_key,
+                &tenant_id.to_string(),
+                &HashMap::new(),
+            )
+        } else {
+            Err(PostHogEvaluationError::NotAvailable(
+                "PostHog integration is not enabled".to_string(),
+            ))
+        }
+    }
+
+    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
+        if let Some(inner) = &self.inner {
+            inner.feature_store().is_feature_flag_boolean(flag_key)
+        } else {
+            Err(PostHogEvaluationError::NotAvailable(
+                "PostHog integration is not enabled".to_string(),
+            ))
+        }
+    }
+}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -353,6 +353,33 @@ paths:
        "200":
          description: OK

+  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/mark_invisible:
+    parameters:
+      - name: tenant_shard_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    put:
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                is_visible:
+                  type: boolean
+                  default: false
+      responses:
+        "200":
+          description: OK
+
  /v1/tenant/{tenant_shard_id}/location_config:
    parameters:
      - name: tenant_shard_id
@@ -626,6 +653,8 @@ paths:
                  format: hex
                pg_version:
                  type: integer
+                read_only:
+                  type: boolean
                existing_initdb_timeline_id:
                  type: string
                  format: hex
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -370,6 +370,18 @@ impl From<crate::tenant::secondary::SecondaryTenantError> for ApiError {
    }
 }

+impl From<crate::tenant::FinalizeTimelineImportError> for ApiError {
+    fn from(err: crate::tenant::FinalizeTimelineImportError) -> ApiError {
+        use crate::tenant::FinalizeTimelineImportError::*;
+        match err {
+            ImportTaskStillRunning => {
+                ApiError::ResourceUnavailable("Import task still running".into())
+            }
+            ShuttingDown => ApiError::ShuttingDown,
+        }
+    }
+}
+
 // Helper function to construct a TimelineInfo struct for a timeline
 async fn build_timeline_info(
    timeline: &Arc<Timeline>,
@@ -572,6 +584,7 @@ async fn timeline_create_handler(
        TimelineCreateRequestMode::Branch {
            ancestor_timeline_id,
            ancestor_start_lsn,
+            read_only: _,
            pg_version: _,
        } => tenant::CreateTimelineParams::Branch(tenant::CreateTimelineParamsBranch {
            new_timeline_id,
@@ -3532,10 +3545,7 @@ async fn activate_post_import_handler(

        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

-        tenant
-            .finalize_importing_timeline(timeline_id)
-            .await
-            .map_err(ApiError::InternalServerError)?;
+        tenant.finalize_importing_timeline(timeline_id).await?;

        match tenant.get_timeline(timeline_id, false) {
            Ok(_timeline) => {
@@ -3653,6 +3663,46 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow
    Ok(())
 }

+async fn tenant_evaluate_feature_flag(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    let flag: String = must_parse_query_param(&request, "flag")?;
+    let as_type: String = must_parse_query_param(&request, "as")?;
+
+    let state = get_state(&request);
+
+    async {
+        let tenant = state
+            .tenant_manager
+            .get_attached_tenant_shard(tenant_shard_id)?;
+        if as_type == "boolean" {
+            let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
+            let result = result.map(|_| true).map_err(|e| e.to_string());
+            json_response(StatusCode::OK, result)
+        } else if as_type == "multivariate" {
+            let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
+            json_response(StatusCode::OK, result)
+        } else {
+            // Auto infer the type of the feature flag.
+            let is_boolean = tenant.feature_resolver.is_feature_flag_boolean(&flag).map_err(|e| ApiError::InternalServerError(anyhow::anyhow!("{e}")))?;
+            if is_boolean {
+                let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
+                let result = result.map(|_| true).map_err(|e| e.to_string());
+                json_response(StatusCode::OK, result)
+            } else {
+                let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
+                json_response(StatusCode::OK, result)
+            }
+        }
+    }
+    .instrument(info_span!("tenant_evaluate_feature_flag", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
+    .await
+}
+
 /// Common functionality of all the HTTP API handlers.
 ///
 /// - Adds a tracing span to each request (by `request_span`)
@@ -4029,5 +4079,8 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/activate_post_import",
            |r| api_handler(r, activate_post_import_handler),
        )
+        .get("/v1/tenant/:tenant_shard_id/feature_flag", |r| {
+            api_handler(r, tenant_evaluate_feature_flag)
+        })
        .any(handler_404))
 }
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -10,6 +10,7 @@ pub mod context;
 pub mod controller_upcall_client;
 pub mod deletion_queue;
 pub mod disk_usage_eviction_task;
+pub mod feature_resolver;
 pub mod http;
 pub mod import_datadir;
 pub mod l0_flush;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1312,11 +1312,44 @@ impl EvictionsWithLowResidenceDuration {
 //
 // Roughly logarithmic scale.
 const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
-    0.000030, // 30 usec
-    0.001000, // 1000 usec
-    0.030,    // 30 ms
-    1.000,    // 1000 ms
-    30.000,   // 30000 ms
+    0.00005,  // 50us
+    0.00006,  // 60us
+    0.00007,  // 70us
+    0.00008,  // 80us
+    0.00009,  // 90us
+    0.0001,   // 100us
+    0.000110, // 110us
+    0.000120, // 120us
+    0.000130, // 130us
+    0.000140, // 140us
+    0.000150, // 150us
+    0.000160, // 160us
+    0.000170, // 170us
+    0.000180, // 180us
+    0.000190, // 190us
+    0.000200, // 200us
+    0.000210, // 210us
+    0.000220, // 220us
+    0.000230, // 230us
+    0.000240, // 240us
+    0.000250, // 250us
+    0.000300, // 300us
+    0.000350, // 350us
+    0.000400, // 400us
+    0.000450, // 450us
+    0.000500, // 500us
+    0.000600, // 600us
+    0.000700, // 700us
+    0.000800, // 800us
+    0.000900, // 900us
+    0.001000, // 1ms
+    0.002000, // 2ms
+    0.003000, // 3ms
+    0.004000, // 4ms
+    0.005000, // 5ms
+    0.01000,  // 10ms
+    0.02000,  // 20ms
+    0.05000,  // 50ms
 ];

 /// VirtualFile fs operation variants.
@@ -2234,8 +2267,10 @@ impl BasebackupQueryTimeOngoingRecording<'_> {
        // If you want to change categorize of a specific error, also change it in `log_query_error`.
        let metric = match res {
            Ok(_) => &self.parent.ok,
-            Err(QueryError::Shutdown) => {
-                // Do not observe ok/err for shutdown
+            Err(QueryError::Shutdown) | Err(QueryError::Reconnect) => {
+                // Do not observe ok/err for shutdown/reconnect.
+                // Reconnect error might be raised when the operation is waiting for LSN and the tenant shutdown interrupts
+                // the operation. A reconnect error will be issued and the client will retry.
                return;
            }
            Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -43,12 +43,14 @@ use strum_macros::IntoStaticStr;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
+use tonic::service::Interceptor as _;
 use tracing::*;
 use utils::auth::{Claims, Scope, SwappableJwtAuth};
 use utils::failpoint_support;
-use utils::id::{TenantId, TimelineId};
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
 use utils::logging::log_slow;
 use utils::lsn::Lsn;
+use utils::shard::ShardIndex;
 use utils::simple_rcu::RcuReadGuard;
 use utils::sync::gate::{Gate, GateGuard};
 use utils::sync::spsc_fold;
@@ -200,9 +202,9 @@ pub fn spawn_grpc(
        .max_concurrent_streams(Some(GRPC_MAX_CONCURRENT_STREAMS));

    // Main page service.
-    let page_service = proto::PageServiceServer::new(PageServerHandler::new(
+    let page_service_handler = PageServerHandler::new(
        tenant_manager,
-        auth,
+        auth.clone(),
        PageServicePipeliningConfig::Serial, // TODO: unused with gRPC
        conf.get_vectored_concurrent_io,
        ConnectionPerfSpanFields::default(),
@@ -210,7 +212,18 @@ pub fn spawn_grpc(
        ctx,
        cancel.clone(),
        gate.enter().expect("just created"),
-    ));
+    );
+
+    let mut tenant_interceptor = TenantMetadataInterceptor;
+    let mut auth_interceptor = TenantAuthInterceptor::new(auth);
+    let interceptors = move |mut req: tonic::Request<()>| {
+        req = tenant_interceptor.call(req)?;
+        req = auth_interceptor.call(req)?;
+        Ok(req)
+    };
+
+    let page_service =
+        proto::PageServiceServer::with_interceptor(page_service_handler, interceptors);
    let server = server.add_service(page_service);

    // Reflection service for use with e.g. grpcurl.
@@ -756,6 +769,9 @@ struct BatchedGetPageRequest {
    timer: SmgrOpTimer,
    lsn_range: LsnRange,
    ctx: RequestContext,
+    // If the request is perf enabled, this contains a context
+    // with a perf span tracking the time spent waiting for the executor.
+    batch_wait_ctx: Option<RequestContext>,
 }

 #[cfg(feature = "testing")]
@@ -768,6 +784,7 @@ struct BatchedTestRequest {
 /// so that we don't keep the [`Timeline::gate`] open while the batch
 /// is being built up inside the [`spsc_fold`] (pagestream pipelining).
 #[derive(IntoStaticStr)]
+#[allow(clippy::large_enum_variant)]
 enum BatchedFeMessage {
    Exists {
        span: Span,
@@ -1285,6 +1302,22 @@ impl PageServerHandler {
                    }
                };

+                let batch_wait_ctx = if ctx.has_perf_span() {
+                    Some(
+                        RequestContextBuilder::from(&ctx)
+                            .perf_span(|crnt_perf_span| {
+                                info_span!(
+                                    target: PERF_TRACE_TARGET,
+                                    parent: crnt_perf_span,
+                                    "WAIT_EXECUTOR",
+                                )
+                            })
+                            .attached_child(),
+                    )
+                } else {
+                    None
+                };
+
                BatchedFeMessage::GetPage {
                    span,
                    shard: shard.downgrade(),
@@ -1296,6 +1329,7 @@ impl PageServerHandler {
                            request_lsn: req.hdr.request_lsn
                        },
                        ctx,
+                        batch_wait_ctx,
                    }],
                    // The executor grabs the batch when it becomes idle.
                    // Hence, [`GetPageBatchBreakReason::ExecutorSteal`] is the
@@ -1451,7 +1485,7 @@ impl PageServerHandler {
            let mut flush_timers = Vec::with_capacity(handler_results.len());
            for handler_result in &mut handler_results {
                let flush_timer = match handler_result {
-                    Ok((_, timer)) => Some(
+                    Ok((_response, timer, _ctx)) => Some(
                        timer
                            .observe_execution_end(flushing_start_time)
                            .expect("we are the first caller"),
@@ -1471,7 +1505,7 @@ impl PageServerHandler {
        // Some handler errors cause exit from pagestream protocol.
        // Other handler errors are sent back as an error message and we stay in pagestream protocol.
        for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) {
-            let response_msg = match handler_result {
+            let (response_msg, ctx) = match handler_result {
                Err(e) => match &e.err {
                    PageStreamError::Shutdown => {
                        // If we fail to fulfil a request during shutdown, which may be _because_ of
@@ -1496,15 +1530,30 @@ impl PageServerHandler {
                            error!("error reading relation or page version: {full:#}")
                        });

-                        PagestreamBeMessage::Error(PagestreamErrorResponse {
-                            req: e.req,
-                            message: e.err.to_string(),
-                        })
+                        (
+                            PagestreamBeMessage::Error(PagestreamErrorResponse {
+                                req: e.req,
+                                message: e.err.to_string(),
+                            }),
+                            None,
+                        )
                    }
                },
-                Ok((response_msg, _op_timer_already_observed)) => response_msg,
+                Ok((response_msg, _op_timer_already_observed, ctx)) => (response_msg, Some(ctx)),
            };

+            let ctx = ctx.map(|req_ctx| {
+                RequestContextBuilder::from(&req_ctx)
+                    .perf_span(|crnt_perf_span| {
+                        info_span!(
+                            target: PERF_TRACE_TARGET,
+                            parent: crnt_perf_span,
+                            "FLUSH_RESPONSE",
+                        )
+                    })
+                    .attached_child()
+            });
+
            //
            // marshal & transmit response message
            //
@@ -1527,6 +1576,17 @@ impl PageServerHandler {
                )),
                None => futures::future::Either::Right(flush_fut),
            };
+
+            let flush_fut = if let Some(req_ctx) = ctx.as_ref() {
+                futures::future::Either::Left(
+                    flush_fut.maybe_perf_instrument(req_ctx, |current_perf_span| {
+                        current_perf_span.clone()
+                    }),
+                )
+            } else {
+                futures::future::Either::Right(flush_fut)
+            };
+
            // do it while respecting cancellation
            let _: () = async move {
                tokio::select! {
@@ -1556,7 +1616,7 @@ impl PageServerHandler {
        ctx: &RequestContext,
    ) -> Result<
        (
-            Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
+            Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>,
            Span,
        ),
        QueryError,
@@ -1583,7 +1643,7 @@ impl PageServerHandler {
                        self.handle_get_rel_exists_request(&shard, &req, &ctx)
                            .instrument(span.clone())
                            .await
-                            .map(|msg| (msg, timer))
+                            .map(|msg| (msg, timer, ctx))
                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
                    ],
                    span,
@@ -1602,7 +1662,7 @@ impl PageServerHandler {
                        self.handle_get_nblocks_request(&shard, &req, &ctx)
                            .instrument(span.clone())
                            .await
-                            .map(|msg| (msg, timer))
+                            .map(|msg| (msg, timer, ctx))
                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
                    ],
                    span,
@@ -1649,7 +1709,7 @@ impl PageServerHandler {
                        self.handle_db_size_request(&shard, &req, &ctx)
                            .instrument(span.clone())
                            .await
-                            .map(|msg| (msg, timer))
+                            .map(|msg| (msg, timer, ctx))
                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
                    ],
                    span,
@@ -1668,7 +1728,7 @@ impl PageServerHandler {
                        self.handle_get_slru_segment_request(&shard, &req, &ctx)
                            .instrument(span.clone())
                            .await
-                            .map(|msg| (msg, timer))
+                            .map(|msg| (msg, timer, ctx))
                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
                    ],
                    span,
@@ -2020,12 +2080,25 @@ impl PageServerHandler {
                            return Ok(());
                        }
                    };
-                    let batch = match batch {
+                    let mut batch = match batch {
                        Ok(batch) => batch,
                        Err(e) => {
                            return Err(e);
                        }
                    };
+
+                    if let BatchedFeMessage::GetPage {
+                        pages,
+                        span: _,
+                        shard: _,
+                        batch_break_reason: _,
+                    } = &mut batch
+                    {
+                        for req in pages {
+                            req.batch_wait_ctx.take();
+                        }
+                    }
+
                    self.pagestream_handle_batched_message(
                        pgb_writer,
                        batch,
@@ -2338,7 +2411,8 @@ impl PageServerHandler {
        io_concurrency: IoConcurrency,
        batch_break_reason: GetPageBatchBreakReason,
        ctx: &RequestContext,
-    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
+    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>
+    {
        debug_assert_current_span_has_tenant_and_timeline_id();

        timeline
@@ -2445,6 +2519,7 @@ impl PageServerHandler {
                                page,
                            }),
                            req.timer,
+                            req.ctx,
                        )
                    })
                    .map_err(|e| BatchedPageStreamError {
@@ -2489,7 +2564,8 @@ impl PageServerHandler {
        timeline: &Timeline,
        requests: Vec<BatchedTestRequest>,
        _ctx: &RequestContext,
-    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
+    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>
+    {
        // real requests would do something with the timeline
        let mut results = Vec::with_capacity(requests.len());
        for _req in requests.iter() {
@@ -2516,6 +2592,10 @@ impl PageServerHandler {
                                req: req.req.clone(),
                            }),
                            req.timer,
+                            RequestContext::new(
+                                TaskKind::PageRequestHandler,
+                                DownloadBehavior::Warn,
+                            ),
                        )
                    })
                    .map_err(|e| BatchedPageStreamError {
@@ -3290,6 +3370,104 @@ impl From<GetActiveTenantError> for QueryError {
    }
 }

+/// gRPC interceptor that decodes tenant metadata and stores it as request extensions of type
+/// TenantTimelineId and ShardIndex.
+///
+/// TODO: consider looking up the timeline handle here and storing it.
+#[derive(Clone)]
+struct TenantMetadataInterceptor;
+
+impl tonic::service::Interceptor for TenantMetadataInterceptor {
+    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
+        // Decode the tenant ID.
+        let tenant_id = req
+            .metadata()
+            .get("neon-tenant-id")
+            .ok_or_else(|| tonic::Status::invalid_argument("missing neon-tenant-id"))?
+            .to_str()
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-tenant-id"))?;
+        let tenant_id = TenantId::from_str(tenant_id)
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-tenant-id"))?;
+
+        // Decode the timeline ID.
+        let timeline_id = req
+            .metadata()
+            .get("neon-timeline-id")
+            .ok_or_else(|| tonic::Status::invalid_argument("missing neon-timeline-id"))?
+            .to_str()
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-timeline-id"))?;
+        let timeline_id = TimelineId::from_str(timeline_id)
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-timeline-id"))?;
+
+        // Decode the shard ID.
+        let shard_index = req
+            .metadata()
+            .get("neon-shard-id")
+            .ok_or_else(|| tonic::Status::invalid_argument("missing neon-shard-id"))?
+            .to_str()
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-shard-id"))?;
+        let shard_index = ShardIndex::from_str(shard_index)
+            .map_err(|_| tonic::Status::invalid_argument("invalid neon-shard-id"))?;
+
+        // Stash them in the request.
+        let extensions = req.extensions_mut();
+        extensions.insert(TenantTimelineId::new(tenant_id, timeline_id));
+        extensions.insert(shard_index);
+
+        Ok(req)
+    }
+}
+
+/// Authenticates gRPC page service requests. Must run after TenantMetadataInterceptor.
+#[derive(Clone)]
+struct TenantAuthInterceptor {
+    auth: Option<Arc<SwappableJwtAuth>>,
+}
+
+impl TenantAuthInterceptor {
+    fn new(auth: Option<Arc<SwappableJwtAuth>>) -> Self {
+        Self { auth }
+    }
+}
+
+impl tonic::service::Interceptor for TenantAuthInterceptor {
+    fn call(&mut self, req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
+        // Do nothing if auth is disabled.
+        let Some(auth) = self.auth.as_ref() else {
+            return Ok(req);
+        };
+
+        // Fetch the tenant ID that's been set by TenantMetadataInterceptor.
+        let ttid = req
+            .extensions()
+            .get::<TenantTimelineId>()
+            .expect("TenantMetadataInterceptor must run before TenantAuthInterceptor");
+
+        // Fetch and decode the JWT token.
+        let jwt = req
+            .metadata()
+            .get("authorization")
+            .ok_or_else(|| tonic::Status::unauthenticated("no authorization header"))?
+            .to_str()
+            .map_err(|_| tonic::Status::invalid_argument("invalid authorization header"))?
+            .strip_prefix("Bearer ")
+            .ok_or_else(|| tonic::Status::invalid_argument("invalid authorization header"))?
+            .trim();
+        let jwtdata: TokenData<Claims> = auth
+            .decode(jwt)
+            .map_err(|err| tonic::Status::invalid_argument(format!("invalid JWT token: {err}")))?;
+        let claims = jwtdata.claims;
+
+        // Check if the token is valid for this tenant.
+        check_permission(&claims, Some(ttid.tenant_id))
+            .map_err(|err| tonic::Status::permission_denied(err.to_string()))?;
+
+        // TODO: consider stashing the claims in the request extensions, if needed.
+
+        Ok(req)
+    }
+}
+
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum GetActiveTimelineError {
    #[error(transparent)]
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -84,6 +84,7 @@ use crate::context;
 use crate::context::RequestContextBuilder;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
+use crate::feature_resolver::FeatureResolver;
 use crate::l0_flush::L0FlushGlobalState;
 use crate::metrics::{
    BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,
@@ -159,6 +160,7 @@ pub struct TenantSharedResources {
    pub deletion_queue_client: DeletionQueueClient,
    pub l0_flush_global_state: L0FlushGlobalState,
    pub basebackup_prepare_sender: BasebackupPrepareSender,
+    pub feature_resolver: FeatureResolver,
 }

 /// A [`TenantShard`] is really an _attached_ tenant.  The configuration
@@ -298,7 +300,7 @@ pub struct TenantShard {
    ///   as in progress.
    /// * Imported timelines are removed when the storage controller calls the post timeline
    ///   import activation endpoint.
-    timelines_importing: std::sync::Mutex<HashMap<TimelineId, ImportingTimeline>>,
+    timelines_importing: std::sync::Mutex<HashMap<TimelineId, Arc<ImportingTimeline>>>,

    /// The last tenant manifest known to be in remote storage. None if the manifest has not yet
    /// been either downloaded or uploaded. Always Some after tenant attach.
@@ -380,6 +382,8 @@ pub struct TenantShard {
    pub(crate) gc_block: gc_block::GcBlock,

    l0_flush_global_state: L0FlushGlobalState,
+
+    pub(crate) feature_resolver: FeatureResolver,
 }
 impl std::fmt::Debug for TenantShard {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -668,6 +672,7 @@ pub enum MaybeOffloaded {
 pub enum TimelineOrOffloaded {
    Timeline(Arc<Timeline>),
    Offloaded(Arc<OffloadedTimeline>),
+    Importing(Arc<ImportingTimeline>),
 }

 impl TimelineOrOffloaded {
@@ -679,6 +684,9 @@ impl TimelineOrOffloaded {
            TimelineOrOffloaded::Offloaded(offloaded) => {
                TimelineOrOffloadedArcRef::Offloaded(offloaded)
            }
+            TimelineOrOffloaded::Importing(importing) => {
+                TimelineOrOffloadedArcRef::Importing(importing)
+            }
        }
    }
    pub fn tenant_shard_id(&self) -> TenantShardId {
@@ -691,12 +699,16 @@ impl TimelineOrOffloaded {
        match self {
            TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
+            TimelineOrOffloaded::Importing(importing) => &importing.delete_progress,
        }
    }
    fn maybe_remote_client(&self) -> Option<Arc<RemoteTimelineClient>> {
        match self {
            TimelineOrOffloaded::Timeline(timeline) => Some(timeline.remote_client.clone()),
            TimelineOrOffloaded::Offloaded(_offloaded) => None,
+            TimelineOrOffloaded::Importing(importing) => {
+                Some(importing.timeline.remote_client.clone())
+            }
        }
    }
 }
@@ -704,6 +716,7 @@ impl TimelineOrOffloaded {
 pub enum TimelineOrOffloadedArcRef<'a> {
    Timeline(&'a Arc<Timeline>),
    Offloaded(&'a Arc<OffloadedTimeline>),
+    Importing(&'a Arc<ImportingTimeline>),
 }

 impl TimelineOrOffloadedArcRef<'_> {
@@ -711,12 +724,14 @@ impl TimelineOrOffloadedArcRef<'_> {
        match self {
            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.tenant_shard_id,
            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.tenant_shard_id,
+            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.tenant_shard_id,
        }
    }
    pub fn timeline_id(&self) -> TimelineId {
        match self {
            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.timeline_id,
            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.timeline_id,
+            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.timeline_id,
        }
    }
 }
@@ -733,6 +748,12 @@ impl<'a> From<&'a Arc<OffloadedTimeline>> for TimelineOrOffloadedArcRef<'a> {
    }
 }

+impl<'a> From<&'a Arc<ImportingTimeline>> for TimelineOrOffloadedArcRef<'a> {
+    fn from(timeline: &'a Arc<ImportingTimeline>) -> Self {
+        Self::Importing(timeline)
+    }
+}
+
 #[derive(Debug, thiserror::Error, PartialEq, Eq)]
 pub enum GetTimelineError {
    #[error("Timeline is shutting down")]
@@ -860,6 +881,14 @@ impl Debug for SetStoppingError {
    }
 }

+#[derive(thiserror::Error, Debug)]
+pub(crate) enum FinalizeTimelineImportError {
+    #[error("Import task not done yet")]
+    ImportTaskStillRunning,
+    #[error("Shutting down")]
+    ShuttingDown,
+}
+
 /// Arguments to [`TenantShard::create_timeline`].
 ///
 /// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
@@ -1146,10 +1175,20 @@ impl TenantShard {
            ctx,
        )?;
        let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
-        anyhow::ensure!(
-            disk_consistent_lsn.is_valid(),
-            "Timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
-        );
+
+        if !disk_consistent_lsn.is_valid() {
+            // As opposed to normal timelines which get initialised with a disk consitent LSN
+            // via initdb, imported timelines start from 0. If the import task stops before
+            // it advances disk consitent LSN, allow it to resume.
+            let in_progress_import = import_pgdata
+                .as_ref()
+                .map(|import| !import.is_done())
+                .unwrap_or(false);
+            if !in_progress_import {
+                anyhow::bail!("Timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn");
+            }
+        }
+
        assert_eq!(
            disk_consistent_lsn,
            metadata.disk_consistent_lsn(),
@@ -1243,20 +1282,25 @@ impl TenantShard {
                    }
                }

-                // Sanity check: a timeline should have some content.
-                anyhow::ensure!(
-                    ancestor.is_some()
-                        || timeline
-                            .layers
-                            .read()
-                            .await
-                            .layer_map()
-                            .expect("currently loading, layer manager cannot be shutdown already")
-                            .iter_historic_layers()
-                            .next()
-                            .is_some(),
-                    "Timeline has no ancestor and no layer files"
-                );
+                if disk_consistent_lsn.is_valid() {
+                    // Sanity check: a timeline should have some content.
+                    // Exception: importing timelines might not yet have any
+                    anyhow::ensure!(
+                        ancestor.is_some()
+                            || timeline
+                                .layers
+                                .read()
+                                .await
+                                .layer_map()
+                                .expect(
+                                    "currently loading, layer manager cannot be shutdown already"
+                                )
+                                .iter_historic_layers()
+                                .next()
+                                .is_some(),
+                        "Timeline has no ancestor and no layer files"
+                    );
+                }

                Ok(TimelineInitAndSyncResult::ReadyToActivate)
            }
@@ -1292,6 +1336,7 @@ impl TenantShard {
            deletion_queue_client,
            l0_flush_global_state,
            basebackup_prepare_sender,
+            feature_resolver,
        } = resources;

        let attach_mode = attached_conf.location.attach_mode;
@@ -1308,6 +1353,7 @@ impl TenantShard {
            deletion_queue_client,
            l0_flush_global_state,
            basebackup_prepare_sender,
+            feature_resolver,
        ));

        // The attach task will carry a GateGuard, so that shutdown() reliably waits for it to drop out if
@@ -1760,20 +1806,25 @@ impl TenantShard {
                    },
                ) => {
                    let timeline_id = timeline.timeline_id;
+                    let import_task_gate = Gate::default();
+                    let import_task_guard = import_task_gate.enter().unwrap();
                    let import_task_handle =
                        tokio::task::spawn(self.clone().create_timeline_import_pgdata_task(
                            timeline.clone(),
                            import_pgdata,
                            guard,
+                            import_task_guard,
                            ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),
                        ));

                    let prev = self.timelines_importing.lock().unwrap().insert(
                        timeline_id,
-                        ImportingTimeline {
+                        Arc::new(ImportingTimeline {
                            timeline: timeline.clone(),
                            import_task_handle,
-                        },
+                            import_task_gate,
+                            delete_progress: TimelineDeleteProgress::default(),
+                        }),
                    );

                    assert!(prev.is_none());
@@ -2391,6 +2442,17 @@ impl TenantShard {
            .collect()
    }

+    /// Lists timelines the tenant contains.
+    /// It's up to callers to omit certain timelines that are not considered ready for use.
+    pub fn list_importing_timelines(&self) -> Vec<Arc<ImportingTimeline>> {
+        self.timelines_importing
+            .lock()
+            .unwrap()
+            .values()
+            .map(Arc::clone)
+            .collect()
+    }
+
    /// Lists timelines the tenant manages, including offloaded ones.
    ///
    /// It's up to callers to omit certain timelines that are not considered ready for use.
@@ -2824,19 +2886,25 @@ impl TenantShard {

        let (timeline, timeline_create_guard) = uninit_timeline.finish_creation_myself();

+        let import_task_gate = Gate::default();
+        let import_task_guard = import_task_gate.enter().unwrap();
+
        let import_task_handle = tokio::spawn(self.clone().create_timeline_import_pgdata_task(
            timeline.clone(),
            index_part,
            timeline_create_guard,
+            import_task_guard,
            timeline_ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),
        ));

        let prev = self.timelines_importing.lock().unwrap().insert(
            timeline.timeline_id,
-            ImportingTimeline {
+            Arc::new(ImportingTimeline {
                timeline: timeline.clone(),
                import_task_handle,
-            },
+                import_task_gate,
+                delete_progress: TimelineDeleteProgress::default(),
+            }),
        );

        // Idempotency is enforced higher up the stack
@@ -2854,13 +2922,13 @@ impl TenantShard {
    pub(crate) async fn finalize_importing_timeline(
        &self,
        timeline_id: TimelineId,
-    ) -> anyhow::Result<()> {
+    ) -> Result<(), FinalizeTimelineImportError> {
        let timeline = {
            let locked = self.timelines_importing.lock().unwrap();
            match locked.get(&timeline_id) {
                Some(importing_timeline) => {
                    if !importing_timeline.import_task_handle.is_finished() {
-                        return Err(anyhow::anyhow!("Import task not done yet"));
+                        return Err(FinalizeTimelineImportError::ImportTaskStillRunning);
                    }

                    importing_timeline.timeline.clone()
@@ -2873,8 +2941,13 @@ impl TenantShard {

        timeline
            .remote_client
-            .schedule_index_upload_for_import_pgdata_finalize()?;
-        timeline.remote_client.wait_completion().await?;
+            .schedule_index_upload_for_import_pgdata_finalize()
+            .map_err(|_err| FinalizeTimelineImportError::ShuttingDown)?;
+        timeline
+            .remote_client
+            .wait_completion()
+            .await
+            .map_err(|_err| FinalizeTimelineImportError::ShuttingDown)?;

        self.timelines_importing
            .lock()
@@ -2890,6 +2963,7 @@ impl TenantShard {
        timeline: Arc<Timeline>,
        index_part: import_pgdata::index_part_format::Root,
        timeline_create_guard: TimelineCreateGuard,
+        _import_task_guard: GateGuard,
        ctx: RequestContext,
    ) {
        debug_assert_current_span_has_tenant_and_timeline_id();
@@ -3135,11 +3209,18 @@ impl TenantShard {
                        .or_insert_with(|| Arc::new(GcCompactionQueue::new()))
                        .clone()
                };
+                let gc_compaction_strategy = self
+                    .feature_resolver
+                    .evaluate_multivariate("gc-comapction-strategy", self.tenant_shard_id.tenant_id)
+                    .ok();
+                let span = if let Some(gc_compaction_strategy) = gc_compaction_strategy {
+                    info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id, strategy = %gc_compaction_strategy)
+                } else {
+                    info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id)
+                };
                outcome = queue
                    .iteration(cancel, ctx, &self.gc_block, &timeline)
-                    .instrument(
-                        info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
-                    )
+                    .instrument(span)
                    .await?;
            }

@@ -3471,8 +3552,9 @@ impl TenantShard {
            let mut timelines_importing = self.timelines_importing.lock().unwrap();
            timelines_importing
                .drain()
-                .for_each(|(_timeline_id, importing_timeline)| {
-                    importing_timeline.shutdown();
+                .for_each(|(timeline_id, importing_timeline)| {
+                    let span = tracing::info_span!("importing_timeline_shutdown", %timeline_id);
+                    js.spawn(async move { importing_timeline.shutdown().instrument(span).await });
                });
        }
        // test_long_timeline_create_then_tenant_delete is leaning on this message
@@ -3793,6 +3875,9 @@ impl TenantShard {
                        .build_timeline_client(offloaded.timeline_id, self.remote_storage.clone());
                    Arc::new(remote_client)
                }
+                TimelineOrOffloadedArcRef::Importing(_) => {
+                    unreachable!("Importing timelines are not included in the iterator")
+                }
            };

            // Shut down the timeline's remote client: this means that the indices we write
@@ -4247,6 +4332,7 @@ impl TenantShard {
        deletion_queue_client: DeletionQueueClient,
        l0_flush_global_state: L0FlushGlobalState,
        basebackup_prepare_sender: BasebackupPrepareSender,
+        feature_resolver: FeatureResolver,
    ) -> TenantShard {
        assert!(!attached_conf.location.generation.is_none());

@@ -4351,6 +4437,7 @@ impl TenantShard {
            gc_block: Default::default(),
            l0_flush_global_state,
            basebackup_prepare_sender,
+            feature_resolver,
        }
    }

@@ -5000,6 +5087,14 @@ impl TenantShard {
                info!("timeline already exists but is offloaded");
                Err(CreateTimelineError::Conflict)
            }
+            Err(TimelineExclusionError::AlreadyExists {
+                existing: TimelineOrOffloaded::Importing(_existing),
+                ..
+            }) => {
+                // If there's a timeline already importing, then we would hit
+                // the [`TimelineExclusionError::AlreadyCreating`] branch above.
+                unreachable!("Importing timelines hold the creation guard")
+            }
            Err(TimelineExclusionError::AlreadyExists {
                existing: TimelineOrOffloaded::Timeline(existing),
                arg,
@@ -5271,6 +5366,7 @@ impl TenantShard {
            l0_compaction_trigger: self.l0_compaction_trigger.clone(),
            l0_flush_global_state: self.l0_flush_global_state.clone(),
            basebackup_prepare_sender: self.basebackup_prepare_sender.clone(),
+            feature_resolver: self.feature_resolver.clone(),
        }
    }

@@ -5736,6 +5832,7 @@ pub(crate) mod harness {
        pub conf: &'static PageServerConf,
        pub tenant_conf: pageserver_api::models::TenantConfig,
        pub tenant_shard_id: TenantShardId,
+        pub shard_identity: ShardIdentity,
        pub generation: Generation,
        pub shard: ShardIndex,
        pub remote_storage: GenericRemoteStorage,
@@ -5803,6 +5900,7 @@ pub(crate) mod harness {
                conf,
                tenant_conf,
                tenant_shard_id,
+                shard_identity,
                generation,
                shard,
                remote_storage,
@@ -5864,8 +5962,7 @@ pub(crate) mod harness {
                    &ShardParameters::default(),
                ))
                .unwrap(),
-                // This is a legacy/test code path: sharding isn't supported here.
-                ShardIdentity::unsharded(),
+                self.shard_identity,
                Some(walredo_mgr),
                self.tenant_shard_id,
                self.remote_storage.clone(),
@@ -5873,6 +5970,7 @@ pub(crate) mod harness {
                // TODO: ideally we should run all unit tests with both configs
                L0FlushGlobalState::new(L0FlushConfig::default()),
                basebackup_requst_sender,
+                FeatureResolver::new_disabled(),
            ));

            let preload = tenant
@@ -5986,6 +6084,7 @@ mod tests {
    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
    use timeline::{CompactOptions, DeltaLayerTestDesc, VersionedKeySpaceQuery};
    use utils::id::TenantId;
+    use utils::shard::{ShardCount, ShardNumber};

    use super::*;
    use crate::DEFAULT_PG_VERSION;
@@ -8314,10 +8413,24 @@ mod tests {
            }

            tline.freeze_and_flush().await?;
+            // Force layers to L1
+            tline
+                .compact(
+                    &cancel,
+                    {
+                        let mut flags = EnumSet::new();
+                        flags.insert(CompactFlags::ForceL0Compaction);
+                        flags
+                    },
+                    &ctx,
+                )
+                .await?;

            if iter % 5 == 0 {
+                let scan_lsn = Lsn(lsn.0 + 1);
+                info!("scanning at {}", scan_lsn);
                let (_, before_delta_file_accessed) =
-                    scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
+                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())
                        .await?;
                tline
                    .compact(
@@ -8326,13 +8439,14 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
+                            flags.insert(CompactFlags::ForceL0Compaction);
                            flags
                        },
                        &ctx,
                    )
                    .await?;
                let (_, after_delta_file_accessed) =
-                    scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
+                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())
                        .await?;
                assert!(
                    after_delta_file_accessed < before_delta_file_accessed,
@@ -8773,6 +8887,8 @@ mod tests {

        let cancel = CancellationToken::new();

+        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
+        tline.force_set_disk_consistent_lsn(Lsn(0x40));
        tline
            .compact(
                &cancel,
@@ -8786,8 +8902,7 @@ mod tests {
            )
            .await
            .unwrap();
-
-        // Image layers are created at last_record_lsn
+        // Image layers are created at repartition LSN
        let images = tline
            .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
            .await
@@ -9305,6 +9420,77 @@ mod tests {
        Ok(())
    }

+    #[tokio::test]
+    async fn test_failed_flush_should_not_update_disk_consistent_lsn() -> anyhow::Result<()> {
+        //
+        // Setup
+        //
+        let harness = TenantHarness::create_custom(
+            "test_failed_flush_should_not_upload_disk_consistent_lsn",
+            pageserver_api::models::TenantConfig::default(),
+            TenantId::generate(),
+            ShardIdentity::new(ShardNumber(0), ShardCount(4), ShardStripeSize(128)).unwrap(),
+            Generation::new(1),
+        )
+        .await?;
+        let (tenant, ctx) = harness.load().await;
+
+        let timeline = tenant
+            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
+            .await?;
+        assert_eq!(timeline.get_shard_identity().count, ShardCount(4));
+        let mut writer = timeline.writer().await;
+        writer
+            .put(
+                *TEST_KEY,
+                Lsn(0x20),
+                &Value::Image(test_img("foo at 0x20")),
+                &ctx,
+            )
+            .await?;
+        writer.finish_write(Lsn(0x20));
+        drop(writer);
+        timeline.freeze_and_flush().await.unwrap();
+
+        timeline.remote_client.wait_completion().await.unwrap();
+        let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
+        let remote_consistent_lsn = timeline.get_remote_consistent_lsn_projected();
+        assert_eq!(Some(disk_consistent_lsn), remote_consistent_lsn);
+
+        //
+        // Test
+        //
+
+        let mut writer = timeline.writer().await;
+        writer
+            .put(
+                *TEST_KEY,
+                Lsn(0x30),
+                &Value::Image(test_img("foo at 0x30")),
+                &ctx,
+            )
+            .await?;
+        writer.finish_write(Lsn(0x30));
+        drop(writer);
+
+        fail::cfg(
+            "flush-layer-before-update-remote-consistent-lsn",
+            "return()",
+        )
+        .unwrap();
+
+        let flush_res = timeline.freeze_and_flush().await;
+        // if flush failed, the disk/remote consistent LSN should not be updated
+        assert!(flush_res.is_err());
+        assert_eq!(disk_consistent_lsn, timeline.get_disk_consistent_lsn());
+        assert_eq!(
+            remote_consistent_lsn,
+            timeline.get_remote_consistent_lsn_projected()
+        );
+
+        Ok(())
+    }
+
    #[cfg(feature = "testing")]
    #[tokio::test]
    async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1348,6 +1348,21 @@ impl RemoteTimelineClient {
        Ok(())
    }

+    pub(crate) fn schedule_unlinking_of_layers_from_index_part<I>(
+        self: &Arc<Self>,
+        names: I,
+    ) -> Result<(), NotInitialized>
+    where
+        I: IntoIterator<Item = LayerName>,
+    {
+        let mut guard = self.upload_queue.lock().unwrap();
+        let upload_queue = guard.initialized_mut()?;
+
+        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
+
+        Ok(())
+    }
+
    /// Update the remote index file, removing the to-be-deleted files from the index,
    /// allowing scheduling of actual deletions later.
    fn schedule_unlinking_of_layers_from_index_part0<I>(
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -103,6 +103,7 @@ use crate::context::{
    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
 };
 use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
+use crate::feature_resolver::FeatureResolver;
 use crate::keyspace::{KeyPartitioning, KeySpace};
 use crate::l0_flush::{self, L0FlushGlobalState};
 use crate::metrics::{
@@ -198,6 +199,7 @@ pub struct TimelineResources {
    pub l0_compaction_trigger: Arc<Notify>,
    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
    pub basebackup_prepare_sender: BasebackupPrepareSender,
+    pub feature_resolver: FeatureResolver,
 }

 pub struct Timeline {
@@ -444,6 +446,8 @@ pub struct Timeline {

    /// A channel to send async requests to prepare a basebackup for the basebackup cache.
    basebackup_prepare_sender: BasebackupPrepareSender,
+
+    feature_resolver: FeatureResolver,
 }

 pub(crate) enum PreviousHeatmap {
@@ -3072,6 +3076,8 @@ impl Timeline {
                wait_lsn_log_slow: tokio::sync::Semaphore::new(1),

                basebackup_prepare_sender: resources.basebackup_prepare_sender,
+
+                feature_resolver: resources.feature_resolver,
            };

            result.repartition_threshold =
@@ -4761,7 +4767,10 @@ impl Timeline {
                    || !flushed_to_lsn.is_valid()
            );

-            if flushed_to_lsn < frozen_to_lsn && self.shard_identity.count.count() > 1 {
+            if flushed_to_lsn < frozen_to_lsn
+                && self.shard_identity.count.count() > 1
+                && result.is_ok()
+            {
                // If our layer flushes didn't carry disk_consistent_lsn up to the `to_lsn` advertised
                // to us via layer_flush_start_rx, then advance it here.
                //
@@ -4906,6 +4915,7 @@ impl Timeline {
                    LastImageLayerCreationStatus::Initial,
                    false, // don't yield for L0, we're flushing L0
                )
+                .instrument(info_span!("create_image_layers", mode = %ImageLayerCreationMode::Initial, partition_mode = "initial", lsn = %self.initdb_lsn))
                .await?;
            debug_assert!(
                matches!(is_complete, LastImageLayerCreationStatus::Complete),
@@ -4939,6 +4949,10 @@ impl Timeline {
            return Err(FlushLayerError::Cancelled);
        }

+        fail_point!("flush-layer-before-update-remote-consistent-lsn", |_| {
+            Err(FlushLayerError::Other(anyhow!("failpoint").into()))
+        });
+
        let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);

        // The new on-disk layers are now in the layer map. We can remove the
@@ -5462,7 +5476,8 @@ impl Timeline {

    /// Returns the image layers generated and an enum indicating whether the process is fully completed.
    /// true = we have generate all image layers, false = we preempt the process for L0 compaction.
-    #[tracing::instrument(skip_all, fields(%lsn, %mode))]
+    ///
+    /// `partition_mode` is only for logging purpose and is not used anywhere in this function.
    async fn create_image_layers(
        self: &Arc<Timeline>,
        partitioning: &KeyPartitioning,
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -206,8 +206,8 @@ pub struct GcCompactionQueue {
 }

 static CONCURRENT_GC_COMPACTION_TASKS: Lazy<Arc<Semaphore>> = Lazy::new(|| {
-    // Only allow two timelines on one pageserver to run gc compaction at a time.
-    Arc::new(Semaphore::new(2))
+    // Only allow one timeline on one pageserver to run gc compaction at a time.
+    Arc::new(Semaphore::new(1))
 });

 impl GcCompactionQueue {
@@ -1278,11 +1278,55 @@ impl Timeline {
        }

        let gc_cutoff = *self.applied_gc_cutoff_lsn.read();
+        let l0_l1_boundary_lsn = {
+            // We do the repartition on the L0-L1 boundary. All data below the boundary
+            // are compacted by L0 with low read amplification, thus making the `repartition`
+            // function run fast.
+            let guard = self.layers.read().await;
+            guard
+                .all_persistent_layers()
+                .iter()
+                .map(|x| {
+                    // Use the end LSN of delta layers OR the start LSN of image layers.
+                    if x.is_delta {
+                        x.lsn_range.end
+                    } else {
+                        x.lsn_range.start
+                    }
+                })
+                .max()
+        };
+
+        let (partition_mode, partition_lsn) = if cfg!(test)
+            || cfg!(feature = "testing")
+            || self
+                .feature_resolver
+                .evaluate_boolean("image-compaction-boundary", self.tenant_shard_id.tenant_id)
+                .is_ok()
+        {
+            let last_repartition_lsn = self.partitioning.read().1;
+            let lsn = match l0_l1_boundary_lsn {
+                Some(boundary) => gc_cutoff
+                    .max(boundary)
+                    .max(last_repartition_lsn)
+                    .max(self.initdb_lsn)
+                    .max(self.ancestor_lsn),
+                None => self.get_last_record_lsn(),
+            };
+            if lsn <= self.initdb_lsn || lsn <= self.ancestor_lsn {
+                // Do not attempt to create image layers below the initdb or ancestor LSN -- no data below it
+                ("l0_l1_boundary", self.get_last_record_lsn())
+            } else {
+                ("l0_l1_boundary", lsn)
+            }
+        } else {
+            ("latest_record", self.get_last_record_lsn())
+        };

        // 2. Repartition and create image layers if necessary
        match self
            .repartition(
-                self.get_last_record_lsn(),
+                partition_lsn,
                self.get_compaction_target_size(),
                options.flags,
                ctx,
@@ -1301,18 +1345,19 @@ impl Timeline {
                    .extend(sparse_partitioning.into_dense().parts);

                // 3. Create new image layers for partitions that have been modified "enough".
+                let mode = if options
+                    .flags
+                    .contains(CompactFlags::ForceImageLayerCreation)
+                {
+                    ImageLayerCreationMode::Force
+                } else {
+                    ImageLayerCreationMode::Try
+                };
                let (image_layers, outcome) = self
                    .create_image_layers(
                        &partitioning,
                        lsn,
-                        if options
-                            .flags
-                            .contains(CompactFlags::ForceImageLayerCreation)
-                        {
-                            ImageLayerCreationMode::Force
-                        } else {
-                            ImageLayerCreationMode::Try
-                        },
+                        mode,
                        &image_ctx,
                        self.last_image_layer_creation_status
                            .load()
@@ -1320,6 +1365,7 @@ impl Timeline {
                            .clone(),
                        options.flags.contains(CompactFlags::YieldForL0),
                    )
+                    .instrument(info_span!("create_image_layers", mode = %mode, partition_mode = %partition_mode, lsn = %lsn))
                    .await
                    .inspect_err(|err| {
                        if let CreateImageLayersError::GetVectoredError(
@@ -1344,7 +1390,8 @@ impl Timeline {
            }

            Ok(_) => {
-                info!("skipping repartitioning due to image compaction LSN being below GC cutoff");
+                // This happens very frequently so we don't want to log it.
+                debug!("skipping repartitioning due to image compaction LSN being below GC cutoff");
            }

            // Suppress errors when cancelled.
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -121,6 +121,7 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
    // This observes the locking order between timelines and timelines_offloaded
    let mut timelines = tenant.timelines.lock().unwrap();
    let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
+    let mut timelines_importing = tenant.timelines_importing.lock().unwrap();
    let offloaded_children_exist = timelines_offloaded
        .iter()
        .any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
@@ -150,8 +151,12 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
                .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
            offloaded_timeline.delete_from_ancestor_with_timelines(&timelines);
        }
+        TimelineOrOffloaded::Importing(importing) => {
+            timelines_importing.remove(&importing.timeline.timeline_id);
+        }
    }

+    drop(timelines_importing);
    drop(timelines_offloaded);
    drop(timelines);

@@ -203,8 +208,17 @@ impl DeleteTimelineFlow {
        guard.mark_in_progress()?;

        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
-        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
-            timeline.shutdown(super::ShutdownMode::Hard).await;
+        // TODO(vlad): shut down imported timeline here
+        match &timeline {
+            TimelineOrOffloaded::Timeline(timeline) => {
+                timeline.shutdown(super::ShutdownMode::Hard).await;
+            }
+            TimelineOrOffloaded::Importing(importing) => {
+                importing.shutdown().await;
+            }
+            TimelineOrOffloaded::Offloaded(_offloaded) => {
+                // Nothing to shut down in this case
+            }
        }

        tenant.gc_block.before_delete(&timeline.timeline_id());
@@ -389,10 +403,18 @@ impl DeleteTimelineFlow {
            Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
        });

-        // Offloaded timelines have no local state
-        // TODO: once we persist offloaded information, delete the timeline from there, too
-        if let TimelineOrOffloaded::Timeline(timeline) = timeline {
-            delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await;
+        match timeline {
+            TimelineOrOffloaded::Timeline(timeline) => {
+                delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await;
+            }
+            TimelineOrOffloaded::Importing(importing) => {
+                delete_local_timeline_directory(conf, tenant.tenant_shard_id, &importing.timeline)
+                    .await;
+            }
+            TimelineOrOffloaded::Offloaded(_offloaded) => {
+                // Offloaded timelines have no local state
+                // TODO: once we persist offloaded information, delete the timeline from there, too
+            }
        }

        fail::fail_point!("timeline-delete-after-rm", |_| {
@@ -451,12 +473,16 @@ pub(super) fn make_timeline_delete_guard(
    // For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
    let timelines = tenant.timelines.lock().unwrap();
    let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
+    let timelines_importing = tenant.timelines_importing.lock().unwrap();

    let timeline = match timelines.get(&timeline_id) {
        Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
        None => match timelines_offloaded.get(&timeline_id) {
            Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
-            None => return Err(DeleteTimelineError::NotFound),
+            None => match timelines_importing.get(&timeline_id) {
+                Some(t) => TimelineOrOffloaded::Importing(Arc::clone(t)),
+                None => return Err(DeleteTimelineError::NotFound),
+            },
        },
    };

--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -8,8 +8,10 @@ use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 use utils::lsn::Lsn;
+use utils::pausable_failpoint;
+use utils::sync::gate::Gate;

-use super::Timeline;
+use super::{Timeline, TimelineDeleteProgress};
 use crate::context::RequestContext;
 use crate::controller_upcall_client::{StorageControllerUpcallApi, StorageControllerUpcallClient};
 use crate::tenant::metadata::TimelineMetadata;
@@ -19,14 +21,25 @@ mod importbucket_client;
 mod importbucket_format;
 pub(crate) mod index_part_format;

-pub(crate) struct ImportingTimeline {
+pub struct ImportingTimeline {
    pub import_task_handle: JoinHandle<()>,
+    pub import_task_gate: Gate,
    pub timeline: Arc<Timeline>,
+    pub delete_progress: TimelineDeleteProgress,
+}
+
+impl std::fmt::Debug for ImportingTimeline {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "ImportingTimeline<{}>", self.timeline.timeline_id)
+    }
 }

 impl ImportingTimeline {
-    pub(crate) fn shutdown(self) {
+    pub async fn shutdown(&self) {
        self.import_task_handle.abort();
+        self.import_task_gate.close().await;
+
+        self.timeline.remote_client.shutdown().await;
    }
 }

@@ -93,6 +106,15 @@ pub async fn doit(
                );
            }

+            tracing::info!("Import plan executed. Flushing remote changes and notifying storcon");
+
+            timeline
+                .remote_client
+                .schedule_index_upload_for_file_changes()?;
+            timeline.remote_client.wait_completion().await?;
+
+            pausable_failpoint!("import-timeline-pre-success-notify-pausable");
+
            // Communicate that shard is done.
            // Ensure at-least-once delivery of the upcall to storage controller
            // before we mark the task as done and never come here again.
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -11,25 +11,14 @@
 //! - => S3 as the source for the PGDATA instead of local filesystem
 //!
 //! TODOs before productionization:
-//! - ChunkProcessingJob size / ImportJob::total_size does not account for sharding.
-//!   => produced image layers likely too small.
 //! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.
-//! - asserts / unwraps need to be replaced with errors
-//! - don't trust remote objects will be small (=prevent OOMs in those cases)
-//!     - limit all in-memory buffers in size, or download to disk and read from there
-//! - limit task concurrency
-//! - generally play nice with other tenants in the system
-//!   - importbucket is different bucket than main pageserver storage, so, should be fine wrt S3 rate limits
-//!   - but concerns like network bandwidth, local disk write bandwidth, local disk capacity, etc
-//! - integrate with layer eviction system
-//! - audit for Tenant::cancel nor Timeline::cancel responsivity
-//! - audit for Tenant/Timeline gate holding (we spawn tokio tasks during this flow!)
 //!
 //! An incomplete set of TODOs from the Hackathon:
 //! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)

 use std::collections::HashSet;
 use std::hash::{Hash, Hasher};
+use std::num::NonZeroUsize;
 use std::ops::Range;
 use std::sync::Arc;

@@ -43,7 +32,7 @@ use pageserver_api::key::{
    rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
    slru_segment_size_to_key,
 };
-use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range, singleton_range};
+use pageserver_api::keyspace::{ShardedRange, singleton_range};
 use pageserver_api::models::{ShardImportProgress, ShardImportProgressV1, ShardImportStatus};
 use pageserver_api::reltag::{RelTag, SlruKind};
 use pageserver_api::shard::ShardIdentity;
@@ -100,8 +89,24 @@ async fn run_v1(
        tasks: Vec::default(),
    };

-    let import_config = &timeline.conf.timeline_import_config;
-    let plan = planner.plan(import_config).await?;
+    // Use the job size limit encoded in the progress if we are resuming an import.
+    // This ensures that imports have stable plans even if the pageserver config changes.
+    let import_config = {
+        match &import_progress {
+            Some(progress) => {
+                let base = &timeline.conf.timeline_import_config;
+                TimelineImportConfig {
+                    import_job_soft_size_limit: NonZeroUsize::new(progress.job_soft_size_limit)
+                        .unwrap(),
+                    import_job_concurrency: base.import_job_concurrency,
+                    import_job_checkpoint_threshold: base.import_job_checkpoint_threshold,
+                }
+            }
+            None => timeline.conf.timeline_import_config.clone(),
+        }
+    };
+
+    let plan = planner.plan(&import_config).await?;

    // Hash the plan and compare with the hash of the plan we got back from the storage controller.
    // If the two match, it means that the planning stage had the same output.
@@ -113,20 +118,28 @@ async fn run_v1(
    let plan_hash = hasher.finish();

    if let Some(progress) = &import_progress {
-        if plan_hash != progress.import_plan_hash {
-            anyhow::bail!("Import plan does not match storcon metadata");
-        }
-
        // Handle collisions on jobs of unequal length
        if progress.jobs != plan.jobs.len() {
            anyhow::bail!("Import plan job length does not match storcon metadata")
        }
+
+        if plan_hash != progress.import_plan_hash {
+            anyhow::bail!("Import plan does not match storcon metadata");
+        }
    }

    pausable_failpoint!("import-timeline-pre-execute-pausable");

+    let jobs_count = import_progress.as_ref().map(|p| p.jobs);
    let start_from_job_idx = import_progress.map(|progress| progress.completed);
-    plan.execute(timeline, start_from_job_idx, plan_hash, import_config, ctx)
+
+    tracing::info!(
+        start_from_job_idx=?start_from_job_idx,
+        jobs=?jobs_count,
+        "Executing import plan"
+    );
+
+    plan.execute(timeline, start_from_job_idx, plan_hash, &import_config, ctx)
        .await
 }

@@ -150,6 +163,7 @@ impl Planner {
    /// This function is and must remain pure: given the same input, it will generate the same import plan.
    async fn plan(mut self, import_config: &TimelineImportConfig) -> anyhow::Result<Plan> {
        let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
+        anyhow::ensure!(pgdata_lsn.is_valid());

        let datadir = PgDataDir::new(&self.storage).await?;

@@ -218,15 +232,36 @@ impl Planner {
                checkpoint_buf,
            )));

+        // Sort the tasks by the key ranges they handle.
+        // The plan being generated here needs to be stable across invocations
+        // of this method.
+        self.tasks.sort_by_key(|task| match task {
+            AnyImportTask::SingleKey(key) => (key.key, key.key.next()),
+            AnyImportTask::RelBlocks(rel_blocks) => {
+                (rel_blocks.key_range.start, rel_blocks.key_range.end)
+            }
+            AnyImportTask::SlruBlocks(slru_blocks) => {
+                (slru_blocks.key_range.start, slru_blocks.key_range.end)
+            }
+        });
+
        // Assigns parts of key space to later parallel jobs
+        // Note: The image layers produced here may have gaps, meaning,
+        //       there is not an image for each key in the layer's key range.
+        //       The read path stops traversal at the first image layer, regardless
+        //       of whether a base image has been found for a key or not.
+        //       (Concept of sparse image layers doesn't exist.)
+        //       This behavior is exactly right for the base image layers we're producing here.
+        //       But, since no other place in the code currently produces image layers with gaps,
+        //       it seems noteworthy.
        let mut last_end_key = Key::MIN;
        let mut current_chunk = Vec::new();
        let mut current_chunk_size: usize = 0;
        let mut jobs = Vec::new();
        for task in std::mem::take(&mut self.tasks).into_iter() {
-            if current_chunk_size + task.total_size()
-                > import_config.import_job_soft_size_limit.into()
-            {
+            let task_size = task.total_size(&self.shard);
+            let projected_chunk_size = current_chunk_size.saturating_add(task_size);
+            if projected_chunk_size > import_config.import_job_soft_size_limit.into() {
                let key_range = last_end_key..task.key_range().start;
                jobs.push(ChunkProcessingJob::new(
                    key_range.clone(),
@@ -236,7 +271,7 @@ impl Planner {
                last_end_key = key_range.end;
                current_chunk_size = 0;
            }
-            current_chunk_size += task.total_size();
+            current_chunk_size = current_chunk_size.saturating_add(task_size);
            current_chunk.push(task);
        }
        jobs.push(ChunkProcessingJob::new(
@@ -426,6 +461,8 @@ impl Plan {
                    }));
                },
                maybe_complete_job_idx = work.next() => {
+                    pausable_failpoint!("import-task-complete-pausable");
+
                    match maybe_complete_job_idx {
                        Some(Ok((job_idx, res))) => {
                            assert!(last_completed_job_idx.checked_add(1).unwrap() == job_idx);
@@ -438,8 +475,12 @@ impl Plan {
                                    jobs: jobs_in_plan,
                                    completed: last_completed_job_idx,
                                    import_plan_hash,
+                                    job_soft_size_limit: import_config.import_job_soft_size_limit.into(),
                                };

+                                timeline.remote_client.schedule_index_upload_for_file_changes()?;
+                                timeline.remote_client.wait_completion().await?;
+
                                storcon_client.put_timeline_import_status(
                                    timeline.tenant_shard_id,
                                    timeline.timeline_id,
@@ -451,6 +492,8 @@ impl Plan {
                                    anyhow::anyhow!("Shut down while putting timeline import status")
                                })?;
                            }
+
+                            tracing::info!(last_completed_job_idx, jobs=%jobs_in_plan, "Checkpointing import status");
                        },
                        Some(Err(_)) => {
                            anyhow::bail!(
@@ -568,18 +611,18 @@ impl PgDataDirDb {
                };

                let path = datadir_path.join(rel_tag.to_segfile_name(segno));
-                assert!(filesize % BLCKSZ as usize == 0); // TODO: this should result in an error
+                anyhow::ensure!(filesize % BLCKSZ as usize == 0);
                let nblocks = filesize / BLCKSZ as usize;

-                PgDataDirDbFile {
+                Ok(PgDataDirDbFile {
                    path,
                    filesize,
                    rel_tag,
                    segno,
                    nblocks: Some(nblocks), // first non-cummulative sizes
-                }
+                })
            })
-            .collect();
+            .collect::<anyhow::Result<_, _>>()?;

        // Set cummulative sizes. Do all of that math here, so that later we could easier
        // parallelize over segments and know with which segments we need to write relsize
@@ -614,12 +657,22 @@ impl PgDataDirDb {
 trait ImportTask {
    fn key_range(&self) -> Range<Key>;

-    fn total_size(&self) -> usize {
-        // TODO: revisit this
-        if is_contiguous_range(&self.key_range()) {
-            contiguous_range_len(&self.key_range()) as usize * 8192
+    fn total_size(&self, shard_identity: &ShardIdentity) -> usize {
+        let range = ShardedRange::new(self.key_range(), shard_identity);
+        let page_count = range.page_count();
+        if page_count == u32::MAX {
+            tracing::warn!(
+                "Import task has non contiguous key range: {}..{}",
+                self.key_range().start,
+                self.key_range().end
+            );
+
+            // Tasks should operate on contiguous ranges. It is unexpected for
+            // ranges to violate this assumption. Calling code handles this by mapping
+            // any task on a non contiguous range to its own image layer.
+            usize::MAX
        } else {
-            u32::MAX as usize
+            page_count as usize * 8192
        }
    }

@@ -640,7 +693,11 @@ impl Hash for ImportSingleKeyTask {
        let ImportSingleKeyTask { key, buf } = self;

        key.hash(state);
-        buf.hash(state);
+        // The key value might not have a stable binary representation.
+        // For instance, the db directory uses an unstable hash-map.
+        // To work around this we are a bit lax here and only hash the
+        // size of the buffer which must be consistent.
+        buf.len().hash(state);
    }
 }

@@ -713,6 +770,8 @@ impl ImportTask for ImportRelBlocksTask {
        layer_writer: &mut ImageLayerWriter,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize> {
+        const MAX_BYTE_RANGE_SIZE: usize = 4 * 1024 * 1024;
+
        debug!("Importing relation file");

        let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
@@ -737,7 +796,7 @@ impl ImportTask for ImportRelBlocksTask {
                assert_eq!(key.len(), 1);
                assert!(!acc.is_empty());
                assert!(acc_end > acc_start);
-                if acc_end == start /* TODO additional max range check here, to limit memory consumption per task to X */ {
+                if acc_end == start && end - acc_start <= MAX_BYTE_RANGE_SIZE {
                    acc.push(key.pop().unwrap());
                    Ok((acc, acc_start, end))
                } else {
@@ -752,8 +811,8 @@ impl ImportTask for ImportRelBlocksTask {
                .get_range(&self.path, range_start.into_u64(), range_end.into_u64())
                .await?;
            let mut buf = Bytes::from(range_buf);
-            // TODO: batched writes
            for key in keys {
+                // The writer buffers writes internally
                let image = buf.split_to(8192);
                layer_writer.put_image(key, image, ctx).await?;
                nimages += 1;
@@ -806,6 +865,9 @@ impl ImportTask for ImportSlruBlocksTask {
        debug!("Importing SLRU segment file {}", self.path);
        let buf = self.storage.get(&self.path).await?;

+        // TODO(vlad): Does timestamp to LSN work for imported timelines?
+        // Probably not since we don't append the `xact_time` to it as in
+        // [`WalIngest::ingest_xact_record`].
        let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;
        let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;
        let mut blknum = start_blk;
@@ -915,7 +977,7 @@ impl ChunkProcessingJob {
                let guard = timeline.layers.read().await;
                let existing_layer = guard.try_get_from_key(&desc.key());
                if let Some(layer) = existing_layer {
-                    if layer.metadata().generation != timeline.generation {
+                    if layer.metadata().generation == timeline.generation {
                        return Err(anyhow::anyhow!(
                            "Import attempted to rewrite layer file in the same generation: {}",
                            layer.local_path()
@@ -942,6 +1004,15 @@ impl ChunkProcessingJob {
            .cloned();
        match existing_layer {
            Some(existing) => {
+                // Unlink the remote layer from the index without scheduling its deletion.
+                // When `existing_layer` drops [`LayerInner::drop`] will schedule its deletion from
+                // remote storage, but that assumes that the layer was unlinked from the index first.
+                timeline
+                    .remote_client
+                    .schedule_unlinking_of_layers_from_index_part(std::iter::once(
+                        existing.layer_desc().layer_name(),
+                    ))?;
+
                guard.open_mut()?.rewrite_layers(
                    &[(existing.clone(), resident_layer.clone())],
                    &[],
--- a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
@@ -6,7 +6,7 @@ use bytes::Bytes;
 use postgres_ffi::ControlFileData;
 use remote_storage::{
    Download, DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, Listing,
-    ListingObject, RemotePath,
+    ListingObject, RemotePath, RemoteStorageConfig,
 };
 use serde::de::DeserializeOwned;
 use tokio_util::sync::CancellationToken;
@@ -22,11 +22,9 @@ pub async fn new(
    location: &index_part_format::Location,
    cancel: CancellationToken,
 ) -> Result<RemoteStorageWrapper, anyhow::Error> {
-    // FIXME: we probably want some timeout, and we might be able to assume the max file
-    // size on S3 is 1GiB (postgres segment size). But the problem is that the individual
-    // downloaders don't know enough about concurrent downloads to make a guess on the
-    // expected bandwidth and resulting best timeout.
-    let timeout = std::time::Duration::from_secs(24 * 60 * 60);
+    // Downloads should be reasonably sized. We do ranged reads for relblock raw data
+    // and full reads for SLRU segments which are bounded by Postgres.
+    let timeout = RemoteStorageConfig::DEFAULT_TIMEOUT;
    let location_storage = match location {
        #[cfg(feature = "testing")]
        index_part_format::Location::LocalFs { path } => {
@@ -50,9 +48,12 @@ pub async fn new(
                            .import_pgdata_aws_endpoint_url
                            .clone()
                            .map(|url| url.to_string()), //  by specifying None here, remote_storage/aws-sdk-rust will infer from env
-                        concurrency_limit: 100.try_into().unwrap(), // TODO: think about this
-                        max_keys_per_list_response: Some(1000),     // TODO: think about this
-                        upload_storage_class: None,                 // irrelevant
+                        // This matches the default import job concurrency. This is managed
+                        // separately from the usual S3 client, but the concern here is bandwidth
+                        // usage.
+                        concurrency_limit: 128.try_into().unwrap(),
+                        max_keys_per_list_response: Some(1000),
+                        upload_storage_class: None, // irrelevant
                    },
                    timeout,
                )
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -113,7 +113,7 @@ impl WalReceiver {
                }
                connection_manager_state.shutdown().await;
                *loop_status.write().unwrap() = None;
-                debug!("task exits");
+                info!("task exits");
            }
            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
        });
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -297,6 +297,7 @@ pub(super) async fn handle_walreceiver_connection(
    let mut expected_wal_start = startpoint;
    while let Some(replication_message) = {
        select! {
+            biased;
            _ = cancellation.cancelled() => {
                debug!("walreceiver interrupted");
                None
--- a/paster/Cargo.toml
+++ b/paster/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "paster"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+[dependencies]
+anyhow.workspace = true
+axum-extra = { workspace = true, features = ["cookie", "cookie-private"] }
+axum.workspace = true
+base64.workspace = true
+chrono.workspace = true
+nanoid = { version = "0.4.0", default-features = false }
+rand.workspace = true
+reqwest.workspace = true
+rustls-native-certs.workspace = true
+rustls.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+time = { version = "0.3.36", default-features = false }
+tokio-postgres-rustls.workspace = true
+tokio-postgres.workspace = true
+tokio.workspace = true
+tracing-subscriber.workspace = true
+tracing.workspace = true
+workspace_hack.workspace = true
--- a/paster/migrations/1_initial.sql
+++ b/paster/migrations/1_initial.sql
@@ -0,0 +1,18 @@
+CREATE TABLE IF NOT EXISTS users (
+    id SERIAL PRIMARY KEY,
+    sub VARCHAR(100) NOT NULL UNIQUE
+);
+
+CREATE TABLE IF NOT EXISTS sessions (
+    id SERIAL PRIMARY KEY,
+    user_id INT NOT NULL UNIQUE REFERENCES users(id),
+    session_id VARCHAR NOT NULL,
+    expires_at TIMESTAMP WITH TIME ZONE NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS pastes (
+    id SERIAL PRIMARY KEY,
+    user_id INT NOT NULL REFERENCES users(id),
+    paste text NOT NULL,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+)
--- a/paster/src/main.rs
+++ b/paster/src/main.rs
@@ -0,0 +1,353 @@
+//! Paster is a service to share logs or code snippets outside of
+//! Slack, not relying on public services
+use anyhow::Result;
+use shortener::google_oauth_gate::{AuthRequest, State, UserId};
+use axum::Form;
+use axum::extract::{FromRef, FromRequestParts, Path, Query, State as AxumStateT};
+use axum::http::StatusCode;
+use axum::response::{Html, IntoResponse};
+use axum::response::{Redirect, Response};
+use axum::routing::get;
+use axum_extra::extract::PrivateCookieJar;
+use axum_extra::extract::cookie::{Cookie, Key};
+use chrono::{Duration, Local, TimeZone, Utc};
+use core::num::NonZeroI32;
+use serde::Deserialize;
+use std::env;
+use std::sync::Arc;
+use tracing::{error, info};
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::util::SubscriberInitExt;
+
+const SOCKET: &str = "127.0.0.1:12344";
+const HOST: &str = "http://127.0.0.1:12344";
+const ALLOWED_OAUTH_DOMAIN: &str = "neon.tech";
+
+fn oauth_redirect_url() -> String {
+    format!("{HOST}{AUTHORIZED_ROUTE}")
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    tracing_subscriber::registry()
+        .with(
+            tracing_subscriber::EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| format!("{}=info", env!("CARGO_CRATE_NAME")).into()),
+        )
+        .with(tracing_subscriber::fmt::layer())
+        .init();
+
+    let oauth_client_id = env::var("OAUTH_CLIENT_ID").expect("Missing OAUTH_CLIENT_ID");
+    let oauth_client_secret = env::var("OAUTH_CLIENT_SECRET").expect("Missing OAUTH_CLIENT_SECRET");
+
+    let db_connstr = env::var("DB_CONNSTR").expect("Missing DB_CONNSTR");
+    let mut roots = rustls::RootCertStore::empty();
+    for cert in rustls_native_certs::load_native_certs().expect("could not load platform certs") {
+        roots.add(cert).unwrap();
+    }
+    let config = rustls::ClientConfig::builder()
+        .with_root_certificates(roots)
+        .with_no_client_auth();
+    let tls = tokio_postgres_rustls::MakeRustlsConnect::new(config);
+    info!("initialized TLS");
+
+    let (db_client, db_conn) = tokio_postgres::connect(&db_connstr, tls).await?;
+    tokio::spawn(async move {
+        if let Err(err) = db_conn.await {
+            error!(%err, "connecting to database");
+            std::process::exit(1);
+        }
+    });
+    info!("connected to database");
+
+    let state = InnerState {
+        db_client,
+        cookie_jar_key: Key::generate(),
+        oauth_client_id,
+        oauth_client_secret,
+    };
+    let router = axum::Router::new()
+        .route("/", get(index).post(paste))
+        .route("/authorize", get(authorize))
+        .route(AUTHORIZED_ROUTE, get(authorized))
+        .route("/{id}", get(view_paste))
+        .with_state(State { 0: Arc::new(state) });
+    let listener = tokio::net::TcpListener::bind(SOCKET)
+        .await
+        .expect("failed to bind TcpListener");
+    info!("listening on {SOCKET}");
+    axum::serve(listener, router).await.unwrap();
+    Ok(())
+}
+
+#[derive(Deserialize)]
+pub struct UserId {
+    id: NonZeroI32,
+}
+
+impl axum::extract::OptionalFromRequestParts<State> for UserId {
+    type Rejection = Response;
+    async fn from_request_parts(
+        parts: &mut axum::http::request::Parts,
+        state: &State,
+    ) -> Result<Option<Self>, Self::Rejection> {
+        let jar: PrivateCookieJar = PrivateCookieJar::from_request_parts(parts, state)
+            .await
+            .unwrap(); // infallible
+        let Some(session_id) = jar.get(COOKIE_SID).map(|cookie| cookie.value().to_owned()) else {
+            return Ok(None);
+        };
+
+        let client = &state.db_client;
+        let query = client
+            .query_opt(
+                "SELECT user_id FROM sessions WHERE session_id = $1",
+                &[&session_id],
+            )
+            .await;
+        let id = match query {
+            Ok(Some(row)) => row.get::<usize, i32>(0),
+            Ok(None) => return Ok(None),
+            Err(err) => {
+                error!(%err, "querying user session");
+                return Ok(None);
+            }
+        };
+        let id = NonZeroI32::new(id).unwrap(); // postgres id guaranteed not to be zero
+        Ok(Some(Self { id }))
+    }
+}
+
+#[derive(Deserialize)]
+struct Paste {
+    paste: String,
+}
+
+fn paste_form() -> Html<String> {
+    Html(
+        r#"
+            <form method="post">
+                <textarea name="paste" style="width:100%;height:80%"></textarea>
+                <input type="submit" value="Paste" style="margin-top:10px">
+            </form>"#
+            .to_string(),
+    )
+}
+
+fn authorize_link(paste_id: i32) -> String {
+    format!("<a href=\"/authorize?paste_id={paste_id}\">Authorize</a>")
+}
+
+async fn index(user: Option<UserId>) -> Html<String> {
+    if user.is_some() {
+        return paste_form();
+    }
+    Html(authorize_link(0))
+}
+
+async fn paste(
+    state: AxumState,
+    user: Option<UserId>,
+    Form(Paste { paste }): Form<Paste>,
+) -> Response {
+    let user_id = match user {
+        None => return StatusCode::FORBIDDEN.into_response(),
+        Some(user) => user.id,
+    };
+    if paste.is_empty() {
+        return paste_form().into_response();
+    }
+
+    let query = state
+        .db_client
+        .query_one(
+            "INSERT INTO pastes (user_id, paste) VALUES ($1, $2) RETURNING id",
+            &[&user_id.get(), &paste],
+        )
+        .await;
+    let id = match query {
+        Ok(row) => row.get::<usize, i32>(0),
+        Err(err) => {
+            error!(%err, "inserting paste");
+            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
+        }
+    };
+    Redirect::to(&format!("/{id}")).into_response()
+}
+
+async fn view_paste(state: AxumState, user: Option<UserId>, Path(paste_id): Path<i32>) -> Response {
+    let user_id = match user {
+        None => return Html(authorize_link(paste_id)).into_response(),
+        Some(user) => user.id,
+    };
+
+    let query = state
+        .db_client
+        .query_opt("SELECT paste FROM pastes WHERE id = $1", &[&paste_id])
+        .await;
+    let row = match query {
+        Ok(None) => return StatusCode::NOT_FOUND.into_response(),
+        Ok(Some(row)) => row,
+        Err(err) => {
+            error!(%err, %paste_id, %user_id, "querying paste");
+            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
+        }
+    };
+    row.get::<usize, String>(0).into_response()
+}
+
+#[derive(Deserialize)]
+struct AuthRequest {
+    code: String,
+}
+
+#[derive(Deserialize)]
+struct AuthResponse {
+    access_token: String,
+    id_token: String,
+    expires_in: u64,
+}
+
+#[derive(Deserialize)]
+struct UserInfo {
+    hd: String,
+    sub: String,
+}
+
+fn decode_id_token(token: String) -> Option<UserInfo> {
+    let payload = token.split(".").skip(1).take(1).collect::<Vec<&str>>();
+    let decoded = base64::decode_config(payload.get(0)?, base64::STANDARD_NO_PAD).ok()?;
+    serde_json::from_slice::<UserInfo>(&decoded).ok()
+}
+
+#[derive(Deserialize)]
+struct AuthorizeQuery {
+    paste_id: i32,
+}
+
+fn generate_csrf_token(num_bytes: u32) -> String {
+    use rand::{Rng, thread_rng};
+    let random_bytes: Vec<u8> = (0..num_bytes).map(|_| thread_rng().r#gen::<u8>()).collect();
+    base64::encode_config(&random_bytes, base64::URL_SAFE_NO_PAD)
+}
+
+async fn authorize(
+    state: AxumState,
+    jar: PrivateCookieJar,
+    Query(AuthorizeQuery { paste_id }): Query<AuthorizeQuery>,
+) -> (PrivateCookieJar, Redirect) {
+    let csrf_token = generate_csrf_token(16);
+    let client_id = &state.oauth_client_id;
+    let redirect_uri = oauth_redirect_url();
+    let auth_url = format!(
+        "{OAUTH_BASE_URL}?response_type=code\
+        &client_id={client_id}\
+        &state={csrf_token}\
+        &redirect_uri={redirect_uri}\
+        &scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email"
+    );
+
+    let redirect_cookie = Cookie::build((COOKIE_REDIRECT, paste_id.to_string()))
+        .path("/")
+        //.TODO secure(true) not true for localhost
+        //.domain(COOKIE_DOMAIN)
+        .secure(false)
+        .same_site(axum_extra::extract::cookie::SameSite::Lax)
+        .http_only(true)
+        .build();
+    let csrf_cookie = Cookie::build((COOKIE_CSRF, csrf_token))
+        .path("/")
+        //.TODO secure(true) not true for localhost
+        //.domain(COOKIE_DOMAIN)
+        .secure(false)
+        .same_site(axum_extra::extract::cookie::SameSite::Lax)
+        .http_only(true)
+        .build();
+    let jar = jar.add(redirect_cookie).add(csrf_cookie);
+    let url = Into::<String>::into(auth_url);
+    (jar, Redirect::to(&url))
+}
+
+async fn authorized(
+    state: AxumState,
+    jar: PrivateCookieJar,
+    Query(auth_request): Query<AuthRequest>,
+) -> Result<(PrivateCookieJar, Redirect), Response> {
+    let params = [
+        ("grant_type", "authorization_code"),
+        ("redirect_uri", &oauth_redirect_url()),
+        ("code", &auth_request.code),
+        ("client_id", &state.oauth_client_id),
+        ("client_secret", &state.oauth_client_secret),
+    ];
+    let auth_response = reqwest::Client::new()
+        .post(OAUTH_TOKEN_URL)
+        .form(&params)
+        .send()
+        .await
+        .map_err(|err| {
+            error!(%err, "exchanging oauth code for token");
+            StatusCode::INTERNAL_SERVER_ERROR.into_response()
+        })?
+        .json::<AuthResponse>()
+        .await
+        .map_err(|err| {
+            error!(%err, "deserializing access token response");
+            StatusCode::INTERNAL_SERVER_ERROR.into_response()
+        })?;
+    let Some(UserInfo { hd, sub }) = decode_id_token(auth_response.id_token) else {
+        error!("Failed to decode response id token");
+        return Err(StatusCode::UNAUTHORIZED.into_response());
+    };
+    if hd != ALLOWED_OAUTH_DOMAIN {
+        error!(hd, "Domain doesn't match {ALLOWED_OAUTH_DOMAIN}");
+        return Err(StatusCode::UNAUTHORIZED.into_response());
+    }
+
+    let token_duration = Duration::try_seconds(auth_response.expires_in as i64).unwrap();
+    let expires_at = Utc.from_utc_datetime(&(Local::now().naive_local() + token_duration));
+    let cookie_max_age = time::Duration::new(token_duration.num_seconds(), 0);
+
+    let session_cookie = Cookie::build((COOKIE_SID, auth_response.access_token.clone()))
+        .path("/")
+        //.TODO secure(true) not true for localhost
+        //.domain(COOKIE_DOMAIN)
+        .secure(false)
+        .same_site(axum_extra::extract::cookie::SameSite::Lax)
+        .http_only(true)
+        .max_age(cookie_max_age)
+        .build();
+
+    state
+        .db_client
+        .query(
+            "WITH user_insert AS (\
+                INSERT INTO users (sub) VALUES ($1) \
+                ON CONFLICT (sub) DO UPDATE SET sub = excluded.sub RETURNING id)\
+        INSERT INTO sessions (user_id, session_id, expires_at) \
+        SELECT id, $2, $3 FROM user_insert \
+        ON CONFLICT (user_id) DO UPDATE SET \
+            session_id = excluded.session_id, \
+             expires_at = excluded.expires_at",
+            &[&sub, &auth_response.access_token, &expires_at],
+        )
+        .await
+        .map_err(|err| {
+            error!(%err, %sub, "updating session");
+            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
+        })?;
+
+    let csrf_cookie = jar.get(COOKIE_CSRF).unwrap(); // set in authorize()
+    let jar = jar.remove(csrf_cookie).add(session_cookie);
+    match jar.get(COOKIE_REDIRECT) {
+        Some(redirect_cookie) => {
+            let mut value = redirect_cookie.value_trimmed();
+            if value == "0" {
+                value = "";
+            }
+            let redirect_url = format!("/{value}");
+            Ok((jar.remove(redirect_cookie), Redirect::to(&redirect_url)))
+        }
+        None => Ok((jar, Redirect::to("/"))),
+    }
+}
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -155,8 +155,9 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 			int			written = 0;

 			written = snprintf((char *) &sk->conninfo, MAXCONNINFO,
-							   "host=%s port=%s dbname=replication options='-c timeline_id=%s tenant_id=%s'",
-							   sk->host, sk->port, wp->config->neon_timeline, wp->config->neon_tenant);
+							   "%s host=%s port=%s dbname=replication options='-c timeline_id=%s tenant_id=%s'",
+							   wp->config->safekeeper_conninfo_options, sk->host, sk->port,
+							   wp->config->neon_timeline, wp->config->neon_tenant);
 			if (written > MAXCONNINFO || written < 0)
 				wp_log(FATAL, "could not create connection string for safekeeper %s:%s", sk->host, sk->port);
 		}
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -714,6 +714,9 @@ typedef struct WalProposerConfig
 	 */
 	char	   *safekeepers_list;

+	/* libpq connection info options. */
+	char	   *safekeeper_conninfo_options;
+
 	/*
 	 * WalProposer reconnects to offline safekeepers once in this interval.
 	 * Time is in milliseconds.
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -64,6 +64,7 @@ char	   *wal_acceptors_list = "";
 int			wal_acceptor_reconnect_timeout = 1000;
 int			wal_acceptor_connection_timeout = 10000;
 int			safekeeper_proto_version = 3;
+char	   *safekeeper_conninfo_options = "";

 /* Set to true in the walproposer bgw. */
 static bool am_walproposer;
@@ -119,6 +120,7 @@ init_walprop_config(bool syncSafekeepers)
 	walprop_config.neon_timeline = neon_timeline;
 	/* WalProposerCreate scribbles directly on it, so pstrdup */
 	walprop_config.safekeepers_list = pstrdup(wal_acceptors_list);
+	walprop_config.safekeeper_conninfo_options = pstrdup(safekeeper_conninfo_options);
 	walprop_config.safekeeper_reconnect_timeout = wal_acceptor_reconnect_timeout;
 	walprop_config.safekeeper_connection_timeout = wal_acceptor_connection_timeout;
 	walprop_config.wal_segment_size = wal_segment_size;
@@ -203,6 +205,16 @@ nwp_register_gucs(void)
 												 * GUC_LIST_QUOTE */
 							   NULL, assign_neon_safekeepers, NULL);

+	DefineCustomStringVariable(
+							   "neon.safekeeper_conninfo_options",
+							   "libpq keyword parameters and values to apply to safekeeper connections",
+							   NULL,
+							   &safekeeper_conninfo_options,
+							   "",
+							   PGC_POSTMASTER,
+							   0,
+							   NULL, NULL, NULL);
+
 	DefineCustomIntVariable(
 							"neon.safekeeper_reconnect_timeout",
 							"Walproposer reconnects to offline safekeepers once in this interval.",
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -17,35 +17,27 @@ pub(super) async fn authenticate(
    config: &'static AuthenticationConfig,
    secret: AuthSecret,
 ) -> auth::Result<ComputeCredentials> {
-    let flow = AuthFlow::new(client);
    let scram_keys = match secret {
        #[cfg(any(test, feature = "testing"))]
        AuthSecret::Md5(_) => {
            debug!("auth endpoint chooses MD5");
-            return Err(auth::AuthError::bad_auth_method("MD5"));
+            return Err(auth::AuthError::MalformedPassword("MD5 not supported"));
        }
        AuthSecret::Scram(secret) => {
            debug!("auth endpoint chooses SCRAM");
            let scram = auth::Scram(&secret, ctx);

-            let auth_outcome = tokio::time::timeout(
-                config.scram_protocol_timeout,
-                async {
-
-                    flow.begin(scram).await.map_err(|error| {
-                        warn!(?error, "error sending scram acknowledgement");
-                        error
-                    })?.authenticate().await.map_err(|error| {
+            let auth_outcome = tokio::time::timeout(config.scram_protocol_timeout, async {
+                AuthFlow::new(client, scram)
+                    .authenticate()
+                    .await
+                    .inspect_err(|error| {
                        warn!(?error, "error processing scram messages");
-                        error
                    })
-                }
-            )
+            })
            .await
-            .map_err(|e| {
-                warn!("error processing scram messages error = authentication timed out, execution time exceeded {} seconds", config.scram_protocol_timeout.as_secs());
-                auth::AuthError::user_timeout(e)
-            })??;
+            .inspect_err(|_| warn!("error processing scram messages error = authentication timed out, execution time exceeded {} seconds", config.scram_protocol_timeout.as_secs()))
+            .map_err(auth::AuthError::user_timeout)??;

            let client_key = match auth_outcome {
                sasl::Outcome::Success(key) => key,
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -2,7 +2,6 @@ use std::fmt;

 use async_trait::async_trait;
 use postgres_client::config::SslMode;
-use pq_proto::BeMessage as Be;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, info_span};
@@ -16,6 +15,7 @@ use crate::context::RequestContext;
 use crate::control_plane::client::cplane_proxy_v1;
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
 use crate::error::{ReportableError, UserFacingError};
+use crate::pqproto::BeMessage;
 use crate::proxy::NeonOptions;
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::stream::PqStream;
@@ -154,11 +154,13 @@ async fn authenticate(

    // Give user a URL to spawn a new database.
    info!(parent: &span, "sending the auth URL to the user");
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&Be::CLIENT_ENCODING)?
-        .write_message(&Be::NoticeResponse(&greeting))
-        .await?;
+    client.write_message(BeMessage::AuthenticationOk);
+    client.write_message(BeMessage::ParameterStatus {
+        name: b"client_encoding",
+        value: b"UTF8",
+    });
+    client.write_message(BeMessage::NoticeResponse(&greeting));
+    client.flush().await?;

    // Wait for console response via control plane (see `mgmt`).
    info!(parent: &span, "waiting for console's reply...");
@@ -188,7 +190,7 @@ async fn authenticate(
        }
    }

-    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
+    client.write_message(BeMessage::NoticeResponse("Connecting to database."));

    // This config should be self-contained, because we won't
    // take username or dbname from client's startup message.
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -24,23 +24,25 @@ pub(crate) async fn authenticate_cleartext(
    debug!("cleartext auth flow override is enabled, proceeding");
    ctx.set_auth_method(crate::context::AuthMethod::Cleartext);

-    // pause the timer while we communicate with the client
-    let paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-
    let ep = EndpointIdInt::from(&info.endpoint);

-    let auth_flow = AuthFlow::new(client)
-        .begin(auth::CleartextPassword {
+    let auth_flow = AuthFlow::new(
+        client,
+        auth::CleartextPassword {
            secret,
            endpoint: ep,
            pool: config.thread_pool.clone(),
-        })
-        .await?;
-    drop(paused);
-    // cleartext auth is only allowed to the ws/http protocol.
-    // If we're here, we already received the password in the first message.
-    // Scram protocol will be executed on the proxy side.
-    let auth_outcome = auth_flow.authenticate().await?;
+        },
+    );
+    let auth_outcome = {
+        // pause the timer while we communicate with the client
+        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+
+        // cleartext auth is only allowed to the ws/http protocol.
+        // If we're here, we already received the password in the first message.
+        // Scram protocol will be executed on the proxy side.
+        auth_flow.authenticate().await?
+    };

    let keys = match auth_outcome {
        sasl::Outcome::Success(key) => key,
@@ -67,9 +69,7 @@ pub(crate) async fn password_hack_no_authentication(
    // pause the timer while we communicate with the client
    let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

-    let payload = AuthFlow::new(client)
-        .begin(auth::PasswordHack)
-        .await?
+    let payload = AuthFlow::new(client, auth::PasswordHack)
        .get_password()
        .await?;

--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -4,37 +4,31 @@ mod hacks;
 pub mod jwt;
 pub mod local;

-use std::net::IpAddr;
 use std::sync::Arc;

 pub use console_redirect::ConsoleRedirectBackend;
 pub(crate) use console_redirect::ConsoleRedirectError;
-use ipnet::{Ipv4Net, Ipv6Net};
 use local::LocalBackend;
 use postgres_client::config::AuthKeys;
 use serde::{Deserialize, Serialize};
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{debug, info, warn};
+use tracing::{debug, info};

-use crate::auth::credentials::check_peer_addr_is_in_list;
-use crate::auth::{
-    self, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern, validate_password_and_exchange,
-};
+use crate::auth::{self, AuthError, ComputeUserInfoMaybeEndpoint, validate_password_and_exchange};
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
 use crate::control_plane::client::ControlPlaneClient;
 use crate::control_plane::errors::GetAuthInfoError;
 use crate::control_plane::{
-    self, AccessBlockerFlags, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
-    CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
+    self, AccessBlockerFlags, AuthSecret, CachedNodeInfo, ControlPlaneApi, EndpointAccessControl,
+    RoleAccessControl,
 };
 use crate::intern::EndpointIdInt;
-use crate::metrics::Metrics;
-use crate::protocol2::ConnectionInfoExtra;
+use crate::pqproto::BeMessage;
 use crate::proxy::NeonOptions;
 use crate::proxy::connect_compute::ComputeConnectBackend;
-use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
+use crate::rate_limiter::EndpointRateLimiter;
 use crate::stream::Stream;
 use crate::types::{EndpointCacheKey, EndpointId, RoleName};
 use crate::{scram, stream};
@@ -200,78 +194,6 @@ impl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {
    }
 }

-#[derive(PartialEq, PartialOrd, Hash, Eq, Ord, Debug, Copy, Clone)]
-pub struct MaskedIp(IpAddr);
-
-impl MaskedIp {
-    fn new(value: IpAddr, prefix: u8) -> Self {
-        match value {
-            IpAddr::V4(v4) => Self(IpAddr::V4(
-                Ipv4Net::new(v4, prefix).map_or(v4, |x| x.trunc().addr()),
-            )),
-            IpAddr::V6(v6) => Self(IpAddr::V6(
-                Ipv6Net::new(v6, prefix).map_or(v6, |x| x.trunc().addr()),
-            )),
-        }
-    }
-}
-
-// This can't be just per IP because that would limit some PaaS that share IP addresses
-pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, MaskedIp)>;
-
-impl AuthenticationConfig {
-    pub(crate) fn check_rate_limit(
-        &self,
-        ctx: &RequestContext,
-        secret: AuthSecret,
-        endpoint: &EndpointId,
-        is_cleartext: bool,
-    ) -> auth::Result<AuthSecret> {
-        // we have validated the endpoint exists, so let's intern it.
-        let endpoint_int = EndpointIdInt::from(endpoint.normalize());
-
-        // only count the full hash count if password hack or websocket flow.
-        // in other words, if proxy needs to run the hashing
-        let password_weight = if is_cleartext {
-            match &secret {
-                #[cfg(any(test, feature = "testing"))]
-                AuthSecret::Md5(_) => 1,
-                AuthSecret::Scram(s) => s.iterations + 1,
-            }
-        } else {
-            // validating scram takes just 1 hmac_sha_256 operation.
-            1
-        };
-
-        let limit_not_exceeded = self.rate_limiter.check(
-            (
-                endpoint_int,
-                MaskedIp::new(ctx.peer_addr(), self.rate_limit_ip_subnet),
-            ),
-            password_weight,
-        );
-
-        if !limit_not_exceeded {
-            warn!(
-                enabled = self.rate_limiter_enabled,
-                "rate limiting authentication"
-            );
-            Metrics::get().proxy.requests_auth_rate_limits_total.inc();
-            Metrics::get()
-                .proxy
-                .endpoints_auth_rate_limits
-                .get_metric()
-                .measure(endpoint);
-
-            if self.rate_limiter_enabled {
-                return Err(auth::AuthError::too_many_connections());
-            }
-        }
-
-        Ok(secret)
-    }
-}
-
 /// True to its name, this function encapsulates our current auth trade-offs.
 /// Here, we choose the appropriate auth flow based on circumstances.
 ///
@@ -284,7 +206,7 @@ async fn auth_quirks(
    allow_cleartext: bool,
    config: &'static AuthenticationConfig,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> auth::Result<(ComputeCredentials, Option<Vec<IpPattern>>)> {
+) -> auth::Result<ComputeCredentials> {
    // If there's no project so far, that entails that client doesn't
    // support SNI or other means of passing the endpoint (project) name.
    // We now expect to see a very specific payload in the place of password.
@@ -300,55 +222,27 @@ async fn auth_quirks(

    debug!("fetching authentication info and allowlists");

-    // check allowed list
-    let allowed_ips = if config.ip_allowlist_check_enabled {
-        let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
-        if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
-            return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
-        }
-        allowed_ips
-    } else {
-        Cached::new_uncached(Arc::new(vec![]))
-    };
+    let access_controls = api
+        .get_endpoint_access_control(ctx, &info.endpoint, &info.user)
+        .await?;

-    // check if a VPC endpoint ID is coming in and if yes, if it's allowed
-    let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
-    if config.is_vpc_acccess_proxy {
-        if access_blocks.vpc_access_blocked {
-            return Err(AuthError::NetworkNotAllowed);
-        }
+    access_controls.check(
+        ctx,
+        config.ip_allowlist_check_enabled,
+        config.is_vpc_acccess_proxy,
+    )?;

-        let incoming_vpc_endpoint_id = match ctx.extra() {
-            None => return Err(AuthError::MissingEndpointName),
-            Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
-            Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
-        };
-        let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
-        // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
-        if !allowed_vpc_endpoint_ids.is_empty()
-            && !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
-        {
-            return Err(AuthError::vpc_endpoint_id_not_allowed(
-                incoming_vpc_endpoint_id,
-            ));
-        }
-    } else if access_blocks.public_access_blocked {
-        return Err(AuthError::NetworkNotAllowed);
-    }
-
-    if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
+    let endpoint = EndpointIdInt::from(&info.endpoint);
+    let rate_limit_config = None;
+    if !endpoint_rate_limiter.check(endpoint, rate_limit_config, 1) {
        return Err(AuthError::too_many_connections());
    }
-    let cached_secret = api.get_role_secret(ctx, &info).await?;
-    let (cached_entry, secret) = cached_secret.take_value();
+    let role_access = api
+        .get_role_access_control(ctx, &info.endpoint, &info.user)
+        .await?;

-    let secret = if let Some(secret) = secret {
-        config.check_rate_limit(
-            ctx,
-            secret,
-            &info.endpoint,
-            unauthenticated_password.is_some() || allow_cleartext,
-        )?
+    let secret = if let Some(secret) = role_access.secret {
+        secret
    } else {
        // If we don't have an authentication secret, we mock one to
        // prevent malicious probing (possible due to missing protocol steps).
@@ -368,14 +262,8 @@ async fn auth_quirks(
    )
    .await
    {
-        Ok(keys) => Ok((keys, Some(allowed_ips.as_ref().clone()))),
-        Err(e) => {
-            if e.is_password_failed() {
-                // The password could have been changed, so we invalidate the cache.
-                cached_entry.invalidate();
-            }
-            Err(e)
-        }
+        Ok(keys) => Ok(keys),
+        Err(e) => Err(e),
    }
 }

@@ -402,7 +290,7 @@ async fn authenticate_with_secret(
        };

        // we have authenticated the password
-        client.write_message_noflush(&pq_proto::BeMessage::AuthenticationOk)?;
+        client.write_message(BeMessage::AuthenticationOk);

        return Ok(ComputeCredentials { info, keys });
    }
@@ -438,7 +326,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<(Backend<'a, ComputeCredentials>, Option<Vec<IpPattern>>)> {
+    ) -> auth::Result<Backend<'a, ComputeCredentials>> {
        let res = match self {
            Self::ControlPlane(api, user_info) => {
                debug!(
@@ -447,17 +335,35 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                    "performing authentication using the console"
                );

-                let (credentials, ip_allowlist) = auth_quirks(
+                let auth_res = auth_quirks(
                    ctx,
                    &*api,
-                    user_info,
+                    user_info.clone(),
                    client,
                    allow_cleartext,
                    config,
                    endpoint_rate_limiter,
                )
-                .await?;
-                Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
+                .await;
+                match auth_res {
+                    Ok(credentials) => Ok(Backend::ControlPlane(api, credentials)),
+                    Err(e) => {
+                        // The password could have been changed, so we invalidate the cache.
+                        // We should only invalidate the cache if the TTL might have expired.
+                        if e.is_password_failed() {
+                            #[allow(irrefutable_let_patterns)]
+                            if let ControlPlaneClient::ProxyV1(api) = &*api {
+                                if let Some(ep) = &user_info.endpoint_id {
+                                    api.caches
+                                        .project_info
+                                        .maybe_invalidate_role_secret(ep, &user_info.user);
+                                }
+                            }
+                        }
+
+                        Err(e)
+                    }
+                }
            }
            Self::Local(_) => {
                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"));
@@ -474,44 +380,30 @@ impl Backend<'_, ComputeUserInfo> {
    pub(crate) async fn get_role_secret(
        &self,
        ctx: &RequestContext,
-    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
-        match self {
-            Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
-            Self::Local(_) => Ok(Cached::new_uncached(None)),
-        }
-    }
-
-    pub(crate) async fn get_allowed_ips(
-        &self,
-        ctx: &RequestContext,
-    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
-        match self {
-            Self::ControlPlane(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
-            Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
-        }
-    }
-
-    pub(crate) async fn get_allowed_vpc_endpoint_ids(
-        &self,
-        ctx: &RequestContext,
-    ) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
+    ) -> Result<RoleAccessControl, GetAuthInfoError> {
        match self {
            Self::ControlPlane(api, user_info) => {
-                api.get_allowed_vpc_endpoint_ids(ctx, user_info).await
+                api.get_role_access_control(ctx, &user_info.endpoint, &user_info.user)
+                    .await
            }
-            Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
+            Self::Local(_) => Ok(RoleAccessControl { secret: None }),
        }
    }

-    pub(crate) async fn get_block_public_or_vpc_access(
+    pub(crate) async fn get_endpoint_access_control(
        &self,
        ctx: &RequestContext,
-    ) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
+    ) -> Result<EndpointAccessControl, GetAuthInfoError> {
        match self {
            Self::ControlPlane(api, user_info) => {
-                api.get_block_public_or_vpc_access(ctx, user_info).await
+                api.get_endpoint_access_control(ctx, &user_info.endpoint, &user_info.user)
+                    .await
            }
-            Self::Local(_) => Ok(Cached::new_uncached(AccessBlockerFlags::default())),
+            Self::Local(_) => Ok(EndpointAccessControl {
+                allowed_ips: Arc::new(vec![]),
+                allowed_vpce: Arc::new(vec![]),
+                flags: AccessBlockerFlags::default(),
+            }),
        }
    }
 }
@@ -540,9 +432,7 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
 mod tests {
    #![allow(clippy::unimplemented, clippy::unwrap_used)]

-    use std::net::IpAddr;
    use std::sync::Arc;
-    use std::time::Duration;

    use bytes::BytesMut;
    use control_plane::AuthSecret;
@@ -553,18 +443,16 @@ mod tests {
    use postgres_protocol::message::frontend;
    use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};

+    use super::auth_quirks;
    use super::jwt::JwkCache;
-    use super::{AuthRateLimiter, auth_quirks};
-    use crate::auth::backend::MaskedIp;
    use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
    use crate::config::AuthenticationConfig;
    use crate::context::RequestContext;
    use crate::control_plane::{
-        self, AccessBlockerFlags, CachedAccessBlockerFlags, CachedAllowedIps,
-        CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret,
+        self, AccessBlockerFlags, CachedNodeInfo, EndpointAccessControl, RoleAccessControl,
    };
    use crate::proxy::NeonOptions;
-    use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
+    use crate::rate_limiter::EndpointRateLimiter;
    use crate::scram::ServerSecret;
    use crate::scram::threadpool::ThreadPool;
    use crate::stream::{PqStream, Stream};
@@ -577,46 +465,34 @@ mod tests {
    }

    impl control_plane::ControlPlaneApi for Auth {
-        async fn get_role_secret(
+        async fn get_role_access_control(
            &self,
            _ctx: &RequestContext,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
-            Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
+            _endpoint: &crate::types::EndpointId,
+            _role: &crate::types::RoleName,
+        ) -> Result<RoleAccessControl, control_plane::errors::GetAuthInfoError> {
+            Ok(RoleAccessControl {
+                secret: Some(self.secret.clone()),
+            })
        }

-        async fn get_allowed_ips(
+        async fn get_endpoint_access_control(
            &self,
            _ctx: &RequestContext,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
-            Ok(CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())))
-        }
-
-        async fn get_allowed_vpc_endpoint_ids(
-            &self,
-            _ctx: &RequestContext,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
-            Ok(CachedAllowedVpcEndpointIds::new_uncached(Arc::new(
-                self.vpc_endpoint_ids.clone(),
-            )))
-        }
-
-        async fn get_block_public_or_vpc_access(
-            &self,
-            _ctx: &RequestContext,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError> {
-            Ok(CachedAccessBlockerFlags::new_uncached(
-                self.access_blocker_flags.clone(),
-            ))
+            _endpoint: &crate::types::EndpointId,
+            _role: &crate::types::RoleName,
+        ) -> Result<EndpointAccessControl, control_plane::errors::GetAuthInfoError> {
+            Ok(EndpointAccessControl {
+                allowed_ips: Arc::new(self.ips.clone()),
+                allowed_vpce: Arc::new(self.vpc_endpoint_ids.clone()),
+                flags: self.access_blocker_flags,
+            })
        }

        async fn get_endpoint_jwks(
            &self,
            _ctx: &RequestContext,
-            _endpoint: crate::types::EndpointId,
+            _endpoint: &crate::types::EndpointId,
        ) -> Result<Vec<super::jwt::AuthRule>, control_plane::errors::GetEndpointJwksError>
        {
            unimplemented!()
@@ -635,9 +511,6 @@ mod tests {
        jwks_cache: JwkCache::default(),
        thread_pool: ThreadPool::new(1),
        scram_protocol_timeout: std::time::Duration::from_secs(5),
-        rate_limiter_enabled: true,
-        rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
-        rate_limit_ip_subnet: 64,
        ip_allowlist_check_enabled: true,
        is_vpc_acccess_proxy: false,
        is_auth_broker: false,
@@ -654,55 +527,10 @@ mod tests {
        }
    }

-    #[test]
-    fn masked_ip() {
-        let ip_a = IpAddr::V4([127, 0, 0, 1].into());
-        let ip_b = IpAddr::V4([127, 0, 0, 2].into());
-        let ip_c = IpAddr::V4([192, 168, 1, 101].into());
-        let ip_d = IpAddr::V4([192, 168, 1, 102].into());
-        let ip_e = IpAddr::V6("abcd:abcd:abcd:abcd:abcd:abcd:abcd:abcd".parse().unwrap());
-        let ip_f = IpAddr::V6("abcd:abcd:abcd:abcd:1234:abcd:abcd:abcd".parse().unwrap());
-
-        assert_ne!(MaskedIp::new(ip_a, 64), MaskedIp::new(ip_b, 64));
-        assert_ne!(MaskedIp::new(ip_a, 32), MaskedIp::new(ip_b, 32));
-        assert_eq!(MaskedIp::new(ip_a, 30), MaskedIp::new(ip_b, 30));
-        assert_eq!(MaskedIp::new(ip_c, 30), MaskedIp::new(ip_d, 30));
-
-        assert_ne!(MaskedIp::new(ip_e, 128), MaskedIp::new(ip_f, 128));
-        assert_eq!(MaskedIp::new(ip_e, 64), MaskedIp::new(ip_f, 64));
-    }
-
-    #[test]
-    fn test_default_auth_rate_limit_set() {
-        // these values used to exceed u32::MAX
-        assert_eq!(
-            RateBucketInfo::DEFAULT_AUTH_SET,
-            [
-                RateBucketInfo {
-                    interval: Duration::from_secs(1),
-                    max_rpi: 1000 * 4096,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(60),
-                    max_rpi: 600 * 4096 * 60,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(600),
-                    max_rpi: 300 * 4096 * 600,
-                }
-            ]
-        );
-
-        for x in RateBucketInfo::DEFAULT_AUTH_SET {
-            let y = x.to_string().parse().unwrap();
-            assert_eq!(x, y);
-        }
-    }
-
    #[tokio::test]
    async fn auth_quirks_scram() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));

        let ctx = RequestContext::test();
        let api = Auth {
@@ -784,7 +612,7 @@ mod tests {
    #[tokio::test]
    async fn auth_quirks_cleartext() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));

        let ctx = RequestContext::test();
        let api = Auth {
@@ -838,7 +666,7 @@ mod tests {
    #[tokio::test]
    async fn auth_quirks_password_hack() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));

        let ctx = RequestContext::test();
        let api = Auth {
@@ -887,7 +715,7 @@ mod tests {
        .await
        .unwrap();

-        assert_eq!(creds.0.info.endpoint, "my-endpoint");
+        assert_eq!(creds.info.endpoint, "my-endpoint");

        handle.await.unwrap();
    }
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -5,7 +5,6 @@ use std::net::IpAddr;
 use std::str::FromStr;

 use itertools::Itertools;
-use pq_proto::StartupMessageParams;
 use thiserror::Error;
 use tracing::{debug, warn};

@@ -13,6 +12,7 @@ use crate::auth::password_hack::parse_endpoint_param;
 use crate::context::RequestContext;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, SniGroup, SniKind};
+use crate::pqproto::StartupMessageParams;
 use crate::proxy::NeonOptions;
 use crate::serverless::{AUTH_BROKER_SNI, SERVERLESS_DRIVER_SNI};
 use crate::types::{EndpointId, RoleName};
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -1,10 +1,8 @@
 //! Main authentication flow.

-use std::io;
 use std::sync::Arc;

 use postgres_protocol::authentication::sasl::{SCRAM_SHA_256, SCRAM_SHA_256_PLUS};
-use pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::info;

@@ -13,35 +11,26 @@ use super::{AuthError, PasswordHackPayload};
 use crate::context::RequestContext;
 use crate::control_plane::AuthSecret;
 use crate::intern::EndpointIdInt;
+use crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};
 use crate::sasl;
 use crate::scram::threadpool::ThreadPool;
 use crate::scram::{self};
 use crate::stream::{PqStream, Stream};
 use crate::tls::TlsServerEndPoint;

-/// Every authentication selector is supposed to implement this trait.
-pub(crate) trait AuthMethod {
-    /// Any authentication selector should provide initial backend message
-    /// containing auth method name and parameters, e.g. md5 salt.
-    fn first_message(&self, channel_binding: bool) -> BeMessage<'_>;
-}
-
-/// Initial state of [`AuthFlow`].
-pub(crate) struct Begin;
-
 /// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`].
 pub(crate) struct Scram<'a>(
    pub(crate) &'a scram::ServerSecret,
    pub(crate) &'a RequestContext,
 );

-impl AuthMethod for Scram<'_> {
+impl Scram<'_> {
    #[inline(always)]
    fn first_message(&self, channel_binding: bool) -> BeMessage<'_> {
        if channel_binding {
-            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
+            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
        } else {
-            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
+            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
                scram::METHODS_WITHOUT_PLUS,
            ))
        }
@@ -52,13 +41,6 @@ impl AuthMethod for Scram<'_> {
 /// <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
 pub(crate) struct PasswordHack;

-impl AuthMethod for PasswordHack {
-    #[inline(always)]
-    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
-        Be::AuthenticationCleartextPassword
-    }
-}
-
 /// Use clear-text password auth called `password` in docs
 /// <https://www.postgresql.org/docs/current/auth-password.html>
 pub(crate) struct CleartextPassword {
@@ -67,53 +49,37 @@ pub(crate) struct CleartextPassword {
    pub(crate) secret: AuthSecret,
 }

-impl AuthMethod for CleartextPassword {
-    #[inline(always)]
-    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
-        Be::AuthenticationCleartextPassword
-    }
-}
-
 /// This wrapper for [`PqStream`] performs client authentication.
 #[must_use]
 pub(crate) struct AuthFlow<'a, S, State> {
    /// The underlying stream which implements libpq's protocol.
    stream: &'a mut PqStream<Stream<S>>,
-    /// State might contain ancillary data (see [`Self::begin`]).
+    /// State might contain ancillary data.
    state: State,
    tls_server_end_point: TlsServerEndPoint,
 }

 /// Initial state of the stream wrapper.
-impl<'a, S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
+impl<'a, S: AsyncRead + AsyncWrite + Unpin, M> AuthFlow<'a, S, M> {
    /// Create a new wrapper for client authentication.
-    pub(crate) fn new(stream: &'a mut PqStream<Stream<S>>) -> Self {
+    pub(crate) fn new(stream: &'a mut PqStream<Stream<S>>, method: M) -> Self {
        let tls_server_end_point = stream.get_ref().tls_server_end_point();

        Self {
            stream,
-            state: Begin,
+            state: method,
            tls_server_end_point,
        }
    }
-
-    /// Move to the next step by sending auth method's name & params to client.
-    pub(crate) async fn begin<M: AuthMethod>(self, method: M) -> io::Result<AuthFlow<'a, S, M>> {
-        self.stream
-            .write_message(&method.first_message(self.tls_server_end_point.supported()))
-            .await?;
-
-        Ok(AuthFlow {
-            stream: self.stream,
-            state: method,
-            tls_server_end_point: self.tls_server_end_point,
-        })
-    }
 }

 impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
    /// Perform user authentication. Raise an error in case authentication failed.
    pub(crate) async fn get_password(self) -> super::Result<PasswordHackPayload> {
+        self.stream
+            .write_message(BeMessage::AuthenticationCleartextPassword);
+        self.stream.flush().await?;
+
        let msg = self.stream.read_password_message().await?;
        let password = msg
            .strip_suffix(&[0])
@@ -133,6 +99,10 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
 impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {
    /// Perform user authentication. Raise an error in case authentication failed.
    pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<ComputeCredentialKeys>> {
+        self.stream
+            .write_message(BeMessage::AuthenticationCleartextPassword);
+        self.stream.flush().await?;
+
        let msg = self.stream.read_password_message().await?;
        let password = msg
            .strip_suffix(&[0])
@@ -147,7 +117,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {
        .await?;

        if let sasl::Outcome::Success(_) = &outcome {
-            self.stream.write_message_noflush(&Be::AuthenticationOk)?;
+            self.stream.write_message(BeMessage::AuthenticationOk);
        }

        Ok(outcome)
@@ -159,42 +129,36 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
    /// Perform user authentication. Raise an error in case authentication failed.
    pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<scram::ScramKey>> {
        let Scram(secret, ctx) = self.state;
+        let channel_binding = self.tls_server_end_point;

-        // pause the timer while we communicate with the client
-        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+        // send sasl message.
+        {
+            // pause the timer while we communicate with the client
+            let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

-        // Initial client message contains the chosen auth method's name.
-        let msg = self.stream.read_password_message().await?;
-        let sasl = sasl::FirstMessage::parse(&msg)
-            .ok_or(AuthError::MalformedPassword("bad sasl message"))?;
-
-        // Currently, the only supported SASL method is SCRAM.
-        if !scram::METHODS.contains(&sasl.method) {
-            return Err(super::AuthError::bad_auth_method(sasl.method));
+            let sasl = self.state.first_message(channel_binding.supported());
+            self.stream.write_message(sasl);
+            self.stream.flush().await?;
        }

-        match sasl.method {
-            SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),
-            SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus),
-            _ => {}
-        }
+        // complete sasl handshake.
+        sasl::authenticate(ctx, self.stream, |method| {
+            // Currently, the only supported SASL method is SCRAM.
+            match method {
+                SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),
+                SCRAM_SHA_256_PLUS => {
+                    ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus);
+                }
+                method => return Err(sasl::Error::BadAuthMethod(method.into())),
+            }

-        // TODO: make this a metric instead
-        info!("client chooses {}", sasl.method);
+            // TODO: make this a metric instead
+            info!("client chooses {}", method);

-        let outcome = sasl::SaslStream::new(self.stream, sasl.message)
-            .authenticate(scram::Exchange::new(
-                secret,
-                rand::random,
-                self.tls_server_end_point,
-            ))
-            .await?;
-
-        if let sasl::Outcome::Success(_) = &outcome {
-            self.stream.write_message_noflush(&Be::AuthenticationOk)?;
-        }
-
-        Ok(outcome)
+            Ok(scram::Exchange::new(secret, rand::random, channel_binding))
+        })
+        .await
+        .map_err(AuthError::Sasl)
    }
 }

--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -32,9 +32,7 @@ use crate::ext::TaskExt;
 use crate::http::health_server::AppMetrics;
 use crate::intern::RoleNameInt;
 use crate::metrics::{Metrics, ThreadPoolMetrics};
-use crate::rate_limiter::{
-    BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo,
-};
+use crate::rate_limiter::{EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo};
 use crate::scram::threadpool::ThreadPool;
 use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::{self, GlobalConnPoolOptions};
@@ -69,15 +67,6 @@ struct LocalProxyCliArgs {
    /// Can be given multiple times for different bucket sizes.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
    user_rps_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
    /// Whether to retry the connection to the compute node
    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
    connect_to_compute_retry: String,
@@ -282,9 +271,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
            jwks_cache: JwkCache::default(),
            thread_pool: ThreadPool::new(0),
            scram_protocol_timeout: Duration::from_secs(10),
-            rate_limiter_enabled: false,
-            rate_limiter: BucketRateLimiter::new(vec![]),
-            rate_limit_ip_subnet: 64,
            ip_allowlist_check_enabled: true,
            is_vpc_acccess_proxy: false,
            is_auth_broker: false,
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -4,8 +4,9 @@
 //! This allows connecting to pods/services running in the same Kubernetes cluster from
 //! the outside. Similar to an ingress controller for HTTPS.

+use std::net::SocketAddr;
 use std::path::Path;
-use std::{net::SocketAddr, sync::Arc};
+use std::sync::Arc;

 use anyhow::{Context, anyhow, bail, ensure};
 use clap::Arg;
@@ -17,6 +18,7 @@ use rustls::pki_types::{DnsName, PrivateKeyDer};
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
 use tokio::net::TcpListener;
 use tokio_rustls::TlsConnector;
+use tokio_rustls::server::TlsStream;
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, error, info};
 use utils::project_git_version;
@@ -24,10 +26,12 @@ use utils::sentry_init::init_sentry;

 use crate::context::RequestContext;
 use crate::metrics::{Metrics, ThreadPoolMetrics};
+use crate::pqproto::FeStartupPacket;
 use crate::protocol2::ConnectionInfo;
-use crate::proxy::{ErrorSource, copy_bidirectional_client_compute, run_until_cancelled};
+use crate::proxy::{
+    ErrorSource, TlsRequired, copy_bidirectional_client_compute, run_until_cancelled,
+};
 use crate::stream::{PqStream, Stream};
-use crate::tls::TlsServerEndPoint;

 project_git_version!(GIT_VERSION);

@@ -84,7 +88,7 @@ pub async fn run() -> anyhow::Result<()> {
        .parse()?;

    // Configure TLS
-    let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
+    let tls_config = match (
        args.get_one::<String>("tls-key"),
        args.get_one::<String>("tls-cert"),
    ) {
@@ -117,7 +121,6 @@ pub async fn run() -> anyhow::Result<()> {
        dest.clone(),
        tls_config.clone(),
        None,
-        tls_server_end_point,
        proxy_listener,
        cancellation_token.clone(),
    ))
@@ -127,7 +130,6 @@ pub async fn run() -> anyhow::Result<()> {
        dest,
        tls_config,
        Some(compute_tls_config),
-        tls_server_end_point,
        proxy_listener_compute_tls,
        cancellation_token.clone(),
    ))
@@ -154,7 +156,7 @@ pub async fn run() -> anyhow::Result<()> {
 pub(super) fn parse_tls(
    key_path: &Path,
    cert_path: &Path,
-) -> anyhow::Result<(Arc<rustls::ServerConfig>, TlsServerEndPoint)> {
+) -> anyhow::Result<Arc<rustls::ServerConfig>> {
    let key = {
        let key_bytes = std::fs::read(key_path).context("TLS key file")?;

@@ -187,10 +189,6 @@ pub(super) fn parse_tls(
            })?
    };

-    // needed for channel bindings
-    let first_cert = cert_chain.first().context("missing certificate")?;
-    let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
-
    let tls_config =
        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
            .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
@@ -199,14 +197,13 @@ pub(super) fn parse_tls(
            .with_single_cert(cert_chain, key)?
            .into();

-    Ok((tls_config, tls_server_end_point))
+    Ok(tls_config)
 }

 pub(super) async fn task_main(
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
    compute_tls_config: Option<Arc<rustls::ClientConfig>>,
-    tls_server_end_point: TlsServerEndPoint,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
 ) -> anyhow::Result<()> {
@@ -242,15 +239,7 @@ pub(super) async fn task_main(
                    crate::metrics::Protocol::SniRouter,
                    "sni",
                );
-                handle_client(
-                    ctx,
-                    dest_suffix,
-                    tls_config,
-                    compute_tls_config,
-                    tls_server_end_point,
-                    socket,
-                )
-                .await
+                handle_client(ctx, dest_suffix, tls_config, compute_tls_config, socket).await
            }
            .unwrap_or_else(|e| {
                // Acknowledge that the task has finished with an error.
@@ -269,55 +258,26 @@ pub(super) async fn task_main(
    Ok(())
 }

-const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
-
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
    ctx: &RequestContext,
    raw_stream: S,
    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
-) -> anyhow::Result<Stream<S>> {
-    let mut stream = PqStream::new(Stream::from_raw(raw_stream));
-
-    let msg = stream.read_startup_packet().await?;
-    use pq_proto::FeStartupPacket::SslRequest;
-
+) -> anyhow::Result<TlsStream<S>> {
+    let (mut stream, msg) = PqStream::parse_startup(Stream::from_raw(raw_stream)).await?;
    match msg {
-        SslRequest { direct: false } => {
-            stream
-                .write_message(&pq_proto::BeMessage::EncryptionResponse(true))
-                .await?;
+        FeStartupPacket::SslRequest { direct: None } => {
+            let raw = stream.accept_tls().await?;

-            // Upgrade raw stream into a secure TLS-backed stream.
-            // NOTE: We've consumed `tls`; this fact will be used later.
-
-            let (raw, read_buf) = stream.into_inner();
-            // TODO: Normally, client doesn't send any data before
-            // server says TLS handshake is ok and read_buf is empty.
-            // However, you could imagine pipelining of postgres
-            // SSLRequest + TLS ClientHello in one hunk similar to
-            // pipelining in our node js driver. We should probably
-            // support that by chaining read_buf with the stream.
-            if !read_buf.is_empty() {
-                bail!("data is sent before server replied with EncryptionResponse");
-            }
-
-            Ok(Stream::Tls {
-                tls: Box::new(
-                    raw.upgrade(tls_config, !ctx.has_private_peer_addr())
-                        .await?,
-                ),
-                tls_server_end_point,
-            })
+            Ok(raw
+                .upgrade(tls_config, !ctx.has_private_peer_addr())
+                .await?)
        }
        unexpected => {
            info!(
                ?unexpected,
                "unexpected startup packet, rejecting connection"
            );
-            stream
-                .throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User, None)
-                .await?
+            Err(stream.throw_error(TlsRequired, None).await)?
        }
    }
 }
@@ -327,15 +287,18 @@ async fn handle_client(
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
    compute_tls_config: Option<Arc<rustls::ClientConfig>>,
-    tls_server_end_point: TlsServerEndPoint,
    stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
-    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config, tls_server_end_point).await?;
+    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config).await?;

    // Cut off first part of the SNI domain
    // We receive required destination details in the format of
    //   `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
-    let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
+    let sni = tls_stream
+        .get_ref()
+        .1
+        .server_name()
+        .ok_or(anyhow!("SNI missing"))?;
    let dest: Vec<&str> = sni
        .split_once('.')
        .context("invalid SNI")?
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -20,7 +20,7 @@ use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version};

 use crate::auth::backend::jwt::JwkCache;
-use crate::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
+use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
 use crate::cancellation::{CancellationHandler, handle_cancel_messages};
 use crate::config::{
    self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
@@ -29,9 +29,7 @@ use crate::config::{
 use crate::context::parquet::ParquetUploadArgs;
 use crate::http::health_server::AppMetrics;
 use crate::metrics::Metrics;
-use crate::rate_limiter::{
-    EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo, WakeComputeRateLimiter,
-};
+use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
 use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use crate::redis::kv_ops::RedisKVClient;
 use crate::redis::{elasticache, notifications};
@@ -154,15 +152,6 @@ struct ProxyCliArgs {
    /// Wake compute rate limiter max number of requests per second.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
    wake_compute_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
    /// Redis rate limiter max number of requests per second.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
    redis_rps_limit: Vec<RateBucketInfo>,
@@ -410,22 +399,9 @@ pub async fn run() -> anyhow::Result<()> {
        Some(tx_cancel),
    ));

-    // bit of a hack - find the min rps and max rps supported and turn it into
-    // leaky bucket config instead
-    let max = args
-        .endpoint_rps_limit
-        .iter()
-        .map(|x| x.rps())
-        .max_by(f64::total_cmp)
-        .unwrap_or(EndpointRateLimiter::DEFAULT.max);
-    let rps = args
-        .endpoint_rps_limit
-        .iter()
-        .map(|x| x.rps())
-        .min_by(f64::total_cmp)
-        .unwrap_or(EndpointRateLimiter::DEFAULT.rps);
    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
-        LeakyBucketConfig { rps, max },
+        RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
+            .unwrap_or(EndpointRateLimiter::DEFAULT),
        64,
    ));

@@ -476,8 +452,7 @@ pub async fn run() -> anyhow::Result<()> {
        let key_path = args.tls_key.expect("already asserted it is set");
        let cert_path = args.tls_cert.expect("already asserted it is set");

-        let (tls_config, tls_server_end_point) =
-            super::pg_sni_router::parse_tls(&key_path, &cert_path)?;
+        let tls_config = super::pg_sni_router::parse_tls(&key_path, &cert_path)?;

        let dest = Arc::new(dest);

@@ -485,7 +460,6 @@ pub async fn run() -> anyhow::Result<()> {
            dest.clone(),
            tls_config.clone(),
            None,
-            tls_server_end_point,
            listen,
            cancellation_token.clone(),
        ));
@@ -494,7 +468,6 @@ pub async fn run() -> anyhow::Result<()> {
            dest,
            tls_config,
            Some(config.connect_to_compute.tls.clone()),
-            tls_server_end_point,
            listen_tls,
            cancellation_token.clone(),
        ));
@@ -681,9 +654,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        jwks_cache: JwkCache::default(),
        thread_pool,
        scram_protocol_timeout: args.scram_protocol_timeout,
-        rate_limiter_enabled: args.auth_rate_limit_enabled,
-        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
-        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
        ip_allowlist_check_enabled: !args.is_private_access_proxy,
        is_vpc_acccess_proxy: args.is_private_access_proxy,
        is_auth_broker: args.is_auth_broker,
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -1,30 +1,25 @@
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet, hash_map};
 use std::convert::Infallible;
-use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
 use std::time::Duration;

 use async_trait::async_trait;
 use clashmap::ClashMap;
+use clashmap::mapref::one::Ref;
 use rand::{Rng, thread_rng};
-use smol_str::SmolStr;
 use tokio::sync::Mutex;
 use tokio::time::Instant;
 use tracing::{debug, info};

-use super::{Cache, Cached};
-use crate::auth::IpPattern;
 use crate::config::ProjectInfoCacheOptions;
-use crate::control_plane::{AccessBlockerFlags, AuthSecret};
+use crate::control_plane::{EndpointAccessControl, RoleAccessControl};
 use crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
 use crate::types::{EndpointId, RoleName};

 #[async_trait]
 pub(crate) trait ProjectInfoCache {
-    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
-    fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>);
-    fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt);
-    fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt);
+    fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt);
+    fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt);
    fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
    async fn decrement_active_listeners(&self);
    async fn increment_active_listeners(&self);
@@ -42,6 +37,10 @@ impl<T> Entry<T> {
            value,
        }
    }
+
+    pub(crate) fn get(&self, valid_since: Instant) -> Option<&T> {
+        (valid_since < self.created_at).then_some(&self.value)
+    }
 }

 impl<T> From<T> for Entry<T> {
@@ -50,101 +49,32 @@ impl<T> From<T> for Entry<T> {
    }
 }

-#[derive(Default)]
 struct EndpointInfo {
-    secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
-    allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
-    block_public_or_vpc_access: Option<Entry<AccessBlockerFlags>>,
-    allowed_vpc_endpoint_ids: Option<Entry<Arc<Vec<String>>>>,
+    role_controls: HashMap<RoleNameInt, Entry<RoleAccessControl>>,
+    controls: Option<Entry<EndpointAccessControl>>,
 }

 impl EndpointInfo {
-    fn check_ignore_cache(ignore_cache_since: Option<Instant>, created_at: Instant) -> bool {
-        match ignore_cache_since {
-            None => false,
-            Some(t) => t < created_at,
-        }
-    }
    pub(crate) fn get_role_secret(
        &self,
        role_name: RoleNameInt,
        valid_since: Instant,
-        ignore_cache_since: Option<Instant>,
-    ) -> Option<(Option<AuthSecret>, bool)> {
-        if let Some(secret) = self.secret.get(&role_name) {
-            if valid_since < secret.created_at {
-                return Some((
-                    secret.value.clone(),
-                    Self::check_ignore_cache(ignore_cache_since, secret.created_at),
-                ));
-            }
-        }
-        None
+    ) -> Option<RoleAccessControl> {
+        let controls = self.role_controls.get(&role_name)?;
+        controls.get(valid_since).cloned()
    }

-    pub(crate) fn get_allowed_ips(
-        &self,
-        valid_since: Instant,
-        ignore_cache_since: Option<Instant>,
-    ) -> Option<(Arc<Vec<IpPattern>>, bool)> {
-        if let Some(allowed_ips) = &self.allowed_ips {
-            if valid_since < allowed_ips.created_at {
-                return Some((
-                    allowed_ips.value.clone(),
-                    Self::check_ignore_cache(ignore_cache_since, allowed_ips.created_at),
-                ));
-            }
-        }
-        None
-    }
-    pub(crate) fn get_allowed_vpc_endpoint_ids(
-        &self,
-        valid_since: Instant,
-        ignore_cache_since: Option<Instant>,
-    ) -> Option<(Arc<Vec<String>>, bool)> {
-        if let Some(allowed_vpc_endpoint_ids) = &self.allowed_vpc_endpoint_ids {
-            if valid_since < allowed_vpc_endpoint_ids.created_at {
-                return Some((
-                    allowed_vpc_endpoint_ids.value.clone(),
-                    Self::check_ignore_cache(
-                        ignore_cache_since,
-                        allowed_vpc_endpoint_ids.created_at,
-                    ),
-                ));
-            }
-        }
-        None
-    }
-    pub(crate) fn get_block_public_or_vpc_access(
-        &self,
-        valid_since: Instant,
-        ignore_cache_since: Option<Instant>,
-    ) -> Option<(AccessBlockerFlags, bool)> {
-        if let Some(block_public_or_vpc_access) = &self.block_public_or_vpc_access {
-            if valid_since < block_public_or_vpc_access.created_at {
-                return Some((
-                    block_public_or_vpc_access.value.clone(),
-                    Self::check_ignore_cache(
-                        ignore_cache_since,
-                        block_public_or_vpc_access.created_at,
-                    ),
-                ));
-            }
-        }
-        None
+    pub(crate) fn get_controls(&self, valid_since: Instant) -> Option<EndpointAccessControl> {
+        let controls = self.controls.as_ref()?;
+        controls.get(valid_since).cloned()
    }

-    pub(crate) fn invalidate_allowed_ips(&mut self) {
-        self.allowed_ips = None;
-    }
-    pub(crate) fn invalidate_allowed_vpc_endpoint_ids(&mut self) {
-        self.allowed_vpc_endpoint_ids = None;
-    }
-    pub(crate) fn invalidate_block_public_or_vpc_access(&mut self) {
-        self.block_public_or_vpc_access = None;
+    pub(crate) fn invalidate_endpoint(&mut self) {
+        self.controls = None;
    }
+
    pub(crate) fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
-        self.secret.remove(&role_name);
+        self.role_controls.remove(&role_name);
    }
 }

@@ -170,34 +100,22 @@ pub struct ProjectInfoCacheImpl {

 #[async_trait]
 impl ProjectInfoCache for ProjectInfoCacheImpl {
-    fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>) {
-        info!(
-            "invalidating allowed vpc endpoint ids for projects `{}`",
-            project_ids
-                .iter()
-                .map(|id| id.to_string())
-                .collect::<Vec<_>>()
-                .join(", ")
-        );
-        for project_id in project_ids {
-            let endpoints = self
-                .project2ep
-                .get(&project_id)
-                .map(|kv| kv.value().clone())
-                .unwrap_or_default();
-            for endpoint_id in endpoints {
-                if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
-                    endpoint_info.invalidate_allowed_vpc_endpoint_ids();
-                }
+    fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt) {
+        info!("invalidating endpoint access for project `{project_id}`");
+        let endpoints = self
+            .project2ep
+            .get(&project_id)
+            .map(|kv| kv.value().clone())
+            .unwrap_or_default();
+        for endpoint_id in endpoints {
+            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
+                endpoint_info.invalidate_endpoint();
            }
        }
    }

-    fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt) {
-        info!(
-            "invalidating allowed vpc endpoint ids for org `{}`",
-            account_id
-        );
+    fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt) {
+        info!("invalidating endpoint access for org `{account_id}`");
        let endpoints = self
            .account2ep
            .get(&account_id)
@@ -205,41 +123,11 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
            .unwrap_or_default();
        for endpoint_id in endpoints {
            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
-                endpoint_info.invalidate_allowed_vpc_endpoint_ids();
+                endpoint_info.invalidate_endpoint();
            }
        }
    }

-    fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt) {
-        info!(
-            "invalidating block public or vpc access for project `{}`",
-            project_id
-        );
-        let endpoints = self
-            .project2ep
-            .get(&project_id)
-            .map(|kv| kv.value().clone())
-            .unwrap_or_default();
-        for endpoint_id in endpoints {
-            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
-                endpoint_info.invalidate_block_public_or_vpc_access();
-            }
-        }
-    }
-
-    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
-        info!("invalidating allowed ips for project `{}`", project_id);
-        let endpoints = self
-            .project2ep
-            .get(&project_id)
-            .map(|kv| kv.value().clone())
-            .unwrap_or_default();
-        for endpoint_id in endpoints {
-            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
-                endpoint_info.invalidate_allowed_ips();
-            }
-        }
-    }
    fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt) {
        info!(
            "invalidating role secret for project_id `{}` and role_name `{}`",
@@ -256,6 +144,7 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
            }
        }
    }
+
    async fn decrement_active_listeners(&self) {
        let mut listeners_guard = self.active_listeners_lock.lock().await;
        if *listeners_guard == 0 {
@@ -293,155 +182,71 @@ impl ProjectInfoCacheImpl {
        }
    }

+    fn get_endpoint_cache(
+        &self,
+        endpoint_id: &EndpointId,
+    ) -> Option<Ref<'_, EndpointIdInt, EndpointInfo>> {
+        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
+        self.cache.get(&endpoint_id)
+    }
+
    pub(crate) fn get_role_secret(
        &self,
        endpoint_id: &EndpointId,
        role_name: &RoleName,
-    ) -> Option<Cached<&Self, Option<AuthSecret>>> {
-        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
+    ) -> Option<RoleAccessControl> {
+        let valid_since = self.get_cache_times();
        let role_name = RoleNameInt::get(role_name)?;
-        let (valid_since, ignore_cache_since) = self.get_cache_times();
-        let endpoint_info = self.cache.get(&endpoint_id)?;
-        let (value, ignore_cache) =
-            endpoint_info.get_role_secret(role_name, valid_since, ignore_cache_since)?;
-        if !ignore_cache {
-            let cached = Cached {
-                token: Some((
-                    self,
-                    CachedLookupInfo::new_role_secret(endpoint_id, role_name),
-                )),
-                value,
-            };
-            return Some(cached);
-        }
-        Some(Cached::new_uncached(value))
-    }
-    pub(crate) fn get_allowed_ips(
-        &self,
-        endpoint_id: &EndpointId,
-    ) -> Option<Cached<&Self, Arc<Vec<IpPattern>>>> {
-        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
-        let (valid_since, ignore_cache_since) = self.get_cache_times();
-        let endpoint_info = self.cache.get(&endpoint_id)?;
-        let value = endpoint_info.get_allowed_ips(valid_since, ignore_cache_since);
-        let (value, ignore_cache) = value?;
-        if !ignore_cache {
-            let cached = Cached {
-                token: Some((self, CachedLookupInfo::new_allowed_ips(endpoint_id))),
-                value,
-            };
-            return Some(cached);
-        }
-        Some(Cached::new_uncached(value))
-    }
-    pub(crate) fn get_allowed_vpc_endpoint_ids(
-        &self,
-        endpoint_id: &EndpointId,
-    ) -> Option<Cached<&Self, Arc<Vec<String>>>> {
-        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
-        let (valid_since, ignore_cache_since) = self.get_cache_times();
-        let endpoint_info = self.cache.get(&endpoint_id)?;
-        let value = endpoint_info.get_allowed_vpc_endpoint_ids(valid_since, ignore_cache_since);
-        let (value, ignore_cache) = value?;
-        if !ignore_cache {
-            let cached = Cached {
-                token: Some((
-                    self,
-                    CachedLookupInfo::new_allowed_vpc_endpoint_ids(endpoint_id),
-                )),
-                value,
-            };
-            return Some(cached);
-        }
-        Some(Cached::new_uncached(value))
-    }
-    pub(crate) fn get_block_public_or_vpc_access(
-        &self,
-        endpoint_id: &EndpointId,
-    ) -> Option<Cached<&Self, AccessBlockerFlags>> {
-        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
-        let (valid_since, ignore_cache_since) = self.get_cache_times();
-        let endpoint_info = self.cache.get(&endpoint_id)?;
-        let value = endpoint_info.get_block_public_or_vpc_access(valid_since, ignore_cache_since);
-        let (value, ignore_cache) = value?;
-        if !ignore_cache {
-            let cached = Cached {
-                token: Some((
-                    self,
-                    CachedLookupInfo::new_block_public_or_vpc_access(endpoint_id),
-                )),
-                value,
-            };
-            return Some(cached);
-        }
-        Some(Cached::new_uncached(value))
+        let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
+        endpoint_info.get_role_secret(role_name, valid_since)
    }

-    pub(crate) fn insert_role_secret(
+    pub(crate) fn get_endpoint_access(
        &self,
-        project_id: ProjectIdInt,
-        endpoint_id: EndpointIdInt,
-        role_name: RoleNameInt,
-        secret: Option<AuthSecret>,
-    ) {
-        if self.cache.len() >= self.config.size {
-            // If there are too many entries, wait until the next gc cycle.
-            return;
-        }
-        self.insert_project2endpoint(project_id, endpoint_id);
-        let mut entry = self.cache.entry(endpoint_id).or_default();
-        if entry.secret.len() < self.config.max_roles {
-            entry.secret.insert(role_name, secret.into());
-        }
+        endpoint_id: &EndpointId,
+    ) -> Option<EndpointAccessControl> {
+        let valid_since = self.get_cache_times();
+        let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
+        endpoint_info.get_controls(valid_since)
    }
-    pub(crate) fn insert_allowed_ips(
-        &self,
-        project_id: ProjectIdInt,
-        endpoint_id: EndpointIdInt,
-        allowed_ips: Arc<Vec<IpPattern>>,
-    ) {
-        if self.cache.len() >= self.config.size {
-            // If there are too many entries, wait until the next gc cycle.
-            return;
-        }
-        self.insert_project2endpoint(project_id, endpoint_id);
-        self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
-    }
-    pub(crate) fn insert_allowed_vpc_endpoint_ids(
+
+    pub(crate) fn insert_endpoint_access(
        &self,
        account_id: Option<AccountIdInt>,
        project_id: ProjectIdInt,
        endpoint_id: EndpointIdInt,
-        allowed_vpc_endpoint_ids: Arc<Vec<String>>,
+        role_name: RoleNameInt,
+        controls: EndpointAccessControl,
+        role_controls: RoleAccessControl,
    ) {
-        if self.cache.len() >= self.config.size {
-            // If there are too many entries, wait until the next gc cycle.
-            return;
-        }
        if let Some(account_id) = account_id {
            self.insert_account2endpoint(account_id, endpoint_id);
        }
        self.insert_project2endpoint(project_id, endpoint_id);
-        self.cache
-            .entry(endpoint_id)
-            .or_default()
-            .allowed_vpc_endpoint_ids = Some(allowed_vpc_endpoint_ids.into());
-    }
-    pub(crate) fn insert_block_public_or_vpc_access(
-        &self,
-        project_id: ProjectIdInt,
-        endpoint_id: EndpointIdInt,
-        access_blockers: AccessBlockerFlags,
-    ) {
+
        if self.cache.len() >= self.config.size {
            // If there are too many entries, wait until the next gc cycle.
            return;
        }
-        self.insert_project2endpoint(project_id, endpoint_id);
-        self.cache
-            .entry(endpoint_id)
-            .or_default()
-            .block_public_or_vpc_access = Some(access_blockers.into());
+
+        let controls = Entry::from(controls);
+        let role_controls = Entry::from(role_controls);
+
+        match self.cache.entry(endpoint_id) {
+            clashmap::Entry::Vacant(e) => {
+                e.insert(EndpointInfo {
+                    role_controls: HashMap::from_iter([(role_name, role_controls)]),
+                    controls: Some(controls),
+                });
+            }
+            clashmap::Entry::Occupied(mut e) => {
+                let ep = e.get_mut();
+                ep.controls = Some(controls);
+                if ep.role_controls.len() < self.config.max_roles {
+                    ep.role_controls.insert(role_name, role_controls);
+                }
+            }
+        }
    }

    fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
@@ -452,6 +257,7 @@ impl ProjectInfoCacheImpl {
                .insert(project_id, HashSet::from([endpoint_id]));
        }
    }
+
    fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {
        if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {
            endpoints.insert(endpoint_id);
@@ -460,21 +266,57 @@ impl ProjectInfoCacheImpl {
                .insert(account_id, HashSet::from([endpoint_id]));
        }
    }
-    fn get_cache_times(&self) -> (Instant, Option<Instant>) {
-        let mut valid_since = Instant::now() - self.config.ttl;
-        // Only ignore cache if ttl is disabled.
+
+    fn ignore_ttl_since(&self) -> Option<Instant> {
        let ttl_disabled_since_us = self
            .ttl_disabled_since_us
            .load(std::sync::atomic::Ordering::Relaxed);
-        let ignore_cache_since = if ttl_disabled_since_us == u64::MAX {
-            None
-        } else {
-            let ignore_cache_since = self.start_time + Duration::from_micros(ttl_disabled_since_us);
+
+        if ttl_disabled_since_us == u64::MAX {
+            return None;
+        }
+
+        Some(self.start_time + Duration::from_micros(ttl_disabled_since_us))
+    }
+
+    fn get_cache_times(&self) -> Instant {
+        let mut valid_since = Instant::now() - self.config.ttl;
+        if let Some(ignore_ttl_since) = self.ignore_ttl_since() {
            // We are fine if entry is not older than ttl or was added before we are getting notifications.
-            valid_since = valid_since.min(ignore_cache_since);
-            Some(ignore_cache_since)
+            valid_since = valid_since.min(ignore_ttl_since);
+        }
+        valid_since
+    }
+
+    pub fn maybe_invalidate_role_secret(&self, endpoint_id: &EndpointId, role_name: &RoleName) {
+        let Some(endpoint_id) = EndpointIdInt::get(endpoint_id) else {
+            return;
        };
-        (valid_since, ignore_cache_since)
+        let Some(role_name) = RoleNameInt::get(role_name) else {
+            return;
+        };
+
+        let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) else {
+            return;
+        };
+
+        let entry = endpoint_info.role_controls.entry(role_name);
+        let hash_map::Entry::Occupied(role_controls) = entry else {
+            return;
+        };
+
+        let created_at = role_controls.get().created_at;
+        let expire = match self.ignore_ttl_since() {
+            // if ignoring TTL, we should still try and roll the password if it's old
+            // and we the client gave an incorrect password. There could be some lag on the redis channel.
+            Some(_) => created_at + self.config.ttl < Instant::now(),
+            // edge case: redis is down, let's be generous and invalidate the cache immediately.
+            None => true,
+        };
+
+        if expire {
+            role_controls.remove();
+        }
    }

    pub async fn gc_worker(&self) -> anyhow::Result<Infallible> {
@@ -509,84 +351,12 @@ impl ProjectInfoCacheImpl {
    }
 }

-/// Lookup info for project info cache.
-/// This is used to invalidate cache entries.
-pub(crate) struct CachedLookupInfo {
-    /// Search by this key.
-    endpoint_id: EndpointIdInt,
-    lookup_type: LookupType,
-}
-
-impl CachedLookupInfo {
-    pub(self) fn new_role_secret(endpoint_id: EndpointIdInt, role_name: RoleNameInt) -> Self {
-        Self {
-            endpoint_id,
-            lookup_type: LookupType::RoleSecret(role_name),
-        }
-    }
-    pub(self) fn new_allowed_ips(endpoint_id: EndpointIdInt) -> Self {
-        Self {
-            endpoint_id,
-            lookup_type: LookupType::AllowedIps,
-        }
-    }
-    pub(self) fn new_allowed_vpc_endpoint_ids(endpoint_id: EndpointIdInt) -> Self {
-        Self {
-            endpoint_id,
-            lookup_type: LookupType::AllowedVpcEndpointIds,
-        }
-    }
-    pub(self) fn new_block_public_or_vpc_access(endpoint_id: EndpointIdInt) -> Self {
-        Self {
-            endpoint_id,
-            lookup_type: LookupType::BlockPublicOrVpcAccess,
-        }
-    }
-}
-
-enum LookupType {
-    RoleSecret(RoleNameInt),
-    AllowedIps,
-    AllowedVpcEndpointIds,
-    BlockPublicOrVpcAccess,
-}
-
-impl Cache for ProjectInfoCacheImpl {
-    type Key = SmolStr;
-    // Value is not really used here, but we need to specify it.
-    type Value = SmolStr;
-
-    type LookupInfo<Key> = CachedLookupInfo;
-
-    fn invalidate(&self, key: &Self::LookupInfo<SmolStr>) {
-        match &key.lookup_type {
-            LookupType::RoleSecret(role_name) => {
-                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
-                    endpoint_info.invalidate_role_secret(*role_name);
-                }
-            }
-            LookupType::AllowedIps => {
-                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
-                    endpoint_info.invalidate_allowed_ips();
-                }
-            }
-            LookupType::AllowedVpcEndpointIds => {
-                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
-                    endpoint_info.invalidate_allowed_vpc_endpoint_ids();
-                }
-            }
-            LookupType::BlockPublicOrVpcAccess => {
-                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
-                    endpoint_info.invalidate_block_public_or_vpc_access();
-                }
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
    use super::*;
+    use crate::control_plane::{AccessBlockerFlags, AuthSecret};
    use crate::scram::ServerSecret;
    use crate::types::ProjectId;

@@ -601,6 +371,8 @@ mod tests {
        });
        let project_id: ProjectId = "project".into();
        let endpoint_id: EndpointId = "endpoint".into();
+        let account_id: Option<AccountIdInt> = None;
+
        let user1: RoleName = "user1".into();
        let user2: RoleName = "user2".into();
        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
@@ -609,183 +381,73 @@ mod tests {
            "127.0.0.1".parse().unwrap(),
            "127.0.0.2".parse().unwrap(),
        ]);
-        cache.insert_role_secret(
+
+        cache.insert_endpoint_access(
+            account_id,
            (&project_id).into(),
            (&endpoint_id).into(),
            (&user1).into(),
-            secret1.clone(),
+            EndpointAccessControl {
+                allowed_ips: allowed_ips.clone(),
+                allowed_vpce: Arc::new(vec![]),
+                flags: AccessBlockerFlags::default(),
+            },
+            RoleAccessControl {
+                secret: secret1.clone(),
+            },
        );
-        cache.insert_role_secret(
+
+        cache.insert_endpoint_access(
+            account_id,
            (&project_id).into(),
            (&endpoint_id).into(),
            (&user2).into(),
-            secret2.clone(),
-        );
-        cache.insert_allowed_ips(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            allowed_ips.clone(),
+            EndpointAccessControl {
+                allowed_ips: allowed_ips.clone(),
+                allowed_vpce: Arc::new(vec![]),
+                flags: AccessBlockerFlags::default(),
+            },
+            RoleAccessControl {
+                secret: secret2.clone(),
+            },
        );

        let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
-        assert!(cached.cached());
-        assert_eq!(cached.value, secret1);
+        assert_eq!(cached.secret, secret1);
+
        let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
-        assert!(cached.cached());
-        assert_eq!(cached.value, secret2);
+        assert_eq!(cached.secret, secret2);

        // Shouldn't add more than 2 roles.
        let user3: RoleName = "user3".into();
        let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));
-        cache.insert_role_secret(
+
+        cache.insert_endpoint_access(
+            account_id,
            (&project_id).into(),
            (&endpoint_id).into(),
            (&user3).into(),
-            secret3.clone(),
+            EndpointAccessControl {
+                allowed_ips: allowed_ips.clone(),
+                allowed_vpce: Arc::new(vec![]),
+                flags: AccessBlockerFlags::default(),
+            },
+            RoleAccessControl {
+                secret: secret3.clone(),
+            },
        );
+
        assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());

-        let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
-        assert!(cached.cached());
-        assert_eq!(cached.value, allowed_ips);
+        let cached = cache.get_endpoint_access(&endpoint_id).unwrap();
+        assert_eq!(cached.allowed_ips, allowed_ips);

        tokio::time::advance(Duration::from_secs(2)).await;
        let cached = cache.get_role_secret(&endpoint_id, &user1);
        assert!(cached.is_none());
        let cached = cache.get_role_secret(&endpoint_id, &user2);
        assert!(cached.is_none());
-        let cached = cache.get_allowed_ips(&endpoint_id);
+        let cached = cache.get_endpoint_access(&endpoint_id);
        assert!(cached.is_none());
    }
-
-    #[tokio::test]
-    async fn test_project_info_cache_invalidations() {
-        tokio::time::pause();
-        let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
-            size: 2,
-            max_roles: 2,
-            ttl: Duration::from_secs(1),
-            gc_interval: Duration::from_secs(600),
-        }));
-        cache.clone().increment_active_listeners().await;
-        tokio::time::advance(Duration::from_secs(2)).await;
-
-        let project_id: ProjectId = "project".into();
-        let endpoint_id: EndpointId = "endpoint".into();
-        let user1: RoleName = "user1".into();
-        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
-        let allowed_ips = Arc::new(vec![
-            "127.0.0.1".parse().unwrap(),
-            "127.0.0.2".parse().unwrap(),
-        ]);
-        cache.insert_role_secret(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            (&user1).into(),
-            secret1.clone(),
-        );
-        cache.insert_role_secret(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            (&user2).into(),
-            secret2.clone(),
-        );
-        cache.insert_allowed_ips(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            allowed_ips.clone(),
-        );
-
-        tokio::time::advance(Duration::from_secs(2)).await;
-        // Nothing should be invalidated.
-
-        let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
-        // TTL is disabled, so it should be impossible to invalidate this value.
-        assert!(!cached.cached());
-        assert_eq!(cached.value, secret1);
-
-        cached.invalidate(); // Shouldn't do anything.
-        let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
-        assert_eq!(cached.value, secret1);
-
-        let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
-        assert!(!cached.cached());
-        assert_eq!(cached.value, secret2);
-
-        // The only way to invalidate this value is to invalidate via the api.
-        cache.invalidate_role_secret_for_project((&project_id).into(), (&user2).into());
-        assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
-
-        let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
-        assert!(!cached.cached());
-        assert_eq!(cached.value, allowed_ips);
-    }
-
-    #[tokio::test]
-    async fn test_increment_active_listeners_invalidate_added_before() {
-        tokio::time::pause();
-        let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
-            size: 2,
-            max_roles: 2,
-            ttl: Duration::from_secs(1),
-            gc_interval: Duration::from_secs(600),
-        }));
-
-        let project_id: ProjectId = "project".into();
-        let endpoint_id: EndpointId = "endpoint".into();
-        let user1: RoleName = "user1".into();
-        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
-        let allowed_ips = Arc::new(vec![
-            "127.0.0.1".parse().unwrap(),
-            "127.0.0.2".parse().unwrap(),
-        ]);
-        cache.insert_role_secret(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            (&user1).into(),
-            secret1.clone(),
-        );
-        cache.clone().increment_active_listeners().await;
-        tokio::time::advance(Duration::from_millis(100)).await;
-        cache.insert_role_secret(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            (&user2).into(),
-            secret2.clone(),
-        );
-
-        // Added before ttl was disabled + ttl should be still cached.
-        let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
-        assert!(cached.cached());
-        let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
-        assert!(cached.cached());
-
-        tokio::time::advance(Duration::from_secs(1)).await;
-        // Added before ttl was disabled + ttl should expire.
-        assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
-        assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
-
-        // Added after ttl was disabled + ttl should not be cached.
-        cache.insert_allowed_ips(
-            (&project_id).into(),
-            (&endpoint_id).into(),
-            allowed_ips.clone(),
-        );
-        let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
-        assert!(!cached.cached());
-
-        tokio::time::advance(Duration::from_secs(1)).await;
-        // Added before ttl was disabled + ttl still should expire.
-        assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
-        assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
-        // Shouldn't be invalidated.
-
-        let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
-        assert!(!cached.cached());
-        assert_eq!(cached.value, allowed_ips);
-    }
 }
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -5,7 +5,6 @@ use anyhow::{Context, anyhow};
 use ipnet::{IpNet, Ipv4Net, Ipv6Net};
 use postgres_client::CancelToken;
 use postgres_client::tls::MakeTlsConnect;
-use pq_proto::CancelKeyData;
 use redis::{Cmd, FromRedisValue, Value};
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
@@ -13,15 +12,15 @@ use tokio::net::TcpStream;
 use tokio::sync::{mpsc, oneshot};
 use tracing::{debug, error, info, warn};

+use crate::auth::AuthError;
 use crate::auth::backend::ComputeUserInfo;
-use crate::auth::{AuthError, check_peer_addr_is_in_list};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::ControlPlaneApi;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
 use crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};
-use crate::protocol2::ConnectionInfoExtra;
+use crate::pqproto::CancelKeyData;
 use crate::rate_limiter::LeakyBucketRateLimiter;
 use crate::redis::keys::KeyPrefix;
 use crate::redis::kv_ops::RedisKVClient;
@@ -272,13 +271,7 @@ pub(crate) enum CancelError {
    #[error("rate limit exceeded")]
    RateLimit,

-    #[error("IP is not allowed")]
-    IpNotAllowed,
-
-    #[error("VPC endpoint id is not allowed to connect")]
-    VpcEndpointIdNotAllowed,
-
-    #[error("Authentication backend error")]
+    #[error("Authentication error")]
    AuthError(#[from] AuthError),

    #[error("key not found")]
@@ -297,10 +290,7 @@ impl ReportableError for CancelError {
            }
            CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
            CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
-            CancelError::IpNotAllowed
-            | CancelError::VpcEndpointIdNotAllowed
-            | CancelError::NotFound => crate::error::ErrorKind::User,
-            CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
+            CancelError::NotFound | CancelError::AuthError(_) => crate::error::ErrorKind::User,
            CancelError::InternalError => crate::error::ErrorKind::Service,
        }
    }
@@ -422,7 +412,13 @@ impl CancellationHandler {
            IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
            IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
        };
-        if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
+
+        let allowed = {
+            let rate_limit_config = None;
+            let limiter = self.limiter.lock_propagate_poison();
+            limiter.check(subnet_key, rate_limit_config, 1)
+        };
+        if !allowed {
            // log only the subnet part of the IP address to know which subnet is rate limited
            tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
            Metrics::get()
@@ -450,52 +446,13 @@ impl CancellationHandler {
            return Err(CancelError::NotFound);
        };

-        if check_ip_allowed {
-            let ip_allowlist = auth_backend
-                .get_allowed_ips(&ctx, &cancel_closure.user_info)
-                .await
-                .map_err(|e| CancelError::AuthError(e.into()))?;
-
-            if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
-                // log it here since cancel_session could be spawned in a task
-                tracing::warn!(
-                    "IP is not allowed to cancel the query: {key}, address: {}",
-                    ctx.peer_addr()
-                );
-                return Err(CancelError::IpNotAllowed);
-            }
-        }
-
-        // check if a VPC endpoint ID is coming in and if yes, if it's allowed
-        let access_blocks = auth_backend
-            .get_block_public_or_vpc_access(&ctx, &cancel_closure.user_info)
+        let info = &cancel_closure.user_info;
+        let access_controls = auth_backend
+            .get_endpoint_access_control(&ctx, &info.endpoint, &info.user)
            .await
            .map_err(|e| CancelError::AuthError(e.into()))?;

-        if check_vpc_allowed {
-            if access_blocks.vpc_access_blocked {
-                return Err(CancelError::AuthError(AuthError::NetworkNotAllowed));
-            }
-
-            let incoming_vpc_endpoint_id = match ctx.extra() {
-                None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
-                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
-                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
-            };
-
-            let allowed_vpc_endpoint_ids = auth_backend
-                .get_allowed_vpc_endpoint_ids(&ctx, &cancel_closure.user_info)
-                .await
-                .map_err(|e| CancelError::AuthError(e.into()))?;
-            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
-            if !allowed_vpc_endpoint_ids.is_empty()
-                && !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
-            {
-                return Err(CancelError::VpcEndpointIdNotAllowed);
-            }
-        } else if access_blocks.public_access_blocked {
-            return Err(CancelError::VpcEndpointIdNotAllowed);
-        }
+        access_controls.check(&ctx, check_ip_allowed, check_vpc_allowed)?;

        Metrics::get()
            .proxy
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -8,7 +8,6 @@ use itertools::Itertools;
 use postgres_client::tls::MakeTlsConnect;
 use postgres_client::{CancelToken, RawConnection};
 use postgres_protocol::message::backend::NoticeResponseBody;
-use pq_proto::StartupMessageParams;
 use rustls::pki_types::InvalidDnsNameError;
 use thiserror::Error;
 use tokio::net::{TcpStream, lookup_host};
@@ -24,6 +23,7 @@ use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumDbConnectionsGuard};
+use crate::pqproto::StartupMessageParams;
 use crate::proxy::neon_option;
 use crate::tls::postgres_rustls::MakeRustlsConnect;
 use crate::types::Host;
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -7,7 +7,6 @@ use arc_swap::ArcSwapOption;
 use clap::ValueEnum;
 use remote_storage::RemoteStorageConfig;

-use crate::auth::backend::AuthRateLimiter;
 use crate::auth::backend::jwt::JwkCache;
 use crate::control_plane::locks::ApiLocks;
 use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig};
@@ -65,9 +64,6 @@ pub struct HttpConfig {
 pub struct AuthenticationConfig {
    pub thread_pool: Arc<ThreadPool>,
    pub scram_protocol_timeout: tokio::time::Duration,
-    pub rate_limiter_enabled: bool,
-    pub rate_limiter: AuthRateLimiter,
-    pub rate_limit_ip_subnet: u8,
    pub ip_allowlist_check_enabled: bool,
    pub is_vpc_acccess_proxy: bool,
    pub jwks_cache: JwkCache,
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -1,7 +1,7 @@
 use std::sync::Arc;

 use futures::{FutureExt, TryFutureExt};
-use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info};

@@ -221,12 +221,10 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        .await
    {
        Ok(auth_result) => auth_result,
-        Err(e) => {
-            return stream.throw_error(e, Some(ctx)).await?;
-        }
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };

-    let mut node = connect_to_compute(
+    let node = connect_to_compute(
        ctx,
        &TcpMechanism {
            user_info,
@@ -238,7 +236,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        config.wake_compute_retry_config,
        &config.connect_to_compute,
    )
-    .or_else(|e| stream.throw_error(e, Some(ctx)))
+    .or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
    .await?;

    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
@@ -246,14 +244,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    session.write_cancel_key(node.cancel_closure.clone())?;

-    prepare_client_connection(&node, *session.key(), &mut stream).await?;
-
-    // Before proxy passing, forward to compute whatever data is left in the
-    // PqStream input buffer. Normally there is none, but our serverless npm
-    // driver in pipeline mode sends startup, password and first query
-    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
-    node.stream.write_all(&read_buf).await?;
+    prepare_client_connection(&node, *session.key(), &mut stream);
+    let stream = stream.flush_and_into_inner().await?;

    Ok(Some(ProxyPassthrough {
        client: stream,
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -4,7 +4,6 @@ use std::net::IpAddr;

 use chrono::Utc;
 use once_cell::sync::OnceCell;
-use pq_proto::StartupMessageParams;
 use smol_str::SmolStr;
 use tokio::sync::mpsc;
 use tracing::field::display;
@@ -20,6 +19,7 @@ use crate::metrics::{
    ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol,
    Waiting,
 };
+use crate::pqproto::StartupMessageParams;
 use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
 use crate::types::{DbName, EndpointId, RoleName};

@@ -370,6 +370,18 @@ impl RequestContext {
        }
    }

+    pub(crate) fn latency_timer_pause_at(
+        &self,
+        at: tokio::time::Instant,
+        waiting_for: Waiting,
+    ) -> LatencyTimerPause<'_> {
+        LatencyTimerPause {
+            ctx: self,
+            start: at,
+            waiting_for,
+        }
+    }
+
    pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated {
        self.0
            .try_lock()
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -11,7 +11,6 @@ use parquet::file::metadata::RowGroupMetaDataPtr;
 use parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties, WriterPropertiesPtr};
 use parquet::file::writer::SerializedFileWriter;
 use parquet::record::RecordWriter;
-use pq_proto::StartupMessageParams;
 use remote_storage::{GenericRemoteStorage, RemotePath, RemoteStorageConfig, TimeoutOrCancel};
 use serde::ser::SerializeMap;
 use tokio::sync::mpsc;
@@ -24,6 +23,7 @@ use super::{LOG_CHAN, RequestContextInner};
 use crate::config::remote_storage_from_toml;
 use crate::context::LOG_CHAN_DISCONNECT;
 use crate::ext::TaskExt;
+use crate::pqproto::StartupMessageParams;

 #[derive(clap::Args, Clone, Debug)]
 pub struct ParquetUploadArgs {
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -15,7 +15,6 @@ use tracing::{Instrument, debug, info, info_span, warn};
 use super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute};
 use crate::auth::backend::ComputeUserInfo;
 use crate::auth::backend::jwt::AuthRule;
-use crate::cache::Cached;
 use crate::context::RequestContext;
 use crate::control_plane::caches::ApiCaches;
 use crate::control_plane::errors::{
@@ -24,12 +23,12 @@ use crate::control_plane::errors::{
 use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
 use crate::control_plane::{
-    AccessBlockerFlags, AuthInfo, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
-    CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, NodeInfo,
+    AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, EndpointAccessControl, NodeInfo,
+    RoleAccessControl,
 };
-use crate::metrics::{CacheOutcome, Metrics};
+use crate::metrics::Metrics;
 use crate::rate_limiter::WakeComputeRateLimiter;
-use crate::types::{EndpointCacheKey, EndpointId};
+use crate::types::{EndpointCacheKey, EndpointId, RoleName};
 use crate::{compute, http, scram};

 pub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
@@ -66,65 +65,34 @@ impl NeonControlPlaneClient {
        self.endpoint.url().as_str()
    }

-    async fn do_get_auth_info(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<AuthInfo, GetAuthInfoError> {
-        if !self
-            .caches
-            .endpoints_cache
-            .is_valid(ctx, &user_info.endpoint.normalize())
-        {
-            // TODO: refactor this because it's weird
-            // this is a failure to authenticate but we return Ok.
-            info!("endpoint is not valid, skipping the request");
-            return Ok(AuthInfo::default());
-        }
-        self.do_get_auth_req(user_info, &ctx.session_id(), Some(ctx))
-            .await
-    }
-
    async fn do_get_auth_req(
        &self,
-        user_info: &ComputeUserInfo,
-        session_id: &uuid::Uuid,
-        ctx: Option<&RequestContext>,
+        ctx: &RequestContext,
+        endpoint: &EndpointId,
+        role: &RoleName,
    ) -> Result<AuthInfo, GetAuthInfoError> {
-        let request_id: String = session_id.to_string();
-        let application_name = if let Some(ctx) = ctx {
-            ctx.console_application_name()
-        } else {
-            "auth_cancellation".to_string()
-        };
-
        async {
            let request = self
                .endpoint
                .get_path("get_endpoint_access_control")
-                .header(X_REQUEST_ID, &request_id)
+                .header(X_REQUEST_ID, ctx.session_id().to_string())
                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", session_id)])
+                .query(&[("session_id", ctx.session_id())])
                .query(&[
-                    ("application_name", application_name.as_str()),
-                    ("endpointish", user_info.endpoint.as_str()),
-                    ("role", user_info.user.as_str()),
+                    ("application_name", ctx.console_application_name().as_str()),
+                    ("endpointish", endpoint.as_str()),
+                    ("role", role.as_str()),
                ])
                .build()?;

            debug!(url = request.url().as_str(), "sending http request");
            let start = Instant::now();
-            let response = match ctx {
-                Some(ctx) => {
-                    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
-                    let rsp = self.endpoint.execute(request).await;
-                    drop(pause);
-                    rsp?
-                }
-                None => self.endpoint.execute(request).await?,
+            let response = {
+                let _pause = ctx.latency_timer_pause_at(start, crate::metrics::Waiting::Cplane);
+                self.endpoint.execute(request).await?
            };
-
            info!(duration = ?start.elapsed(), "received http response");
+
            let body = match parse_body::<GetEndpointAccessControl>(response).await {
                Ok(body) => body,
                // Error 404 is special: it's ok not to have a secret.
@@ -180,7 +148,7 @@ impl NeonControlPlaneClient {
    async fn do_get_endpoint_jwks(
        &self,
        ctx: &RequestContext,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
        if !self
            .caches
@@ -313,225 +281,104 @@ impl NeonControlPlaneClient {

 impl super::ControlPlaneApi for NeonControlPlaneClient {
    #[tracing::instrument(skip_all)]
-    async fn get_role_secret(
+    async fn get_role_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
-        let normalized_ep = &user_info.endpoint.normalize();
-        let user = &user_info.user;
-        if let Some(role_secret) = self
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<RoleAccessControl, crate::control_plane::errors::GetAuthInfoError> {
+        let normalized_ep = &endpoint.normalize();
+        if let Some(secret) = self
            .caches
            .project_info
-            .get_role_secret(normalized_ep, user)
+            .get_role_secret(normalized_ep, role)
        {
-            return Ok(role_secret);
+            return Ok(secret);
        }
-        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
-        let account_id = auth_info.account_id;
+
+        if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
+            info!("endpoint is not valid, skipping the request");
+            return Err(GetAuthInfoError::UnknownEndpoint);
+        }
+
+        let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;
+
+        let control = EndpointAccessControl {
+            allowed_ips: Arc::new(auth_info.allowed_ips),
+            allowed_vpce: Arc::new(auth_info.allowed_vpc_endpoint_ids),
+            flags: auth_info.access_blocker_flags,
+        };
+        let role_control = RoleAccessControl {
+            secret: auth_info.secret,
+        };
+
        if let Some(project_id) = auth_info.project_id {
            let normalized_ep_int = normalized_ep.into();
-            self.caches.project_info.insert_role_secret(
+
+            self.caches.project_info.insert_endpoint_access(
+                auth_info.account_id,
                project_id,
                normalized_ep_int,
-                user.into(),
-                auth_info.secret.clone(),
-            );
-            self.caches.project_info.insert_allowed_ips(
-                project_id,
-                normalized_ep_int,
-                Arc::new(auth_info.allowed_ips),
-            );
-            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
-                account_id,
-                project_id,
-                normalized_ep_int,
-                Arc::new(auth_info.allowed_vpc_endpoint_ids),
-            );
-            self.caches.project_info.insert_block_public_or_vpc_access(
-                project_id,
-                normalized_ep_int,
-                auth_info.access_blocker_flags,
+                role.into(),
+                control,
+                role_control.clone(),
            );
            ctx.set_project_id(project_id);
        }
-        // When we just got a secret, we don't need to invalidate it.
-        Ok(Cached::new_uncached(auth_info.secret))
+
+        Ok(role_control)
    }

-    async fn get_allowed_ips(
+    #[tracing::instrument(skip_all)]
+    async fn get_endpoint_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
-        let normalized_ep = &user_info.endpoint.normalize();
-        if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
-            Metrics::get()
-                .proxy
-                .allowed_ips_cache_misses // TODO SR: Should we rename this variable to something like allowed_ip_cache_stats?
-                .inc(CacheOutcome::Hit);
-            return Ok(allowed_ips);
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<EndpointAccessControl, GetAuthInfoError> {
+        let normalized_ep = &endpoint.normalize();
+        if let Some(control) = self.caches.project_info.get_endpoint_access(normalized_ep) {
+            return Ok(control);
        }
-        Metrics::get()
-            .proxy
-            .allowed_ips_cache_misses
-            .inc(CacheOutcome::Miss);
-        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
-        let allowed_ips = Arc::new(auth_info.allowed_ips);
-        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
-        let access_blocker_flags = auth_info.access_blocker_flags;
-        let user = &user_info.user;
-        let account_id = auth_info.account_id;
+
+        if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
+            info!("endpoint is not valid, skipping the request");
+            return Err(GetAuthInfoError::UnknownEndpoint);
+        }
+
+        let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;
+
+        let control = EndpointAccessControl {
+            allowed_ips: Arc::new(auth_info.allowed_ips),
+            allowed_vpce: Arc::new(auth_info.allowed_vpc_endpoint_ids),
+            flags: auth_info.access_blocker_flags,
+        };
+        let role_control = RoleAccessControl {
+            secret: auth_info.secret,
+        };
+
        if let Some(project_id) = auth_info.project_id {
            let normalized_ep_int = normalized_ep.into();
-            self.caches.project_info.insert_role_secret(
+
+            self.caches.project_info.insert_endpoint_access(
+                auth_info.account_id,
                project_id,
                normalized_ep_int,
-                user.into(),
-                auth_info.secret.clone(),
-            );
-            self.caches.project_info.insert_allowed_ips(
-                project_id,
-                normalized_ep_int,
-                allowed_ips.clone(),
-            );
-            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
-                account_id,
-                project_id,
-                normalized_ep_int,
-                allowed_vpc_endpoint_ids.clone(),
-            );
-            self.caches.project_info.insert_block_public_or_vpc_access(
-                project_id,
-                normalized_ep_int,
-                access_blocker_flags,
+                role.into(),
+                control.clone(),
+                role_control,
            );
            ctx.set_project_id(project_id);
        }
-        Ok(Cached::new_uncached(allowed_ips))
-    }

-    async fn get_allowed_vpc_endpoint_ids(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
-        let normalized_ep = &user_info.endpoint.normalize();
-        if let Some(allowed_vpc_endpoint_ids) = self
-            .caches
-            .project_info
-            .get_allowed_vpc_endpoint_ids(normalized_ep)
-        {
-            Metrics::get()
-                .proxy
-                .vpc_endpoint_id_cache_stats
-                .inc(CacheOutcome::Hit);
-            return Ok(allowed_vpc_endpoint_ids);
-        }
-
-        Metrics::get()
-            .proxy
-            .vpc_endpoint_id_cache_stats
-            .inc(CacheOutcome::Miss);
-
-        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
-        let allowed_ips = Arc::new(auth_info.allowed_ips);
-        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
-        let access_blocker_flags = auth_info.access_blocker_flags;
-        let user = &user_info.user;
-        let account_id = auth_info.account_id;
-        if let Some(project_id) = auth_info.project_id {
-            let normalized_ep_int = normalized_ep.into();
-            self.caches.project_info.insert_role_secret(
-                project_id,
-                normalized_ep_int,
-                user.into(),
-                auth_info.secret.clone(),
-            );
-            self.caches.project_info.insert_allowed_ips(
-                project_id,
-                normalized_ep_int,
-                allowed_ips.clone(),
-            );
-            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
-                account_id,
-                project_id,
-                normalized_ep_int,
-                allowed_vpc_endpoint_ids.clone(),
-            );
-            self.caches.project_info.insert_block_public_or_vpc_access(
-                project_id,
-                normalized_ep_int,
-                access_blocker_flags,
-            );
-            ctx.set_project_id(project_id);
-        }
-        Ok(Cached::new_uncached(allowed_vpc_endpoint_ids))
-    }
-
-    async fn get_block_public_or_vpc_access(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
-        let normalized_ep = &user_info.endpoint.normalize();
-        if let Some(access_blocker_flags) = self
-            .caches
-            .project_info
-            .get_block_public_or_vpc_access(normalized_ep)
-        {
-            Metrics::get()
-                .proxy
-                .access_blocker_flags_cache_stats
-                .inc(CacheOutcome::Hit);
-            return Ok(access_blocker_flags);
-        }
-
-        Metrics::get()
-            .proxy
-            .access_blocker_flags_cache_stats
-            .inc(CacheOutcome::Miss);
-
-        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
-        let allowed_ips = Arc::new(auth_info.allowed_ips);
-        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
-        let access_blocker_flags = auth_info.access_blocker_flags;
-        let user = &user_info.user;
-        let account_id = auth_info.account_id;
-        if let Some(project_id) = auth_info.project_id {
-            let normalized_ep_int = normalized_ep.into();
-            self.caches.project_info.insert_role_secret(
-                project_id,
-                normalized_ep_int,
-                user.into(),
-                auth_info.secret.clone(),
-            );
-            self.caches.project_info.insert_allowed_ips(
-                project_id,
-                normalized_ep_int,
-                allowed_ips.clone(),
-            );
-            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
-                account_id,
-                project_id,
-                normalized_ep_int,
-                allowed_vpc_endpoint_ids.clone(),
-            );
-            self.caches.project_info.insert_block_public_or_vpc_access(
-                project_id,
-                normalized_ep_int,
-                access_blocker_flags.clone(),
-            );
-            ctx.set_project_id(project_id);
-        }
-        Ok(Cached::new_uncached(access_blocker_flags))
+        Ok(control)
    }

    #[tracing::instrument(skip_all)]
    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestContext,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
        self.do_get_endpoint_jwks(ctx, endpoint).await
    }
--- a/proxy/src/control_plane/client/mock.rs
+++ b/proxy/src/control_plane/client/mock.rs
@@ -15,14 +15,14 @@ use crate::auth::backend::ComputeUserInfo;
 use crate::auth::backend::jwt::AuthRule;
 use crate::cache::Cached;
 use crate::context::RequestContext;
-use crate::control_plane::client::{
-    CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedRoleSecret,
-};
 use crate::control_plane::errors::{
    ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
 };
 use crate::control_plane::messages::MetricsAuxInfo;
-use crate::control_plane::{AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
+use crate::control_plane::{
+    AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, EndpointAccessControl, NodeInfo,
+    RoleAccessControl,
+};
 use crate::intern::RoleNameInt;
 use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
 use crate::url::ApiUrl;
@@ -66,7 +66,8 @@ impl MockControlPlane {

    async fn do_get_auth_info(
        &self,
-        user_info: &ComputeUserInfo,
+        endpoint: &EndpointId,
+        role: &RoleName,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        let (secret, allowed_ips) = async {
            // Perhaps we could persist this connection, but then we'd have to
@@ -80,7 +81,7 @@ impl MockControlPlane {
            let secret = if let Some(entry) = get_execute_postgres_query(
                &client,
                "select rolpassword from pg_catalog.pg_authid where rolname = $1",
-                &[&&*user_info.user],
+                &[&role.as_str()],
                "rolpassword",
            )
            .await?
@@ -89,7 +90,7 @@ impl MockControlPlane {
                let secret = scram::ServerSecret::parse(&entry).map(AuthSecret::Scram);
                secret.or_else(|| parse_md5(&entry).map(AuthSecret::Md5))
            } else {
-                warn!("user '{}' does not exist", user_info.user);
+                warn!("user '{role}' does not exist");
                None
            };

@@ -97,7 +98,7 @@ impl MockControlPlane {
                match get_execute_postgres_query(
                    &client,
                    "select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1",
-                    &[&user_info.endpoint.as_str()],
+                    &[&endpoint.as_str()],
                    "allowed_ips",
                )
                .await?
@@ -133,7 +134,7 @@ impl MockControlPlane {

    async fn do_get_endpoint_jwks(
        &self,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
        let (client, connection) =
            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
@@ -222,53 +223,36 @@ async fn get_execute_postgres_query(
 }

 impl super::ControlPlaneApi for MockControlPlane {
-    #[tracing::instrument(skip_all)]
-    async fn get_role_secret(
+    async fn get_endpoint_access_control(
        &self,
        _ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
-        Ok(CachedRoleSecret::new_uncached(
-            self.do_get_auth_info(user_info).await?.secret,
-        ))
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<EndpointAccessControl, GetAuthInfoError> {
+        let info = self.do_get_auth_info(endpoint, role).await?;
+        Ok(EndpointAccessControl {
+            allowed_ips: Arc::new(info.allowed_ips),
+            allowed_vpce: Arc::new(info.allowed_vpc_endpoint_ids),
+            flags: info.access_blocker_flags,
+        })
    }

-    async fn get_allowed_ips(
+    async fn get_role_access_control(
        &self,
        _ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
-        Ok(Cached::new_uncached(Arc::new(
-            self.do_get_auth_info(user_info).await?.allowed_ips,
-        )))
-    }
-
-    async fn get_allowed_vpc_endpoint_ids(
-        &self,
-        _ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedVpcEndpointIds, super::errors::GetAuthInfoError> {
-        Ok(Cached::new_uncached(Arc::new(
-            self.do_get_auth_info(user_info)
-                .await?
-                .allowed_vpc_endpoint_ids,
-        )))
-    }
-
-    async fn get_block_public_or_vpc_access(
-        &self,
-        _ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<super::CachedAccessBlockerFlags, super::errors::GetAuthInfoError> {
-        Ok(Cached::new_uncached(
-            self.do_get_auth_info(user_info).await?.access_blocker_flags,
-        ))
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<RoleAccessControl, GetAuthInfoError> {
+        let info = self.do_get_auth_info(endpoint, role).await?;
+        Ok(RoleAccessControl {
+            secret: info.secret,
+        })
    }

    async fn get_endpoint_jwks(
        &self,
        _ctx: &RequestContext,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
        self.do_get_endpoint_jwks(endpoint).await
    }
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -16,15 +16,14 @@ use crate::cache::endpoints::EndpointsCache;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
 use crate::context::RequestContext;
-use crate::control_plane::{
-    CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo,
-    CachedRoleSecret, ControlPlaneApi, NodeInfoCache, errors,
-};
+use crate::control_plane::{CachedNodeInfo, ControlPlaneApi, NodeInfoCache, errors};
 use crate::error::ReportableError;
 use crate::metrics::ApiLockMetrics;
 use crate::rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token};
 use crate::types::EndpointId;

+use super::{EndpointAccessControl, RoleAccessControl};
+
 #[non_exhaustive]
 #[derive(Clone)]
 pub enum ControlPlaneClient {
@@ -40,68 +39,42 @@ pub enum ControlPlaneClient {
 }

 impl ControlPlaneApi for ControlPlaneClient {
-    async fn get_role_secret(
+    async fn get_role_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
+        endpoint: &EndpointId,
+        role: &crate::types::RoleName,
+    ) -> Result<RoleAccessControl, errors::GetAuthInfoError> {
        match self {
-            Self::ProxyV1(api) => api.get_role_secret(ctx, user_info).await,
+            Self::ProxyV1(api) => api.get_role_access_control(ctx, endpoint, role).await,
            #[cfg(any(test, feature = "testing"))]
-            Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await,
+            Self::PostgresMock(api) => api.get_role_access_control(ctx, endpoint, role).await,
            #[cfg(test)]
-            Self::Test(_) => {
+            Self::Test(_api) => {
                unreachable!("this function should never be called in the test backend")
            }
        }
    }

-    async fn get_allowed_ips(
+    async fn get_endpoint_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
+        endpoint: &EndpointId,
+        role: &crate::types::RoleName,
+    ) -> Result<EndpointAccessControl, errors::GetAuthInfoError> {
        match self {
-            Self::ProxyV1(api) => api.get_allowed_ips(ctx, user_info).await,
+            Self::ProxyV1(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,
            #[cfg(any(test, feature = "testing"))]
-            Self::PostgresMock(api) => api.get_allowed_ips(ctx, user_info).await,
+            Self::PostgresMock(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,
            #[cfg(test)]
-            Self::Test(api) => api.get_allowed_ips(),
-        }
-    }
-
-    async fn get_allowed_vpc_endpoint_ids(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError> {
-        match self {
-            Self::ProxyV1(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
-            #[cfg(any(test, feature = "testing"))]
-            Self::PostgresMock(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
-            #[cfg(test)]
-            Self::Test(api) => api.get_allowed_vpc_endpoint_ids(),
-        }
-    }
-
-    async fn get_block_public_or_vpc_access(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError> {
-        match self {
-            Self::ProxyV1(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
-            #[cfg(any(test, feature = "testing"))]
-            Self::PostgresMock(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
-            #[cfg(test)]
-            Self::Test(api) => api.get_block_public_or_vpc_access(),
+            Self::Test(api) => api.get_access_control(),
        }
    }

    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestContext,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError> {
        match self {
            Self::ProxyV1(api) => api.get_endpoint_jwks(ctx, endpoint).await,
@@ -131,15 +104,7 @@ impl ControlPlaneApi for ControlPlaneClient {
 pub(crate) trait TestControlPlaneClient: Send + Sync + 'static {
    fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;

-    fn get_allowed_ips(&self) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
-
-    fn get_allowed_vpc_endpoint_ids(
-        &self,
-    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
-
-    fn get_block_public_or_vpc_access(
-        &self,
-    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
+    fn get_access_control(&self) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;

    fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;
 }
@@ -309,7 +274,7 @@ impl FetchAuthRules for ControlPlaneClient {
        ctx: &RequestContext,
        endpoint: EndpointId,
    ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
-        self.get_endpoint_jwks(ctx, endpoint)
+        self.get_endpoint_jwks(ctx, &endpoint)
            .await
            .map_err(FetchAuthRulesError::GetEndpointJwks)
    }
--- a/proxy/src/control_plane/errors.rs
+++ b/proxy/src/control_plane/errors.rs
@@ -99,6 +99,10 @@ pub(crate) enum GetAuthInfoError {

    #[error(transparent)]
    ApiError(ControlPlaneError),
+
+    /// Proxy does not know about the endpoint in advanced
+    #[error("endpoint not found in endpoint cache")]
+    UnknownEndpoint,
 }

 // This allows more useful interactions than `#[from]`.
@@ -115,6 +119,8 @@ impl UserFacingError for GetAuthInfoError {
            Self::BadSecret => REQUEST_FAILED.to_owned(),
            // However, API might return a meaningful error.
            Self::ApiError(e) => e.to_string_client(),
+            // pretend like control plane returned an error.
+            Self::UnknownEndpoint => REQUEST_FAILED.to_owned(),
        }
    }
 }
@@ -124,6 +130,8 @@ impl ReportableError for GetAuthInfoError {
        match self {
            Self::BadSecret => crate::error::ErrorKind::ControlPlane,
            Self::ApiError(_) => crate::error::ErrorKind::ControlPlane,
+            // we only apply endpoint filtering if control plane is under high load.
+            Self::UnknownEndpoint => crate::error::ErrorKind::ServiceRateLimit,
        }
    }
 }
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -11,16 +11,16 @@ pub(crate) mod errors;

 use std::sync::Arc;

-use crate::auth::IpPattern;
 use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
-use crate::cache::project_info::ProjectInfoCacheImpl;
+use crate::auth::{AuthError, IpPattern, check_peer_addr_is_in_list};
 use crate::cache::{Cached, TimedLru};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
 use crate::intern::{AccountIdInt, ProjectIdInt};
-use crate::types::{EndpointCacheKey, EndpointId};
+use crate::protocol2::ConnectionInfoExtra;
+use crate::types::{EndpointCacheKey, EndpointId, RoleName};
 use crate::{compute, scram};

 /// Various cache-related types.
@@ -101,7 +101,7 @@ impl NodeInfo {
    }
 }

-#[derive(Clone, Default, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Default)]
 pub(crate) struct AccessBlockerFlags {
    pub public_access_blocked: bool,
    pub vpc_access_blocked: bool,
@@ -110,47 +110,78 @@ pub(crate) struct AccessBlockerFlags {
 pub(crate) type NodeInfoCache =
    TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneErrorMessage>>>;
 pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
-pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
-pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
-pub(crate) type CachedAllowedVpcEndpointIds =
-    Cached<&'static ProjectInfoCacheImpl, Arc<Vec<String>>>;
-pub(crate) type CachedAccessBlockerFlags =
-    Cached<&'static ProjectInfoCacheImpl, AccessBlockerFlags>;
+
+#[derive(Clone)]
+pub struct RoleAccessControl {
+    pub secret: Option<AuthSecret>,
+}
+
+#[derive(Clone)]
+pub struct EndpointAccessControl {
+    pub allowed_ips: Arc<Vec<IpPattern>>,
+    pub allowed_vpce: Arc<Vec<String>>,
+    pub flags: AccessBlockerFlags,
+}
+
+impl EndpointAccessControl {
+    pub fn check(
+        &self,
+        ctx: &RequestContext,
+        check_ip_allowed: bool,
+        check_vpc_allowed: bool,
+    ) -> Result<(), AuthError> {
+        if check_ip_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &self.allowed_ips) {
+            return Err(AuthError::IpAddressNotAllowed(ctx.peer_addr()));
+        }
+
+        // check if a VPC endpoint ID is coming in and if yes, if it's allowed
+        if check_vpc_allowed {
+            if self.flags.vpc_access_blocked {
+                return Err(AuthError::NetworkNotAllowed);
+            }
+
+            let incoming_vpc_endpoint_id = match ctx.extra() {
+                None => return Err(AuthError::MissingVPCEndpointId),
+                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
+                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
+            };
+
+            let vpce = &self.allowed_vpce;
+            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
+            if !vpce.is_empty() && !vpce.contains(&incoming_vpc_endpoint_id) {
+                return Err(AuthError::vpc_endpoint_id_not_allowed(
+                    incoming_vpc_endpoint_id,
+                ));
+            }
+        } else if self.flags.public_access_blocked {
+            return Err(AuthError::NetworkNotAllowed);
+        }
+
+        Ok(())
+    }
+}

 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
 pub(crate) trait ControlPlaneApi {
-    /// Get the client's auth secret for authentication.
-    /// Returns option because user not found situation is special.
-    /// We still have to mock the scram to avoid leaking information that user doesn't exist.
-    async fn get_role_secret(
+    async fn get_role_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<RoleAccessControl, errors::GetAuthInfoError>;

-    async fn get_allowed_ips(
+    async fn get_endpoint_access_control(
        &self,
        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
-
-    async fn get_allowed_vpc_endpoint_ids(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
-
-    async fn get_block_public_or_vpc_access(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
+        endpoint: &EndpointId,
+        role: &RoleName,
+    ) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;

    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestContext,
-        endpoint: EndpointId,
+        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError>;

    /// Wake up the compute node and return the corresponding connection info.
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -92,6 +92,7 @@ mod logging;
 mod metrics;
 mod parse;
 mod pglb;
+mod pqproto;
 mod protocol2;
 mod proxy;
 mod rate_limiter;
--- a/proxy/src/pqproto.rs
+++ b/proxy/src/pqproto.rs
@@ -0,0 +1,693 @@
+//! Postgres protocol codec
+//!
+//! <https://www.postgresql.org/docs/current/protocol-message-formats.html>
+
+use std::fmt;
+use std::io::{self, Cursor};
+
+use bytes::{Buf, BufMut};
+use itertools::Itertools;
+use rand::distributions::{Distribution, Standard};
+use tokio::io::{AsyncRead, AsyncReadExt};
+use zerocopy::{FromBytes, Immutable, IntoBytes, big_endian};
+
+pub type ErrorCode = [u8; 5];
+
+pub const FE_PASSWORD_MESSAGE: u8 = b'p';
+
+pub const SQLSTATE_INTERNAL_ERROR: [u8; 5] = *b"XX000";
+
+/// The protocol version number.
+///
+/// The most significant 16 bits are the major version number (3 for the protocol described here).
+/// The least significant 16 bits are the minor version number (0 for the protocol described here).
+/// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-STARTUPMESSAGE>
+#[derive(Clone, Copy, PartialEq, PartialOrd, FromBytes, IntoBytes, Immutable)]
+#[repr(C)]
+pub struct ProtocolVersion {
+    major: big_endian::U16,
+    minor: big_endian::U16,
+}
+
+impl ProtocolVersion {
+    pub const fn new(major: u16, minor: u16) -> Self {
+        Self {
+            major: big_endian::U16::new(major),
+            minor: big_endian::U16::new(minor),
+        }
+    }
+    pub const fn minor(self) -> u16 {
+        self.minor.get()
+    }
+    pub const fn major(self) -> u16 {
+        self.major.get()
+    }
+}
+
+impl fmt::Debug for ProtocolVersion {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list()
+            .entry(&self.major())
+            .entry(&self.minor())
+            .finish()
+    }
+}
+
+/// read the type from the stream using zerocopy.
+///
+/// not cancel safe.
+macro_rules! read {
+    ($s:expr => $t:ty) => {{
+        // cannot be implemented as a function due to lack of const-generic-expr
+        let mut buf = [0; size_of::<$t>()];
+        $s.read_exact(&mut buf).await?;
+        let res: $t = zerocopy::transmute!(buf);
+        res
+    }};
+}
+
+pub async fn read_startup<S>(stream: &mut S) -> io::Result<FeStartupPacket>
+where
+    S: AsyncRead + Unpin,
+{
+    /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L118>
+    const MAX_STARTUP_PACKET_LENGTH: usize = 10000;
+    const RESERVED_INVALID_MAJOR_VERSION: u16 = 1234;
+    /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L132>
+    const CANCEL_REQUEST_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5678);
+    /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L166>
+    const NEGOTIATE_SSL_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5679);
+    /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L167>
+    const NEGOTIATE_GSS_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5680);
+
+    /// This first reads the startup message header, is 8 bytes.
+    /// The first 4 bytes is a big-endian message length, and the next 4 bytes is a version number.
+    ///
+    /// The length value is inclusive of the header. For example,
+    /// an empty message will always have length 8.
+    #[derive(Clone, Copy, FromBytes, IntoBytes, Immutable)]
+    #[repr(C)]
+    struct StartupHeader {
+        len: big_endian::U32,
+        version: ProtocolVersion,
+    }
+
+    let header = read!(stream => StartupHeader);
+
+    // <https://github.com/postgres/postgres/blob/04bcf9e19a4261fe9c7df37c777592c2e10c32a7/src/backend/tcop/backend_startup.c#L378-L382>
+    // First byte indicates standard SSL handshake message
+    // (It can't be a Postgres startup length because in network byte order
+    // that would be a startup packet hundreds of megabytes long)
+    if header.as_bytes()[0] == 0x16 {
+        return Ok(FeStartupPacket::SslRequest {
+            // The bytes we read for the header are actually part of a TLS ClientHello.
+            // In theory, if the ClientHello was < 8 bytes we would fail with EOF before we get here.
+            // In practice though, I see no world where a ClientHello is less than 8 bytes
+            // since it includes ephemeral keys etc.
+            direct: Some(zerocopy::transmute!(header)),
+        });
+    }
+
+    let Some(len) = (header.len.get() as usize).checked_sub(8) else {
+        return Err(io::Error::other(format!(
+            "invalid startup message length {}, must be at least 8.",
+            header.len,
+        )));
+    };
+
+    // TODO: add a histogram for startup packet lengths
+    if len > MAX_STARTUP_PACKET_LENGTH {
+        tracing::warn!("large startup message detected: {len} bytes");
+        return Err(io::Error::other(format!(
+            "invalid startup message length {len}"
+        )));
+    }
+
+    match header.version {
+        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-CANCELREQUEST>
+        CANCEL_REQUEST_CODE => {
+            if len != 8 {
+                return Err(io::Error::other(
+                    "CancelRequest message is malformed, backend PID / secret key missing",
+                ));
+            }
+
+            Ok(FeStartupPacket::CancelRequest(
+                read!(stream => CancelKeyData),
+            ))
+        }
+        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-SSLREQUEST>
+        NEGOTIATE_SSL_CODE => {
+            // Requested upgrade to SSL (aka TLS)
+            Ok(FeStartupPacket::SslRequest { direct: None })
+        }
+        NEGOTIATE_GSS_CODE => {
+            // Requested upgrade to GSSAPI
+            Ok(FeStartupPacket::GssEncRequest)
+        }
+        version if version.major() == RESERVED_INVALID_MAJOR_VERSION => Err(io::Error::other(
+            format!("Unrecognized request code {version:?}"),
+        )),
+        // StartupMessage
+        version => {
+            // The protocol version number is followed by one or more pairs of parameter name and value strings.
+            // A zero byte is required as a terminator after the last name/value pair.
+            // Parameters can appear in any order. user is required, others are optional.
+
+            let mut buf = vec![0; len];
+            stream.read_exact(&mut buf).await?;
+
+            if buf.pop() != Some(b'\0') {
+                return Err(io::Error::other(
+                    "StartupMessage params: missing null terminator",
+                ));
+            }
+
+            // TODO: Don't do this.
+            // There's no guarantee that these messages are utf8,
+            // but they usually happen to be simple ascii.
+            let params = String::from_utf8(buf)
+                .map_err(|_| io::Error::other("StartupMessage params: invalid utf-8"))?;
+
+            Ok(FeStartupPacket::StartupMessage {
+                version,
+                params: StartupMessageParams { params },
+            })
+        }
+    }
+}
+
+/// Read a raw postgres packet, which will respect the max length requested.
+///
+/// This returns the message tag, as well as the message body. The message
+/// body is written into `buf`, and it is otherwise completely overwritten.
+///
+/// This is not cancel safe.
+pub async fn read_message<'a, S>(
+    stream: &mut S,
+    buf: &'a mut Vec<u8>,
+    max: usize,
+) -> io::Result<(u8, &'a mut [u8])>
+where
+    S: AsyncRead + Unpin,
+{
+    /// This first reads the header, which for regular messages in the 3.0 protocol is 5 bytes.
+    /// The first byte is a message tag, and the next 4 bytes is a big-endian length.
+    ///
+    /// Awkwardly, the length value is inclusive of itself, but not of the tag. For example,
+    /// an empty message will always have length 4.
+    #[derive(Clone, Copy, FromBytes)]
+    #[repr(C)]
+    struct Header {
+        tag: u8,
+        len: big_endian::U32,
+    }
+
+    let header = read!(stream => Header);
+
+    // as described above, the length must be at least 4.
+    let Some(len) = (header.len.get() as usize).checked_sub(4) else {
+        return Err(io::Error::other(format!(
+            "invalid startup message length {}, must be at least 4.",
+            header.len,
+        )));
+    };
+
+    // TODO: add a histogram for message lengths
+
+    // check if the message exceeds our desired max.
+    if len > max {
+        tracing::warn!("large postgres message detected: {len} bytes");
+        return Err(io::Error::other(format!("invalid message length {len}")));
+    }
+
+    // read in our entire message.
+    buf.resize(len, 0);
+    stream.read_exact(buf).await?;
+
+    Ok((header.tag, buf))
+}
+
+pub struct WriteBuf(Cursor<Vec<u8>>);
+
+impl Buf for WriteBuf {
+    #[inline]
+    fn remaining(&self) -> usize {
+        self.0.remaining()
+    }
+
+    #[inline]
+    fn chunk(&self) -> &[u8] {
+        self.0.chunk()
+    }
+
+    #[inline]
+    fn advance(&mut self, cnt: usize) {
+        self.0.advance(cnt);
+    }
+}
+
+impl WriteBuf {
+    pub const fn new() -> Self {
+        Self(Cursor::new(Vec::new()))
+    }
+
+    /// Use a heuristic to determine if we should shrink the write buffer.
+    #[inline]
+    fn should_shrink(&self) -> bool {
+        let n = self.0.position() as usize;
+        let len = self.0.get_ref().len();
+
+        // the unused space at the front of our buffer is 2x the size of our filled portion.
+        n + n > len
+    }
+
+    /// Shrink the write buffer so that subsequent writes have more spare capacity.
+    #[cold]
+    fn shrink(&mut self) {
+        let n = self.0.position() as usize;
+        let buf = self.0.get_mut();
+
+        // buf repr:
+        // [----unused------|-----filled-----|-----uninit-----]
+        //                  ^ n              ^ buf.len()      ^ buf.capacity()
+        let filled = n..buf.len();
+        let filled_len = filled.len();
+        buf.copy_within(filled, 0);
+        buf.truncate(filled_len);
+        self.0.set_position(0);
+    }
+
+    /// clear the write buffer.
+    pub fn reset(&mut self) {
+        let buf = self.0.get_mut();
+        buf.clear();
+        self.0.set_position(0);
+    }
+
+    /// Write a raw message to the internal buffer.
+    ///
+    /// The size_hint value is only a hint for reserving space. It's ok if it's incorrect, since
+    /// we calculate the length after the fact.
+    pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {
+        if self.should_shrink() {
+            self.shrink();
+        }
+
+        let buf = self.0.get_mut();
+        buf.reserve(5 + size_hint);
+
+        buf.push(tag);
+        let start = buf.len();
+        buf.extend_from_slice(&[0, 0, 0, 0]);
+
+        f(buf);
+
+        let end = buf.len();
+        let len = (end - start) as u32;
+        buf[start..start + 4].copy_from_slice(&len.to_be_bytes());
+    }
+
+    /// Write an encryption response message.
+    pub fn encryption(&mut self, m: u8) {
+        self.0.get_mut().push(m);
+    }
+
+    pub fn write_error(&mut self, msg: &str, error_code: ErrorCode) {
+        self.shrink();
+
+        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-ERRORRESPONSE>
+        // <https://www.postgresql.org/docs/current/protocol-error-fields.html>
+        // "SERROR\0CXXXXX\0M\0\0".len() == 17
+        self.write_raw(17 + msg.len(), b'E', |buf| {
+            // Severity: ERROR
+            buf.put_slice(b"SERROR\0");
+
+            // Code: error_code
+            buf.put_u8(b'C');
+            buf.put_slice(&error_code);
+            buf.put_u8(0);
+
+            // Message: msg
+            buf.put_u8(b'M');
+            buf.put_slice(msg.as_bytes());
+            buf.put_u8(0);
+
+            // End.
+            buf.put_u8(0);
+        });
+    }
+}
+
+#[derive(Debug)]
+pub enum FeStartupPacket {
+    CancelRequest(CancelKeyData),
+    SslRequest {
+        direct: Option<[u8; 8]>,
+    },
+    GssEncRequest,
+    StartupMessage {
+        version: ProtocolVersion,
+        params: StartupMessageParams,
+    },
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct StartupMessageParams {
+    pub params: String,
+}
+
+impl StartupMessageParams {
+    /// Get parameter's value by its name.
+    pub fn get(&self, name: &str) -> Option<&str> {
+        self.iter().find_map(|(k, v)| (k == name).then_some(v))
+    }
+
+    /// Split command-line options according to PostgreSQL's logic,
+    /// taking into account all escape sequences but leaving them as-is.
+    /// [`None`] means that there's no `options` in [`Self`].
+    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {
+        self.get("options").map(Self::parse_options_raw)
+    }
+
+    /// Split command-line options according to PostgreSQL's logic,
+    /// taking into account all escape sequences but leaving them as-is.
+    pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {
+        // See `postgres: pg_split_opts`.
+        let mut last_was_escape = false;
+        input
+            .split(move |c: char| {
+                // We split by non-escaped whitespace symbols.
+                let should_split = c.is_ascii_whitespace() && !last_was_escape;
+                last_was_escape = c == '\\' && !last_was_escape;
+                should_split
+            })
+            .filter(|s| !s.is_empty())
+    }
+
+    /// Iterate through key-value pairs in an arbitrary order.
+    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
+        self.params.split_terminator('\0').tuples()
+    }
+
+    // This function is mostly useful in tests.
+    #[cfg(test)]
+    pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {
+        let mut b = Self {
+            params: String::new(),
+        };
+        for (k, v) in pairs {
+            b.insert(k, v);
+        }
+        b
+    }
+
+    /// Set parameter's value by its name.
+    /// name and value must not contain a \0 byte
+    pub fn insert(&mut self, name: &str, value: &str) {
+        self.params.reserve(name.len() + value.len() + 2);
+        self.params.push_str(name);
+        self.params.push('\0');
+        self.params.push_str(value);
+        self.params.push('\0');
+    }
+}
+
+/// Cancel keys usually are represented as PID+SecretKey, but to proxy they're just
+/// opaque bytes.
+#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, FromBytes, IntoBytes, Immutable)]
+pub struct CancelKeyData(pub big_endian::U64);
+
+pub fn id_to_cancel_key(id: u64) -> CancelKeyData {
+    CancelKeyData(big_endian::U64::new(id))
+}
+
+impl fmt::Display for CancelKeyData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let id = self.0;
+        f.debug_tuple("CancelKeyData")
+            .field(&format_args!("{id:x}"))
+            .finish()
+    }
+}
+impl Distribution<CancelKeyData> for Standard {
+    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {
+        id_to_cancel_key(rng.r#gen())
+    }
+}
+
+pub enum BeMessage<'a> {
+    AuthenticationOk,
+    AuthenticationSasl(BeAuthenticationSaslMessage<'a>),
+    AuthenticationCleartextPassword,
+    BackendKeyData(CancelKeyData),
+    ParameterStatus {
+        name: &'a [u8],
+        value: &'a [u8],
+    },
+    ReadyForQuery,
+    NoticeResponse(&'a str),
+    NegotiateProtocolVersion {
+        version: ProtocolVersion,
+        options: &'a [&'a str],
+    },
+}
+
+#[derive(Debug)]
+pub enum BeAuthenticationSaslMessage<'a> {
+    Methods(&'a [&'a str]),
+    Continue(&'a [u8]),
+    Final(&'a [u8]),
+}
+
+impl BeMessage<'_> {
+    /// Write the message into an internal buffer
+    pub fn write_message(self, buf: &mut WriteBuf) {
+        match self {
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>
+            BeMessage::AuthenticationOk => {
+                buf.write_raw(1, b'R', |buf| buf.put_i32(0));
+            }
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>
+            BeMessage::AuthenticationCleartextPassword => {
+                buf.write_raw(1, b'R', |buf| buf.put_i32(3));
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
+            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(methods)) => {
+                let len: usize = methods.iter().map(|m| m.len() + 1).sum();
+                buf.write_raw(len + 2, b'R', |buf| {
+                    buf.put_i32(10); // Specifies that SASL auth method is used.
+                    for method in methods {
+                        buf.put_slice(method.as_bytes());
+                        buf.put_u8(0);
+                    }
+                    buf.put_u8(0); // zero terminator for the list
+                });
+            }
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
+            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Continue(extra)) => {
+                buf.write_raw(extra.len() + 1, b'R', |buf| {
+                    buf.put_i32(11); // Continue SASL auth.
+                    buf.put_slice(extra);
+                });
+            }
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
+            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Final(extra)) => {
+                buf.write_raw(extra.len() + 1, b'R', |buf| {
+                    buf.put_i32(12); // Send final SASL message.
+                    buf.put_slice(extra);
+                });
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-BACKENDKEYDATA>
+            BeMessage::BackendKeyData(key_data) => {
+                buf.write_raw(8, b'K', |buf| buf.put_slice(key_data.as_bytes()));
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NOTICERESPONSE>
+            // <https://www.postgresql.org/docs/current/protocol-error-fields.html>
+            BeMessage::NoticeResponse(msg) => {
+                // 'N' signalizes NoticeResponse messages
+                buf.write_raw(18 + msg.len(), b'N', |buf| {
+                    // Severity: NOTICE
+                    buf.put_slice(b"SNOTICE\0");
+
+                    // Code: XX000 (ignored for notice, but still required)
+                    buf.put_slice(b"CXX000\0");
+
+                    // Message: msg
+                    buf.put_u8(b'M');
+                    buf.put_slice(msg.as_bytes());
+                    buf.put_u8(0);
+
+                    // End notice.
+                    buf.put_u8(0);
+                });
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-PARAMETERSTATUS>
+            BeMessage::ParameterStatus { name, value } => {
+                buf.write_raw(name.len() + value.len() + 2, b'S', |buf| {
+                    buf.put_slice(name.as_bytes());
+                    buf.put_u8(0);
+                    buf.put_slice(value.as_bytes());
+                    buf.put_u8(0);
+                });
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>
+            BeMessage::ReadyForQuery => {
+                buf.write_raw(1, b'Z', |buf| buf.put_u8(b'I'));
+            }
+
+            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>
+            BeMessage::NegotiateProtocolVersion { version, options } => {
+                let len: usize = options.iter().map(|o| o.len() + 1).sum();
+                buf.write_raw(8 + len, b'v', |buf| {
+                    buf.put_slice(version.as_bytes());
+                    buf.put_u32(options.len() as u32);
+                    for option in options {
+                        buf.put_slice(option.as_bytes());
+                        buf.put_u8(0);
+                    }
+                });
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+
+    use tokio::io::{AsyncWriteExt, duplex};
+    use zerocopy::IntoBytes;
+
+    use crate::pqproto::{FeStartupPacket, read_message, read_startup};
+
+    use super::ProtocolVersion;
+
+    #[tokio::test]
+    async fn reject_large_startup() {
+        // we're going to define a v3.0 startup message with far too many parameters.
+        let mut payload = vec![];
+        // 10001 + 8 bytes.
+        payload.extend_from_slice(&10009_u32.to_be_bytes());
+        payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());
+        payload.resize(10009, b'a');
+
+        let (mut server, mut client) = duplex(128);
+        #[rustfmt::skip]
+        let (server, client) = tokio::join!(
+            async move { read_startup(&mut server).await.unwrap_err() },
+            async move { client.write_all(&payload).await.unwrap_err() },
+        );
+
+        assert_eq!(server.to_string(), "invalid startup message length 10001");
+        assert_eq!(client.to_string(), "broken pipe");
+    }
+
+    #[tokio::test]
+    async fn reject_large_password() {
+        // we're going to define a password message that is far too long.
+        let mut payload = vec![];
+        payload.push(b'p');
+        payload.extend_from_slice(&517_u32.to_be_bytes());
+        payload.resize(518, b'a');
+
+        let (mut server, mut client) = duplex(128);
+        #[rustfmt::skip]
+        let (server, client) = tokio::join!(
+            async move { read_message(&mut server, &mut vec![], 512).await.unwrap_err() },
+            async move { client.write_all(&payload).await.unwrap_err() },
+        );
+
+        assert_eq!(server.to_string(), "invalid message length 513");
+        assert_eq!(client.to_string(), "broken pipe");
+    }
+
+    #[tokio::test]
+    async fn read_startup_message() {
+        let mut payload = vec![];
+        payload.extend_from_slice(&17_u32.to_be_bytes());
+        payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());
+        payload.extend_from_slice(b"abc\0def\0\0");
+
+        let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();
+        let FeStartupPacket::StartupMessage { version, params } = startup else {
+            panic!("unexpected startup message: {startup:?}");
+        };
+
+        assert_eq!(version.major(), 3);
+        assert_eq!(version.minor(), 0);
+        assert_eq!(params.params, "abc\0def\0");
+    }
+
+    #[tokio::test]
+    async fn read_ssl_message() {
+        let mut payload = vec![];
+        payload.extend_from_slice(&8_u32.to_be_bytes());
+        payload.extend_from_slice(ProtocolVersion::new(1234, 5679).as_bytes());
+
+        let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();
+        let FeStartupPacket::SslRequest { direct: None } = startup else {
+            panic!("unexpected startup message: {startup:?}");
+        };
+    }
+
+    #[tokio::test]
+    async fn read_tls_message() {
+        // sample client hello taken from <https://tls13.xargs.org/#client-hello>
+        let client_hello = [
+            0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00, 0xf4, 0x03, 0x03, 0x00, 0x01, 0x02,
+            0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e,
+            0x1f, 0x20, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
+            0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,
+            0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x08, 0x13, 0x02, 0x13, 0x03, 0x13, 0x01,
+            0x00, 0xff, 0x01, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0x18, 0x00, 0x16, 0x00, 0x00,
+            0x13, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x75, 0x6c, 0x66, 0x68, 0x65,
+            0x69, 0x6d, 0x2e, 0x6e, 0x65, 0x74, 0x00, 0x0b, 0x00, 0x04, 0x03, 0x00, 0x01, 0x02,
+            0x00, 0x0a, 0x00, 0x16, 0x00, 0x14, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x1e, 0x00, 0x19,
+            0x00, 0x18, 0x01, 0x00, 0x01, 0x01, 0x01, 0x02, 0x01, 0x03, 0x01, 0x04, 0x00, 0x23,
+            0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x1e,
+            0x00, 0x1c, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x08, 0x07, 0x08, 0x08, 0x08, 0x09,
+            0x08, 0x0a, 0x08, 0x0b, 0x08, 0x04, 0x08, 0x05, 0x08, 0x06, 0x04, 0x01, 0x05, 0x01,
+            0x06, 0x01, 0x00, 0x2b, 0x00, 0x03, 0x02, 0x03, 0x04, 0x00, 0x2d, 0x00, 0x02, 0x01,
+            0x01, 0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x35, 0x80, 0x72,
+            0xd6, 0x36, 0x58, 0x80, 0xd1, 0xae, 0xea, 0x32, 0x9a, 0xdf, 0x91, 0x21, 0x38, 0x38,
+            0x51, 0xed, 0x21, 0xa2, 0x8e, 0x3b, 0x75, 0xe9, 0x65, 0xd0, 0xd2, 0xcd, 0x16, 0x62,
+            0x54,
+        ];
+
+        let mut cursor = Cursor::new(&client_hello);
+
+        let startup = read_startup(&mut cursor).await.unwrap();
+        let FeStartupPacket::SslRequest {
+            direct: Some(prefix),
+        } = startup
+        else {
+            panic!("unexpected startup message: {startup:?}");
+        };
+
+        // check that no data is lost.
+        assert_eq!(prefix, [0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00]);
+        assert_eq!(cursor.position(), 8);
+    }
+
+    #[tokio::test]
+    async fn read_message_success() {
+        let query = b"Q\0\0\0\x0cSELECT 1Q\0\0\0\x0cSELECT 2";
+        let mut cursor = Cursor::new(&query);
+
+        let mut buf = vec![];
+        let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();
+        assert_eq!(tag, b'Q');
+        assert_eq!(message, b"SELECT 1");
+
+        let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();
+        assert_eq!(tag, b'Q');
+        assert_eq!(message, b"SELECT 2");
+    }
+}
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -1,5 +1,4 @@
 use async_trait::async_trait;
-use pq_proto::StartupMessageParams;
 use tokio::time;
 use tracing::{debug, info, warn};

@@ -15,6 +14,7 @@ use crate::error::ReportableError;
 use crate::metrics::{
    ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType,
 };
+use crate::pqproto::StartupMessageParams;
 use crate::proxy::retry::{CouldRetry, retry_after, should_retry};
 use crate::proxy::wake_compute::wake_compute;
 use crate::types::Host;
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -1,8 +1,3 @@
-use bytes::Buf;
-use pq_proto::framed::Framed;
-use pq_proto::{
-    BeMessage as Be, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,
-};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};
@@ -12,7 +7,10 @@ use crate::config::TlsConfig;
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
-use crate::proxy::ERR_INSECURE_CONNECTION;
+use crate::pqproto::{
+    BeMessage, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,
+};
+use crate::proxy::TlsRequired;
 use crate::stream::{PqStream, Stream, StreamUpgradeError};
 use crate::tls::PG_ALPN_PROTOCOL;

@@ -71,33 +69,25 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    const PG_PROTOCOL_EARLIEST: ProtocolVersion = ProtocolVersion::new(3, 0);
    const PG_PROTOCOL_LATEST: ProtocolVersion = ProtocolVersion::new(3, 0);

-    let mut stream = PqStream::new(Stream::from_raw(stream));
+    let (mut stream, mut msg) = PqStream::parse_startup(Stream::from_raw(stream)).await?;
    loop {
-        let msg = stream.read_startup_packet().await?;
        match msg {
            FeStartupPacket::SslRequest { direct } => match stream.get_ref() {
                Stream::Raw { .. } if !tried_ssl => {
                    tried_ssl = true;

-                    // We can't perform TLS handshake without a config
-                    let have_tls = tls.is_some();
-                    if !direct {
-                        stream
-                            .write_message(&Be::EncryptionResponse(have_tls))
-                            .await?;
-                    } else if !have_tls {
-                        return Err(HandshakeError::ProtocolViolation);
-                    }
-
                    if let Some(tls) = tls.take() {
                        // Upgrade raw stream into a secure TLS-backed stream.
                        // NOTE: We've consumed `tls`; this fact will be used later.

-                        let Framed {
-                            stream: raw,
-                            read_buf,
-                            write_buf,
-                        } = stream.framed;
+                        let mut read_buf;
+                        let raw = if let Some(direct) = &direct {
+                            read_buf = &direct[..];
+                            stream.accept_direct_tls()
+                        } else {
+                            read_buf = &[];
+                            stream.accept_tls().await?
+                        };

                        let Stream::Raw { raw } = raw else {
                            return Err(HandshakeError::StreamUpgradeError(
@@ -105,12 +95,11 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                            ));
                        };

-                        let mut read_buf = read_buf.reader();
                        let mut res = Ok(());
                        let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())
                            .accept_with(raw, |session| {
                                // push the early data to the tls session
-                                while !read_buf.get_ref().is_empty() {
+                                while !read_buf.is_empty() {
                                    match session.read_tls(&mut read_buf) {
                                        Ok(_) => {}
                                        Err(e) => {
@@ -123,7 +112,6 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(

                        res?;

-                        let read_buf = read_buf.into_inner();
                        if !read_buf.is_empty() {
                            return Err(HandshakeError::EarlyData);
                        }
@@ -157,16 +145,17 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                        let (_, tls_server_end_point) =
                            tls.cert_resolver.resolve(conn_info.server_name());

-                        stream = PqStream {
-                            framed: Framed {
-                                stream: Stream::Tls {
-                                    tls: Box::new(tls_stream),
-                                    tls_server_end_point,
-                                },
-                                read_buf,
-                                write_buf,
-                            },
+                        let tls = Stream::Tls {
+                            tls: Box::new(tls_stream),
+                            tls_server_end_point,
                        };
+                        (stream, msg) = PqStream::parse_startup(tls).await?;
+                    } else {
+                        if direct.is_some() {
+                            // client sent us a ClientHello already, we can't do anything with it.
+                            return Err(HandshakeError::ProtocolViolation);
+                        }
+                        msg = stream.reject_encryption().await?;
                    }
                }
                _ => return Err(HandshakeError::ProtocolViolation),
@@ -176,7 +165,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                    tried_gss = true;

                    // Currently, we don't support GSSAPI
-                    stream.write_message(&Be::EncryptionResponse(false)).await?;
+                    msg = stream.reject_encryption().await?;
                }
                _ => return Err(HandshakeError::ProtocolViolation),
            },
@@ -186,13 +175,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                // Check that the config has been consumed during upgrade
                // OR we didn't provide it at all (for dev purposes).
                if tls.is_some() {
-                    return stream
-                        .throw_error_str(
-                            ERR_INSECURE_CONNECTION,
-                            crate::error::ErrorKind::User,
-                            None,
-                        )
-                        .await?;
+                    Err(stream.throw_error(TlsRequired, None).await)?;
                }

                // This log highlights the start of the connection.
@@ -214,20 +197,21 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                // no protocol extensions are supported.
                // <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/backend/tcop/backend_startup.c#L744-L753>
                let mut unsupported = vec![];
-                for (k, _) in params.iter() {
+                let mut supported = StartupMessageParams::default();
+
+                for (k, v) in params.iter() {
                    if k.starts_with("_pq_.") {
                        unsupported.push(k);
+                    } else {
+                        supported.insert(k, v);
                    }
                }

-                // TODO: remove unsupported options so we don't send them to compute.
-
-                stream
-                    .write_message(&Be::NegotiateProtocolVersion {
-                        version: PG_PROTOCOL_LATEST,
-                        options: &unsupported,
-                    })
-                    .await?;
+                stream.write_message(BeMessage::NegotiateProtocolVersion {
+                    version: PG_PROTOCOL_LATEST,
+                    options: &unsupported,
+                });
+                stream.flush().await?;

                info!(
                    ?version,
@@ -235,7 +219,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                    session_type = "normal",
                    "successful handshake; unsupported minor version requested"
                );
-                break Ok(HandshakeData::Startup(stream, params));
+                break Ok(HandshakeData::Startup(stream, supported));
            }
            FeStartupPacket::StartupMessage { version, params } => {
                warn!(
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -10,15 +10,14 @@ pub(crate) mod wake_compute;
 use std::sync::Arc;

 pub use copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};
-use futures::{FutureExt, TryFutureExt};
+use futures::FutureExt;
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
-use pq_proto::{BeMessage as Be, CancelKeyData, StartupMessageParams};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use smol_str::{SmolStr, ToSmolStr, format_smolstr};
 use thiserror::Error;
-use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info, warn};

@@ -27,8 +26,9 @@ use self::passthrough::ProxyPassthrough;
 use crate::cancellation::{self, CancellationHandler};
 use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
 use crate::context::RequestContext;
-use crate::error::ReportableError;
+use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
+use crate::pqproto::{BeMessage, CancelKeyData, StartupMessageParams};
 use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
 use crate::proxy::handshake::{HandshakeData, handshake};
 use crate::rate_limiter::EndpointRateLimiter;
@@ -38,6 +38,18 @@ use crate::{auth, compute};

 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";

+#[derive(Error, Debug)]
+#[error("{ERR_INSECURE_CONNECTION}")]
+pub struct TlsRequired;
+
+impl ReportableError for TlsRequired {
+    fn get_error_kind(&self) -> crate::error::ErrorKind {
+        crate::error::ErrorKind::User
+    }
+}
+
+impl UserFacingError for TlsRequired {}
+
 pub async fn run_until_cancelled<F: std::future::Future>(
    f: F,
    cancellation_token: &CancellationToken,
@@ -329,11 +341,11 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    let user_info = match result {
        Ok(user_info) => user_info,
-        Err(e) => stream.throw_error(e, Some(ctx)).await?,
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };

    let user = user_info.get_user().to_owned();
-    let (user_info, _ip_allowlist) = match user_info
+    let user_info = match user_info
        .authenticate(
            ctx,
            &mut stream,
@@ -349,10 +361,10 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
            let app = params.get("application_name");
            let params_span = tracing::info_span!("", ?user, ?db, ?app);

-            return stream
+            return Err(stream
                .throw_error(e, Some(ctx))
                .instrument(params_span)
-                .await?;
+                .await)?;
        }
    };

@@ -365,7 +377,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        .get(NeonOptions::PARAMS_COMPAT)
        .is_some();

-    let mut node = connect_to_compute(
+    let res = connect_to_compute(
        ctx,
        &TcpMechanism {
            user_info: compute_user_info.clone(),
@@ -377,22 +389,19 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        config.wake_compute_retry_config,
        &config.connect_to_compute,
    )
-    .or_else(|e| stream.throw_error(e, Some(ctx)))
-    .await?;
+    .await;
+
+    let node = match res {
+        Ok(node) => node,
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
+    };

    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
    let session = cancellation_handler_clone.get_key();

    session.write_cancel_key(node.cancel_closure.clone())?;
-
-    prepare_client_connection(&node, *session.key(), &mut stream).await?;
-
-    // Before proxy passing, forward to compute whatever data is left in the
-    // PqStream input buffer. Normally there is none, but our serverless npm
-    // driver in pipeline mode sends startup, password and first query
-    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
-    node.stream.write_all(&read_buf).await?;
+    prepare_client_connection(&node, *session.key(), &mut stream);
+    let stream = stream.flush_and_into_inner().await?;

    let private_link_id = match ctx.extra() {
        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
@@ -413,31 +422,28 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
 }

 /// Finish client connection initialization: confirm auth success, send params, etc.
-#[tracing::instrument(skip_all)]
-pub(crate) async fn prepare_client_connection(
+pub(crate) fn prepare_client_connection(
    node: &compute::PostgresConnection,
    cancel_key_data: CancelKeyData,
    stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-) -> Result<(), std::io::Error> {
+) {
    // Forward all deferred notices to the client.
    for notice in &node.delayed_notice {
-        stream.write_message_noflush(&Be::Raw(b'N', notice.as_bytes()))?;
+        stream.write_raw(notice.as_bytes().len(), b'N', |buf| {
+            buf.extend_from_slice(notice.as_bytes());
+        });
    }

    // Forward all postgres connection params to the client.
    for (name, value) in &node.params {
-        stream.write_message_noflush(&Be::ParameterStatus {
+        stream.write_message(BeMessage::ParameterStatus {
            name: name.as_bytes(),
            value: value.as_bytes(),
-        })?;
+        });
    }

-    stream
-        .write_message_noflush(&Be::BackendKeyData(cancel_key_data))?
-        .write_message(&Be::ReadyForQuery)
-        .await?;
-
-    Ok(())
+    stream.write_message(BeMessage::BackendKeyData(cancel_key_data));
+    stream.write_message(BeMessage::ReadyForQuery);
 }

 #[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
--- a/proxy/src/proxy/retry.rs
+++ b/proxy/src/proxy/retry.rs
@@ -125,9 +125,10 @@ pub(crate) fn retry_after(num_retries: u32, config: RetryConfig) -> time::Durati

 #[cfg(test)]
 mod tests {
-    use super::ShouldRetryWakeCompute;
    use postgres_client::error::{DbError, SqlState};

+    use super::ShouldRetryWakeCompute;
+
    #[test]
    fn should_retry_wake_compute_for_db_error() {
        // These SQLStates should NOT trigger a wake_compute retry.
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -10,7 +10,7 @@ use bytes::{Bytes, BytesMut};
 use futures::{SinkExt, StreamExt};
 use postgres_client::tls::TlsConnect;
 use postgres_protocol::message::frontend;
-use tokio::io::{AsyncReadExt, DuplexStream};
+use tokio::io::{AsyncReadExt, AsyncWriteExt, DuplexStream};
 use tokio_util::codec::{Decoder, Encoder};

 use super::*;
@@ -49,15 +49,14 @@ async fn proxy_mitm(
        };

        let mut end_server = tokio_util::codec::Framed::new(end_server, PgFrame);
-        let (end_client, buf) = end_client.framed.into_inner();
-        assert!(buf.is_empty());
+        let end_client = end_client.flush_and_into_inner().await.unwrap();
        let mut end_client = tokio_util::codec::Framed::new(end_client, PgFrame);

        // give the end_server the startup parameters
        let mut buf = BytesMut::new();
        frontend::startup_message(
            &postgres_protocol::message::frontend::StartupMessageParams {
-                params: startup.params.into(),
+                params: startup.params.as_bytes().into(),
            },
            &mut buf,
        )
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -26,9 +26,7 @@ use crate::auth::backend::{
 use crate::config::{ComputeConfig, RetryConfig};
 use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
 use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
-use crate::control_plane::{
-    self, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo, NodeInfo, NodeInfoCache,
-};
+use crate::control_plane::{self, CachedNodeInfo, NodeInfo, NodeInfoCache};
 use crate::error::ErrorKind;
 use crate::tls::client_config::compute_client_config_with_certs;
 use crate::tls::postgres_rustls::MakeRustlsConnect;
@@ -128,7 +126,7 @@ trait TestAuth: Sized {
        self,
        stream: &mut PqStream<Stream<S>>,
    ) -> anyhow::Result<()> {
-        stream.write_message_noflush(&Be::AuthenticationOk)?;
+        stream.write_message(BeMessage::AuthenticationOk);
        Ok(())
    }
 }
@@ -157,9 +155,7 @@ impl TestAuth for Scram {
        self,
        stream: &mut PqStream<Stream<S>>,
    ) -> anyhow::Result<()> {
-        let outcome = auth::AuthFlow::new(stream)
-            .begin(auth::Scram(&self.0, &RequestContext::test()))
-            .await?
+        let outcome = auth::AuthFlow::new(stream, auth::Scram(&self.0, &RequestContext::test()))
            .authenticate()
            .await?;

@@ -185,10 +181,12 @@ async fn dummy_proxy(

    auth.authenticate(&mut stream).await?;

-    stream
-        .write_message_noflush(&Be::CLIENT_ENCODING)?
-        .write_message(&Be::ReadyForQuery)
-        .await?;
+    stream.write_message(BeMessage::ParameterStatus {
+        name: b"client_encoding",
+        value: b"UTF8",
+    });
+    stream.write_message(BeMessage::ReadyForQuery);
+    stream.flush().await?;

    Ok(())
 }
@@ -547,20 +545,9 @@ impl TestControlPlaneClient for TestConnectMechanism {
        }
    }

-    fn get_allowed_ips(&self) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
-        unimplemented!("not used in tests")
-    }
-
-    fn get_allowed_vpc_endpoint_ids(
+    fn get_access_control(
        &self,
-    ) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
-        unimplemented!("not used in tests")
-    }
-
-    fn get_block_public_or_vpc_access(
-        &self,
-    ) -> Result<control_plane::CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError>
-    {
+    ) -> Result<control_plane::EndpointAccessControl, control_plane::errors::GetAuthInfoError> {
        unimplemented!("not used in tests")
    }

--- a/proxy/src/rate_limiter/leaky_bucket.rs
+++ b/proxy/src/rate_limiter/leaky_bucket.rs
@@ -15,7 +15,7 @@ pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;

 pub struct LeakyBucketRateLimiter<Key> {
    map: ClashMap<Key, LeakyBucketState, RandomState>,
-    config: utils::leaky_bucket::LeakyBucketConfig,
+    default_config: utils::leaky_bucket::LeakyBucketConfig,
    access_count: AtomicUsize,
 }

@@ -28,15 +28,17 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
    pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
        Self {
            map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
-            config: config.into(),
+            default_config: config.into(),
            access_count: AtomicUsize::new(0),
        }
    }

    /// Check that number of connections to the endpoint is below `max_rps` rps.
-    pub(crate) fn check(&self, key: K, n: u32) -> bool {
+    pub(crate) fn check(&self, key: K, config: Option<LeakyBucketConfig>, n: u32) -> bool {
        let now = Instant::now();

+        let config = config.map_or(self.default_config, Into::into);
+
        if self.access_count.fetch_add(1, Ordering::AcqRel) % 2048 == 0 {
            self.do_gc(now);
        }
@@ -46,7 +48,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
            .entry(key)
            .or_insert_with(|| LeakyBucketState { empty_at: now });

-        entry.add_tokens(&self.config, now, n as f64).is_ok()
+        entry.add_tokens(&config, now, n as f64).is_ok()
    }

    fn do_gc(&self, now: Instant) {
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -15,6 +15,8 @@ use tracing::info;
 use crate::ext::LockExt;
 use crate::intern::EndpointIdInt;

+use super::LeakyBucketConfig;
+
 pub struct GlobalRateLimiter {
    data: Vec<RateBucket>,
    info: Vec<RateBucketInfo>,
@@ -144,19 +146,6 @@ impl RateBucketInfo {
        Self::new(50_000, Duration::from_secs(10)),
    ];

-    /// All of these are per endpoint-maskedip pair.
-    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
-    ///
-    /// First bucket: 1000mcpus total per endpoint-ip pair
-    /// * 4096000 requests per second with 1 hash rounds.
-    /// * 1000 requests per second with 4096 hash rounds.
-    /// * 6.8 requests per second with 600000 hash rounds.
-    pub const DEFAULT_AUTH_SET: [Self; 3] = [
-        Self::new(1000 * 4096, Duration::from_secs(1)),
-        Self::new(600 * 4096, Duration::from_secs(60)),
-        Self::new(300 * 4096, Duration::from_secs(600)),
-    ];
-
    pub fn rps(&self) -> f64 {
        (self.max_rpi as f64) / self.interval.as_secs_f64()
    }
@@ -184,6 +173,21 @@ impl RateBucketInfo {
            max_rpi: ((max_rps as u64) * (interval.as_millis() as u64) / 1000) as u32,
        }
    }
+
+    pub fn to_leaky_bucket(this: &[Self]) -> Option<LeakyBucketConfig> {
+        // bit of a hack - find the min rps and max rps supported and turn it into
+        // leaky bucket config instead
+
+        let mut iter = this.iter().map(|info| info.rps());
+        let first = iter.next()?;
+
+        let (min, max) = (first, first);
+        let (min, max) = iter.fold((min, max), |(min, max), rps| {
+            (f64::min(min, rps), f64::max(max, rps))
+        });
+
+        Some(LeakyBucketConfig { rps: min, max })
+    }
 }

 impl<K: Hash + Eq> BucketRateLimiter<K> {
--- a/proxy/src/rate_limiter/mod.rs
+++ b/proxy/src/rate_limiter/mod.rs
@@ -8,4 +8,4 @@ pub(crate) use limit_algorithm::aimd::Aimd;
 pub(crate) use limit_algorithm::{
    DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,
 };
-pub use limiter::{BucketRateLimiter, GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
+pub use limiter::{GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
--- a/proxy/src/redis/cancellation_publisher.rs
+++ b/proxy/src/redis/cancellation_publisher.rs
@@ -1,10 +1,11 @@
 use core::net::IpAddr;
 use std::sync::Arc;

-use pq_proto::CancelKeyData;
 use tokio::sync::Mutex;
 use uuid::Uuid;

+use crate::pqproto::CancelKeyData;
+
 pub trait CancellationPublisherMut: Send + Sync + 'static {
    #[allow(async_fn_in_trait)]
    async fn try_publish(
--- a/proxy/src/redis/keys.rs
+++ b/proxy/src/redis/keys.rs
@@ -1,16 +1,15 @@
 use std::io::ErrorKind;

 use anyhow::Ok;
-use pq_proto::{CancelKeyData, id_to_cancel_key};
-use serde::{Deserialize, Serialize};
+
+use crate::pqproto::{CancelKeyData, id_to_cancel_key};

 pub mod keyspace {
    pub const CANCEL_PREFIX: &str = "cancel";
 }

-#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) enum KeyPrefix {
-    #[serde(untagged)]
    Cancel(CancelKeyData),
 }

@@ -18,9 +17,7 @@ impl KeyPrefix {
    pub(crate) fn build_redis_key(&self) -> String {
        match self {
            KeyPrefix::Cancel(key) => {
-                let hi = (key.backend_pid as u64) << 32;
-                let lo = (key.cancel_key as u64) & 0xffff_ffff;
-                let id = hi | lo;
+                let id = key.0.get();
                let keyspace = keyspace::CANCEL_PREFIX;
                format!("{keyspace}:{id:x}")
            }
@@ -63,10 +60,7 @@ mod tests {

    #[test]
    fn test_build_redis_key() {
-        let cancel_key: KeyPrefix = KeyPrefix::Cancel(CancelKeyData {
-            backend_pid: 12345,
-            cancel_key: 54321,
-        });
+        let cancel_key: KeyPrefix = KeyPrefix::Cancel(id_to_cancel_key(12345 << 32 | 54321));

        let redis_key = cancel_key.build_redis_key();
        assert_eq!(redis_key, "cancel:30390000d431");
@@ -77,10 +71,7 @@ mod tests {
        let redis_key = "cancel:30390000d431";
        let key: KeyPrefix = parse_redis_key(redis_key).expect("Failed to parse key");

-        let ref_key = CancelKeyData {
-            backend_pid: 12345,
-            cancel_key: 54321,
-        };
+        let ref_key = id_to_cancel_key(12345 << 32 | 54321);

        assert_eq!(key.as_str(), KeyPrefix::Cancel(ref_key).as_str());
        let KeyPrefix::Cancel(cancel_key) = key;
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -2,11 +2,9 @@ use std::convert::Infallible;
 use std::sync::Arc;

 use futures::StreamExt;
-use pq_proto::CancelKeyData;
 use redis::aio::PubSub;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
-use uuid::Uuid;

 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use crate::cache::project_info::ProjectInfoCache;
@@ -100,14 +98,6 @@ pub(crate) struct PasswordUpdate {
    role_name: RoleNameInt,
 }

-#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
-pub(crate) struct CancelSession {
-    pub(crate) region_id: Option<String>,
-    pub(crate) cancel_key_data: CancelKeyData,
-    pub(crate) session_id: Uuid,
-    pub(crate) peer_addr: Option<std::net::IpAddr>,
-}
-
 fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
 where
    T: for<'de2> serde::Deserialize<'de2>,
@@ -243,29 +233,30 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {

 fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
    match msg {
-        Notification::AllowedIpsUpdate { allowed_ips_update } => {
-            cache.invalidate_allowed_ips_for_project(allowed_ips_update.project_id);
+        Notification::AllowedIpsUpdate {
+            allowed_ips_update: AllowedIpsUpdate { project_id },
        }
-        Notification::BlockPublicOrVpcAccessUpdated {
-            block_public_or_vpc_access_updated,
-        } => cache.invalidate_block_public_or_vpc_access_for_project(
-            block_public_or_vpc_access_updated.project_id,
-        ),
+        | Notification::BlockPublicOrVpcAccessUpdated {
+            block_public_or_vpc_access_updated: BlockPublicOrVpcAccessUpdated { project_id },
+        } => cache.invalidate_endpoint_access_for_project(project_id),
        Notification::AllowedVpcEndpointsUpdatedForOrg {
-            allowed_vpc_endpoints_updated_for_org,
-        } => cache.invalidate_allowed_vpc_endpoint_ids_for_org(
-            allowed_vpc_endpoints_updated_for_org.account_id,
-        ),
+            allowed_vpc_endpoints_updated_for_org: AllowedVpcEndpointsUpdatedForOrg { account_id },
+        } => cache.invalidate_endpoint_access_for_org(account_id),
        Notification::AllowedVpcEndpointsUpdatedForProjects {
-            allowed_vpc_endpoints_updated_for_projects,
-        } => cache.invalidate_allowed_vpc_endpoint_ids_for_projects(
-            allowed_vpc_endpoints_updated_for_projects.project_ids,
-        ),
-        Notification::PasswordUpdate { password_update } => cache
-            .invalidate_role_secret_for_project(
-                password_update.project_id,
-                password_update.role_name,
-            ),
+            allowed_vpc_endpoints_updated_for_projects:
+                AllowedVpcEndpointsUpdatedForProjects { project_ids },
+        } => {
+            for project in project_ids {
+                cache.invalidate_endpoint_access_for_project(project);
+            }
+        }
+        Notification::PasswordUpdate {
+            password_update:
+                PasswordUpdate {
+                    project_id,
+                    role_name,
+                },
+        } => cache.invalidate_role_secret_for_project(project_id, role_name),
        Notification::UnknownTopic => unreachable!(),
    }
 }
--- a/proxy/src/sasl/messages.rs
+++ b/proxy/src/sasl/messages.rs
@@ -1,7 +1,5 @@
 //! Definitions for SASL messages.

-use pq_proto::{BeAuthenticationSaslMessage, BeMessage};
-
 use crate::parse::split_cstr;

 /// SASL-specific payload of [`PasswordMessage`](pq_proto::FeMessage::PasswordMessage).
@@ -30,26 +28,6 @@ impl<'a> FirstMessage<'a> {
    }
 }

-/// A single SASL message.
-/// This struct is deliberately decoupled from lower-level
-/// [`BeAuthenticationSaslMessage`].
-#[derive(Debug)]
-pub(super) enum ServerMessage<T> {
-    /// We expect to see more steps.
-    Continue(T),
-    /// This is the final step.
-    Final(T),
-}
-
-impl<'a> ServerMessage<&'a str> {
-    pub(super) fn to_reply(&self) -> BeMessage<'a> {
-        BeMessage::AuthenticationSasl(match self {
-            ServerMessage::Continue(s) => BeAuthenticationSaslMessage::Continue(s.as_bytes()),
-            ServerMessage::Final(s) => BeAuthenticationSaslMessage::Final(s.as_bytes()),
-        })
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/proxy/src/sasl/mod.rs
+++ b/proxy/src/sasl/mod.rs
@@ -14,7 +14,7 @@ use std::io;

 pub(crate) use channel_binding::ChannelBinding;
 pub(crate) use messages::FirstMessage;
-pub(crate) use stream::{Outcome, SaslStream};
+pub(crate) use stream::{Outcome, authenticate};
 use thiserror::Error;

 use crate::error::{ReportableError, UserFacingError};
@@ -22,6 +22,9 @@ use crate::error::{ReportableError, UserFacingError};
 /// Fine-grained auth errors help in writing tests.
 #[derive(Error, Debug)]
 pub(crate) enum Error {
+    #[error("Unsupported authentication method: {0}")]
+    BadAuthMethod(Box<str>),
+
    #[error("Channel binding failed: {0}")]
    ChannelBindingFailed(&'static str),

@@ -54,6 +57,7 @@ impl UserFacingError for Error {
 impl ReportableError for Error {
    fn get_error_kind(&self) -> crate::error::ErrorKind {
        match self {
+            Error::BadAuthMethod(_) => crate::error::ErrorKind::User,
            Error::ChannelBindingFailed(_) => crate::error::ErrorKind::User,
            Error::ChannelBindingBadMethod(_) => crate::error::ErrorKind::User,
            Error::BadClientMessage(_) => crate::error::ErrorKind::User,
--- a/proxy/src/sasl/stream.rs
+++ b/proxy/src/sasl/stream.rs
@@ -3,61 +3,12 @@
 use std::io;

 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::info;

-use super::Mechanism;
-use super::messages::ServerMessage;
+use super::{Mechanism, Step};
+use crate::context::RequestContext;
+use crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};
 use crate::stream::PqStream;

-/// Abstracts away all peculiarities of the libpq's protocol.
-pub(crate) struct SaslStream<'a, S> {
-    /// The underlying stream.
-    stream: &'a mut PqStream<S>,
-    /// Current password message we received from client.
-    current: bytes::Bytes,
-    /// First SASL message produced by client.
-    first: Option<&'a str>,
-}
-
-impl<'a, S> SaslStream<'a, S> {
-    pub(crate) fn new(stream: &'a mut PqStream<S>, first: &'a str) -> Self {
-        Self {
-            stream,
-            current: bytes::Bytes::new(),
-            first: Some(first),
-        }
-    }
-}
-
-impl<S: AsyncRead + Unpin> SaslStream<'_, S> {
-    // Receive a new SASL message from the client.
-    async fn recv(&mut self) -> io::Result<&str> {
-        if let Some(first) = self.first.take() {
-            return Ok(first);
-        }
-
-        self.current = self.stream.read_password_message().await?;
-        let s = std::str::from_utf8(&self.current)
-            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
-
-        Ok(s)
-    }
-}
-
-impl<S: AsyncWrite + Unpin> SaslStream<'_, S> {
-    // Send a SASL message to the client.
-    async fn send(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> {
-        self.stream.write_message(&msg.to_reply()).await?;
-        Ok(())
-    }
-
-    // Queue a SASL message for the client.
-    fn send_noflush(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> {
-        self.stream.write_message_noflush(&msg.to_reply())?;
-        Ok(())
-    }
-}
-
 /// SASL authentication outcome.
 /// It's much easier to match on those two variants
 /// than to peek into a noisy protocol error type.
@@ -69,33 +20,62 @@ pub(crate) enum Outcome<R> {
    Failure(&'static str),
 }

-impl<S: AsyncRead + AsyncWrite + Unpin> SaslStream<'_, S> {
-    /// Perform SASL message exchange according to the underlying algorithm
-    /// until user is either authenticated or denied access.
-    pub(crate) async fn authenticate<M: Mechanism>(
-        mut self,
-        mut mechanism: M,
-    ) -> super::Result<Outcome<M::Output>> {
-        loop {
-            let input = self.recv().await?;
-            let step = mechanism.exchange(input).map_err(|error| {
-                info!(?error, "error during SASL exchange");
-                error
-            })?;
+pub async fn authenticate<S, F, M>(
+    ctx: &RequestContext,
+    stream: &mut PqStream<S>,
+    mechanism: F,
+) -> super::Result<Outcome<M::Output>>
+where
+    S: AsyncRead + AsyncWrite + Unpin,
+    F: FnOnce(&str) -> super::Result<M>,
+    M: Mechanism,
+{
+    let sasl = {
+        // pause the timer while we communicate with the client
+        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

-            use super::Step;
-            return Ok(match step {
-                Step::Continue(moved_mechanism, reply) => {
-                    self.send(&ServerMessage::Continue(&reply)).await?;
-                    mechanism = moved_mechanism;
-                    continue;
-                }
-                Step::Success(result, reply) => {
-                    self.send_noflush(&ServerMessage::Final(&reply))?;
-                    Outcome::Success(result)
-                }
-                Step::Failure(reason) => Outcome::Failure(reason),
-            });
+        // Initial client message contains the chosen auth method's name.
+        let msg = stream.read_password_message().await?;
+        super::FirstMessage::parse(msg).ok_or(super::Error::BadClientMessage("bad sasl message"))?
+    };
+
+    let mut mechanism = mechanism(sasl.method)?;
+    let mut input = sasl.message;
+    loop {
+        let step = mechanism
+            .exchange(input)
+            .inspect_err(|error| tracing::info!(?error, "error during SASL exchange"))?;
+
+        match step {
+            Step::Continue(moved_mechanism, reply) => {
+                mechanism = moved_mechanism;
+
+                // pause the timer while we communicate with the client
+                let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+
+                // write reply
+                let sasl_msg = BeAuthenticationSaslMessage::Continue(reply.as_bytes());
+                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
+
+                // get next input
+                stream.flush().await?;
+                let msg = stream.read_password_message().await?;
+                input = std::str::from_utf8(msg)
+                    .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
+            }
+            Step::Success(result, reply) => {
+                // pause the timer while we communicate with the client
+                let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+
+                // write reply
+                let sasl_msg = BeAuthenticationSaslMessage::Final(reply.as_bytes());
+                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
+                stream.write_message(BeMessage::AuthenticationOk);
+                // exit with success
+                break Ok(Outcome::Success(result));
+            }
+            // exit with failure
+            Step::Failure(reason) => break Ok(Outcome::Failure(reason)),
        }
    }
 }
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -22,7 +22,7 @@ use super::http_conn_pool::{self, HttpConnPool, Send, poll_http2_client};
 use super::local_conn_pool::{self, EXT_NAME, EXT_SCHEMA, EXT_VERSION, LocalConnPool};
 use crate::auth::backend::local::StaticAuthRules;
 use crate::auth::backend::{ComputeCredentials, ComputeUserInfo};
-use crate::auth::{self, AuthError, check_peer_addr_is_in_list};
+use crate::auth::{self, AuthError};
 use crate::compute;
 use crate::compute_ctl::{
    ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
@@ -35,7 +35,6 @@ use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
 use crate::control_plane::locks::ApiLocks;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::intern::EndpointIdInt;
-use crate::protocol2::ConnectionInfoExtra;
 use crate::proxy::connect_compute::ConnectMechanism;
 use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
 use crate::rate_limiter::EndpointRateLimiter;
@@ -63,63 +62,24 @@ impl PoolingBackend {

        let user_info = user_info.clone();
        let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
-        let allowed_ips = backend.get_allowed_ips(ctx).await?;
+        let access_control = backend.get_endpoint_access_control(ctx).await?;
+        access_control.check(
+            ctx,
+            self.config.authentication_config.ip_allowlist_check_enabled,
+            self.config.authentication_config.is_vpc_acccess_proxy,
+        )?;

-        if self.config.authentication_config.ip_allowlist_check_enabled
-            && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
-        {
-            return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
-        }
-
-        let access_blocker_flags = backend.get_block_public_or_vpc_access(ctx).await?;
-        if self.config.authentication_config.is_vpc_acccess_proxy {
-            if access_blocker_flags.vpc_access_blocked {
-                return Err(AuthError::NetworkNotAllowed);
-            }
-
-            let extra = ctx.extra();
-            let incoming_endpoint_id = match extra {
-                None => String::new(),
-                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
-                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
-            };
-
-            if incoming_endpoint_id.is_empty() {
-                return Err(AuthError::MissingVPCEndpointId);
-            }
-
-            let allowed_vpc_endpoint_ids = backend.get_allowed_vpc_endpoint_ids(ctx).await?;
-            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
-            if !allowed_vpc_endpoint_ids.is_empty()
-                && !allowed_vpc_endpoint_ids.contains(&incoming_endpoint_id)
-            {
-                return Err(AuthError::vpc_endpoint_id_not_allowed(incoming_endpoint_id));
-            }
-        } else if access_blocker_flags.public_access_blocked {
-            return Err(AuthError::NetworkNotAllowed);
-        }
-
-        if !self
-            .endpoint_rate_limiter
-            .check(user_info.endpoint.clone().into(), 1)
-        {
+        let ep = EndpointIdInt::from(&user_info.endpoint);
+        let rate_limit_config = None;
+        if !self.endpoint_rate_limiter.check(ep, rate_limit_config, 1) {
            return Err(AuthError::too_many_connections());
        }
-        let cached_secret = backend.get_role_secret(ctx).await?;
-        let secret = match cached_secret.value.clone() {
-            Some(secret) => self.config.authentication_config.check_rate_limit(
-                ctx,
-                secret,
-                &user_info.endpoint,
-                true,
-            )?,
-            None => {
-                // If we don't have an authentication secret, for the http flow we can just return an error.
-                info!("authentication info not found");
-                return Err(AuthError::password_failed(&*user_info.user));
-            }
+        let role_access = backend.get_role_secret(ctx).await?;
+        let Some(secret) = role_access.secret else {
+            // If we don't have an authentication secret, for the http flow we can just return an error.
+            info!("authentication info not found");
+            return Err(AuthError::password_failed(&*user_info.user));
        };
-        let ep = EndpointIdInt::from(&user_info.endpoint);
        let auth_outcome = crate::auth::validate_password_and_exchange(
            &self.config.authentication_config.thread_pool,
            ep,
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -17,7 +17,6 @@ use postgres_client::error::{DbError, ErrorPosition, SqlState};
 use postgres_client::{
    GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, RowStream, Transaction,
 };
-use pq_proto::StartupMessageParamsBuilder;
 use serde::Serialize;
 use serde_json::Value;
 use serde_json::value::RawValue;
@@ -41,6 +40,7 @@ use crate::context::RequestContext;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::http::{ReadBodyError, read_body_with_limit};
 use crate::metrics::{HttpDirection, Metrics, SniGroup, SniKind};
+use crate::pqproto::StartupMessageParams;
 use crate::proxy::{NeonOptions, run_until_cancelled};
 use crate::serverless::backend::HttpConnError;
 use crate::types::{DbName, RoleName};
@@ -219,7 +219,7 @@ fn get_conn_info(

    let mut options = Option::None;

-    let mut params = StartupMessageParamsBuilder::default();
+    let mut params = StartupMessageParams::default();
    params.insert("user", &username);
    params.insert("database", &dbname);
    for (key, value) in pairs {
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -2,19 +2,17 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::{io, task};

-use bytes::BytesMut;
-use pq_proto::framed::{ConnectionError, Framed};
-use pq_proto::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};
 use rustls::ServerConfig;
-use serde::{Deserialize, Serialize};
 use thiserror::Error;
-use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf};
 use tokio_rustls::server::TlsStream;
-use tracing::debug;

-use crate::control_plane::messages::ColdStartInfo;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::metrics::Metrics;
+use crate::pqproto::{
+    BeMessage, FE_PASSWORD_MESSAGE, FeStartupPacket, SQLSTATE_INTERNAL_ERROR, WriteBuf,
+    read_message, read_startup,
+};
 use crate::tls::TlsServerEndPoint;

 /// Stream wrapper which implements libpq's protocol.
@@ -23,58 +21,77 @@ use crate::tls::TlsServerEndPoint;
 /// or [`AsyncWrite`] to prevent subtle errors (e.g. trying
 /// to pass random malformed bytes through the connection).
 pub struct PqStream<S> {
-    pub(crate) framed: Framed<S>,
+    stream: S,
+    read: Vec<u8>,
+    write: WriteBuf,
 }

 impl<S> PqStream<S> {
-    /// Construct a new libpq protocol wrapper.
-    pub fn new(stream: S) -> Self {
+    pub fn get_ref(&self) -> &S {
+        &self.stream
+    }
+
+    /// Construct a new libpq protocol wrapper over a stream without the first startup message.
+    #[cfg(test)]
+    pub fn new_skip_handshake(stream: S) -> Self {
        Self {
-            framed: Framed::new(stream),
+            stream,
+            read: Vec::new(),
+            write: WriteBuf::new(),
        }
    }
-
-    /// Extract the underlying stream and read buffer.
-    pub fn into_inner(self) -> (S, BytesMut) {
-        self.framed.into_inner()
-    }
-
-    /// Get a shared reference to the underlying stream.
-    pub(crate) fn get_ref(&self) -> &S {
-        self.framed.get_ref()
-    }
 }

-fn err_connection() -> io::Error {
-    io::Error::new(io::ErrorKind::ConnectionAborted, "connection is lost")
+impl<S: AsyncRead + AsyncWrite + Unpin> PqStream<S> {
+    /// Construct a new libpq protocol wrapper and read the first startup message.
+    ///
+    /// This is not cancel safe.
+    pub async fn parse_startup(mut stream: S) -> io::Result<(Self, FeStartupPacket)> {
+        let startup = read_startup(&mut stream).await?;
+        Ok((
+            Self {
+                stream,
+                read: Vec::new(),
+                write: WriteBuf::new(),
+            },
+            startup,
+        ))
+    }
+
+    /// Tell the client that encryption is not supported.
+    ///
+    /// This is not cancel safe
+    pub async fn reject_encryption(&mut self) -> io::Result<FeStartupPacket> {
+        // N for No.
+        self.write.encryption(b'N');
+        self.flush().await?;
+        read_startup(&mut self.stream).await
+    }
 }

 impl<S: AsyncRead + Unpin> PqStream<S> {
-    /// Receive [`FeStartupPacket`], which is a first packet sent by a client.
-    pub async fn read_startup_packet(&mut self) -> io::Result<FeStartupPacket> {
-        self.framed
-            .read_startup_message()
-            .await
-            .map_err(ConnectionError::into_io_error)?
-            .ok_or_else(err_connection)
-    }
-
-    async fn read_message(&mut self) -> io::Result<FeMessage> {
-        self.framed
-            .read_message()
-            .await
-            .map_err(ConnectionError::into_io_error)?
-            .ok_or_else(err_connection)
-    }
-
-    pub(crate) async fn read_password_message(&mut self) -> io::Result<bytes::Bytes> {
-        match self.read_message().await? {
-            FeMessage::PasswordMessage(msg) => Ok(msg),
-            bad => Err(io::Error::new(
-                io::ErrorKind::InvalidData,
-                format!("unexpected message type: {bad:?}"),
-            )),
+    /// Read a raw postgres packet, which will respect the max length requested.
+    /// This is not cancel safe.
+    async fn read_raw_expect(&mut self, tag: u8, max: usize) -> io::Result<&mut [u8]> {
+        let (actual_tag, msg) = read_message(&mut self.stream, &mut self.read, max).await?;
+        if actual_tag != tag {
+            return Err(io::Error::other(format!(
+                "incorrect message tag, expected {:?}, got {:?}",
+                tag as char, actual_tag as char,
+            )));
        }
+        Ok(msg)
+    }
+
+    /// Read a postgres password message, which will respect the max length requested.
+    /// This is not cancel safe.
+    pub async fn read_password_message(&mut self) -> io::Result<&mut [u8]> {
+        // passwords are usually pretty short
+        // and SASL SCRAM messages are no longer than 256 bytes in my testing
+        // (a few hashes and random bytes, encoded into base64).
+        const MAX_PASSWORD_LENGTH: usize = 512;
+        self.read_raw_expect(FE_PASSWORD_MESSAGE, MAX_PASSWORD_LENGTH)
+            .await
    }
 }

@@ -84,6 +101,16 @@ pub struct ReportedError {
    error_kind: ErrorKind,
 }

+impl ReportedError {
+    pub fn new(e: (impl UserFacingError + Into<anyhow::Error>)) -> Self {
+        let error_kind = e.get_error_kind();
+        Self {
+            source: e.into(),
+            error_kind,
+        }
+    }
+}
+
 impl std::fmt::Display for ReportedError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.source.fmt(f)
@@ -102,109 +129,65 @@ impl ReportableError for ReportedError {
    }
 }

-#[derive(Serialize, Deserialize, Debug)]
-enum ErrorTag {
-    #[serde(rename = "proxy")]
-    Proxy,
-    #[serde(rename = "compute")]
-    Compute,
-    #[serde(rename = "client")]
-    Client,
-    #[serde(rename = "controlplane")]
-    ControlPlane,
-    #[serde(rename = "other")]
-    Other,
-}
-
-impl From<ErrorKind> for ErrorTag {
-    fn from(error_kind: ErrorKind) -> Self {
-        match error_kind {
-            ErrorKind::User => Self::Client,
-            ErrorKind::ClientDisconnect => Self::Client,
-            ErrorKind::RateLimit => Self::Proxy,
-            ErrorKind::ServiceRateLimit => Self::Proxy, // considering rate limit as proxy error for SLI
-            ErrorKind::Quota => Self::Proxy,
-            ErrorKind::Service => Self::Proxy,
-            ErrorKind::ControlPlane => Self::ControlPlane,
-            ErrorKind::Postgres => Self::Other,
-            ErrorKind::Compute => Self::Compute,
-        }
-    }
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(rename_all = "snake_case")]
-struct ProbeErrorData {
-    tag: ErrorTag,
-    msg: String,
-    cold_start_info: Option<ColdStartInfo>,
-}
-
 impl<S: AsyncWrite + Unpin> PqStream<S> {
-    /// Write the message into an internal buffer, but don't flush the underlying stream.
-    pub(crate) fn write_message_noflush(
-        &mut self,
-        message: &BeMessage<'_>,
-    ) -> io::Result<&mut Self> {
-        self.framed
-            .write_message(message)
-            .map_err(ProtocolError::into_io_error)?;
-        Ok(self)
+    /// Tell the client that we are willing to accept SSL.
+    /// This is not cancel safe
+    pub async fn accept_tls(mut self) -> io::Result<S> {
+        // S for SSL.
+        self.write.encryption(b'S');
+        self.flush().await?;
+        Ok(self.stream)
    }

-    /// Write the message into an internal buffer and flush it.
-    pub async fn write_message(&mut self, message: &BeMessage<'_>) -> io::Result<&mut Self> {
-        self.write_message_noflush(message)?;
-        self.flush().await?;
-        Ok(self)
+    /// Assert that we are using direct TLS.
+    pub fn accept_direct_tls(self) -> S {
+        self.stream
+    }
+
+    /// Write a raw message to the internal buffer.
+    pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {
+        self.write.write_raw(size_hint, tag, f);
+    }
+
+    /// Write the message into an internal buffer
+    pub fn write_message(&mut self, message: BeMessage<'_>) {
+        message.write_message(&mut self.write);
    }

    /// Flush the output buffer into the underlying stream.
-    pub(crate) async fn flush(&mut self) -> io::Result<&mut Self> {
-        self.framed.flush().await?;
-        Ok(self)
+    ///
+    /// This is cancel safe.
+    pub async fn flush(&mut self) -> io::Result<()> {
+        self.stream.write_all_buf(&mut self.write).await?;
+        self.write.reset();
+
+        self.stream.flush().await?;
+
+        Ok(())
    }

-    /// Writes message with the given error kind to the stream.
-    /// Used only for probe queries
-    async fn write_format_message(
-        &mut self,
-        msg: &str,
-        error_kind: ErrorKind,
-        ctx: Option<&crate::context::RequestContext>,
-    ) -> String {
-        let formatted_msg = match ctx {
-            Some(ctx) if ctx.get_testodrome_id().is_some() => {
-                serde_json::to_string(&ProbeErrorData {
-                    tag: ErrorTag::from(error_kind),
-                    msg: msg.to_string(),
-                    cold_start_info: Some(ctx.cold_start_info()),
-                })
-                .unwrap_or_default()
-            }
-            _ => msg.to_string(),
-        };
-
-        // already error case, ignore client IO error
-        self.write_message(&BeMessage::ErrorResponse(&formatted_msg, None))
-            .await
-            .inspect_err(|e| debug!("write_message failed: {e}"))
-            .ok();
-
-        formatted_msg
+    /// Flush the output buffer into the underlying stream.
+    ///
+    /// This is cancel safe.
+    pub async fn flush_and_into_inner(mut self) -> io::Result<S> {
+        self.flush().await?;
+        Ok(self.stream)
    }

-    /// Write the error message using [`Self::write_format_message`], then re-throw it.
-    /// Allowing string literals is safe under the assumption they might not contain any runtime info.
-    /// This method exists due to `&str` not implementing `Into<anyhow::Error>`.
+    /// Write the error message to the client, then re-throw it.
+    ///
+    /// Trait [`UserFacingError`] acts as an allowlist for error types.
    /// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
-    pub async fn throw_error_str<T>(
+    pub(crate) async fn throw_error<E>(
        &mut self,
-        msg: &'static str,
-        error_kind: ErrorKind,
+        error: E,
        ctx: Option<&crate::context::RequestContext>,
-    ) -> Result<T, ReportedError> {
-        self.write_format_message(msg, error_kind, ctx).await;
+    ) -> ReportedError
+    where
+        E: UserFacingError + Into<anyhow::Error>,
+    {
+        let error_kind = error.get_error_kind();
+        let msg = error.to_string_client();

        if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
            tracing::info!(
@@ -214,39 +197,39 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
            );
        }

-        Err(ReportedError {
-            source: anyhow::anyhow!(msg),
-            error_kind,
-        })
-    }
-
-    /// Write the error message using [`Self::write_format_message`], then re-throw it.
-    /// Trait [`UserFacingError`] acts as an allowlist for error types.
-    /// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
-    pub(crate) async fn throw_error<T, E>(
-        &mut self,
-        error: E,
-        ctx: Option<&crate::context::RequestContext>,
-    ) -> Result<T, ReportedError>
-    where
-        E: UserFacingError + Into<anyhow::Error>,
-    {
-        let error_kind = error.get_error_kind();
-        let msg = error.to_string_client();
-        self.write_format_message(&msg, error_kind, ctx).await;
-        if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
-            tracing::info!(
-                kind=error_kind.to_metric_label(),
-                error=%error,
-                msg,
-                "forwarding error to user",
-            );
+        let probe_msg;
+        let mut msg = &*msg;
+        if let Some(ctx) = ctx {
+            if ctx.get_testodrome_id().is_some() {
+                let tag = match error_kind {
+                    ErrorKind::User => "client",
+                    ErrorKind::ClientDisconnect => "client",
+                    ErrorKind::RateLimit => "proxy",
+                    ErrorKind::ServiceRateLimit => "proxy",
+                    ErrorKind::Quota => "proxy",
+                    ErrorKind::Service => "proxy",
+                    ErrorKind::ControlPlane => "controlplane",
+                    ErrorKind::Postgres => "other",
+                    ErrorKind::Compute => "compute",
+                };
+                probe_msg = typed_json::json!({
+                    "tag": tag,
+                    "msg": msg,
+                    "cold_start_info": ctx.cold_start_info(),
+                })
+                .to_string();
+                msg = &probe_msg;
+            }
        }

-        Err(ReportedError {
-            source: anyhow::anyhow!(error),
-            error_kind,
-        })
+        // TODO: either preserve the error code from postgres, or assign error codes to proxy errors.
+        self.write.write_error(msg, SQLSTATE_INTERNAL_ERROR);
+
+        self.flush()
+            .await
+            .unwrap_or_else(|e| tracing::debug!("write_message failed: {e}"));
+
+        ReportedError::new(error)
    }
 }

--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -44,6 +44,7 @@ struct GlobalTimelinesState {
    // on-demand timeline creation from recreating deleted timelines.  This is only soft-enforced, as
    // this map is dropped on restart.
    tombstones: HashMap<TenantTimelineId, Instant>,
+    tenant_tombstones: HashMap<TenantId, Instant>,

    conf: Arc<SafeKeeperConf>,
    broker_active_set: Arc<TimelinesSet>,
@@ -81,10 +82,25 @@ impl GlobalTimelinesState {
        }
    }

+    fn has_tombstone(&self, ttid: &TenantTimelineId) -> bool {
+        self.tombstones.contains_key(ttid) || self.tenant_tombstones.contains_key(&ttid.tenant_id)
+    }
+
+    /// Removes all blocking tombstones for the given timeline ID.
+    /// Returns `true` if there have been actual changes.
+    fn remove_tombstone(&mut self, ttid: &TenantTimelineId) -> bool {
+        self.tombstones.remove(ttid).is_some()
+            || self.tenant_tombstones.remove(&ttid.tenant_id).is_some()
+    }
+
    fn delete(&mut self, ttid: TenantTimelineId) {
        self.timelines.remove(&ttid);
        self.tombstones.insert(ttid, Instant::now());
    }
+
+    fn add_tenant_tombstone(&mut self, tenant_id: TenantId) {
+        self.tenant_tombstones.insert(tenant_id, Instant::now());
+    }
 }

 /// A struct used to manage access to the global timelines map.
@@ -99,6 +115,7 @@ impl GlobalTimelines {
            state: Mutex::new(GlobalTimelinesState {
                timelines: HashMap::new(),
                tombstones: HashMap::new(),
+                tenant_tombstones: HashMap::new(),
                conf,
                broker_active_set: Arc::new(TimelinesSet::default()),
                global_rate_limiter: RateLimiter::new(1, 1),
@@ -245,7 +262,7 @@ impl GlobalTimelines {
                return Ok(timeline);
            }

-            if state.tombstones.contains_key(&ttid) {
+            if state.has_tombstone(&ttid) {
                anyhow::bail!("Timeline {ttid} is deleted, refusing to recreate");
            }

@@ -295,13 +312,14 @@ impl GlobalTimelines {
                _ => {}
            }
            if check_tombstone {
-                if state.tombstones.contains_key(&ttid) {
+                if state.has_tombstone(&ttid) {
                    anyhow::bail!("timeline {ttid} is deleted, refusing to recreate");
                }
            } else {
                // We may be have been asked to load a timeline that was previously deleted (e.g. from `pull_timeline.rs`).  We trust
                // that the human doing this manual intervention knows what they are doing, and remove its tombstone.
-                if state.tombstones.remove(&ttid).is_some() {
+                // It's also possible that we enter this when the tenant has been deleted, even if the timeline itself has never existed.
+                if state.remove_tombstone(&ttid) {
                    warn!("un-deleted timeline {ttid}");
                }
            }
@@ -482,6 +500,7 @@ impl GlobalTimelines {
        let tli_res = {
            let state = self.state.lock().unwrap();

+            // Do NOT check tenant tombstones here: those were set earlier
            if state.tombstones.contains_key(ttid) {
                // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do.
                info!("Timeline {ttid} was already deleted");
@@ -557,6 +576,10 @@ impl GlobalTimelines {
        action: DeleteOrExclude,
    ) -> Result<HashMap<TenantTimelineId, TimelineDeleteResult>> {
        info!("deleting all timelines for tenant {}", tenant_id);
+
+        // Adding a tombstone before getting the timelines to prevent new timeline additions
+        self.state.lock().unwrap().add_tenant_tombstone(*tenant_id);
+
        let to_delete = self.get_all_for_tenant(*tenant_id);

        let mut err = None;
@@ -600,6 +623,9 @@ impl GlobalTimelines {
        state
            .tombstones
            .retain(|_, v| now.duration_since(*v) < *tombstone_ttl);
+        state
+            .tenant_tombstones
+            .retain(|_, v| now.duration_since(*v) < *tombstone_ttl);
    }
 }

--- a/safekeeper/tests/walproposer_sim/simulation.rs
+++ b/safekeeper/tests/walproposer_sim/simulation.rs
@@ -87,6 +87,7 @@ impl WalProposer {
        let config = Config {
            ttid,
            safekeepers_list: addrs,
+            safekeeper_conninfo_options: String::new(),
            safekeeper_reconnect_timeout: 1000,
            safekeeper_connection_timeout: 5000,
            sync_safekeepers,
--- a/Show More
+++ b/Show More