bump lsn a bit

new values
Fix the test
2026-02-17 17:40:37 +00:00 · 2024-01-27 13:48:06 +02:00 · 2024-01-27 13:34:28 +02:00 · 2024-01-27 13:21:13 +02:00 · 2024-01-27 13:21:13 +02:00 · 2024-01-27 13:21:13 +02:00
102 changed files with 1531 additions and 4189 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,28 +1,27 @@
 *

-# Files
-!Cargo.lock
-!Cargo.toml
-!Makefile
 !rust-toolchain.toml
-!scripts/combine_control_files.py
-!scripts/ninstall.sh
-!vm-cgconfig.conf
+!Cargo.toml
+!Cargo.lock
+!Makefile

-# Directories
 !.cargo/
 !.config/
-!compute_tools/
 !control_plane/
+!compute_tools/
 !libs/
-!neon_local/
 !pageserver/
-!patches/
 !pgxn/
 !proxy/
-!s3_scrubber/
 !safekeeper/
+!s3_scrubber/
 !storage_broker/
 !trace/
-!vendor/postgres-*/
+!vendor/postgres-v14/
+!vendor/postgres-v15/
+!vendor/postgres-v16/
 !workspace_hack/
+!neon_local/
+!scripts/ninstall.sh
+!scripts/combine_control_files.py
+!vm-cgconfig.conf
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -508,7 +508,7 @@ jobs:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
-          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: std-fs
+          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -285,6 +285,7 @@ dependencies = [
 "metrics",
 "pageserver_api",
 "pageserver_client",
+ "postgres_backend",
 "postgres_connection",
 "serde",
 "serde_json",
@@ -2735,12 +2736,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "libm"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.1.4"
@@ -2811,15 +2806,6 @@ version = "2.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"

-[[package]]
-name = "memoffset"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
-dependencies = [
- "autocfg",
-]
-
 [[package]]
 name = "memoffset"
 version = "0.8.0"
@@ -2846,9 +2832,6 @@ dependencies = [
 "libc",
 "once_cell",
 "prometheus",
- "rand 0.8.5",
- "rand_distr",
- "twox-hash",
 "workspace_hack",
 ]

@@ -2952,19 +2935,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "nix"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
-dependencies = [
- "bitflags 1.3.2",
- "cfg-if",
- "libc",
- "memoffset 0.7.1",
- "pin-utils",
-]
-
 [[package]]
 name = "nix"
 version = "0.27.1"
@@ -3087,7 +3057,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
 dependencies = [
 "autocfg",
- "libm",
 ]

 [[package]]
@@ -4102,8 +4071,6 @@ dependencies = [
 "sync_wrapper",
 "task-local-extensions",
 "thiserror",
- "tikv-jemalloc-ctl",
- "tikv-jemallocator",
 "tls-listener",
 "tokio",
 "tokio-postgres",
@@ -4204,16 +4171,6 @@ dependencies = [
 "getrandom 0.2.11",
 ]

-[[package]]
-name = "rand_distr"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
-dependencies = [
- "num-traits",
- "rand 0.8.5",
-]
-
 [[package]]
 name = "rand_hc"
 version = "0.2.0"
@@ -5554,37 +5511,6 @@ dependencies = [
 "ordered-float 2.10.1",
 ]

-[[package]]
-name = "tikv-jemalloc-ctl"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
-dependencies = [
- "libc",
- "paste",
- "tikv-jemalloc-sys",
-]
-
-[[package]]
-name = "tikv-jemalloc-sys"
-version = "0.5.4+5.3.0-patched"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
-dependencies = [
- "cc",
- "libc",
-]
-
-[[package]]
-name = "tikv-jemallocator"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
-dependencies = [
- "libc",
- "tikv-jemalloc-sys",
-]
-
 [[package]]
 name = "time"
 version = "0.3.21"
@@ -5684,10 +5610,9 @@ dependencies = [
 [[package]]
 name = "tokio-epoll-uring"
 version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0e1af4ccddf2f01805cfc9eaefa97ee13c04b52d"
+source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0dd3a2f8bf3239d34a19719ef1a74146c093126f"
 dependencies = [
 "futures",
- "nix 0.26.4",
 "once_cell",
 "scopeguard",
 "thiserror",
@@ -6209,7 +6134,7 @@ dependencies = [
 [[package]]
 name = "uring-common"
 version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0e1af4ccddf2f01805cfc9eaefa97ee13c04b52d"
+source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0dd3a2f8bf3239d34a19719ef1a74146c093126f"
 dependencies = [
 "io-uring",
 "libc",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -149,8 +149,6 @@ tar = "0.4"
 task-local-extensions = "0.1.4"
 test-context = "0.1"
 thiserror = "1.0"
-tikv-jemallocator = "0.5"
-tikv-jemalloc-ctl = "0.5"
 tls-listener = { version = "0.7", features = ["rustls", "hyper-h1"] }
 tokio = { version = "1.17", features = ["macros"] }
 tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
@@ -167,7 +165,6 @@ tracing = "0.1"
 tracing-error = "0.2.0"
 tracing-opentelemetry = "0.20.0"
 tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
-twox-hash = { version = "1.6.3", default-features = false }
 url = "2.2"
 uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
 walkdir = "2.3.2"
--- a/2
+++ b/2
@@ -53,7 +53,6 @@ RUN set -e \
      --bin pagectl  \
      --bin safekeeper  \
      --bin storage_broker  \
-      --bin attachment_service  \
      --bin proxy  \
      --bin neon_local \
      --locked --release \
@@ -81,7 +80,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker      /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/attachment_service  /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin

--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -241,12 +241,9 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
 FROM build-deps AS vector-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-COPY patches/pgvector.patch /pgvector.patch
-
-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.6.0.tar.gz -O pgvector.tar.gz && \
-    echo "b0cf4ba1ab016335ac8fb1cada0d2106235889a194fffeece217c5bda90b2f19 pgvector.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
+    echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
-    patch -p1 < /pgvector.patch && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
@@ -523,7 +520,8 @@ RUN apt-get update && \
        libboost-regex1.74-dev \
        libboost-serialization1.74-dev \
        libboost-system1.74-dev \
-        libeigen3-dev
+        libeigen3-dev \
+        libfreetype6-dev

 ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
 RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
@@ -549,7 +547,6 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
        -D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \
        -D RDK_INSTALL_INTREE=OFF \
        -D RDK_INSTALL_COMIC_FONTS=OFF \
-        -D RDK_BUILD_FREETYPE_SUPPORT=OFF \
        -D CMAKE_BUILD_TYPE=Release \
        . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -904,7 +901,7 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
 # libxml2, libxslt1.1 for xml2
 # libzstd1 for zstd
-# libboost* for rdkit
+# libboost*, libfreetype6, and zlib1g for rdkit
 # ca-certificates for communicating with s3 by compute_ctl
 RUN apt update &&  \
    apt install --no-install-recommends -y \
@@ -917,6 +914,7 @@ RUN apt update &&  \
        libboost-serialization1.74.0 \
        libboost-system1.74.0 \
        libossp-uuid16 \
+        libfreetype6 \
        libgeos-c1v5 \
        libgdal28 \
        libproj19 \
@@ -928,6 +926,7 @@ RUN apt update &&  \
        libcurl4-openssl-dev \
        locales \
        procps \
+        zlib1g \
        ca-certificates && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -758,14 +758,6 @@ BEGIN
    END LOOP;
 END $$;
 "#,
-        r#"
-DO $$
-BEGIN
-    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
-    END IF;
-END
-$$;"#,
    ];

    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -21,6 +21,10 @@ tokio.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true

+# TODO: remove this after DB persistence is added, it is only used for
+# a parsing function when loading pageservers from neon_local LocalEnv
+postgres_backend.workspace = true
+
 diesel = { version = "2.1.4", features = ["serde_json", "postgres"] }

 utils = { path = "../../libs/utils/" }
--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -2,17 +2,13 @@ use crate::reconciler::ReconcileError;
 use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
 use hyper::{Body, Request, Response};
 use hyper::{StatusCode, Uri};
-use pageserver_api::models::{
-    TenantCreateRequest, TenantLocationConfigRequest, TimelineCreateRequest,
-};
+use pageserver_api::models::{TenantCreateRequest, TimelineCreateRequest};
 use pageserver_api::shard::TenantShardId;
-use pageserver_client::mgmt_api;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
 use utils::auth::SwappableJwtAuth;
 use utils::http::endpoint::{auth_middleware, request_span};
 use utils::http::request::parse_request_param;
-use utils::id::{TenantId, TimelineId};
+use utils::id::TenantId;

 use utils::{
    http::{
@@ -116,78 +112,6 @@ async fn handle_tenant_create(
    json_response(StatusCode::OK, service.tenant_create(create_req).await?)
 }

-// For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
-// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.  This avoids
-// needing to track a "deleting" state for tenants.
-async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
-where
-    R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
-    F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
-{
-    let started_at = Instant::now();
-    // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
-    // completed.
-    let mut retry_period = Duration::from_secs(1);
-    // On subsequent retries, wait longer.
-    let max_retry_period = Duration::from_secs(5);
-    // Enable callers with a 30 second request timeout to reliably get a response
-    let max_wait = Duration::from_secs(25);
-
-    loop {
-        let status = f(service.clone()).await?;
-        match status {
-            StatusCode::ACCEPTED => {
-                tracing::info!("Deletion accepted, waiting to try again...");
-                tokio::time::sleep(retry_period).await;
-                retry_period = max_retry_period;
-            }
-            StatusCode::NOT_FOUND => {
-                tracing::info!("Deletion complete");
-                return json_response(StatusCode::OK, ());
-            }
-            _ => {
-                tracing::warn!("Unexpected status {status}");
-                return json_response(status, ());
-            }
-        }
-
-        let now = Instant::now();
-        if now + retry_period > started_at + max_wait {
-            tracing::info!("Deletion timed out waiting for 404");
-            // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
-            // the pageserver's swagger definition for this endpoint, and has the same desired
-            // effect of causing the control plane to retry later.
-            return json_response(StatusCode::CONFLICT, ());
-        }
-    }
-}
-
-async fn handle_tenant_location_config(
-    service: Arc<Service>,
-    mut req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let config_req = json_request::<TenantLocationConfigRequest>(&mut req).await?;
-    json_response(
-        StatusCode::OK,
-        service
-            .tenant_location_config(tenant_id, config_req)
-            .await?,
-    )
-}
-
-async fn handle_tenant_delete(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-
-    deletion_wrapper(service, move |service| async move {
-        service.tenant_delete(tenant_id).await
-    })
-    .await
-}
-
 async fn handle_tenant_timeline_create(
    service: Arc<Service>,
    mut req: Request<Body>,
@@ -202,63 +126,6 @@ async fn handle_tenant_timeline_create(
    )
 }

-async fn handle_tenant_timeline_delete(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
-
-    deletion_wrapper(service, move |service| async move {
-        service.tenant_timeline_delete(tenant_id, timeline_id).await
-    })
-    .await
-}
-
-async fn handle_tenant_timeline_passthrough(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-
-    let Some(path) = req.uri().path_and_query() else {
-        // This should never happen, our request router only calls us if there is a path
-        return Err(ApiError::BadRequest(anyhow::anyhow!("Missing path")));
-    };
-
-    tracing::info!("Proxying request for tenant {} ({})", tenant_id, path);
-
-    // Find the node that holds shard zero
-    let (base_url, tenant_shard_id) = service.tenant_shard0_baseurl(tenant_id)?;
-
-    // Callers will always pass an unsharded tenant ID.  Before proxying, we must
-    // rewrite this to a shard-aware shard zero ID.
-    let path = format!("{}", path);
-    let tenant_str = tenant_id.to_string();
-    let tenant_shard_str = format!("{}", tenant_shard_id);
-    let path = path.replace(&tenant_str, &tenant_shard_str);
-
-    let client = mgmt_api::Client::new(base_url, service.get_config().jwt_token.as_deref());
-    let resp = client.get_raw(path).await.map_err(|_e|
-        // FIXME: give APiError a proper Unavailable variant.  We return 503 here because
-        // if we can't successfully send a request to the pageserver, we aren't available.
-        ApiError::ShuttingDown)?;
-
-    // We have a reqest::Response, would like a http::Response
-    let mut builder = hyper::Response::builder()
-        .status(resp.status())
-        .version(resp.version());
-    for (k, v) in resp.headers() {
-        builder = builder.header(k, v);
-    }
-
-    let response = builder
-        .body(Body::wrap_stream(resp.bytes_stream()))
-        .map_err(|e| ApiError::InternalServerError(e.into()))?;
-
-    Ok(response)
-}
-
 async fn handle_tenant_locate(
    service: Arc<Service>,
    req: Request<Body>,
@@ -274,11 +141,6 @@ async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, ())
 }

-async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&req);
-    json_response(StatusCode::OK, state.service.node_list().await?)
-}
-
 async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    let config_req = json_request::<NodeConfigureRequest>(&mut req).await?;
@@ -364,64 +226,26 @@ pub fn make_router(

    router
        .data(Arc::new(HttpState::new(service, auth)))
-        // Non-prefixed generic endpoints (status, metrics)
        .get("/status", |r| request_span(r, handle_status))
-        // Upcalls for the pageserver: point the pageserver's `control_plane_api` config to this prefix
-        .post("/upcall/v1/re-attach", |r| {
-            request_span(r, handle_re_attach)
-        })
-        .post("/upcall/v1/validate", |r| request_span(r, handle_validate))
-        // Test/dev/debug endpoints
-        .post("/debug/v1/attach-hook", |r| {
-            request_span(r, handle_attach_hook)
-        })
-        .post("/debug/v1/inspect", |r| request_span(r, handle_inspect))
-        .get("/control/v1/tenant/:tenant_id/locate", |r| {
-            tenant_service_handler(r, handle_tenant_locate)
-        })
-        // Node operations
-        .post("/control/v1/node", |r| {
-            request_span(r, handle_node_register)
-        })
-        .get("/control/v1/node", |r| request_span(r, handle_node_list))
-        .put("/control/v1/node/:node_id/config", |r| {
+        .post("/re-attach", |r| request_span(r, handle_re_attach))
+        .post("/validate", |r| request_span(r, handle_validate))
+        .post("/attach-hook", |r| request_span(r, handle_attach_hook))
+        .post("/inspect", |r| request_span(r, handle_inspect))
+        .post("/node", |r| request_span(r, handle_node_register))
+        .put("/node/:node_id/config", |r| {
            request_span(r, handle_node_configure)
        })
-        // Tenant Shard operations
-        .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {
-            tenant_service_handler(r, handle_tenant_shard_migrate)
-        })
-        // Tenant operations
-        // The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into
-        // this service to manage tenants that actually consist of many tenant shards, as if they are a single entity.
        .post("/v1/tenant", |r| {
            tenant_service_handler(r, handle_tenant_create)
        })
-        .delete("/v1/tenant/:tenant_id", |r| {
-            tenant_service_handler(r, handle_tenant_delete)
-        })
-        .put("/v1/tenant/:tenant_id/location_config", |r| {
-            tenant_service_handler(r, handle_tenant_location_config)
-        })
-        // Tenant Shard operations (low level/maintenance)
-        .put("/tenant/:tenant_shard_id/migrate", |r| {
-            tenant_service_handler(r, handle_tenant_shard_migrate)
-        })
-        // Timeline operations
-        .delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
-            tenant_service_handler(r, handle_tenant_timeline_delete)
-        })
        .post("/v1/tenant/:tenant_id/timeline", |r| {
            tenant_service_handler(r, handle_tenant_timeline_create)
        })
-        // Tenant detail GET passthrough to shard zero
-        .get("/v1/tenant/:tenant_id*", |r| {
-            tenant_service_handler(r, handle_tenant_timeline_passthrough)
+        .get("/tenant/:tenant_id/locate", |r| {
+            tenant_service_handler(r, handle_tenant_locate)
        })
-        // Timeline GET passthrough to shard zero.  Note that the `*` in the URL is a wildcard: any future
-        // timeline GET APIs will be implicitly included.
-        .get("/v1/tenant/:tenant_id/timeline*", |r| {
-            tenant_service_handler(r, handle_tenant_timeline_passthrough)
+        .put("/tenant/:tenant_shard_id/migrate", |r| {
+            tenant_service_handler(r, handle_tenant_shard_migrate)
        })
        // Path aliases for tests_forward_compatibility
        // TODO: remove these in future PR
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -39,7 +39,7 @@ struct Cli {

    /// Path to the .json file to store state (will be created if it doesn't exist)
    #[arg(short, long)]
-    path: Option<Utf8PathBuf>,
+    path: Utf8PathBuf,

    /// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
    #[arg(long)]
@@ -62,7 +62,7 @@ async fn main() -> anyhow::Result<()> {
        GIT_VERSION,
        launch_ts.to_string(),
        BUILD_TAG,
-        args.path.as_ref().unwrap_or(&Utf8PathBuf::from("<none>")),
+        args.path,
        args.listen
    );

@@ -70,7 +70,11 @@ async fn main() -> anyhow::Result<()> {
        jwt_token: args.jwt_token,
    };

-    let json_path = args.path;
+    let json_path = if args.path.as_os_str().is_empty() {
+        None
+    } else {
+        Some(args.path)
+    };
    let persistence = Arc::new(Persistence::new(args.database_url, json_path.clone()));

    let service = Service::spawn(config, persistence.clone()).await?;
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -9,6 +9,7 @@ use diesel::prelude::*;
 use diesel::Connection;
 use pageserver_api::models::TenantConfig;
 use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId};
+use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
 use utils::generation::Generation;
 use utils::id::{NodeId, TenantId};
@@ -128,11 +129,51 @@ impl Persistence {
            })
            .await?;

+        if nodes.is_empty() {
+            return self.list_nodes_local_env().await;
+        }
+
        tracing::info!("list_nodes: loaded {} nodes", nodes.len());

        Ok(nodes)
    }

+    /// Shim for automated compatibility tests: load nodes from LocalEnv instead of database
+    pub(crate) async fn list_nodes_local_env(&self) -> DatabaseResult<Vec<Node>> {
+        // Enable test_backward_compatibility to work by populating our list of
+        // nodes from LocalEnv when it is not present in persistent storage.  Otherwise at
+        // first startup in the compat test, we may have shards but no nodes.
+        use control_plane::local_env::LocalEnv;
+        let env = LocalEnv::load_config().map_err(|e| DatabaseError::Logical(format!("{e}")))?;
+        tracing::info!(
+            "Loading {} pageserver nodes from LocalEnv",
+            env.pageservers.len()
+        );
+        let mut nodes = Vec::new();
+        for ps_conf in env.pageservers {
+            let (pg_host, pg_port) =
+                parse_host_port(&ps_conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
+            let (http_host, http_port) = parse_host_port(&ps_conf.listen_http_addr)
+                .expect("Unable to parse listen_http_addr");
+            let node = Node {
+                id: ps_conf.id,
+                listen_pg_addr: pg_host.to_string(),
+                listen_pg_port: pg_port.unwrap_or(5432),
+                listen_http_addr: http_host.to_string(),
+                listen_http_port: http_port.unwrap_or(80),
+                availability: NodeAvailability::Active,
+                scheduling: NodeSchedulingPolicy::Active,
+            };
+
+            // Synchronize database with what we learn from LocalEnv
+            self.insert_node(&node).await?;
+
+            nodes.push(node);
+        }
+
+        Ok(nodes)
+    }
+
    /// At startup, load the high level state for shards, such as their config + policy.  This will
    /// be enriched at runtime with state discovered on pageservers.
    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
@@ -21,7 +21,6 @@ use pageserver_api::{
    models,
    models::{
        LocationConfig, LocationConfigMode, ShardParameters, TenantConfig, TenantCreateRequest,
-        TenantLocationConfigRequest, TenantLocationConfigResponse, TenantShardLocation,
        TimelineCreateRequest, TimelineInfo,
    },
    shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId},
@@ -31,14 +30,14 @@ use utils::{
    completion::Barrier,
    generation::Generation,
    http::error::ApiError,
-    id::{NodeId, TenantId, TimelineId},
+    id::{NodeId, TenantId},
    seqwait::SeqWait,
 };

 use crate::{
    compute_hook::ComputeHook,
    node::Node,
-    persistence::{DatabaseError, NodePersistence, Persistence, TenantShardPersistence},
+    persistence::{DatabaseError, Persistence, TenantShardPersistence},
    scheduler::Scheduler,
    tenant_state::{
        IntentState, ObservedState, ObservedStateLocation, ReconcileResult, ReconcileWaitError,
@@ -636,7 +635,7 @@ impl Service {
                shard_number: tenant_shard_id.shard_number.0 as i32,
                shard_count: tenant_shard_id.shard_count.0 as i32,
                shard_stripe_size: create_req.shard_parameters.stripe_size.0 as i32,
-                generation: create_req.generation.map(|g| g as i32).unwrap_or(0),
+                generation: 0,
                generation_pageserver: i64::MAX,
                placement_policy: serde_json::to_string(&placement_policy).unwrap(),
                config: serde_json::to_string(&create_req.config).unwrap(),
@@ -678,7 +677,6 @@ impl Service {
                        })?;

                        response_shards.push(TenantCreateResponseShard {
-                            shard_id: tenant_shard_id,
                            node_id: entry
                                .get()
                                .intent
@@ -711,7 +709,6 @@ impl Service {
                        })?;

                        response_shards.push(TenantCreateResponseShard {
-                            shard_id: tenant_shard_id,
                            node_id: state
                                .intent
                                .attached
@@ -745,257 +742,14 @@ impl Service {
            (waiters, response_shards)
        };

-        self.await_waiters(waiters).await?;
-
-        Ok(TenantCreateResponse {
-            shards: response_shards,
-        })
-    }
-
-    /// Helper for functions that reconcile a number of shards, and would like to do a timeout-bounded
-    /// wait for reconciliation to complete before responding.
-    async fn await_waiters(
-        &self,
-        waiters: Vec<ReconcilerWaiter>,
-    ) -> Result<(), ReconcileWaitError> {
-        let deadline = Instant::now().checked_add(Duration::from_secs(30)).unwrap();
+        let deadline = Instant::now().checked_add(Duration::from_secs(5)).unwrap();
        for waiter in waiters {
            let timeout = deadline.duration_since(Instant::now());
            waiter.wait_timeout(timeout).await?;
        }
-
-        Ok(())
-    }
-
-    /// This API is used by the cloud control plane to do coarse-grained control of tenants:
-    /// - Call with mode Attached* to upsert the tenant.
-    /// - Call with mode Detached to switch to PolicyMode::Detached
-    ///
-    /// In future, calling with mode Secondary may switch to a detach-lite mode in which a tenant only has
-    /// secondary locations.
-    pub(crate) async fn tenant_location_config(
-        &self,
-        tenant_id: TenantId,
-        req: TenantLocationConfigRequest,
-    ) -> Result<TenantLocationConfigResponse, ApiError> {
-        if req.tenant_id.shard_count.0 > 1 {
-            return Err(ApiError::BadRequest(anyhow::anyhow!(
-                "This API is for importing single-sharded or unsharded tenants"
-            )));
-        }
-
-        let mut waiters = Vec::new();
-        let mut result = TenantLocationConfigResponse { shards: Vec::new() };
-        let maybe_create = {
-            let mut locked = self.inner.write().unwrap();
-            let result_tx = locked.result_tx.clone();
-            let compute_hook = locked.compute_hook.clone();
-            let pageservers = locked.nodes.clone();
-
-            let mut scheduler = Scheduler::new(&locked.tenants, &locked.nodes);
-
-            // Maybe we have existing shards
-            let mut create = true;
-            for (shard_id, shard) in locked
-                .tenants
-                .range_mut(TenantShardId::tenant_range(tenant_id))
-            {
-                // Saw an existing shard: this is not a creation
-                create = false;
-
-                // Note that for existing tenants we do _not_ respect the generation in the request: this is likely
-                // to be stale.  Once a tenant is created in this service, our view of generation is authoritative, and
-                // callers' generations may be ignored.  This represents a one-way migration of tenants from the outer
-                // cloud control plane into this service.
-
-                // Use location config mode as an indicator of policy: if they ask for
-                // attached we go to default HA attached mode.  If they ask for secondary
-                // we go to secondary-only mode.  If they ask for detached we detach.
-                match req.config.mode {
-                    LocationConfigMode::Detached => {
-                        shard.policy = PlacementPolicy::Detached;
-                    }
-                    LocationConfigMode::Secondary => {
-                        // TODO: implement secondary-only mode.
-                        todo!();
-                    }
-                    LocationConfigMode::AttachedMulti
-                    | LocationConfigMode::AttachedSingle
-                    | LocationConfigMode::AttachedStale => {
-                        // TODO: persistence for changes in policy
-                        if pageservers.len() > 1 {
-                            shard.policy = PlacementPolicy::Double(1)
-                        } else {
-                            // Convenience for dev/test: if we just have one pageserver, import
-                            // tenants into Single mode so that scheduling will succeed.
-                            shard.policy = PlacementPolicy::Single
-                        }
-                    }
-                }
-
-                shard.schedule(&mut scheduler)?;
-
-                let maybe_waiter = shard.maybe_reconcile(
-                    result_tx.clone(),
-                    &pageservers,
-                    &compute_hook,
-                    &self.config,
-                    &self.persistence,
-                );
-                if let Some(waiter) = maybe_waiter {
-                    waiters.push(waiter);
-                }
-
-                if let Some(node_id) = shard.intent.attached {
-                    result.shards.push(TenantShardLocation {
-                        shard_id: *shard_id,
-                        node_id,
-                    })
-                }
-            }
-
-            if create {
-                // Validate request mode
-                match req.config.mode {
-                    LocationConfigMode::Detached | LocationConfigMode::Secondary => {
-                        // When using this API to onboard an existing tenant to this service, it must start in
-                        // an attached state, because we need the request to come with a generation
-                        return Err(ApiError::BadRequest(anyhow::anyhow!(
-                            "Imported tenant must be in attached mode"
-                        )));
-                    }
-
-                    LocationConfigMode::AttachedMulti
-                    | LocationConfigMode::AttachedSingle
-                    | LocationConfigMode::AttachedStale => {
-                        // Pass
-                    }
-                }
-
-                // Validate request generation
-                let Some(generation) = req.config.generation else {
-                    // We can only import attached tenants, because we need the request to come with a generation
-                    return Err(ApiError::BadRequest(anyhow::anyhow!(
-                        "Generation is mandatory when importing tenant"
-                    )));
-                };
-
-                // Synthesize a creation request
-                Some(TenantCreateRequest {
-                    new_tenant_id: TenantShardId::unsharded(tenant_id),
-                    generation: Some(generation),
-                    shard_parameters: ShardParameters {
-                        // Must preserve the incoming shard_count do distinguish unsharded (0)
-                        // from single-sharded (1): this distinction appears in the S3 keys of the tenant.
-                        count: req.tenant_id.shard_count,
-                        // We only import un-sharded or single-sharded tenants, so stripe
-                        // size can be made up arbitrarily here.
-                        stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE,
-                    },
-                    config: req.config.tenant_conf,
-                })
-            } else {
-                None
-            }
-        };
-
-        if let Some(create_req) = maybe_create {
-            let create_resp = self.tenant_create(create_req).await?;
-            result.shards = create_resp
-                .shards
-                .into_iter()
-                .map(|s| TenantShardLocation {
-                    node_id: s.node_id,
-                    shard_id: s.shard_id,
-                })
-                .collect();
-        } else {
-            // This was an update, wait for reconciliation
-            self.await_waiters(waiters).await?;
-        }
-
-        Ok(result)
-    }
-
-    pub(crate) async fn tenant_delete(&self, tenant_id: TenantId) -> Result<StatusCode, ApiError> {
-        // TODO: refactor into helper
-        let targets = {
-            let locked = self.inner.read().unwrap();
-            let mut targets = Vec::new();
-
-            for (tenant_shard_id, shard) in
-                locked.tenants.range(TenantShardId::tenant_range(tenant_id))
-            {
-                let node_id = shard.intent.attached.ok_or_else(|| {
-                    ApiError::InternalServerError(anyhow::anyhow!("Shard not scheduled"))
-                })?;
-                let node = locked
-                    .nodes
-                    .get(&node_id)
-                    .expect("Pageservers may not be deleted while referenced");
-
-                targets.push((*tenant_shard_id, node.clone()));
-            }
-            targets
-        };
-
-        // TODO: error out if the tenant is not attached anywhere.
-
-        // Phase 1: delete on the pageservers
-        let mut any_pending = false;
-        for (tenant_shard_id, node) in targets {
-            let client = mgmt_api::Client::new(node.base_url(), self.config.jwt_token.as_deref());
-            // TODO: this, like many other places, requires proper retry handling for 503, timeout: those should not
-            // surface immediately as an error to our caller.
-            let status = client.tenant_delete(tenant_shard_id).await.map_err(|e| {
-                ApiError::InternalServerError(anyhow::anyhow!(
-                    "Error deleting shard {tenant_shard_id} on node {}: {e}",
-                    node.id
-                ))
-            })?;
-            tracing::info!(
-                "Shard {tenant_shard_id} on node {}, delete returned {}",
-                node.id,
-                status
-            );
-            if status == StatusCode::ACCEPTED {
-                any_pending = true;
-            }
-        }
-
-        if any_pending {
-            // Caller should call us again later.  When we eventually see 404s from
-            // all the shards, we may proceed to delete our records of the tenant.
-            tracing::info!(
-                "Tenant {} has some shards pending deletion, returning 202",
-                tenant_id
-            );
-            return Ok(StatusCode::ACCEPTED);
-        }
-
-        // Fall through: deletion of the tenant on pageservers is complete, we may proceed to drop
-        // our in-memory state and database state.
-
-        // Ordering: we delete persistent state first: if we then
-        // crash, we will drop the in-memory state.
-
-        // Drop persistent state.
-        self.persistence.delete_tenant(tenant_id).await?;
-
-        // Drop in-memory state
-        {
-            let mut locked = self.inner.write().unwrap();
-            locked
-                .tenants
-                .retain(|tenant_shard_id, _shard| tenant_shard_id.tenant_id != tenant_id);
-            tracing::info!(
-                "Deleted tenant {tenant_id}, now have {} tenants",
-                locked.tenants.len()
-            );
-        };
-
-        // Success is represented as 404, to imitate the existing pageserver deletion API
-        Ok(StatusCode::NOT_FOUND)
+        Ok(TenantCreateResponse {
+            shards: response_shards,
+        })
    }

    pub(crate) async fn tenant_timeline_create(
@@ -1005,15 +759,25 @@ impl Service {
    ) -> Result<TimelineInfo, ApiError> {
        let mut timeline_info = None;

-        tracing::info!(
-            "Creating timeline {}/{}",
-            tenant_id,
-            create_req.new_timeline_id,
-        );
+        let ensure_waiters = {
+            let locked = self.inner.write().unwrap();
+            tracing::info!(
+                "Creating timeline {}/{}, have {} pageservers",
+                tenant_id,
+                create_req.new_timeline_id,
+                locked.nodes.len()
+            );

-        self.ensure_attached_wait(tenant_id).await?;
+            self.ensure_attached(locked, tenant_id)
+                .map_err(ApiError::InternalServerError)?
+        };
+
+        let deadline = Instant::now().checked_add(Duration::from_secs(5)).unwrap();
+        for waiter in ensure_waiters {
+            let timeout = deadline.duration_since(Instant::now());
+            waiter.wait_timeout(timeout).await?;
+        }

-        // TODO: refuse to do this if shard splitting is in progress
        let targets = {
            let locked = self.inner.read().unwrap();
            let mut targets = Vec::new();
@@ -1084,111 +848,6 @@ impl Service {
        Ok(timeline_info.expect("targets cannot be empty"))
    }

-    pub(crate) async fn tenant_timeline_delete(
-        &self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> Result<StatusCode, ApiError> {
-        tracing::info!("Deleting timeline {}/{}", tenant_id, timeline_id,);
-
-        self.ensure_attached_wait(tenant_id).await?;
-
-        // TODO: refuse to do this if shard splitting is in progress
-        let targets = {
-            let locked = self.inner.read().unwrap();
-            let mut targets = Vec::new();
-
-            for (tenant_shard_id, shard) in
-                locked.tenants.range(TenantShardId::tenant_range(tenant_id))
-            {
-                let node_id = shard.intent.attached.ok_or_else(|| {
-                    ApiError::InternalServerError(anyhow::anyhow!("Shard not scheduled"))
-                })?;
-                let node = locked
-                    .nodes
-                    .get(&node_id)
-                    .expect("Pageservers may not be deleted while referenced");
-
-                targets.push((*tenant_shard_id, node.clone()));
-            }
-            targets
-        };
-
-        if targets.is_empty() {
-            return Err(ApiError::NotFound(
-                anyhow::anyhow!("Tenant not found").into(),
-            ));
-        }
-
-        // TODO: call into shards concurrently
-        let mut any_pending = false;
-        for (tenant_shard_id, node) in targets {
-            let client = mgmt_api::Client::new(node.base_url(), self.config.jwt_token.as_deref());
-
-            tracing::info!(
-                "Deleting timeline on shard {}/{}, attached to node {}",
-                tenant_shard_id,
-                timeline_id,
-                node.id
-            );
-
-            let status = client
-                .timeline_delete(tenant_shard_id, timeline_id)
-                .await
-                .map_err(|e| {
-                    ApiError::InternalServerError(anyhow::anyhow!(
-                    "Error deleting timeline {timeline_id} on {tenant_shard_id} on node {}: {e}",
-                    node.id
-                ))
-                })?;
-
-            if status == StatusCode::ACCEPTED {
-                any_pending = true;
-            }
-        }
-
-        if any_pending {
-            Ok(StatusCode::ACCEPTED)
-        } else {
-            Ok(StatusCode::NOT_FOUND)
-        }
-    }
-
-    /// When you need to send an HTTP request to the pageserver that holds shard0 of a tenant, this
-    /// function looks it up and returns the url.  If the tenant isn't found, returns Err(ApiError::NotFound)
-    pub(crate) fn tenant_shard0_baseurl(
-        &self,
-        tenant_id: TenantId,
-    ) -> Result<(String, TenantShardId), ApiError> {
-        let locked = self.inner.read().unwrap();
-        let Some((tenant_shard_id, shard)) = locked
-            .tenants
-            .range(TenantShardId::tenant_range(tenant_id))
-            .next()
-        else {
-            return Err(ApiError::NotFound(
-                anyhow::anyhow!("Tenant {tenant_id} not found").into(),
-            ));
-        };
-
-        // TODO: should use the ID last published to compute_hook, rather than the intent: the intent might
-        // point to somewhere we haven't attached yet.
-        let Some(node_id) = shard.intent.attached else {
-            return Err(ApiError::Conflict(
-                "Cannot call timeline API on non-attached tenant".to_string(),
-            ));
-        };
-
-        let Some(node) = locked.nodes.get(&node_id) else {
-            // This should never happen
-            return Err(ApiError::InternalServerError(anyhow::anyhow!(
-                "Shard refers to nonexistent node"
-            )));
-        };
-
-        Ok((node.base_url(), *tenant_shard_id))
-    }
-
    pub(crate) fn tenant_locate(
        &self,
        tenant_id: TenantId,
@@ -1334,20 +993,6 @@ impl Service {
        Ok(TenantShardMigrateResponse {})
    }

-    pub(crate) async fn node_list(&self) -> Result<Vec<NodePersistence>, ApiError> {
-        // It is convenient to avoid taking the big lock and converting Node to a serializable
-        // structure, by fetching from storage instead of reading in-memory state.
-        let nodes = self
-            .persistence
-            .list_nodes()
-            .await?
-            .into_iter()
-            .map(|n| n.to_persistent())
-            .collect();
-
-        Ok(nodes)
-    }
-
    pub(crate) async fn node_register(
        &self,
        register_req: NodeRegisterRequest,
@@ -1521,7 +1166,7 @@ impl Service {
    /// Helper for methods that will try and call pageserver APIs for
    /// a tenant, such as timeline CRUD: they cannot proceed unless the tenant
    /// is attached somewhere.
-    fn ensure_attached_schedule(
+    fn ensure_attached(
        &self,
        mut locked: std::sync::RwLockWriteGuard<'_, ServiceState>,
        tenant_id: TenantId,
@@ -1551,23 +1196,6 @@ impl Service {
        Ok(waiters)
    }

-    async fn ensure_attached_wait(&self, tenant_id: TenantId) -> Result<(), ApiError> {
-        let ensure_waiters = {
-            let locked = self.inner.write().unwrap();
-
-            self.ensure_attached_schedule(locked, tenant_id)
-                .map_err(ApiError::InternalServerError)?
-        };
-
-        let deadline = Instant::now().checked_add(Duration::from_secs(5)).unwrap();
-        for waiter in ensure_waiters {
-            let timeout = deadline.duration_since(Instant::now());
-            waiter.wait_timeout(timeout).await?;
-        }
-
-        Ok(())
-    }
-
    /// Check all tenants for pending reconciliation work, and reconcile those in need
    ///
    /// Returns how many reconciliation tasks were started
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -17,7 +17,6 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize};
 use std::{env, str::FromStr};
 use tokio::process::Command;
 use tracing::instrument;
-use url::Url;
 use utils::{
    auth::{Claims, Scope},
    id::{NodeId, TenantId},
@@ -60,7 +59,6 @@ pub struct InspectResponse {

 #[derive(Serialize, Deserialize)]
 pub struct TenantCreateResponseShard {
-    pub shard_id: TenantShardId,
    pub node_id: NodeId,
    pub generation: u32,
 }
@@ -525,15 +523,13 @@ impl AttachmentService {
        RQ: Serialize + Sized,
        RS: DeserializeOwned + Sized,
    {
-        // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
-        // for general purpose API access.
-        let listen_url = self.env.control_plane_api.clone().unwrap();
-        let url = Url::from_str(&format!(
-            "http://{}:{}/{path}",
-            listen_url.host_str().unwrap(),
-            listen_url.port().unwrap()
-        ))
-        .unwrap();
+        let url = self
+            .env
+            .control_plane_api
+            .clone()
+            .unwrap()
+            .join(&path)
+            .unwrap();

        let mut builder = self.client.request(method, url);
        if let Some(body) = body {
@@ -570,7 +566,7 @@ impl AttachmentService {
        let response = self
            .dispatch::<_, AttachHookResponse>(
                Method::POST,
-                "debug/v1/attach-hook".to_string(),
+                "attach-hook".to_string(),
                Some(request),
            )
            .await?;
@@ -586,11 +582,7 @@ impl AttachmentService {
        let request = InspectRequest { tenant_shard_id };

        let response = self
-            .dispatch::<_, InspectResponse>(
-                Method::POST,
-                "debug/v1/inspect".to_string(),
-                Some(request),
-            )
+            .dispatch::<_, InspectResponse>(Method::POST, "inspect".to_string(), Some(request))
            .await?;

        Ok(response.attachment)
@@ -607,12 +599,8 @@ impl AttachmentService {

    #[instrument(skip(self))]
    pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {
-        self.dispatch::<(), _>(
-            Method::GET,
-            format!("control/v1/tenant/{tenant_id}/locate"),
-            None,
-        )
-        .await
+        self.dispatch::<(), _>(Method::GET, format!("tenant/{tenant_id}/locate"), None)
+            .await
    }

    #[instrument(skip(self))]
@@ -634,7 +622,7 @@ impl AttachmentService {

    #[instrument(skip_all, fields(node_id=%req.node_id))]
    pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {
-        self.dispatch::<_, ()>(Method::POST, "control/v1/node".to_string(), Some(req))
+        self.dispatch::<_, ()>(Method::POST, "node".to_string(), Some(req))
            .await
    }

@@ -642,7 +630,7 @@ impl AttachmentService {
    pub async fn node_configure(&self, req: NodeConfigureRequest) -> anyhow::Result<()> {
        self.dispatch::<_, ()>(
            Method::PUT,
-            format!("control/v1/node/{}/config", req.node_id),
+            format!("node/{}/config", req.node_id),
            Some(req),
        )
        .await
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -51,7 +51,7 @@ project_git_version!(GIT_VERSION);

 const DEFAULT_PG_VERSION: &str = "15";

-const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
+const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/";

 fn default_conf(num_pageservers: u16) -> String {
    let mut template = format!(
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -395,11 +395,6 @@ impl PageServerNode {
                .transpose()
                .context("Failed to parse 'gc_feedback' as bool")?,
            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
-            lazy_slru_download: settings
-                .remove("lazy_slru_download")
-                .map(|x| x.parse::<bool>())
-                .transpose()
-                .context("Failed to parse 'lazy_slru_download' as bool")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -500,11 +495,6 @@ impl PageServerNode {
                    .transpose()
                    .context("Failed to parse 'gc_feedback' as bool")?,
                heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
-                lazy_slru_download: settings
-                    .remove("lazy_slru_download")
-                    .map(|x| x.parse::<bool>())
-                    .transpose()
-                    .context("Failed to parse 'lazy_slru_download' as bool")?,
            }
        };

--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -9,10 +9,5 @@ prometheus.workspace = true
 libc.workspace = true
 once_cell.workspace = true
 chrono.workspace = true
-twox-hash.workspace = true

 workspace_hack.workspace = true
-
-[dev-dependencies]
-rand = "0.8"
-rand_distr = "0.4.3"
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -1,523 +0,0 @@
-//! HyperLogLog is an algorithm for the count-distinct problem,
-//! approximating the number of distinct elements in a multiset.
-//! Calculating the exact cardinality of the distinct elements
-//! of a multiset requires an amount of memory proportional to
-//! the cardinality, which is impractical for very large data sets.
-//! Probabilistic cardinality estimators, such as the HyperLogLog algorithm,
-//! use significantly less memory than this, but can only approximate the cardinality.
-
-use std::{
-    collections::HashMap,
-    hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
-    sync::{atomic::AtomicU8, Arc, RwLock},
-};
-
-use prometheus::{
-    core::{self, Describer},
-    proto, Opts,
-};
-use twox_hash::xxh3;
-
-/// Create an [`HyperLogLogVec`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_hll_vec {
-    ($N:literal, $OPTS:expr, $LABELS_NAMES:expr $(,)?) => {{
-        let hll_vec = $crate::HyperLogLogVec::<$N>::new($OPTS, $LABELS_NAMES).unwrap();
-        $crate::register(Box::new(hll_vec.clone())).map(|_| hll_vec)
-    }};
-
-    ($N:literal, $NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{
-        $crate::register_hll_vec!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES)
-    }};
-}
-
-/// Create an [`HyperLogLog`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_hll {
-    ($N:literal, $OPTS:expr $(,)?) => {{
-        let hll = $crate::HyperLogLog::<$N>::with_opts($OPTS).unwrap();
-        $crate::register(Box::new(hll.clone())).map(|_| hll)
-    }};
-
-    ($N:literal, $NAME:expr, $HELP:expr $(,)?) => {{
-        $crate::register_hll!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES)
-    }};
-}
-
-/// HLL is a probabilistic cardinality measure.
-///
-/// How to use this time-series for a metric name `my_metrics_total_hll`:
-///
-/// ```promql
-/// # harmonic mean
-/// 1 / (
-///     sum (
-///         2 ^ -(
-///             # HLL merge operation
-///             max (my_metrics_total_hll{}) by (hll_shard, other_labels...)
-///         )
-///     ) without (hll_shard)
-/// )
-/// * alpha
-/// * shards_count
-/// * shards_count
-/// ```
-///
-/// If you want an estimate over time, you can use the following query:
-///
-/// ```promql
-/// # harmonic mean
-/// 1 / (
-///     sum (
-///         2 ^ -(
-///             # HLL merge operation
-///             max (
-///                 max_over_time(my_metrics_total_hll{}[$__rate_interval])
-///             ) by (hll_shard, other_labels...)
-///         )
-///     ) without (hll_shard)
-/// )
-/// * alpha
-/// * shards_count
-/// * shards_count
-/// ```
-///
-/// In the case of low cardinality, you might want to use the linear counting approximation:
-///
-/// ```promql
-/// # LinearCounting(m, V) = m log (m / V)
-/// shards_count * ln(shards_count /
-///     # calculate V = how many shards contain a 0
-///     count(max (proxy_connecting_endpoints{}) by (hll_shard, protocol) == 0) without (hll_shard)
-/// )
-/// ```
-///
-/// See <https://en.wikipedia.org/wiki/HyperLogLog#Practical_considerations> for estimates on alpha
-#[derive(Clone)]
-pub struct HyperLogLogVec<const N: usize> {
-    core: Arc<HyperLogLogVecCore<N>>,
-}
-
-struct HyperLogLogVecCore<const N: usize> {
-    pub children: RwLock<HashMap<u64, HyperLogLog<N>, BuildHasherDefault<xxh3::Hash64>>>,
-    pub desc: core::Desc,
-    pub opts: Opts,
-}
-
-impl<const N: usize> core::Collector for HyperLogLogVec<N> {
-    fn desc(&self) -> Vec<&core::Desc> {
-        vec![&self.core.desc]
-    }
-
-    fn collect(&self) -> Vec<proto::MetricFamily> {
-        let mut m = proto::MetricFamily::default();
-        m.set_name(self.core.desc.fq_name.clone());
-        m.set_help(self.core.desc.help.clone());
-        m.set_field_type(proto::MetricType::GAUGE);
-
-        let mut metrics = Vec::new();
-        for child in self.core.children.read().unwrap().values() {
-            child.core.collect_into(&mut metrics);
-        }
-        m.set_metric(metrics);
-
-        vec![m]
-    }
-}
-
-impl<const N: usize> HyperLogLogVec<N> {
-    /// Create a new [`HyperLogLogVec`] based on the provided
-    /// [`Opts`] and partitioned by the given label names. At least one label name must be
-    /// provided.
-    pub fn new(opts: Opts, label_names: &[&str]) -> prometheus::Result<Self> {
-        assert!(N.is_power_of_two());
-        let variable_names = label_names.iter().map(|s| (*s).to_owned()).collect();
-        let opts = opts.variable_labels(variable_names);
-
-        let desc = opts.describe()?;
-        let v = HyperLogLogVecCore {
-            children: RwLock::new(HashMap::default()),
-            desc,
-            opts,
-        };
-
-        Ok(Self { core: Arc::new(v) })
-    }
-
-    /// `get_metric_with_label_values` returns the [`HyperLogLog<P>`] for the given slice
-    /// of label values (same order as the VariableLabels in Desc). If that combination of
-    /// label values is accessed for the first time, a new [`HyperLogLog<P>`] is created.
-    ///
-    /// An error is returned if the number of label values is not the same as the
-    /// number of VariableLabels in Desc.
-    pub fn get_metric_with_label_values(
-        &self,
-        vals: &[&str],
-    ) -> prometheus::Result<HyperLogLog<N>> {
-        self.core.get_metric_with_label_values(vals)
-    }
-
-    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error
-    /// occurs.
-    pub fn with_label_values(&self, vals: &[&str]) -> HyperLogLog<N> {
-        self.get_metric_with_label_values(vals).unwrap()
-    }
-}
-
-impl<const N: usize> HyperLogLogVecCore<N> {
-    pub fn get_metric_with_label_values(
-        &self,
-        vals: &[&str],
-    ) -> prometheus::Result<HyperLogLog<N>> {
-        let h = self.hash_label_values(vals)?;
-
-        if let Some(metric) = self.children.read().unwrap().get(&h).cloned() {
-            return Ok(metric);
-        }
-
-        self.get_or_create_metric(h, vals)
-    }
-
-    pub(crate) fn hash_label_values(&self, vals: &[&str]) -> prometheus::Result<u64> {
-        if vals.len() != self.desc.variable_labels.len() {
-            return Err(prometheus::Error::InconsistentCardinality {
-                expect: self.desc.variable_labels.len(),
-                got: vals.len(),
-            });
-        }
-
-        let mut h = xxh3::Hash64::default();
-        for val in vals {
-            h.write(val.as_bytes());
-        }
-
-        Ok(h.finish())
-    }
-
-    fn get_or_create_metric(
-        &self,
-        hash: u64,
-        label_values: &[&str],
-    ) -> prometheus::Result<HyperLogLog<N>> {
-        let mut children = self.children.write().unwrap();
-        // Check exist first.
-        if let Some(metric) = children.get(&hash).cloned() {
-            return Ok(metric);
-        }
-
-        let metric = HyperLogLog::with_opts_and_label_values(&self.opts, label_values)?;
-        children.insert(hash, metric.clone());
-        Ok(metric)
-    }
-}
-
-/// HLL is a probabilistic cardinality measure.
-///
-/// How to use this time-series for a metric name `my_metrics_total_hll`:
-///
-/// ```promql
-/// # harmonic mean
-/// 1 / (
-///     sum (
-///         2 ^ -(
-///             # HLL merge operation
-///             max (my_metrics_total_hll{}) by (hll_shard, other_labels...)
-///         )
-///     ) without (hll_shard)
-/// )
-/// * alpha
-/// * shards_count
-/// * shards_count
-/// ```
-///
-/// If you want an estimate over time, you can use the following query:
-///
-/// ```promql
-/// # harmonic mean
-/// 1 / (
-///     sum (
-///         2 ^ -(
-///             # HLL merge operation
-///             max (
-///                 max_over_time(my_metrics_total_hll{}[$__rate_interval])
-///             ) by (hll_shard, other_labels...)
-///         )
-///     ) without (hll_shard)
-/// )
-/// * alpha
-/// * shards_count
-/// * shards_count
-/// ```
-///
-/// In the case of low cardinality, you might want to use the linear counting approximation:
-///
-/// ```promql
-/// # LinearCounting(m, V) = m log (m / V)
-/// shards_count * ln(shards_count /
-///     # calculate V = how many shards contain a 0
-///     count(max (proxy_connecting_endpoints{}) by (hll_shard, protocol) == 0) without (hll_shard)
-/// )
-/// ```
-///
-/// See <https://en.wikipedia.org/wiki/HyperLogLog#Practical_considerations> for estimates on alpha
-#[derive(Clone)]
-pub struct HyperLogLog<const N: usize> {
-    core: Arc<HyperLogLogCore<N>>,
-}
-
-impl<const N: usize> HyperLogLog<N> {
-    /// Create a [`HyperLogLog`] with the `name` and `help` arguments.
-    pub fn new<S1: Into<String>, S2: Into<String>>(name: S1, help: S2) -> prometheus::Result<Self> {
-        assert!(N.is_power_of_two());
-        let opts = Opts::new(name, help);
-        Self::with_opts(opts)
-    }
-
-    /// Create a [`HyperLogLog`] with the `opts` options.
-    pub fn with_opts(opts: Opts) -> prometheus::Result<Self> {
-        Self::with_opts_and_label_values(&opts, &[])
-    }
-
-    fn with_opts_and_label_values(opts: &Opts, label_values: &[&str]) -> prometheus::Result<Self> {
-        let desc = opts.describe()?;
-        let labels = make_label_pairs(&desc, label_values)?;
-
-        let v = HyperLogLogCore {
-            shards: [0; N].map(AtomicU8::new),
-            desc,
-            labels,
-        };
-        Ok(Self { core: Arc::new(v) })
-    }
-
-    pub fn measure(&self, item: &impl Hash) {
-        // changing the hasher will break compatibility with previous measurements.
-        self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));
-    }
-
-    fn record(&self, hash: u64) {
-        let p = N.ilog2() as u8;
-        let j = hash & (N as u64 - 1);
-        let rho = (hash >> p).leading_zeros() as u8 + 1 - p;
-        self.core.shards[j as usize].fetch_max(rho, std::sync::atomic::Ordering::Relaxed);
-    }
-}
-
-struct HyperLogLogCore<const N: usize> {
-    shards: [AtomicU8; N],
-    desc: core::Desc,
-    labels: Vec<proto::LabelPair>,
-}
-
-impl<const N: usize> core::Collector for HyperLogLog<N> {
-    fn desc(&self) -> Vec<&core::Desc> {
-        vec![&self.core.desc]
-    }
-
-    fn collect(&self) -> Vec<proto::MetricFamily> {
-        let mut m = proto::MetricFamily::default();
-        m.set_name(self.core.desc.fq_name.clone());
-        m.set_help(self.core.desc.help.clone());
-        m.set_field_type(proto::MetricType::GAUGE);
-
-        let mut metrics = Vec::new();
-        self.core.collect_into(&mut metrics);
-        m.set_metric(metrics);
-
-        vec![m]
-    }
-}
-
-impl<const N: usize> HyperLogLogCore<N> {
-    fn collect_into(&self, metrics: &mut Vec<proto::Metric>) {
-        self.shards.iter().enumerate().for_each(|(i, x)| {
-            let mut shard_label = proto::LabelPair::default();
-            shard_label.set_name("hll_shard".to_owned());
-            shard_label.set_value(format!("{i}"));
-
-            // We reset the counter to 0 so we can perform a cardinality measure over any time slice in prometheus.
-
-            // This seems like it would be a race condition,
-            // but HLL is not impacted by a write in one shard happening in between.
-            // This is because in PromQL we will be implementing a harmonic mean of all buckets.
-            // we will also merge samples in a time series using `max by (hll_shard)`.
-
-            // TODO: maybe we shouldn't reset this on every collect, instead, only after a time window.
-            // this would mean that a dev port-forwarding the metrics url won't break the sampling.
-            let v = x.swap(0, std::sync::atomic::Ordering::Relaxed);
-
-            let mut m = proto::Metric::default();
-            let mut c = proto::Gauge::default();
-            c.set_value(v as f64);
-            m.set_gauge(c);
-
-            let mut labels = Vec::with_capacity(self.labels.len() + 1);
-            labels.extend_from_slice(&self.labels);
-            labels.push(shard_label);
-
-            m.set_label(labels);
-            metrics.push(m);
-        })
-    }
-}
-
-fn make_label_pairs(
-    desc: &core::Desc,
-    label_values: &[&str],
-) -> prometheus::Result<Vec<proto::LabelPair>> {
-    if desc.variable_labels.len() != label_values.len() {
-        return Err(prometheus::Error::InconsistentCardinality {
-            expect: desc.variable_labels.len(),
-            got: label_values.len(),
-        });
-    }
-
-    let total_len = desc.variable_labels.len() + desc.const_label_pairs.len();
-    if total_len == 0 {
-        return Ok(vec![]);
-    }
-
-    if desc.variable_labels.is_empty() {
-        return Ok(desc.const_label_pairs.clone());
-    }
-
-    let mut label_pairs = Vec::with_capacity(total_len);
-    for (i, n) in desc.variable_labels.iter().enumerate() {
-        let mut label_pair = proto::LabelPair::default();
-        label_pair.set_name(n.clone());
-        label_pair.set_value(label_values[i].to_owned());
-        label_pairs.push(label_pair);
-    }
-
-    for label_pair in &desc.const_label_pairs {
-        label_pairs.push(label_pair.clone());
-    }
-    label_pairs.sort();
-    Ok(label_pairs)
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::HashSet;
-
-    use prometheus::{proto, Opts};
-    use rand::{rngs::StdRng, Rng, SeedableRng};
-    use rand_distr::{Distribution, Zipf};
-
-    use crate::HyperLogLogVec;
-
-    fn collect(hll: &HyperLogLogVec<32>) -> Vec<proto::Metric> {
-        let mut metrics = vec![];
-        hll.core
-            .children
-            .read()
-            .unwrap()
-            .values()
-            .for_each(|c| c.core.collect_into(&mut metrics));
-        metrics
-    }
-    fn get_cardinality(metrics: &[proto::Metric], filter: impl Fn(&proto::Metric) -> bool) -> f64 {
-        let mut buckets = [0.0; 32];
-        for metric in metrics.chunks_exact(32) {
-            if filter(&metric[0]) {
-                for (i, m) in metric.iter().enumerate() {
-                    buckets[i] = f64::max(buckets[i], m.get_gauge().get_value());
-                }
-            }
-        }
-
-        buckets
-            .into_iter()
-            .map(|f| 2.0f64.powf(-f))
-            .sum::<f64>()
-            .recip()
-            * 0.697
-            * 32.0
-            * 32.0
-    }
-
-    fn test_cardinality(n: usize, dist: impl Distribution<f64>) -> ([usize; 3], [f64; 3]) {
-        let hll = HyperLogLogVec::<32>::new(Opts::new("foo", "bar"), &["x"]).unwrap();
-
-        let mut iter = StdRng::seed_from_u64(0x2024_0112).sample_iter(dist);
-        let mut set_a = HashSet::new();
-        let mut set_b = HashSet::new();
-
-        for x in iter.by_ref().take(n) {
-            set_a.insert(x.to_bits());
-            hll.with_label_values(&["a"]).measure(&x.to_bits());
-        }
-        for x in iter.by_ref().take(n) {
-            set_b.insert(x.to_bits());
-            hll.with_label_values(&["b"]).measure(&x.to_bits());
-        }
-        let merge = &set_a | &set_b;
-
-        let metrics = collect(&hll);
-        let len = get_cardinality(&metrics, |_| true);
-        let len_a = get_cardinality(&metrics, |l| l.get_label()[0].get_value() == "a");
-        let len_b = get_cardinality(&metrics, |l| l.get_label()[0].get_value() == "b");
-
-        ([merge.len(), set_a.len(), set_b.len()], [len, len_a, len_b])
-    }
-
-    #[test]
-    fn test_cardinality_small() {
-        let (actual, estimate) = test_cardinality(100, Zipf::new(100, 1.2f64).unwrap());
-
-        assert_eq!(actual, [46, 30, 32]);
-        assert!(51.3 < estimate[0] && estimate[0] < 51.4);
-        assert!(44.0 < estimate[1] && estimate[1] < 44.1);
-        assert!(39.0 < estimate[2] && estimate[2] < 39.1);
-    }
-
-    #[test]
-    fn test_cardinality_medium() {
-        let (actual, estimate) = test_cardinality(10000, Zipf::new(10000, 1.2f64).unwrap());
-
-        assert_eq!(actual, [2529, 1618, 1629]);
-        assert!(2309.1 < estimate[0] && estimate[0] < 2309.2);
-        assert!(1566.6 < estimate[1] && estimate[1] < 1566.7);
-        assert!(1629.5 < estimate[2] && estimate[2] < 1629.6);
-    }
-
-    #[test]
-    fn test_cardinality_large() {
-        let (actual, estimate) = test_cardinality(1_000_000, Zipf::new(1_000_000, 1.2f64).unwrap());
-
-        assert_eq!(actual, [129077, 79579, 79630]);
-        assert!(126067.2 < estimate[0] && estimate[0] < 126067.3);
-        assert!(83076.8 < estimate[1] && estimate[1] < 83076.9);
-        assert!(64251.2 < estimate[2] && estimate[2] < 64251.3);
-    }
-
-    #[test]
-    fn test_cardinality_small2() {
-        let (actual, estimate) = test_cardinality(100, Zipf::new(200, 0.8f64).unwrap());
-
-        assert_eq!(actual, [92, 58, 60]);
-        assert!(116.1 < estimate[0] && estimate[0] < 116.2);
-        assert!(81.7 < estimate[1] && estimate[1] < 81.8);
-        assert!(69.3 < estimate[2] && estimate[2] < 69.4);
-    }
-
-    #[test]
-    fn test_cardinality_medium2() {
-        let (actual, estimate) = test_cardinality(10000, Zipf::new(20000, 0.8f64).unwrap());
-
-        assert_eq!(actual, [8201, 5131, 5051]);
-        assert!(6846.4 < estimate[0] && estimate[0] < 6846.5);
-        assert!(5239.1 < estimate[1] && estimate[1] < 5239.2);
-        assert!(4292.8 < estimate[2] && estimate[2] < 4292.9);
-    }
-
-    #[test]
-    fn test_cardinality_large2() {
-        let (actual, estimate) = test_cardinality(1_000_000, Zipf::new(2_000_000, 0.8f64).unwrap());
-
-        assert_eq!(actual, [777847, 482069, 482246]);
-        assert!(699437.4 < estimate[0] && estimate[0] < 699437.5);
-        assert!(374948.9 < estimate[1] && estimate[1] < 374949.0);
-        assert!(434609.7 < estimate[2] && estimate[2] < 434609.8);
-    }
-}
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -28,9 +28,7 @@ use prometheus::{Registry, Result};
 pub mod launch_timestamp;
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
-mod hll;
 pub mod metric_vec_duration;
-pub use hll::{HyperLogLog, HyperLogLogVec};

 pub type UIntGauge = GenericGauge<AtomicU64>;
 pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -8,7 +8,6 @@ use std::{
 };

 use byteorder::{BigEndian, ReadBytesExt};
-use postgres_ffi::BLCKSZ;
 use serde::{Deserialize, Serialize};
 use serde_with::serde_as;
 use strum_macros;
@@ -272,7 +271,6 @@ pub struct TenantConfig {
    pub evictions_low_residence_duration_metric_threshold: Option<String>,
    pub gc_feedback: Option<bool>,
    pub heatmap_period: Option<String>,
-    pub lazy_slru_download: Option<bool>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -366,19 +364,6 @@ pub struct TenantLocationConfigRequest {
    pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
 }

-#[derive(Serialize, Deserialize, Debug)]
-#[serde(deny_unknown_fields)]
-pub struct TenantShardLocation {
-    pub shard_id: TenantShardId,
-    pub node_id: NodeId,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(deny_unknown_fields)]
-pub struct TenantLocationConfigResponse {
-    pub shards: Vec<TenantShardLocation>,
-}
-
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantConfigRequest {
@@ -648,7 +633,6 @@ pub enum PagestreamFeMessage {
    Nblocks(PagestreamNblocksRequest),
    GetPage(PagestreamGetPageRequest),
    DbSize(PagestreamDbSizeRequest),
-    GetSlruSegment(PagestreamGetSlruSegmentRequest),
 }

 // Wrapped in libpq CopyData
@@ -659,7 +643,6 @@ pub enum PagestreamBeMessage {
    GetPage(PagestreamGetPageResponse),
    Error(PagestreamErrorResponse),
    DbSize(PagestreamDbSizeResponse),
-    GetSlruSegment(PagestreamGetSlruSegmentResponse),
 }

 // Keep in sync with `pagestore_client.h`
@@ -670,7 +653,6 @@ enum PagestreamBeMessageTag {
    GetPage = 102,
    Error = 103,
    DbSize = 104,
-    GetSlruSegment = 105,
 }
 impl TryFrom<u8> for PagestreamBeMessageTag {
    type Error = u8;
@@ -681,7 +663,6 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
            102 => Ok(PagestreamBeMessageTag::GetPage),
            103 => Ok(PagestreamBeMessageTag::Error),
            104 => Ok(PagestreamBeMessageTag::DbSize),
-            105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
            _ => Err(value),
        }
    }
@@ -716,14 +697,6 @@ pub struct PagestreamDbSizeRequest {
    pub dbnode: u32,
 }

-#[derive(Debug, PartialEq, Eq)]
-pub struct PagestreamGetSlruSegmentRequest {
-    pub latest: bool,
-    pub lsn: Lsn,
-    pub kind: u8,
-    pub segno: u32,
-}
-
 #[derive(Debug)]
 pub struct PagestreamExistsResponse {
    pub exists: bool,
@@ -739,11 +712,6 @@ pub struct PagestreamGetPageResponse {
    pub page: Bytes,
 }

-#[derive(Debug)]
-pub struct PagestreamGetSlruSegmentResponse {
-    pub segment: Bytes,
-}
-
 #[derive(Debug)]
 pub struct PagestreamErrorResponse {
    pub message: String,
@@ -807,14 +775,6 @@ impl PagestreamFeMessage {
                bytes.put_u64(req.lsn.0);
                bytes.put_u32(req.dbnode);
            }
-
-            Self::GetSlruSegment(req) => {
-                bytes.put_u8(4);
-                bytes.put_u8(u8::from(req.latest));
-                bytes.put_u64(req.lsn.0);
-                bytes.put_u8(req.kind);
-                bytes.put_u32(req.segno);
-            }
        }

        bytes.into()
@@ -865,14 +825,6 @@ impl PagestreamFeMessage {
                lsn: Lsn::from(body.read_u64::<BigEndian>()?),
                dbnode: body.read_u32::<BigEndian>()?,
            })),
-            4 => Ok(PagestreamFeMessage::GetSlruSegment(
-                PagestreamGetSlruSegmentRequest {
-                    latest: body.read_u8()? != 0,
-                    lsn: Lsn::from(body.read_u64::<BigEndian>()?),
-                    kind: body.read_u8()?,
-                    segno: body.read_u32::<BigEndian>()?,
-                },
-            )),
            _ => bail!("unknown smgr message tag: {:?}", msg_tag),
        }
    }
@@ -908,12 +860,6 @@ impl PagestreamBeMessage {
                bytes.put_u8(Tag::DbSize as u8);
                bytes.put_i64(resp.db_size);
            }
-
-            Self::GetSlruSegment(resp) => {
-                bytes.put_u8(Tag::GetSlruSegment as u8);
-                bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
-                bytes.put(&resp.segment[..]);
-            }
        }

        bytes.into()
@@ -954,14 +900,6 @@ impl PagestreamBeMessage {
                    let db_size = buf.read_i64::<BigEndian>()?;
                    Self::DbSize(PagestreamDbSizeResponse { db_size })
                }
-                Tag::GetSlruSegment => {
-                    let n_blocks = buf.read_u32::<BigEndian>()?;
-                    let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
-                    buf.read_exact(&mut segment)?;
-                    Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
-                        segment: segment.into(),
-                    })
-                }
            };
        let remaining = buf.into_inner();
        if !remaining.is_empty() {
@@ -980,7 +918,6 @@ impl PagestreamBeMessage {
            Self::GetPage(_) => "GetPage",
            Self::Error(_) => "Error",
            Self::DbSize(_) => "DbSize",
-            Self::GetSlruSegment(_) => "GetSlruSegment",
        }
    }
 }
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -123,11 +123,9 @@ impl RelTag {
    PartialOrd,
    Ord,
    strum_macros::EnumIter,
-    strum_macros::FromRepr,
 )]
-#[repr(u8)]
 pub enum SlruKind {
-    Clog = 0,
+    Clog,
    MultiXactMembers,
    MultiXactOffsets,
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -207,16 +207,10 @@ pub fn find_end_of_wal(
                let seg_offs = curr_lsn.segment_offset(wal_seg_size);
                segment.seek(SeekFrom::Start(seg_offs as u64))?;
                // loop inside segment
-                while curr_lsn.segment_number(wal_seg_size) == segno {
+                loop {
                    let bytes_read = segment.read(&mut buf)?;
                    if bytes_read == 0 {
-                        debug!(
-                            "find_end_of_wal reached end at {:?}, EOF in segment {:?} at offset {}",
-                            result,
-                            seg_file_path,
-                            curr_lsn.segment_offset(wal_seg_size)
-                        );
-                        return Ok(result);
+                        break; // EOF
                    }
                    curr_lsn += bytes_read as u64;
                    decoder.feed_bytes(&buf[0..bytes_read]);
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -646,7 +646,7 @@ impl RemoteStorage for S3Bucket {
        let timestamp = DateTime::from(timestamp);
        let done_if_after = DateTime::from(done_if_after);

-        tracing::info!("Target time: {timestamp:?}, done_if_after {done_if_after:?}");
+        tracing::trace!("Target time: {timestamp:?}, done_if_after {done_if_after:?}");

        // get the passed prefix or if it is not set use prefix_in_bucket value
        let prefix = prefix
@@ -657,108 +657,75 @@ impl RemoteStorage for S3Bucket {
        let max_retries = 10;
        let is_permanent = |_e: &_| false;

-        let mut key_marker = None;
-        let mut version_id_marker = None;
-        let mut versions_and_deletes = Vec::new();
+        let list = backoff::retry(
+            || async {
+                Ok(self
+                    .client
+                    .list_object_versions()
+                    .bucket(self.bucket_name.clone())
+                    .set_prefix(prefix.clone())
+                    .send()
+                    .await?)
+            },
+            is_permanent,
+            warn_threshold,
+            max_retries,
+            "listing object versions for time_travel_recover",
+            backoff::Cancel::new(cancel.clone(), || anyhow!("Cancelled")),
+        )
+        .await?;

-        loop {
-            let response = backoff::retry(
-                || async {
-                    Ok(self
-                        .client
-                        .list_object_versions()
-                        .bucket(self.bucket_name.clone())
-                        .set_prefix(prefix.clone())
-                        .set_key_marker(key_marker.clone())
-                        .set_version_id_marker(version_id_marker.clone())
-                        .send()
-                        .await?)
-                },
-                is_permanent,
-                warn_threshold,
-                max_retries,
-                "listing object versions for time_travel_recover",
-                backoff::Cancel::new(cancel.clone(), || anyhow!("Cancelled")),
-            )
-            .await?;
-
-            tracing::trace!(
-                "  Got List response version_id_marker={:?}, key_marker={:?}",
-                response.version_id_marker,
-                response.key_marker
-            );
-            let versions = response
-                .versions
-                .unwrap_or_default()
-                .into_iter()
-                .map(VerOrDelete::from_version);
-            let deletes = response
-                .delete_markers
-                .unwrap_or_default()
-                .into_iter()
-                .map(VerOrDelete::from_delete_marker);
-            itertools::process_results(versions.chain(deletes), |n_vds| {
-                versions_and_deletes.extend(n_vds)
-            })?;
-            fn none_if_empty(v: Option<String>) -> Option<String> {
-                v.filter(|v| !v.is_empty())
-            }
-            version_id_marker = none_if_empty(response.next_version_id_marker);
-            key_marker = none_if_empty(response.next_key_marker);
-            if version_id_marker.is_none() {
-                // The final response is not supposed to be truncated
-                if response.is_truncated.unwrap_or_default() {
-                    anyhow::bail!(
-                        "Received truncated ListObjectVersions response for prefix={prefix:?}"
-                    );
-                }
-                break;
-            }
-            // Limit the number of versions deletions, mostly so that we don't
-            // keep requesting forever if the list is too long, as we'd put the
-            // list in RAM.
-            // Building a list of 100k entries that reaches the limit roughly takes
-            // 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.
-            const COMPLEXITY_LIMIT: usize = 100_000;
-            if versions_and_deletes.len() >= COMPLEXITY_LIMIT {
-                anyhow::bail!(
-                    "Limit for number of versions/deletions exceeded for prefix={prefix:?}"
-                );
-            }
+        if list.is_truncated().unwrap_or_default() {
+            anyhow::bail!("Received truncated ListObjectVersions response for prefix={prefix:?}");
        }

-        // Work on the list of references instead of the objects directly,
-        // otherwise we get lifetime errors in the sort_by_key call below.
-        let mut versions_and_deletes = versions_and_deletes.iter().collect::<Vec<_>>();
+        let mut versions_deletes = list
+            .versions()
+            .iter()
+            .map(VerOrDelete::Version)
+            .chain(list.delete_markers().iter().map(VerOrDelete::DeleteMarker))
+            .collect::<Vec<_>>();

-        versions_and_deletes.sort_by_key(|vd| (&vd.key, &vd.last_modified));
+        versions_deletes.sort_by_key(|vd| (vd.key(), vd.last_modified()));

        let mut vds_for_key = HashMap::<_, Vec<_>>::new();

-        for vd in &versions_and_deletes {
-            let VerOrDelete {
-                version_id, key, ..
-            } = &vd;
+        for vd in versions_deletes {
+            let last_modified = vd.last_modified();
+            let version_id = vd.version_id();
+            let key = vd.key();
+            let (Some(last_modified), Some(version_id), Some(key)) =
+                (last_modified, version_id, key)
+            else {
+                anyhow::bail!(
+                    "One (or more) of last_modified, key, and id is None. \
+                    Is versioning enabled in the bucket? last_modified={:?} key={:?} version_id={:?}",
+                    last_modified, key, version_id,
+                );
+            };
            if version_id == "null" {
                anyhow::bail!("Received ListVersions response for key={key} with version_id='null', \
                    indicating either disabled versioning, or legacy objects with null version id values");
            }
            tracing::trace!(
-                "Parsing version key={key} version_id={version_id} kind={:?}",
-                vd.kind
+                "Parsing version key={key} version_id={version_id} is_delete={}",
+                matches!(vd, VerOrDelete::DeleteMarker(_))
            );

-            vds_for_key.entry(key).or_default().push(vd);
+            vds_for_key
+                .entry(key)
+                .or_default()
+                .push((vd, last_modified, version_id));
        }
        for (key, versions) in vds_for_key {
-            let last_vd = versions.last().unwrap();
-            if last_vd.last_modified > done_if_after {
+            let (last_vd, last_last_modified, _version_id) = versions.last().unwrap();
+            if last_last_modified > &&done_if_after {
                tracing::trace!("Key {key} has version later than done_if_after, skipping");
                continue;
            }
            // the version we want to restore to.
            let version_to_restore_to =
-                match versions.binary_search_by_key(&timestamp, |tpl| tpl.last_modified) {
+                match versions.binary_search_by_key(&timestamp, |tpl| *tpl.1) {
                    Ok(v) => v,
                    Err(e) => e,
                };
@@ -776,11 +743,7 @@ impl RemoteStorage for S3Bucket {
                do_delete = true;
            } else {
                match &versions[version_to_restore_to - 1] {
-                    VerOrDelete {
-                        kind: VerOrDeleteKind::Version,
-                        version_id,
-                        ..
-                    } => {
+                    (VerOrDelete::Version(_), _last_modified, version_id) => {
                        tracing::trace!("Copying old version {version_id} for {key}...");
                        // Restore the state to the last version by copying
                        let source_id =
@@ -805,16 +768,13 @@ impl RemoteStorage for S3Bucket {
                        )
                        .await?;
                    }
-                    VerOrDelete {
-                        kind: VerOrDeleteKind::DeleteMarker,
-                        ..
-                    } => {
+                    (VerOrDelete::DeleteMarker(_), _last_modified, _version_id) => {
                        do_delete = true;
                    }
                }
            };
            if do_delete {
-                if matches!(last_vd.kind, VerOrDeleteKind::DeleteMarker) {
+                if matches!(last_vd, VerOrDelete::DeleteMarker(_)) {
                    // Key has since been deleted (but there was some history), no need to do anything
                    tracing::trace!("Key {key} already deleted, skipping.");
                } else {
@@ -851,59 +811,29 @@ fn start_measuring_requests(
    })
 }

-// Save RAM and only store the needed data instead of the entire ObjectVersion/DeleteMarkerEntry
-struct VerOrDelete {
-    kind: VerOrDeleteKind,
-    last_modified: DateTime,
-    version_id: String,
-    key: String,
+enum VerOrDelete<'a> {
+    Version(&'a ObjectVersion),
+    DeleteMarker(&'a DeleteMarkerEntry),
 }

-#[derive(Debug)]
-enum VerOrDeleteKind {
-    Version,
-    DeleteMarker,
-}
-
-impl VerOrDelete {
-    fn with_kind(
-        kind: VerOrDeleteKind,
-        last_modified: Option<DateTime>,
-        version_id: Option<String>,
-        key: Option<String>,
-    ) -> anyhow::Result<Self> {
-        let lvk = (last_modified, version_id, key);
-        let (Some(last_modified), Some(version_id), Some(key)) = lvk else {
-            anyhow::bail!(
-                "One (or more) of last_modified, key, and id is None. \
-            Is versioning enabled in the bucket? last_modified={:?}, version_id={:?}, key={:?}",
-                lvk.0,
-                lvk.1,
-                lvk.2,
-            );
-        };
-        Ok(Self {
-            kind,
-            last_modified,
-            version_id,
-            key,
-        })
+impl<'a> VerOrDelete<'a> {
+    fn last_modified(&self) -> Option<&'a DateTime> {
+        match self {
+            VerOrDelete::Version(v) => v.last_modified(),
+            VerOrDelete::DeleteMarker(v) => v.last_modified(),
+        }
    }
-    fn from_version(v: ObjectVersion) -> anyhow::Result<Self> {
-        Self::with_kind(
-            VerOrDeleteKind::Version,
-            v.last_modified,
-            v.version_id,
-            v.key,
-        )
+    fn version_id(&self) -> Option<&'a str> {
+        match self {
+            VerOrDelete::Version(v) => v.version_id(),
+            VerOrDelete::DeleteMarker(v) => v.version_id(),
+        }
    }
-    fn from_delete_marker(v: DeleteMarkerEntry) -> anyhow::Result<Self> {
-        Self::with_kind(
-            VerOrDeleteKind::DeleteMarker,
-            v.last_modified,
-            v.version_id,
-            v.key,
-        )
+    fn key(&self) -> Option<&'a str> {
+        match self {
+            VerOrDelete::Version(v) => v.key(),
+            VerOrDelete::DeleteMarker(v) => v.key(),
+        }
    }
 }

--- a/libs/utils/src/crashsafe.rs
+++ b/libs/utils/src/crashsafe.rs
@@ -112,55 +112,6 @@ pub async fn fsync_async(path: impl AsRef<Utf8Path>) -> Result<(), std::io::Erro
    tokio::fs::File::open(path.as_ref()).await?.sync_all().await
 }

-pub async fn fsync_async_opt(
-    path: impl AsRef<Utf8Path>,
-    do_fsync: bool,
-) -> Result<(), std::io::Error> {
-    if do_fsync {
-        fsync_async(path.as_ref()).await?;
-    }
-    Ok(())
-}
-
-/// Like postgres' durable_rename, renames file issuing fsyncs do make it
-/// durable. After return, file and rename are guaranteed to be persisted.
-///
-/// Unlike postgres, it only does fsyncs to 1) file to be renamed to make
-/// contents durable; 2) its directory entry to make rename durable 3) again to
-/// already renamed file, which is not required by standards but postgres does
-/// it, let's stick to that. Postgres additionally fsyncs newpath *before*
-/// rename if it exists to ensure that at least one of the files survives, but
-/// current callers don't need that.
-///
-/// virtual_file.rs has similar code, but it doesn't use vfs.
-///
-/// Useful links: <https://lwn.net/Articles/457667/>
-/// <https://www.postgresql.org/message-id/flat/56583BDD.9060302%402ndquadrant.com>
-/// <https://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/>
-pub async fn durable_rename(
-    old_path: impl AsRef<Utf8Path>,
-    new_path: impl AsRef<Utf8Path>,
-    do_fsync: bool,
-) -> io::Result<()> {
-    // first fsync the file
-    fsync_async_opt(old_path.as_ref(), do_fsync).await?;
-
-    // Time to do the real deal.
-    tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;
-
-    // Postgres'ish fsync of renamed file.
-    fsync_async_opt(new_path.as_ref(), do_fsync).await?;
-
-    // Now fsync the parent
-    let parent = match new_path.as_ref().parent() {
-        Some(p) => p,
-        None => Utf8Path::new("./"), // assume current dir if there is no parent
-    };
-    fsync_async_opt(parent, do_fsync).await?;
-
-    Ok(())
-}
-
 #[cfg(test)]
 mod tests {

--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -1,10 +1,4 @@
-use std::{
-    sync::{
-        atomic::{AtomicBool, Ordering},
-        Arc,
-    },
-    time::Duration,
-};
+use std::{sync::Arc, time::Duration};

 /// Gates are a concurrency helper, primarily used for implementing safe shutdown.
 ///
@@ -12,70 +6,62 @@ use std::{
 /// the resource calls `close()` when they want to ensure that all holders of guards
 /// have released them, and that no future guards will be issued.
 pub struct Gate {
-    inner: Arc<GateInner>,
+    /// Each caller of enter() takes one unit from the semaphore. In close(), we
+    /// take all the units to ensure all GateGuards are destroyed.
+    sem: Arc<tokio::sync::Semaphore>,
+
+    /// For observability only: a name that will be used to log warnings if a particular
+    /// gate is holding up shutdown
+    name: String,
 }

 impl std::fmt::Debug for Gate {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Gate")
-            // use this for identification
-            .field("ptr", &Arc::as_ptr(&self.inner))
-            .field("inner", &self.inner)
-            .finish()
-    }
-}
-
-struct GateInner {
-    sem: tokio::sync::Semaphore,
-    closing: std::sync::atomic::AtomicBool,
-}
-
-impl std::fmt::Debug for GateInner {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let avail = self.sem.available_permits();
-
-        let guards = u32::try_from(avail)
-            .ok()
-            // the sem only supports 32-bit ish amount, but lets play it safe
-            .and_then(|x| Gate::MAX_UNITS.checked_sub(x));
-
-        let closing = self.closing.load(Ordering::Relaxed);
-
-        if let Some(guards) = guards {
-            f.debug_struct("Gate")
-                .field("remaining_guards", &guards)
-                .field("closing", &closing)
-                .finish()
-        } else {
-            f.debug_struct("Gate")
-                .field("avail_permits", &avail)
-                .field("closing", &closing)
-                .finish()
-        }
+        write!(f, "Gate<{}>", self.name)
    }
 }

 /// RAII guard for a [`Gate`]: as long as this exists, calls to [`Gate::close`] will
 /// not complete.
 #[derive(Debug)]
-pub struct GateGuard {
-    // Record the span where the gate was entered, so that we can identify who was blocking Gate::close
-    span_at_enter: tracing::Span,
-    gate: Arc<GateInner>,
-}
+pub struct GateGuard(tokio::sync::OwnedSemaphorePermit);

-impl Drop for GateGuard {
-    fn drop(&mut self) {
-        if self.gate.closing.load(Ordering::Relaxed) {
-            self.span_at_enter.in_scope(
-                || tracing::info!(gate = ?Arc::as_ptr(&self.gate), "kept the gate from closing"),
-            );
+/// Observability helper: every `warn_period`, emit a log warning that we're still waiting on this gate
+async fn warn_if_stuck<Fut: std::future::Future>(
+    fut: Fut,
+    name: &str,
+    warn_period: std::time::Duration,
+) -> <Fut as std::future::Future>::Output {
+    let started = std::time::Instant::now();
+
+    let mut fut = std::pin::pin!(fut);
+
+    let mut warned = false;
+    let ret = loop {
+        match tokio::time::timeout(warn_period, &mut fut).await {
+            Ok(ret) => break ret,
+            Err(_) => {
+                tracing::warn!(
+                    gate = name,
+                    elapsed_ms = started.elapsed().as_millis(),
+                    "still waiting, taking longer than expected..."
+                );
+                warned = true;
+            }
        }
+    };

-        // when the permit was acquired, it was forgotten to allow us to manage it's lifecycle
-        // manually, so "return" the permit now.
-        self.gate.sem.add_permits(1);
+    // If we emitted a warning for slowness, also emit a message when we complete, so that
+    // someone debugging a shutdown can know for sure whether we have moved past this operation.
+    if warned {
+        tracing::info!(
+            gate = name,
+            elapsed_ms = started.elapsed().as_millis(),
+            "completed, after taking longer than expected"
+        )
    }
+
+    ret
 }

 #[derive(Debug)]
@@ -83,20 +69,16 @@ pub enum GateError {
    GateClosed,
 }

-impl Default for Gate {
-    fn default() -> Self {
-        Self {
-            inner: Arc::new(GateInner {
-                sem: tokio::sync::Semaphore::new(Self::MAX_UNITS as usize),
-                closing: AtomicBool::new(false),
-            }),
-        }
-    }
-}
-
 impl Gate {
    const MAX_UNITS: u32 = u32::MAX;

+    pub fn new(name: String) -> Self {
+        Self {
+            sem: Arc::new(tokio::sync::Semaphore::new(Self::MAX_UNITS as usize)),
+            name,
+        }
+    }
+
    /// Acquire a guard that will prevent close() calls from completing. If close()
    /// was already called, this will return an error which should be interpreted
    /// as "shutting down".
@@ -106,23 +88,11 @@ impl Gate {
    /// to avoid blocking close() indefinitely: typically types that contain a Gate will
    /// also contain a CancellationToken.
    pub fn enter(&self) -> Result<GateGuard, GateError> {
-        let permit = self
-            .inner
-            .sem
-            .try_acquire()
-            .map_err(|_| GateError::GateClosed)?;
-
-        // we now have the permit, let's disable the normal raii functionality and leave
-        // "returning" the permit to our GateGuard::drop.
-        //
-        // this is done to avoid the need for multiple Arcs (one for semaphore, next for other
-        // fields).
-        permit.forget();
-
-        Ok(GateGuard {
-            span_at_enter: tracing::Span::current(),
-            gate: self.inner.clone(),
-        })
+        self.sem
+            .clone()
+            .try_acquire_owned()
+            .map(GateGuard)
+            .map_err(|_| GateError::GateClosed)
    }

    /// Types with a shutdown() method and a gate should call this method at the
@@ -132,88 +102,48 @@ impl Gate {
    /// important that the holders of such guards are respecting a CancellationToken which has
    /// been cancelled before entering this function.
    pub async fn close(&self) {
-        let started_at = std::time::Instant::now();
-        let mut do_close = std::pin::pin!(self.do_close());
-
-        let nag_after = Duration::from_secs(1);
-
-        let Err(_timeout) = tokio::time::timeout(nag_after, &mut do_close).await else {
-            return;
-        };
-
-        tracing::info!(
-            gate = ?self.as_ptr(),
-            elapsed_ms = started_at.elapsed().as_millis(),
-            "closing is taking longer than expected"
-        );
-
-        // close operation is not trying to be cancellation safe as pageserver does not need it.
-        //
-        // note: "closing" is not checked in Gate::enter -- it exists just for observability,
-        // dropping of GateGuard after this will log who they were.
-        self.inner.closing.store(true, Ordering::Relaxed);
-
-        do_close.await;
-
-        tracing::info!(
-            gate = ?self.as_ptr(),
-            elapsed_ms = started_at.elapsed().as_millis(),
-            "close completed"
-        );
-    }
-
-    /// Used as an identity of a gate. This identity will be resolved to something useful when
-    /// it's actually closed in a hopefully sensible `tracing::Span` which will describe it even
-    /// more.
-    ///
-    /// `GateGuard::drop` also logs this pointer when it has realized it has been keeping the gate
-    /// open for too long.
-    fn as_ptr(&self) -> *const GateInner {
-        Arc::as_ptr(&self.inner)
+        warn_if_stuck(self.do_close(), &self.name, Duration::from_millis(1000)).await
    }

    /// Check if [`Self::close()`] has finished waiting for all [`Self::enter()`] users to finish.  This
    /// is usually analoguous for "Did shutdown finish?" for types that include a Gate, whereas checking
    /// the CancellationToken on such types is analogous to "Did shutdown start?"
    pub fn close_complete(&self) -> bool {
-        self.inner.sem.is_closed()
+        self.sem.is_closed()
    }

-    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, fields(gate = ?self.as_ptr()))]
    async fn do_close(&self) {
-        tracing::debug!("Closing Gate...");
-
-        match self.inner.sem.acquire_many(Self::MAX_UNITS).await {
-            Ok(_permit) => {
+        tracing::debug!(gate = self.name, "Closing Gate...");
+        match self.sem.acquire_many(Self::MAX_UNITS).await {
+            Ok(_units) => {
                // While holding all units, close the semaphore.  All subsequent calls to enter() will fail.
-                self.inner.sem.close();
+                self.sem.close();
            }
-            Err(_closed) => {
+            Err(_) => {
                // Semaphore closed: we are the only function that can do this, so it indicates a double-call.
                // This is legal.  Timeline::shutdown for example is not protected from being called more than
                // once.
-                tracing::debug!("Double close")
+                tracing::debug!(gate = self.name, "Double close")
            }
        }
-        tracing::debug!("Closed Gate.")
+        tracing::debug!(gate = self.name, "Closed Gate.")
    }
 }

 #[cfg(test)]
 mod tests {
+    use futures::FutureExt;
+
    use super::*;

    #[tokio::test]
-    async fn close_unused() {
-        // Having taken no guards, we should not be blocked in close
-        let gate = Gate::default();
+    async fn test_idle_gate() {
+        // Having taken no gates, we should not be blocked in close
+        let gate = Gate::new("test".to_string());
        gate.close().await;
-    }

-    #[tokio::test]
-    async fn close_idle() {
        // If a guard is dropped before entering, close should not be blocked
-        let gate = Gate::default();
+        let gate = Gate::new("test".to_string());
        let guard = gate.enter().unwrap();
        drop(guard);
        gate.close().await;
@@ -222,30 +152,25 @@ mod tests {
        gate.enter().expect_err("enter should fail after close");
    }

-    #[tokio::test(start_paused = true)]
-    async fn close_busy_gate() {
-        let gate = Gate::default();
-        let forever = Duration::from_secs(24 * 7 * 365);
+    #[tokio::test]
+    async fn test_busy_gate() {
+        let gate = Gate::new("test".to_string());

-        let guard =
-            tracing::info_span!("i am holding back the gate").in_scope(|| gate.enter().unwrap());
+        let guard = gate.enter().unwrap();

        let mut close_fut = std::pin::pin!(gate.close());

-        // Close should be waiting for guards to drop
-        tokio::time::timeout(forever, &mut close_fut)
-            .await
-            .unwrap_err();
+        // Close should be blocked
+        assert!(close_fut.as_mut().now_or_never().is_none());

        // Attempting to enter() should fail, even though close isn't done yet.
        gate.enter()
            .expect_err("enter should fail after entering close");

-        // this will now log, which we cannot verify except manually
        drop(guard);

        // Guard is gone, close should finish
-        close_fut.await;
+        assert!(close_fut.as_mut().now_or_never().is_some());

        // Attempting to enter() is still forbidden
        gate.enter().expect_err("enter should fail finishing close");
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -69,25 +69,6 @@ impl Client {
        resp.json().await.map_err(Error::ReceiveBody)
    }

-    /// Get an arbitrary path and returning a streaming Response.  This function is suitable
-    /// for pass-through/proxy use cases where we don't care what the response content looks
-    /// like.
-    ///
-    /// Use/add one of the properly typed methods below if you know aren't proxying, and
-    /// know what kind of response you expect.
-    pub async fn get_raw(&self, path: String) -> Result<reqwest::Response> {
-        debug_assert!(path.starts_with('/'));
-        let uri = format!("{}{}", self.mgmt_api_endpoint, path);
-
-        let req = self.client.request(Method::GET, uri);
-        let req = if let Some(value) = &self.authorization_header {
-            req.header(reqwest::header::AUTHORIZATION, value)
-        } else {
-            req
-        };
-        req.send().await.map_err(Error::ReceiveBody)
-    }
-
    pub async fn tenant_details(
        &self,
        tenant_shard_id: TenantShardId,
@@ -190,25 +171,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    /// The tenant deletion API can return 202 if deletion is incomplete, or
-    /// 404 if it is complete.  Callers are responsible for checking the status
-    /// code and retrying.  Error codes other than 404 will return Err().
-    pub async fn tenant_delete(&self, tenant_shard_id: TenantShardId) -> Result<StatusCode> {
-        let uri = format!("{}/v1/tenant/{tenant_shard_id}", self.mgmt_api_endpoint);
-
-        match self.request(Method::DELETE, &uri, ()).await {
-            Err(Error::ApiError(status_code, msg)) => {
-                if status_code == StatusCode::NOT_FOUND {
-                    Ok(StatusCode::NOT_FOUND)
-                } else {
-                    Err(Error::ApiError(status_code, msg))
-                }
-            }
-            Err(e) => Err(e),
-            Ok(response) => Ok(response.status()),
-        }
-    }
-
    pub async fn tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {
        let uri = format!("{}/v1/tenant/config", self.mgmt_api_endpoint);
        self.request(Method::PUT, &uri, req).await?;
@@ -272,32 +234,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    /// The timeline deletion API can return 201 if deletion is incomplete, or
-    /// 403 if it is complete.  Callers are responsible for checking the status
-    /// code and retrying.  Error codes other than 403 will return Err().
-    pub async fn timeline_delete(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-    ) -> Result<StatusCode> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}",
-            self.mgmt_api_endpoint
-        );
-
-        match self.request(Method::DELETE, &uri, ()).await {
-            Err(Error::ApiError(status_code, msg)) => {
-                if status_code == StatusCode::NOT_FOUND {
-                    Ok(StatusCode::NOT_FOUND)
-                } else {
-                    Err(Error::ApiError(status_code, msg))
-                }
-            }
-            Err(e) => Err(e),
-            Ok(response) => Ok(response.status()),
-        }
-    }
-
    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{}/reset",
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -156,8 +156,7 @@ impl PagestreamClient {
            PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
            PagestreamBeMessage::Exists(_)
            | PagestreamBeMessage::Nblocks(_)
-            | PagestreamBeMessage::DbSize(_)
-            | PagestreamBeMessage::GetSlruSegment(_) => {
+            | PagestreamBeMessage::DbSize(_) => {
                anyhow::bail!(
                    "unexpected be message kind in response to getpage request: {}",
                    msg.kind()
--- a/pageserver/pagebench/src/util/request_stats.rs
+++ b/pageserver/pagebench/src/util/request_stats.rs
@@ -66,10 +66,13 @@ impl serde::Serialize for LatencyPercentiles {
    {
        use serde::ser::SerializeMap;
        let mut ser = serializer.serialize_map(Some(LATENCY_PERCENTILES.len()))?;
-        for (p, v) in LATENCY_PERCENTILES.iter().zip(&self.latency_percentiles) {
+        for p in LATENCY_PERCENTILES {
            ser.serialize_entry(
                &format!("p{p}"),
-                &format!("{}", humantime::format_duration(*v)),
+                &format!(
+                    "{}",
+                    &humantime::format_duration(self.latency_percentiles[0])
+                ),
            )?;
        }
        ser.end()
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -222,8 +222,6 @@ where
    async fn send_tarball(mut self) -> anyhow::Result<()> {
        // TODO include checksum

-        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
-
        // Create pgdata subdirs structure
        for dir in PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(dir)?;
@@ -250,29 +248,29 @@ where
                    .context("could not add config file to basebackup tarball")?;
            }
        }
-        if !lazy_slru_download {
-            // Gather non-relational files from object storage pages.
-            let slru_partitions = self
+
+        // Gather non-relational files from object storage pages.
+        let slru_partitions = self
+            .timeline
+            .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
+            .await?
+            .partition(Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64);
+
+        let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
+
+        for part in slru_partitions.parts {
+            let blocks = self
                .timeline
-                .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
-                .await?
-                .partition(Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64);
+                .get_vectored(&part.ranges, self.lsn, self.ctx)
+                .await?;

-            let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
-
-            for part in slru_partitions.parts {
-                let blocks = self
-                    .timeline
-                    .get_vectored(&part.ranges, self.lsn, self.ctx)
-                    .await?;
-
-                for (key, block) in blocks {
-                    slru_builder.add_block(&key, block?).await?;
-                }
+            for (key, block) in blocks {
+                slru_builder.add_block(&key, block?).await?;
            }
-            slru_builder.finish().await?;
        }

+        slru_builder.finish().await?;
+
        let mut min_restart_lsn: Lsn = Lsn::MAX;
        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -33,10 +33,12 @@ use pageserver::{
 use postgres_backend::AuthType;
 use utils::failpoint_support;
 use utils::logging::TracingErrorLayerEnablement;
+use utils::signals::ShutdownSignals;
 use utils::{
    auth::{JwtAuth, SwappableJwtAuth},
    logging, project_build_tag, project_git_version,
    sentry_init::init_sentry,
+    signals::Signal,
    tcp_listener,
 };

@@ -654,42 +656,34 @@ fn start_pageserver(
    let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());

    // All started up! Now just sit and wait for shutdown signal.
-    {
-        use signal_hook::consts::*;
-        let signal_handler = BACKGROUND_RUNTIME.spawn_blocking(move || {
-            let mut signals =
-                signal_hook::iterator::Signals::new([SIGINT, SIGTERM, SIGQUIT]).unwrap();
-            return signals
-                .forever()
-                .next()
-                .expect("forever() never returns None unless explicitly closed");
-        });
-        let signal = BACKGROUND_RUNTIME
-            .block_on(signal_handler)
-            .expect("join error");
-        match signal {
-            SIGQUIT => {
-                info!("Got signal {signal}. Terminating in immediate shutdown mode",);
-                std::process::exit(111);
-            }
-            SIGINT | SIGTERM => {
-                info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",);
-
-                // This cancels the `shutdown_pageserver` cancellation tree.
-                // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
-                // The plan is to change that over time.
-                shutdown_pageserver.take();
-                let bg_remote_storage = remote_storage.clone();
-                let bg_deletion_queue = deletion_queue.clone();
-                BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(
-                    bg_remote_storage.map(|_| bg_deletion_queue),
-                    0,
-                ));
-                unreachable!()
-            }
-            _ => unreachable!(),
+    ShutdownSignals::handle(|signal| match signal {
+        Signal::Quit => {
+            info!(
+                "Got {}. Terminating in immediate shutdown mode",
+                signal.name()
+            );
+            std::process::exit(111);
        }
-    }
+
+        Signal::Interrupt | Signal::Terminate => {
+            info!(
+                "Got {}. Terminating gracefully in fast shutdown mode",
+                signal.name()
+            );
+
+            // This cancels the `shutdown_pageserver` cancellation tree.
+            // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
+            // The plan is to change that over time.
+            shutdown_pageserver.take();
+            let bg_remote_storage = remote_storage.clone();
+            let bg_deletion_queue = deletion_queue.clone();
+            BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(
+                bg_remote_storage.map(|_| bg_deletion_queue),
+                0,
+            ));
+            unreachable!()
+        }
+    })
 }

 fn create_remote_storage_client(
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -97,86 +97,23 @@ pub enum EvictionOrder {

    /// Order the layers to be evicted by how recently they have been accessed relatively within
    /// the set of resident layers of a tenant.
+    ///
+    /// This strategy will evict layers more fairly but is untested.
    RelativeAccessed {
-        /// Determines if the tenant with most layers should lose first.
-        ///
-        /// Having this enabled is currently the only reasonable option, because the order in which
-        /// we read tenants is deterministic. If we find the need to use this as `false`, we need
-        /// to ensure nondeterminism by adding in a random number to break the
-        /// `relative_last_activity==0.0` ties.
-        #[serde(default = "default_highest_layer_count_loses_first")]
+        #[serde(default)]
        highest_layer_count_loses_first: bool,
    },
 }

-fn default_highest_layer_count_loses_first() -> bool {
-    true
-}
-
 impl EvictionOrder {
-    fn sort(&self, candidates: &mut [(MinResidentSizePartition, EvictionCandidate)]) {
-        use EvictionOrder::*;
-
+    /// Return true, if with [`Self::RelativeAccessed`] order the tenants with the highest layer
+    /// counts should be the first ones to have their layers evicted.
+    fn highest_layer_count_loses_first(&self) -> bool {
        match self {
-            AbsoluteAccessed => {
-                candidates.sort_unstable_by_key(|(partition, candidate)| {
-                    (*partition, candidate.last_activity_ts)
-                });
-            }
-            RelativeAccessed { .. } => candidates.sort_unstable_by_key(|(partition, candidate)| {
-                (*partition, candidate.relative_last_activity)
-            }),
-        }
-    }
-
-    /// Called to fill in the [`EvictionCandidate::relative_last_activity`] while iterating tenants
-    /// layers in **most** recently used order.
-    fn relative_last_activity(&self, total: usize, index: usize) -> finite_f32::FiniteF32 {
-        use EvictionOrder::*;
-
-        match self {
-            AbsoluteAccessed => finite_f32::FiniteF32::ZERO,
-            RelativeAccessed {
+            EvictionOrder::AbsoluteAccessed => false,
+            EvictionOrder::RelativeAccessed {
                highest_layer_count_loses_first,
-            } => {
-                // keeping the -1 or not decides if every tenant should lose their least recently accessed
-                // layer OR if this should happen in the order of having highest layer count:
-                let fudge = if *highest_layer_count_loses_first {
-                    // relative_last_activity vs. tenant layer count:
-                    // - 0.1..=1.0 (10 layers)
-                    // - 0.01..=1.0 (100 layers)
-                    // - 0.001..=1.0 (1000 layers)
-                    //
-                    // leading to evicting less of the smallest tenants.
-                    0
-                } else {
-                    // use full 0.0..=1.0 range, which means even the smallest tenants could always lose a
-                    // layer. the actual ordering is unspecified: for 10k tenants on a pageserver it could
-                    // be that less than 10k layer evictions is enough, so we would not need to evict from
-                    // all tenants.
-                    //
-                    // as the tenant ordering is now deterministic this could hit the same tenants
-                    // disproportionetly on multiple invocations. alternative could be to remember how many
-                    // layers did we evict last time from this tenant, and inject that as an additional
-                    // fudge here.
-                    1
-                };
-
-                let total = total.checked_sub(fudge).filter(|&x| x > 1).unwrap_or(1);
-                let divider = total as f32;
-
-                // most recently used is always (total - 0) / divider == 1.0
-                // least recently used depends on the fudge:
-                // -       (total - 1) - (total - 1) / total => 0 / total
-                // -             total - (total - 1) / total => 1 / total
-                let distance = (total - index) as f32;
-
-                finite_f32::FiniteF32::try_from_normalized(distance / divider)
-                    .unwrap_or_else(|val| {
-                        tracing::warn!(%fudge, "calculated invalid relative_last_activity for i={index}, total={total}: {val}");
-                        finite_f32::FiniteF32::ZERO
-                    })
-            }
+            } => *highest_layer_count_loses_first,
        }
    }
 }
@@ -452,6 +389,52 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(

    let selection = select_victims(&candidates, usage_pre);

+    let mut candidates = candidates;
+
+    let selection = if matches!(eviction_order, EvictionOrder::RelativeAccessed { .. }) {
+        // we currently have the layers ordered by AbsoluteAccessed so that we can get the summary
+        // for comparison here. this is a temporary measure to develop alternatives.
+        use std::fmt::Write;
+
+        let mut summary_buf = String::with_capacity(256);
+
+        {
+            let absolute_summary = candidates
+                .iter()
+                .take(selection.amount)
+                .map(|(_, candidate)| candidate)
+                .collect::<summary::EvictionSummary>();
+
+            write!(summary_buf, "{absolute_summary}").expect("string grows");
+
+            info!("absolute accessed selection summary: {summary_buf}");
+        }
+
+        candidates.sort_unstable_by_key(|(partition, candidate)| {
+            (*partition, candidate.relative_last_activity)
+        });
+
+        let selection = select_victims(&candidates, usage_pre);
+
+        {
+            summary_buf.clear();
+
+            let relative_summary = candidates
+                .iter()
+                .take(selection.amount)
+                .map(|(_, candidate)| candidate)
+                .collect::<summary::EvictionSummary>();
+
+            write!(summary_buf, "{relative_summary}").expect("string grows");
+
+            info!("relative accessed selection summary: {summary_buf}");
+        }
+
+        selection
+    } else {
+        selection
+    };
+
    let (evicted_amount, usage_planned) = selection.into_amount_and_planned();

    // phase2: evict layers
@@ -852,12 +835,54 @@ async fn collect_eviction_candidates(
            .sort_unstable_by_key(|layer_info| std::cmp::Reverse(layer_info.last_activity_ts));
        let mut cumsum: i128 = 0;

-        let total = tenant_candidates.len();
+        // keeping the -1 or not decides if every tenant should lose their least recently accessed
+        // layer OR if this should happen in the order of having highest layer count:
+        let fudge = if eviction_order.highest_layer_count_loses_first() {
+            // relative_age vs. tenant layer count:
+            // - 0.1..=1.0 (10 layers)
+            // - 0.01..=1.0 (100 layers)
+            // - 0.001..=1.0 (1000 layers)
+            //
+            // leading to evicting less of the smallest tenants.
+            0
+        } else {
+            // use full 0.0..=1.0 range, which means even the smallest tenants could always lose a
+            // layer. the actual ordering is unspecified: for 10k tenants on a pageserver it could
+            // be that less than 10k layer evictions is enough, so we would not need to evict from
+            // all tenants.
+            //
+            // as the tenant ordering is now deterministic this could hit the same tenants
+            // disproportionetly on multiple invocations. alternative could be to remember how many
+            // layers did we evict last time from this tenant, and inject that as an additional
+            // fudge here.
+            1
+        };
+
+        let total = tenant_candidates
+            .len()
+            .checked_sub(fudge)
+            .filter(|&x| x > 0)
+            // support 0 or 1 resident layer tenants as well
+            .unwrap_or(1);
+        let divider = total as f32;

        for (i, mut candidate) in tenant_candidates.into_iter().enumerate() {
            // as we iterate this reverse sorted list, the most recently accessed layer will always
            // be 1.0; this is for us to evict it last.
-            candidate.relative_last_activity = eviction_order.relative_last_activity(total, i);
+            candidate.relative_last_activity = if matches!(
+                eviction_order,
+                EvictionOrder::RelativeAccessed { .. }
+            ) {
+                // another possibility: use buckets, like (256.0 * relative_last_activity) as u8 or
+                // similarly for u16. unsure how it would help.
+                finite_f32::FiniteF32::try_from_normalized((total - i) as f32 / divider)
+                    .unwrap_or_else(|val| {
+                        tracing::warn!(%fudge, "calculated invalid relative_last_activity for i={i}, total={total}: {val}");
+                        finite_f32::FiniteF32::ZERO
+                    })
+            } else {
+                finite_f32::FiniteF32::ZERO
+            };

            let partition = if cumsum > min_resident_size as i128 {
                MinResidentSizePartition::Above
@@ -902,7 +927,10 @@ async fn collect_eviction_candidates(
    debug_assert!(MinResidentSizePartition::Above < MinResidentSizePartition::Below,
        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");

-    eviction_order.sort(&mut candidates);
+    // always behave as if AbsoluteAccessed was selected. if RelativeAccessed is in use, we
+    // will sort later by candidate.relative_last_activity to get compare evictions.
+    candidates
+        .sort_unstable_by_key(|(partition, candidate)| (*partition, candidate.last_activity_ts));

    Ok(EvictionCandidates::Finished(candidates))
 }
@@ -1042,12 +1070,6 @@ pub(crate) mod finite_f32 {
        }
    }

-    impl From<FiniteF32> for f32 {
-        fn from(value: FiniteF32) -> f32 {
-            value.0
-        }
-    }
-
    impl FiniteF32 {
        pub const ZERO: FiniteF32 = FiniteF32(0.0);

@@ -1060,9 +1082,136 @@ pub(crate) mod finite_f32 {
                Err(value)
            }
        }
+    }
+}

-        pub fn into_inner(self) -> f32 {
-            self.into()
+mod summary {
+    use super::finite_f32::FiniteF32;
+    use super::{EvictionCandidate, LayerCount};
+    use pageserver_api::shard::TenantShardId;
+    use std::collections::{BTreeMap, HashMap};
+    use std::time::SystemTime;
+
+    #[derive(Debug, Default)]
+    pub(super) struct EvictionSummary {
+        evicted_per_tenant: HashMap<TenantShardId, LayerCount>,
+        total: LayerCount,
+
+        last_absolute: Option<SystemTime>,
+        last_relative: Option<FiniteF32>,
+    }
+
+    impl<'a> FromIterator<&'a EvictionCandidate> for EvictionSummary {
+        fn from_iter<T: IntoIterator<Item = &'a EvictionCandidate>>(iter: T) -> Self {
+            let mut summary = EvictionSummary::default();
+            for item in iter {
+                let counts = summary
+                    .evicted_per_tenant
+                    .entry(*item.layer.get_tenant_shard_id())
+                    .or_default();
+
+                let sz = item.layer.get_file_size();
+
+                counts.file_sizes += sz;
+                counts.count += 1;
+
+                summary.total.file_sizes += sz;
+                summary.total.count += 1;
+
+                summary.last_absolute = Some(item.last_activity_ts);
+                summary.last_relative = Some(item.relative_last_activity);
+            }
+
+            summary
+        }
+    }
+
+    struct SiBytesAmount(u64);
+
+    impl std::fmt::Display for SiBytesAmount {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            if self.0 < 1024 {
+                return write!(f, "{}B", self.0);
+            }
+
+            let mut tmp = self.0;
+            let mut ch = 0;
+            let suffixes = b"KMGTPE";
+
+            while tmp > 1024 * 1024 && ch < suffixes.len() - 1 {
+                tmp /= 1024;
+                ch += 1;
+            }
+
+            let ch = suffixes[ch] as char;
+
+            write!(f, "{:.1}{ch}iB", tmp as f64 / 1024.0)
+        }
+    }
+
+    impl std::fmt::Display for EvictionSummary {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            // wasteful, but it's for testing
+
+            let mut sorted: BTreeMap<usize, Vec<(TenantShardId, u64)>> = BTreeMap::new();
+
+            for (tenant_shard_id, count) in &self.evicted_per_tenant {
+                sorted
+                    .entry(count.count)
+                    .or_default()
+                    .push((*tenant_shard_id, count.file_sizes));
+            }
+
+            let total_file_sizes = SiBytesAmount(self.total.file_sizes);
+
+            writeln!(
+                f,
+                "selected {} layers of {total_file_sizes} up to ({:?}, {:.2?}):",
+                self.total.count, self.last_absolute, self.last_relative,
+            )?;
+
+            for (count, per_tenant) in sorted.iter().rev().take(10) {
+                write!(f, "- {count} layers: ")?;
+
+                if per_tenant.len() < 3 {
+                    for (i, (tenant_shard_id, bytes)) in per_tenant.iter().enumerate() {
+                        if i > 0 {
+                            write!(f, ", ")?;
+                        }
+                        let bytes = SiBytesAmount(*bytes);
+                        write!(f, "{tenant_shard_id} ({bytes})")?;
+                    }
+                } else {
+                    let num_tenants = per_tenant.len();
+                    let total_bytes = per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>();
+                    let total_bytes = SiBytesAmount(total_bytes);
+                    let layers = num_tenants * count;
+
+                    write!(
+                        f,
+                        "{num_tenants} tenants {total_bytes} in total {layers} layers",
+                    )?;
+                }
+
+                writeln!(f)?;
+            }
+
+            if sorted.len() > 10 {
+                let (rem_count, rem_bytes) = sorted
+                    .iter()
+                    .rev()
+                    .map(|(count, per_tenant)| {
+                        (
+                            count,
+                            per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>(),
+                        )
+                    })
+                    .fold((0, 0), |acc, next| (acc.0 + next.0, acc.1 + next.1));
+                let rem_bytes = SiBytesAmount(rem_bytes);
+                writeln!(f, "- rest of tenants ({}) not shown ({rem_count} layers or {:.1}%, {rem_bytes} or {:.1}% bytes)", sorted.len() - 10, 100.0 * rem_count as f64 / self.total.count as f64, 100.0 * rem_bytes.0 as f64 / self.total.file_sizes as f64)?;
+            }
+
+            Ok(())
        }
    }
 }
@@ -1187,40 +1336,3 @@ mod filesystem_level_usage {
        assert!(!usage.has_pressure());
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn relative_equal_bounds() {
-        let order = EvictionOrder::RelativeAccessed {
-            highest_layer_count_loses_first: false,
-        };
-
-        let len = 10;
-        let v = (0..len)
-            .map(|i| order.relative_last_activity(len, i).into_inner())
-            .collect::<Vec<_>>();
-
-        assert_eq!(v.first(), Some(&1.0));
-        assert_eq!(v.last(), Some(&0.0));
-        assert!(v.windows(2).all(|slice| slice[0] > slice[1]));
-    }
-
-    #[test]
-    fn relative_spare_bounds() {
-        let order = EvictionOrder::RelativeAccessed {
-            highest_layer_count_loses_first: true,
-        };
-
-        let len = 10;
-        let v = (0..len)
-            .map(|i| order.relative_last_activity(len, i).into_inner())
-            .collect::<Vec<_>>();
-
-        assert_eq!(v.first(), Some(&1.0));
-        assert_eq!(v.last(), Some(&0.1));
-        assert!(v.windows(2).all(|slice| slice[0] > slice[1]));
-    }
-}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -419,6 +419,12 @@ paths:
            type: string
            format: date-time
          description: A timestamp to get the LSN
+        - name: version
+          in: query
+          required: false
+          schema:
+            type: integer
+          description: The version of the endpoint to use
      responses:
        "200":
          description: OK
@@ -668,10 +674,6 @@ paths:
      responses:
        "200":
          description: Tenant is now in requested state
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/TenantLocationConfigResponse"
        "503":
          description: Tenant's state cannot be changed right now.  Wait a few seconds and retry.
          content:
@@ -1424,27 +1426,6 @@ components:
          $ref: '#/components/schemas/SecondaryConfig'
        tenant_conf:
          $ref: '#/components/schemas/TenantConfig'
-    TenantLocationConfigResponse:
-      type: object
-      required:
-        - shards
-      properties:
-        shards:
-          description: Pageservers where this tenant's shards are attached.  Not populated for secondary locations.
-          type: array
-          items:
-            $ref: "#/components/schemas/TenantShardLocation"
-    TenantShardLocation:
-      type: object
-      required:
-        - node_id
-        - shard_id
-      properties:
-        node_id:
-          description: Pageserver node ID where this shard is attached
-          type: integer
-        shard_id: Tenant shard ID of the shard
-          type: string
    SecondaryConfig:
      type: object
      properties:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -17,8 +17,6 @@ use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::LocationConfigListResponse;
 use pageserver_api::models::ShardParameters;
 use pageserver_api::models::TenantDetails;
-use pageserver_api::models::TenantLocationConfigResponse;
-use pageserver_api::models::TenantShardLocation;
 use pageserver_api::models::TenantState;
 use pageserver_api::models::{
    DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest,
@@ -1358,7 +1356,7 @@ async fn put_tenant_location_config_handler(
    let location_conf =
        LocationConf::try_from(&request_data.config).map_err(ApiError::BadRequest)?;

-    let attached = state
+    state
        .tenant_manager
        .upsert_location(
            tenant_shard_id,
@@ -1367,8 +1365,7 @@ async fn put_tenant_location_config_handler(
            tenant::SpawnMode::Normal,
            &ctx,
        )
-        .await?
-        .is_some();
+        .await?;

    if let Some(_flush_ms) = flush {
        match state
@@ -1387,18 +1384,7 @@ async fn put_tenant_location_config_handler(
        tracing::info!("No flush requested when configuring");
    }

-    // This API returns a vector of pageservers where the tenant is attached: this is
-    // primarily for use in the sharding service.  For compatibilty, we also return this
-    // when called directly on a pageserver, but the payload is always zero or one shards.
-    let mut response = TenantLocationConfigResponse { shards: Vec::new() };
-    if attached {
-        response.shards.push(TenantShardLocation {
-            shard_id: tenant_shard_id,
-            node_id: state.conf.id,
-        })
-    }
-
-    json_response(StatusCode::OK, response)
+    json_response(StatusCode::OK, ())
 }

 async fn list_location_config_handler(
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1043,7 +1043,6 @@ pub enum SmgrQueryType {
    GetRelSize,
    GetPageAtLsn,
    GetDbSize,
-    GetSlruSegment,
 }

 #[derive(Debug)]
@@ -1160,12 +1159,11 @@ mod smgr_query_time_tests {
    #[test]
    fn op_label_name() {
        use super::SmgrQueryType::*;
-        let expect: [(super::SmgrQueryType, &'static str); 5] = [
+        let expect: [(super::SmgrQueryType, &'static str); 4] = [
            (GetRelExists, "get_rel_exists"),
            (GetRelSize, "get_rel_size"),
            (GetPageAtLsn, "get_page_at_lsn"),
            (GetDbSize, "get_db_size"),
-            (GetSlruSegment, "get_slru_segment"),
        ];
        for (op, expect) in expect {
            let actual: &'static str = op.into();
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -22,8 +22,7 @@ use pageserver_api::models::{
    PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
    PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
-    PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest,
-    PagestreamNblocksResponse,
+    PagestreamNblocksRequest, PagestreamNblocksResponse,
 };
 use pageserver_api::shard::ShardIndex;
 use pageserver_api::shard::{ShardCount, ShardNumber};
@@ -75,8 +74,8 @@ use crate::tenant::GetTimelineError;
 use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::trace::Tracer;
+
 use pageserver_api::key::rel_block_to_key;
-use pageserver_api::reltag::SlruKind;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

@@ -369,16 +368,6 @@ impl From<WaitLsnError> for PageStreamError {
    }
 }

-impl From<WaitLsnError> for QueryError {
-    fn from(value: WaitLsnError) -> Self {
-        match value {
-            e @ WaitLsnError::Timeout(_) => Self::Other(anyhow::Error::new(e)),
-            WaitLsnError::Shutdown => Self::Shutdown,
-            WaitLsnError::BadState => Self::Reconnect,
-        }
-    }
-}
-
 impl PageServerHandler {
    pub fn new(
        conf: &'static PageServerConf,
@@ -648,15 +637,6 @@ impl PageServerHandler {
                        span,
                    )
                }
-                PagestreamFeMessage::GetSlruSegment(req) => {
-                    let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.lsn);
-                    (
-                        self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
            };

            match response {
@@ -1147,33 +1127,6 @@ impl PageServerHandler {
        }))
    }

-    async fn handle_get_slru_segment_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        req: &PagestreamGetSlruSegmentRequest,
-        ctx: &RequestContext,
-    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?;
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetSlruSegment);
-
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn =
-            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
-                .await?;
-
-        let kind = SlruKind::from_repr(req.kind)
-            .ok_or(PageStreamError::BadRequest("invalid SLRU kind".into()))?;
-        let segment = timeline.get_slru_segment(kind, req.segno, lsn, ctx).await?;
-
-        Ok(PagestreamBeMessage::GetSlruSegment(
-            PagestreamGetSlruSegmentResponse { segment },
-        ))
-    }
-
    #[allow(clippy::too_many_arguments)]
    #[instrument(skip_all, fields(?lsn, ?prev_lsn, %full_backup))]
    async fn handle_basebackup_request<IO>(
@@ -1186,7 +1139,7 @@ impl PageServerHandler {
        full_backup: bool,
        gzip: bool,
        ctx: RequestContext,
-    ) -> Result<(), QueryError>
+    ) -> anyhow::Result<()>
    where
        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
    {
@@ -1451,7 +1404,7 @@ where
                    )
                    .await?;
                    pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-                    Result::<(), QueryError>::Ok(())
+                    anyhow::Ok(())
                },
            )
            .await?;
@@ -1725,7 +1678,6 @@ impl From<GetActiveTenantError> for QueryError {
            | GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {
                QueryError::Shutdown
            }
-            e @ GetActiveTenantError::NotFound(_) => QueryError::NotFound(format!("{e}").into()),
            e => QueryError::Other(anyhow::anyhow!(e)),
        }
    }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -12,7 +12,7 @@ use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::repository::*;
 use crate::walrecord::NeonWalRecord;
 use anyhow::{ensure, Context};
-use bytes::{Buf, Bytes, BytesMut};
+use bytes::{Buf, Bytes};
 use pageserver_api::key::{
    dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
    rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
@@ -321,27 +321,6 @@ impl Timeline {
        }
    }

-    /// Get the whole SLRU segment
-    pub(crate) async fn get_slru_segment(
-        &self,
-        kind: SlruKind,
-        segno: u32,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<Bytes, PageReconstructError> {
-        let n_blocks = self
-            .get_slru_segment_size(kind, segno, Version::Lsn(lsn), ctx)
-            .await?;
-        let mut segment = BytesMut::with_capacity(n_blocks as usize * BLCKSZ as usize);
-        for blkno in 0..n_blocks {
-            let block = self
-                .get_slru_page_at_lsn(kind, segno, blkno, lsn, ctx)
-                .await?;
-            segment.extend_from_slice(&block[..BLCKSZ as usize]);
-        }
-        Ok(segment.freeze())
-    }
-
    /// Look up given SLRU page version.
    pub(crate) async fn get_slru_page_at_lsn(
        &self,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1020,7 +1020,6 @@ impl Tenant {
                Some(remote_timeline_client),
                self.deletion_queue_client.clone(),
            )
-            .instrument(tracing::info_span!("timeline_delete", %timeline_id))
            .await
            .context("resume_deletion")
            .map_err(LoadLocalTimelineError::ResumeDeletion)?;
@@ -2094,10 +2093,7 @@ impl Tenant {
            let timelines = self.timelines.lock().unwrap();
            timelines.values().for_each(|timeline| {
                let timeline = Arc::clone(timeline);
-                let timeline_id = timeline.timeline_id;
-
-                let span =
-                    tracing::info_span!("timeline_shutdown", %timeline_id, ?freeze_and_flush);
+                let span = Span::current();
                js.spawn(async move {
                    if freeze_and_flush {
                        timeline.flush_and_shutdown().instrument(span).await
@@ -2697,7 +2693,7 @@ impl Tenant {
            activate_now_sem: tokio::sync::Semaphore::new(0),
            delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())),
            cancel: CancellationToken::default(),
-            gate: Gate::default(),
+            gate: Gate::new(format!("Tenant<{tenant_shard_id}>")),
        }
    }

@@ -3782,11 +3778,6 @@ async fn run_initdb(
        .env_clear()
        .env("LD_LIBRARY_PATH", &initdb_lib_dir)
        .env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
-        .stdin(std::process::Stdio::null())
-        // stdout invocation produces the same output every time, we don't need it
-        .stdout(std::process::Stdio::null())
-        // we would be interested in the stderr output, if there was any
-        .stderr(std::process::Stdio::piped())
        .spawn()?;

    // Ideally we'd select here with the cancellation token, but the problem is that
@@ -3907,7 +3898,6 @@ pub(crate) mod harness {
                ),
                gc_feedback: Some(tenant_conf.gc_feedback),
                heatmap_period: Some(tenant_conf.heatmap_period),
-                lazy_slru_download: Some(tenant_conf.lazy_slru_download),
            }
        }
    }
@@ -5230,7 +5220,7 @@ mod tests {
            let raw_tline = tline.raw_timeline().unwrap();
            raw_tline
                .shutdown()
-                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_shard_id, timeline_id=%TIMELINE_ID))
+                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_shard_id))
                .await;
            std::mem::forget(tline);
        }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -345,9 +345,6 @@ pub struct TenantConf {
    /// may be disabled if a Tenant will not have secondary locations: only secondary
    /// locations will use the heatmap uploaded by attached locations.
    pub heatmap_period: Duration,
-
-    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
-    pub lazy_slru_download: bool,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -433,10 +430,6 @@ pub struct TenantConfOpt {
    #[serde(with = "humantime_serde")]
    #[serde(default)]
    pub heatmap_period: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
-    pub lazy_slru_download: Option<bool>,
 }

 impl TenantConfOpt {
@@ -482,9 +475,6 @@ impl TenantConfOpt {
                .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
            gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
            heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
-            lazy_slru_download: self
-                .lazy_slru_download
-                .unwrap_or(global_conf.lazy_slru_download),
        }
    }
 }
@@ -523,7 +513,6 @@ impl Default for TenantConf {
            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
            gc_feedback: false,
            heatmap_period: Duration::ZERO,
-            lazy_slru_download: false,
        }
    }
 }
@@ -595,7 +584,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
                .map(humantime),
            gc_feedback: value.gc_feedback,
            heatmap_period: value.heatmap_period.map(humantime),
-            lazy_slru_download: value.lazy_slru_download,
        }
    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -136,11 +136,7 @@ async fn schedule_ordered_timeline_deletions(
    let mut already_running_deletions = vec![];

    for (timeline_id, _) in sorted.into_iter().rev() {
-        let span = tracing::info_span!("timeline_delete", %timeline_id);
-        let res = DeleteTimelineFlow::run(tenant, timeline_id, true)
-            .instrument(span)
-            .await;
-        if let Err(e) = res {
+        if let Err(e) = DeleteTimelineFlow::run(tenant, timeline_id, true).await {
            match e {
                DeleteTimelineError::NotFound => {
                    // Timeline deletion finished after call to clone above but before call
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -51,10 +51,7 @@ use crate::keyspace::KeyPartitioning;
 use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use anyhow::Result;
-use pageserver_api::keyspace::KeySpaceAccum;
-use std::cmp::Ordering;
-use std::collections::{BTreeMap, VecDeque};
-use std::iter::Peekable;
+use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use utils::lsn::Lsn;
@@ -147,221 +144,11 @@ impl Drop for BatchedUpdates<'_> {
 }

 /// Return value of LayerMap::search
-#[derive(Eq, PartialEq, Debug)]
 pub struct SearchResult {
    pub layer: Arc<PersistentLayerDesc>,
    pub lsn_floor: Lsn,
 }

-pub struct OrderedSearchResult(SearchResult);
-
-impl Ord for OrderedSearchResult {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.0.lsn_floor.cmp(&other.0.lsn_floor)
-    }
-}
-
-impl PartialOrd for OrderedSearchResult {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl PartialEq for OrderedSearchResult {
-    fn eq(&self, other: &Self) -> bool {
-        self.0.lsn_floor == other.0.lsn_floor
-    }
-}
-
-impl Eq for OrderedSearchResult {}
-
-pub struct RangeSearchResult {
-    pub found: BTreeMap<OrderedSearchResult, KeySpaceAccum>,
-    pub not_found: KeySpaceAccum,
-}
-
-impl RangeSearchResult {
-    fn new() -> Self {
-        Self {
-            found: BTreeMap::new(),
-            not_found: KeySpaceAccum::new(),
-        }
-    }
-}
-
-/// Collector for results of range search queries on the LayerMap.
-/// It should be provided with two iterators for the delta and image coverage
-/// that contain all the changes for layers which intersect the range.
-struct RangeSearchCollector<Iter>
-where
-    Iter: Iterator<Item = (i128, Option<Arc<PersistentLayerDesc>>)>,
-{
-    delta_coverage: Peekable<Iter>,
-    image_coverage: Peekable<Iter>,
-    key_range: Range<Key>,
-    end_lsn: Lsn,
-
-    current_delta: Option<Arc<PersistentLayerDesc>>,
-    current_image: Option<Arc<PersistentLayerDesc>>,
-
-    result: RangeSearchResult,
-}
-
-#[derive(Debug)]
-enum NextLayerType {
-    Delta(i128),
-    Image(i128),
-    Both(i128),
-}
-
-impl NextLayerType {
-    fn next_change_at_key(&self) -> Key {
-        match self {
-            NextLayerType::Delta(at) => Key::from_i128(*at),
-            NextLayerType::Image(at) => Key::from_i128(*at),
-            NextLayerType::Both(at) => Key::from_i128(*at),
-        }
-    }
-}
-
-impl<Iter> RangeSearchCollector<Iter>
-where
-    Iter: Iterator<Item = (i128, Option<Arc<PersistentLayerDesc>>)>,
-{
-    fn new(
-        key_range: Range<Key>,
-        end_lsn: Lsn,
-        delta_coverage: Iter,
-        image_coverage: Iter,
-    ) -> Self {
-        Self {
-            delta_coverage: delta_coverage.peekable(),
-            image_coverage: image_coverage.peekable(),
-            key_range,
-            end_lsn,
-            current_delta: None,
-            current_image: None,
-            result: RangeSearchResult::new(),
-        }
-    }
-
-    /// Run the collector. Collection is implemented via a two pointer algorithm.
-    /// One pointer tracks the start of the current range and the other tracks
-    /// the beginning of the next range which will overlap with the next change
-    /// in coverage across both image and delta.
-    fn collect(mut self) -> RangeSearchResult {
-        let next_layer_type = self.choose_next_layer_type();
-        let mut current_range_start = match next_layer_type {
-            None => {
-                // No changes for the range
-                self.pad_range(self.key_range.clone());
-                return self.result;
-            }
-            Some(layer_type) if self.key_range.end <= layer_type.next_change_at_key() => {
-                // Changes only after the end of the range
-                self.pad_range(self.key_range.clone());
-                return self.result;
-            }
-            Some(layer_type) => {
-                // Changes for the range exist. Record anything before the first
-                // coverage change as not found.
-                let coverage_start = layer_type.next_change_at_key();
-                let range_before = self.key_range.start..coverage_start;
-                self.pad_range(range_before);
-
-                self.advance(&layer_type);
-                coverage_start
-            }
-        };
-
-        while current_range_start < self.key_range.end {
-            let next_layer_type = self.choose_next_layer_type();
-            match next_layer_type {
-                Some(t) => {
-                    let current_range_end = t.next_change_at_key();
-                    self.add_range(current_range_start..current_range_end);
-                    current_range_start = current_range_end;
-
-                    self.advance(&t);
-                }
-                None => {
-                    self.add_range(current_range_start..self.key_range.end);
-                    current_range_start = self.key_range.end;
-                }
-            }
-        }
-
-        self.result
-    }
-
-    /// Mark a range as not found (i.e. no layers intersect it)
-    fn pad_range(&mut self, key_range: Range<Key>) {
-        if !key_range.is_empty() {
-            self.result.not_found.add_range(key_range);
-        }
-    }
-
-    /// Select the appropiate layer for the given range and update
-    /// the collector.
-    fn add_range(&mut self, covered_range: Range<Key>) {
-        let selected = LayerMap::select_layer(
-            self.current_delta.clone(),
-            self.current_image.clone(),
-            self.end_lsn,
-        );
-
-        match selected {
-            Some(search_result) => self
-                .result
-                .found
-                .entry(OrderedSearchResult(search_result))
-                .or_default()
-                .add_range(covered_range),
-            None => self.pad_range(covered_range),
-        }
-    }
-
-    /// Move to the next coverage change.
-    fn advance(&mut self, layer_type: &NextLayerType) {
-        match layer_type {
-            NextLayerType::Delta(_) => {
-                let (_, layer) = self.delta_coverage.next().unwrap();
-                self.current_delta = layer;
-            }
-            NextLayerType::Image(_) => {
-                let (_, layer) = self.image_coverage.next().unwrap();
-                self.current_image = layer;
-            }
-            NextLayerType::Both(_) => {
-                let (_, image_layer) = self.image_coverage.next().unwrap();
-                let (_, delta_layer) = self.delta_coverage.next().unwrap();
-
-                self.current_image = image_layer;
-                self.current_delta = delta_layer;
-            }
-        }
-    }
-
-    /// Pick the next coverage change: the one at the lesser key or both if they're alligned.
-    fn choose_next_layer_type(&mut self) -> Option<NextLayerType> {
-        let next_delta_at = self.delta_coverage.peek().map(|(key, _)| key);
-        let next_image_at = self.image_coverage.peek().map(|(key, _)| key);
-
-        match (next_delta_at, next_image_at) {
-            (None, None) => None,
-            (Some(next_delta_at), None) => Some(NextLayerType::Delta(*next_delta_at)),
-            (None, Some(next_image_at)) => Some(NextLayerType::Image(*next_image_at)),
-            (Some(next_delta_at), Some(next_image_at)) if next_image_at < next_delta_at => {
-                Some(NextLayerType::Image(*next_image_at))
-            }
-            (Some(next_delta_at), Some(next_image_at)) if next_delta_at < next_image_at => {
-                Some(NextLayerType::Delta(*next_delta_at))
-            }
-            (Some(next_delta_at), Some(_)) => Some(NextLayerType::Both(*next_delta_at)),
-        }
-    }
-}
-
 impl LayerMap {
    ///
    /// Find the latest layer (by lsn.end) that covers the given
@@ -399,18 +186,7 @@ impl LayerMap {
        let latest_delta = version.delta_coverage.query(key.to_i128());
        let latest_image = version.image_coverage.query(key.to_i128());

-        Self::select_layer(latest_delta, latest_image, end_lsn)
-    }
-
-    fn select_layer(
-        delta_layer: Option<Arc<PersistentLayerDesc>>,
-        image_layer: Option<Arc<PersistentLayerDesc>>,
-        end_lsn: Lsn,
-    ) -> Option<SearchResult> {
-        assert!(delta_layer.as_ref().map_or(true, |l| l.is_delta()));
-        assert!(image_layer.as_ref().map_or(true, |l| !l.is_delta()));
-
-        match (delta_layer, image_layer) {
+        match (latest_delta, latest_image) {
            (None, None) => None,
            (None, Some(image)) => {
                let lsn_floor = image.get_lsn_range().start;
@@ -447,17 +223,6 @@ impl LayerMap {
        }
    }

-    pub fn range_search(&self, key_range: Range<Key>, end_lsn: Lsn) -> Option<RangeSearchResult> {
-        let version = self.historic.get().unwrap().get_version(end_lsn.0 - 1)?;
-
-        let raw_range = key_range.start.to_i128()..key_range.end.to_i128();
-        let delta_changes = version.delta_coverage.range_overlaps(&raw_range);
-        let image_changes = version.image_coverage.range_overlaps(&raw_range);
-
-        let collector = RangeSearchCollector::new(key_range, end_lsn, delta_changes, image_changes);
-        Some(collector.collect())
-    }
-
    /// Start a batch of updates, applied on drop
    pub fn batch_update(&mut self) -> BatchedUpdates<'_> {
        BatchedUpdates { layer_map: self }
@@ -866,126 +631,3 @@ impl LayerMap {
        Ok(())
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[derive(Clone)]
-    struct LayerDesc {
-        key_range: Range<Key>,
-        lsn_range: Range<Lsn>,
-        is_delta: bool,
-    }
-
-    fn create_layer_map(layers: Vec<LayerDesc>) -> LayerMap {
-        let mut layer_map = LayerMap::default();
-
-        for layer in layers {
-            layer_map.insert_historic_noflush(PersistentLayerDesc::new_test(
-                layer.key_range,
-                layer.lsn_range,
-                layer.is_delta,
-            ));
-        }
-
-        layer_map.flush_updates();
-        layer_map
-    }
-
-    fn assert_range_search_result_eq(lhs: RangeSearchResult, rhs: RangeSearchResult) {
-        assert_eq!(lhs.not_found.to_keyspace(), rhs.not_found.to_keyspace());
-        let lhs: Vec<_> = lhs
-            .found
-            .into_iter()
-            .map(|(search_result, accum)| (search_result.0, accum.to_keyspace()))
-            .collect();
-        let rhs: Vec<_> = rhs
-            .found
-            .into_iter()
-            .map(|(search_result, accum)| (search_result.0, accum.to_keyspace()))
-            .collect();
-
-        assert_eq!(lhs, rhs);
-    }
-
-    fn brute_force_range_search(
-        layer_map: &LayerMap,
-        key_range: Range<Key>,
-        end_lsn: Lsn,
-    ) -> RangeSearchResult {
-        let mut range_search_result = RangeSearchResult::new();
-
-        let mut key = key_range.start;
-        while key != key_range.end {
-            let res = layer_map.search(key, end_lsn);
-            match res {
-                Some(res) => {
-                    range_search_result
-                        .found
-                        .entry(OrderedSearchResult(res))
-                        .or_default()
-                        .add_key(key);
-                }
-                None => {
-                    range_search_result.not_found.add_key(key);
-                }
-            }
-
-            key = key.next();
-        }
-
-        range_search_result
-    }
-
-    #[test]
-    fn ranged_search_on_empty_layer_map() {
-        let layer_map = LayerMap::default();
-        let range = Key::from_i128(100)..Key::from_i128(200);
-
-        let res = layer_map.range_search(range, Lsn(100));
-        assert!(res.is_none());
-    }
-
-    #[test]
-    fn ranged_search() {
-        let layers = vec![
-            LayerDesc {
-                key_range: Key::from_i128(15)..Key::from_i128(50),
-                lsn_range: Lsn(0)..Lsn(5),
-                is_delta: false,
-            },
-            LayerDesc {
-                key_range: Key::from_i128(10)..Key::from_i128(20),
-                lsn_range: Lsn(5)..Lsn(20),
-                is_delta: true,
-            },
-            LayerDesc {
-                key_range: Key::from_i128(15)..Key::from_i128(25),
-                lsn_range: Lsn(20)..Lsn(30),
-                is_delta: true,
-            },
-            LayerDesc {
-                key_range: Key::from_i128(35)..Key::from_i128(40),
-                lsn_range: Lsn(25)..Lsn(35),
-                is_delta: true,
-            },
-            LayerDesc {
-                key_range: Key::from_i128(35)..Key::from_i128(40),
-                lsn_range: Lsn(35)..Lsn(40),
-                is_delta: false,
-            },
-        ];
-
-        let layer_map = create_layer_map(layers.clone());
-        for start in 0..60 {
-            for end in (start + 1)..60 {
-                let range = Key::from_i128(start)..Key::from_i128(end);
-                let result = layer_map.range_search(range.clone(), Lsn(100)).unwrap();
-                let expected = brute_force_range_search(&layer_map, range, Lsn(100));
-
-                assert_range_search_result_eq(result, expected);
-            }
-        }
-    }
-}
--- a/pageserver/src/tenant/layer_map/layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/layer_coverage.rs
@@ -129,42 +129,6 @@ impl<Value: Clone> LayerCoverage<Value> {
            .map(|(k, v)| (*k, v.as_ref().map(|x| x.1.clone())))
    }

-    /// Returns an iterator which includes all coverage changes for layers that intersect
-    /// with the provided range.
-    pub fn range_overlaps(
-        &self,
-        key_range: &Range<i128>,
-    ) -> impl Iterator<Item = (i128, Option<Value>)> + '_
-    where
-        Value: Eq,
-    {
-        let first_change = self.query(key_range.start);
-        match first_change {
-            Some(change) => {
-                // If the start of the range is covered, we have to deal with two cases:
-                // 1. Start of the range is aligned with the start of a layer.
-                // In this case the return of `self.range` will contain the layer which aligns with the start of the key range.
-                // We advance said iterator to avoid duplicating the first change.
-                // 2. Start of the range is not aligned with the start of a layer.
-                let range = key_range.start..key_range.end;
-                let mut range_coverage = self.range(range).peekable();
-                if range_coverage
-                    .peek()
-                    .is_some_and(|c| c.1.as_ref() == Some(&change))
-                {
-                    range_coverage.next();
-                }
-                itertools::Either::Left(
-                    std::iter::once((key_range.start, Some(change))).chain(range_coverage),
-                )
-            }
-            None => {
-                let range = key_range.start..key_range.end;
-                let coverage = self.range(range);
-                itertools::Either::Right(coverage)
-            }
-        }
-    }
    /// O(1) clone
    pub fn clone(&self) -> Self {
        Self {
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -1311,7 +1311,6 @@ impl TenantManager {
        tenant_shard_id: TenantShardId,
        activation_timeout: Duration,
    ) -> Result<(), DeleteTenantError> {
-        super::span::debug_assert_current_span_has_tenant_id();
        // We acquire a SlotGuard during this function to protect against concurrent
        // changes while the ::prepare phase of DeleteTenantFlow executes, but then
        // have to return the Tenant to the map while the background deletion runs.
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -112,7 +112,7 @@ impl SecondaryTenant {
            // on shutdown we walk the tenants and fire their
            // individual cancellations?
            cancel: CancellationToken::new(),
-            gate: Gate::default(),
+            gate: Gate::new(format!("SecondaryTenant {tenant_shard_id}")),

            shard_identity,
            tenant_conf: std::sync::Mutex::new(tenant_conf),
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -55,13 +55,13 @@ impl PersistentLayerDesc {
    }

    #[cfg(test)]
-    pub fn new_test(key_range: Range<Key>, lsn_range: Range<Lsn>, is_delta: bool) -> Self {
+    pub fn new_test(key_range: Range<Key>) -> Self {
        Self {
            tenant_shard_id: TenantShardId::unsharded(TenantId::generate()),
            timeline_id: TimelineId::generate(),
            key_range,
-            lsn_range,
-            is_delta,
+            lsn_range: Lsn(0)..Lsn(1),
+            is_delta: false,
            file_size: 0,
        }
    }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -457,21 +457,6 @@ pub(crate) enum GetVectoredError {
    InvalidLsn(Lsn),
 }

-#[derive(thiserror::Error, Debug)]
-pub(crate) enum GetReadyAncestorError {
-    #[error("ancestor timeline {0} is being stopped")]
-    AncestorStopping(TimelineId),
-
-    #[error("Ancestor LSN wait error: {0}")]
-    AncestorLsnTimeout(#[from] WaitLsnError),
-
-    #[error("Cancelled")]
-    Cancelled,
-
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 #[derive(Clone, Copy)]
 pub enum LogicalSizeCalculationCause {
    Initial,
@@ -550,18 +535,6 @@ impl From<GetVectoredError> for CreateImageLayersError {
    }
 }

-impl From<GetReadyAncestorError> for PageReconstructError {
-    fn from(e: GetReadyAncestorError) -> Self {
-        use GetReadyAncestorError::*;
-        match e {
-            AncestorStopping(tid) => PageReconstructError::AncestorStopping(tid),
-            AncestorLsnTimeout(wait_err) => PageReconstructError::AncestorLsnTimeout(wait_err),
-            Cancelled => PageReconstructError::Cancelled,
-            Other(other) => PageReconstructError::Other(other),
-        }
-    }
-}
-
 /// Public interface functions
 impl Timeline {
    /// Get the LSN where this branch was created
@@ -1087,6 +1060,7 @@ impl Timeline {
    /// also to remote storage.  This method can easily take multiple seconds for a busy timeline.
    ///
    /// While we are flushing, we continue to accept read I/O.
+    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
    pub(crate) async fn flush_and_shutdown(&self) {
        debug_assert_current_span_has_tenant_and_timeline_id();

@@ -1135,8 +1109,6 @@ impl Timeline {
    /// Shut down immediately, without waiting for any open layers to flush to disk.  This is a subset of
    /// the graceful [`Timeline::flush_and_shutdown`] function.
    pub(crate) async fn shutdown(&self) {
-        span::debug_assert_current_span_has_tenant_and_timeline_id();
-
        // Signal any subscribers to our cancellation token to drop out
        tracing::debug!("Cancelling CancellationToken");
        self.cancel.cancel();
@@ -1315,13 +1287,6 @@ const REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE: u64 = 10;

 // Private functions
 impl Timeline {
-    pub fn get_lazy_slru_download(&self) -> bool {
-        let tenant_conf = self.tenant_conf.read().unwrap().tenant_conf;
-        tenant_conf
-            .lazy_slru_download
-            .unwrap_or(self.conf.default_tenant_conf.lazy_slru_download)
-    }
-
    fn get_checkpoint_distance(&self) -> u64 {
        let tenant_conf = self.tenant_conf.read().unwrap().tenant_conf;
        tenant_conf
@@ -1530,7 +1495,7 @@ impl Timeline {
                delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTimelineFlow::default())),

                cancel,
-                gate: Gate::default(),
+                gate: Gate::new(format!("Timeline<{tenant_shard_id}/{timeline_id}>")),

                compaction_lock: tokio::sync::Mutex::default(),
                gc_lock: tokio::sync::Mutex::default(),
@@ -2427,8 +2392,60 @@ impl Timeline {
                    timeline.ancestor_lsn,
                    cont_lsn
                );
+                let ancestor = match timeline.get_ancestor_timeline() {
+                    Ok(timeline) => timeline,
+                    Err(e) => return Err(PageReconstructError::from(e)),
+                };

-                timeline_owned = timeline.get_ready_ancestor_timeline(ctx).await?;
+                // It's possible that the ancestor timeline isn't active yet, or
+                // is active but hasn't yet caught up to the branch point. Wait
+                // for it.
+                //
+                // This cannot happen while the pageserver is running normally,
+                // because you cannot create a branch from a point that isn't
+                // present in the pageserver yet. However, we don't wait for the
+                // branch point to be uploaded to cloud storage before creating
+                // a branch. I.e., the branch LSN need not be remote consistent
+                // for the branching operation to succeed.
+                //
+                // Hence, if we try to load a tenant in such a state where
+                // 1. the existence of the branch was persisted (in IndexPart and/or locally)
+                // 2. but the ancestor state is behind branch_lsn because it was not yet persisted
+                // then we will need to wait for the ancestor timeline to
+                // re-stream WAL up to branch_lsn before we access it.
+                //
+                // How can a tenant get in such a state?
+                // - ungraceful pageserver process exit
+                // - detach+attach => this is a bug, https://github.com/neondatabase/neon/issues/4219
+                //
+                // NB: this could be avoided by requiring
+                //   branch_lsn >= remote_consistent_lsn
+                // during branch creation.
+                match ancestor.wait_to_become_active(ctx).await {
+                    Ok(()) => {}
+                    Err(TimelineState::Stopping) => {
+                        return Err(PageReconstructError::AncestorStopping(ancestor.timeline_id));
+                    }
+                    Err(state) => {
+                        return Err(PageReconstructError::Other(anyhow::anyhow!(
+                            "Timeline {} will not become active. Current state: {:?}",
+                            ancestor.timeline_id,
+                            &state,
+                        )));
+                    }
+                }
+                ancestor
+                    .wait_lsn(timeline.ancestor_lsn, ctx)
+                    .await
+                    .map_err(|e| match e {
+                        e @ WaitLsnError::Timeout(_) => PageReconstructError::AncestorLsnTimeout(e),
+                        WaitLsnError::Shutdown => PageReconstructError::Cancelled,
+                        e @ WaitLsnError::BadState => {
+                            PageReconstructError::Other(anyhow::anyhow!(e))
+                        }
+                    })?;
+
+                timeline_owned = ancestor;
                timeline = &*timeline_owned;
                prev_lsn = Lsn(u64::MAX);
                continue 'outer;
@@ -2558,66 +2575,6 @@ impl Timeline {
        Some((lsn, img))
    }

-    async fn get_ready_ancestor_timeline(
-        &self,
-        ctx: &RequestContext,
-    ) -> Result<Arc<Timeline>, GetReadyAncestorError> {
-        let ancestor = match self.get_ancestor_timeline() {
-            Ok(timeline) => timeline,
-            Err(e) => return Err(GetReadyAncestorError::from(e)),
-        };
-
-        // It's possible that the ancestor timeline isn't active yet, or
-        // is active but hasn't yet caught up to the branch point. Wait
-        // for it.
-        //
-        // This cannot happen while the pageserver is running normally,
-        // because you cannot create a branch from a point that isn't
-        // present in the pageserver yet. However, we don't wait for the
-        // branch point to be uploaded to cloud storage before creating
-        // a branch. I.e., the branch LSN need not be remote consistent
-        // for the branching operation to succeed.
-        //
-        // Hence, if we try to load a tenant in such a state where
-        // 1. the existence of the branch was persisted (in IndexPart and/or locally)
-        // 2. but the ancestor state is behind branch_lsn because it was not yet persisted
-        // then we will need to wait for the ancestor timeline to
-        // re-stream WAL up to branch_lsn before we access it.
-        //
-        // How can a tenant get in such a state?
-        // - ungraceful pageserver process exit
-        // - detach+attach => this is a bug, https://github.com/neondatabase/neon/issues/4219
-        //
-        // NB: this could be avoided by requiring
-        //   branch_lsn >= remote_consistent_lsn
-        // during branch creation.
-        match ancestor.wait_to_become_active(ctx).await {
-            Ok(()) => {}
-            Err(TimelineState::Stopping) => {
-                return Err(GetReadyAncestorError::AncestorStopping(
-                    ancestor.timeline_id,
-                ));
-            }
-            Err(state) => {
-                return Err(GetReadyAncestorError::Other(anyhow::anyhow!(
-                    "Timeline {} will not become active. Current state: {:?}",
-                    ancestor.timeline_id,
-                    &state,
-                )));
-            }
-        }
-        ancestor
-            .wait_lsn(self.ancestor_lsn, ctx)
-            .await
-            .map_err(|e| match e {
-                e @ WaitLsnError::Timeout(_) => GetReadyAncestorError::AncestorLsnTimeout(e),
-                WaitLsnError::Shutdown => GetReadyAncestorError::Cancelled,
-                e @ WaitLsnError::BadState => GetReadyAncestorError::Other(anyhow::anyhow!(e)),
-            })?;
-
-        Ok(ancestor)
-    }
-
    fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
        let ancestor = self.ancestor_timeline.as_ref().with_context(|| {
            format!(
@@ -2828,12 +2785,12 @@ impl Timeline {
    }

    /// Flush one frozen in-memory layer to disk, as a new delta layer.
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id, layer=%frozen_layer))]
    async fn flush_frozen_layer(
        self: &Arc<Self>,
        frozen_layer: Arc<InMemoryLayer>,
        ctx: &RequestContext,
    ) -> Result<(), FlushLayerError> {
-        span::debug_assert_current_span_has_tenant_and_timeline_id();
        // As a special case, when we have just imported an image into the repository,
        // instead of writing out a L0 delta layer, we directly write out image layer
        // files instead. This is possible as long as *all* the data imported into the
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -356,14 +356,12 @@ impl DeleteTimelineFlow {
    // NB: If this fails half-way through, and is retried, the retry will go through
    // all the same steps again. Make sure the code here is idempotent, and don't
    // error out if some of the shutdown tasks have already been completed!
-    #[instrument(skip_all, fields(%inplace))]
+    #[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_shard_id.tenant_id, shard_id=%tenant.tenant_shard_id.shard_slug()))]
    pub async fn run(
        tenant: &Arc<Tenant>,
        timeline_id: TimelineId,
        inplace: bool,
    ) -> Result<(), DeleteTimelineError> {
-        super::debug_assert_current_span_has_tenant_and_timeline_id();
-
        let (timeline, mut guard) = Self::prepare(tenant, timeline_id)?;

        guard.mark_in_progress()?;
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -26,8 +26,11 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
 use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
 use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};

+use std::str::FromStr;
+
 use anyhow::{bail, Context, Result};
 use bytes::{Buf, Bytes, BytesMut};
+use hex::FromHex;
 use tracing::*;
 use utils::failpoint_support;

@@ -44,9 +47,10 @@ use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
 use postgres_ffi::v14::xlog_utils::*;
-use postgres_ffi::v14::CheckPoint;
+use postgres_ffi::v14::{bindings::FullTransactionId, CheckPoint};
 use postgres_ffi::TransactionId;
 use postgres_ffi::BLCKSZ;
+use utils::id::TenantId;
 use utils::lsn::Lsn;

 pub struct WalIngest {
@@ -109,6 +113,55 @@ impl WalIngest {
            self.checkpoint_modified = true;
        }

+        // BEGIN ONE-OFF HACK
+        //
+        // We had a bug where we incorrectly passed 0 to update_next_xid(). That was
+        // harmless as long as nextXid was < 2^31, because 0 looked like a very old
+        // XID. But once nextXid reaches 2^31, 0 starts to look like a very new XID, and
+        // we incorrectly bumped up nextXid to the next epoch, to value '1:1024'
+        //
+        // We have one known timeline in production where that happened. This is a one-off
+        // fix to fix that damage. The last WAL record on that timeline as of this writing
+        // is this:
+        //
+        // rmgr: Standby     len (rec/tot):     50/    50, tx:          0, lsn: 35A/E32D86D8, prev 35A/E32D86B0, desc: RUNNING_XACTS nextXid 2325447052 latestCompletedXid 2325447051 oldestRunningXid 2325447052
+        //
+        // So on that particular timeline, before that LSN, fix the incorrectly set
+        // nextXid to the nextXid value from that record, plus 1000 to give some safety
+        // margin.
+
+        // For testing this hack, this failpoint temporarily re-introduces the bug that
+        // was fixed
+        fn reintroduce_bug_failpoint_activated() -> bool {
+            fail::fail_point!("reintroduce-nextxid-update-bug", |_| { true });
+            false
+        }
+        if decoded.xl_xid == pg_constants::INVALID_TRANSACTION_ID
+            && reintroduce_bug_failpoint_activated()
+            && self.checkpoint.update_next_xid(decoded.xl_xid)
+        {
+            info!(
+                "failpoint: Incorrectly updated nextXid at LSN {} to {}",
+                lsn, self.checkpoint.nextXid.value
+            );
+            self.checkpoint_modified = true;
+        }
+
+        if self.checkpoint.nextXid.value == 4294968320 && // 1::1024, the incorrect value
+            modification.tline.tenant_shard_id.tenant_id == TenantId::from_hex("df254570a4f603805528b46b0d45a76c").unwrap() &&
+            lsn < Lsn::from_str("367/C7409300").unwrap() &&
+            !reintroduce_bug_failpoint_activated()
+        {
+            // This is the last nextXid value from the last RUNNING_XACTS record, at the
+            // end of the WAL as of this writing.
+            self.checkpoint.nextXid = FullTransactionId {
+                value: 2399949836 + 1000,
+            };
+            self.checkpoint_modified = true;
+            warn!("nextXid fixed by one-off hack at LSN {}", lsn);
+        }
+        // END ONE-OFF HACK
+
        match decoded.xl_rmid {
            pg_constants::RM_HEAP_ID | pg_constants::RM_HEAP2_ID => {
                // Heap AM records need some special handling, because they modify VM pages
@@ -1363,22 +1416,16 @@ impl WalIngest {
            self.checkpoint.nextMultiOffset = xlrec.moff + xlrec.nmembers;
            self.checkpoint_modified = true;
        }
-        let max_mbr_xid = xlrec.members.iter().fold(None, |acc, mbr| {
-            if let Some(max_xid) = acc {
-                if mbr.xid.wrapping_sub(max_xid) as i32 > 0 {
-                    Some(mbr.xid)
-                } else {
-                    acc
-                }
+        let max_mbr_xid = xlrec.members.iter().fold(0u32, |acc, mbr| {
+            if mbr.xid.wrapping_sub(acc) as i32 > 0 {
+                mbr.xid
            } else {
-                Some(mbr.xid)
+                acc
            }
        });

-        if let Some(max_xid) = max_mbr_xid {
-            if self.checkpoint.update_next_xid(max_xid) {
-                self.checkpoint_modified = true;
-            }
+        if self.checkpoint.update_next_xid(max_mbr_xid) {
+            self.checkpoint_modified = true;
        }
        Ok(())
    }
--- a/patches/pgvector.patch
+++ b/patches/pgvector.patch
@@ -1,56 +0,0 @@
-From 5518a806a70e7f40d5054a762ccda7d5e6b0d31c Mon Sep 17 00:00:00 2001
-From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
-Date: Tue, 30 Jan 2024 14:33:00 +0200
-Subject: [PATCH] Make v0.6.0 work with Neon
-
-Now that the WAL-logging happens as a separate step at the end of the
-build, we need a few neon-specific hints to make it work.
---
- src/hnswbuild.c | 28 ++++++++++++++++++++++++++++
- 1 file changed, 28 insertions(+)
-
-diff --git a/src/hnswbuild.c b/src/hnswbuild.c
-index 680789ba9044900eac9321844ee2a808a4a2ed12..41c5b709bcb2367ac8b8c498788ecac4c1148b74 100644
--- a/src/hnswbuild.c
-+++ b/src/hnswbuild.c
-@@ -1089,13 +1089,41 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
- 	SeedRandom(42);
- #endif
-
-+#ifdef NEON_SMGR
-+	smgr_start_unlogged_build(index->rd_smgr);
-+#endif
-+
- 	InitBuildState(buildstate, heap, index, indexInfo, forkNum);
-
- 	BuildGraph(buildstate, forkNum);
-
-+#ifdef NEON_SMGR
-+	smgr_finish_unlogged_build_phase_1(index->rd_smgr);
-+#endif
-+
- 	if (RelationNeedsWAL(index))
-+	{
- 		log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocks(index), true);
-
-+#ifdef NEON_SMGR
-+		{
-+#if PG_VERSION_NUM >= 160000
-+			RelFileLocator rlocator = index->rd_smgr->smgr_rlocator.locator;
-+#else
-+			RelFileNode rlocator = index->rd_smgr->smgr_rnode.node;
-+#endif
-+
-+			SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator,
-+										   MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
-+			SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
-+		}
-+#endif
-+	}
-+
-+#ifdef NEON_SMGR
-+	smgr_end_unlogged_build(index->rd_smgr);
-+#endif
-+
- 	FreeBuildState(buildstate);
- }
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -328,14 +328,18 @@ pageserver_connect(shardno_t shard_no, int elevel)

 	now = GetCurrentTimestamp();
 	us_since_last_connect = now - last_connect_time;
-	if (us_since_last_connect < MAX_RECONNECT_INTERVAL_USEC)
+	if (us_since_last_connect < delay_us)
 	{
-		pg_usleep(delay_us);
+		pg_usleep(delay_us - us_since_last_connect);
 		delay_us *= 2;
+		if (delay_us > MAX_RECONNECT_INTERVAL_USEC)
+			delay_us = MAX_RECONNECT_INTERVAL_USEC;
+		last_connect_time = GetCurrentTimestamp();
 	}
 	else
 	{
 		delay_us = MIN_RECONNECT_INTERVAL_USEC;
+		last_connect_time = now;
 	}

 	/*
@@ -362,7 +366,6 @@ pageserver_connect(shardno_t shard_no, int elevel)
 	values[n] = NULL;
 	n++;
 	conn = PQconnectdbParams(keywords, values, 1);
-	last_connect_time = GetCurrentTimestamp();

 	if (PQstatus(conn) == CONNECTION_BAD)
 	{
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -15,7 +15,6 @@

 #include "neon_pgversioncompat.h"

-#include "access/slru.h"
 #include "access/xlogdefs.h"
 #include RELFILEINFO_HDR
 #include "lib/stringinfo.h"
@@ -35,7 +34,6 @@ typedef enum
 	T_NeonNblocksRequest,
 	T_NeonGetPageRequest,
 	T_NeonDbSizeRequest,
-	T_NeonGetSlruSegmentRequest,

 	/* pagestore -> pagestore_client */
 	T_NeonExistsResponse = 100,
@@ -43,7 +41,6 @@ typedef enum
 	T_NeonGetPageResponse,
 	T_NeonErrorResponse,
 	T_NeonDbSizeResponse,
-	T_NeonGetSlruSegmentResponse,
 } NeonMessageTag;

 /* base struct for c-style inheritance */
@@ -62,13 +59,6 @@ typedef struct
 														(errmsg(NEON_TAG "[shard %d] " fmt, shard_no, ##__VA_ARGS__), \
 														 errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))

-/* SLRUs downloadable from page server */
-typedef enum {
-	SLRU_CLOG,
-	SLRU_MULTIXACT_MEMBERS,
-	SLRU_MULTIXACT_OFFSETS
-} SlruKind;
-
 /*
 * supertype of all the Neon*Request structs below
 *
@@ -111,13 +101,6 @@ typedef struct
 	BlockNumber blkno;
 } NeonGetPageRequest;

-typedef struct
-{
-	NeonRequest req;
-	SlruKind kind;
-	int      segno;
-} NeonGetSlruSegmentRequest;
-
 /* supertype of all the Neon*Response structs below */
 typedef struct
 {
@@ -157,14 +140,6 @@ typedef struct
 												 * message */
 } NeonErrorResponse;

-typedef struct
-{
-	NeonMessageTag tag;
-	int         n_blocks;
-	char		data[BLCKSZ * SLRU_PAGES_PER_SEGMENT];
-} NeonGetSlruSegmentResponse;
-
-
 extern StringInfoData nm_pack_request(NeonRequest *msg);
 extern NeonResponse *nm_unpack_response(StringInfo s);
 extern char *nm_to_string(NeonMessage *msg);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -1043,25 +1043,12 @@ nm_pack_request(NeonRequest *msg)
 				break;
 			}

-		case T_NeonGetSlruSegmentRequest:
-			{
-				NeonGetSlruSegmentRequest *msg_req = (NeonGetSlruSegmentRequest *) msg;
-
-				pq_sendbyte(&s, msg_req->req.latest);
-				pq_sendint64(&s, msg_req->req.lsn);
-				pq_sendbyte(&s, msg_req->kind);
-				pq_sendint32(&s, msg_req->segno);
-
-				break;
-			}
-
 			/* pagestore -> pagestore_client. We never need to create these. */
 		case T_NeonExistsResponse:
 		case T_NeonNblocksResponse:
 		case T_NeonGetPageResponse:
 		case T_NeonErrorResponse:
 		case T_NeonDbSizeResponse:
-		case T_NeonGetSlruSegmentResponse:
 		default:
 			neon_log(ERROR, "unexpected neon message tag 0x%02x", msg->tag);
 			break;
@@ -1148,20 +1135,6 @@ nm_unpack_response(StringInfo s)
 				break;
 			}

-		case T_NeonGetSlruSegmentResponse:
-		    {
-				NeonGetSlruSegmentResponse *msg_resp;
-				int n_blocks = pq_getmsgint(s, 4);
-				msg_resp = palloc(sizeof(NeonGetSlruSegmentResponse));
-				msg_resp->tag = tag;
-				msg_resp->n_blocks = n_blocks;
-				memcpy(msg_resp->data, pq_getmsgbytes(s, n_blocks * BLCKSZ), n_blocks * BLCKSZ);
-				pq_getmsgend(s);
-
-				resp = (NeonResponse *) msg_resp;
-				break;
-			}
-
 			/*
 			 * pagestore_client -> pagestore
 			 *
@@ -1171,7 +1144,6 @@ nm_unpack_response(StringInfo s)
 		case T_NeonNblocksRequest:
 		case T_NeonGetPageRequest:
 		case T_NeonDbSizeRequest:
-		case T_NeonGetSlruSegmentRequest:
 		default:
 			neon_log(ERROR, "unexpected neon message tag 0x%02x", tag);
 			break;
@@ -1241,18 +1213,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfoChar(&s, '}');
 				break;
 			}
-		case T_NeonGetSlruSegmentRequest:
-			{
-				NeonGetSlruSegmentRequest *msg_req = (NeonGetSlruSegmentRequest *) msg;

-				appendStringInfoString(&s, "{\"type\": \"NeonGetSlruSegmentRequest\"");
-				appendStringInfo(&s, ", \"kind\": %u", msg_req->kind);
-				appendStringInfo(&s, ", \"segno\": %u", msg_req->segno);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
-				appendStringInfoChar(&s, '}');
-				break;
-			}
 			/* pagestore -> pagestore_client */
 		case T_NeonExistsResponse:
 			{
@@ -1306,17 +1267,6 @@ nm_to_string(NeonMessage *msg)
 								 msg_resp->db_size);
 				appendStringInfoChar(&s, '}');

-				break;
-			}
-		case T_NeonGetSlruSegmentResponse:
-			{
-				NeonGetSlruSegmentResponse *msg_resp = (NeonGetSlruSegmentResponse *) msg;
-
-				appendStringInfoString(&s, "{\"type\": \"NeonGetSlruSegmentResponse\"");
-				appendStringInfo(&s, ", \"n_blocks\": %u}",
-								 msg_resp->n_blocks);
-				appendStringInfoChar(&s, '}');
-
 				break;
 			}

@@ -2789,74 +2739,6 @@ neon_end_unlogged_build(SMgrRelation reln)
 	unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 }

-#define STRPREFIX(str, prefix) (strncmp(str, prefix, strlen(prefix)) == 0)
-
-static int
-neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
-{
-	XLogRecPtr request_lsn;
-	/*
-	 * GetRedoStartLsn() returns LSN of basebackup.
-	 * We need to download SLRU segments only once after node startup,
-	 * then SLRUs are maintained locally.
-	 */
-	request_lsn = GetRedoStartLsn();
-	request_lsn = nm_adjust_lsn(request_lsn);
-	SlruKind kind;
-
-    if (STRPREFIX(path, "pg_xact"))
-        kind = SLRU_CLOG;
-    else if (STRPREFIX(path, "pg_multixact/members"))
-        kind = SLRU_MULTIXACT_MEMBERS;
-    else if (STRPREFIX(path, "pg_multixact/offsets"))
-        kind = SLRU_MULTIXACT_OFFSETS;
-    else
-        return -1;
-
-	NeonResponse *resp;
-	NeonGetSlruSegmentRequest request = {
-		.req.tag = T_NeonGetSlruSegmentRequest,
-		.req.latest = false,
-		.req.lsn = request_lsn,
-
-		.kind = kind,
-		.segno = segno
-	};
-	int n_blocks;
-	shardno_t shard_no = 0; /* All SLRUs are at shard 0 */
-	do
-	{
-		while (!page_server->send(shard_no, &request.req) || !page_server->flush(shard_no));
-		consume_prefetch_responses();
-		resp = page_server->receive(shard_no);
-	} while (resp == NULL);
-
-	switch (resp->tag)
-	{
-		case T_NeonGetSlruSegmentResponse:
-			n_blocks = ((NeonGetSlruSegmentResponse *) resp)->n_blocks;
-			memcpy(buffer, ((NeonGetSlruSegmentResponse *) resp)->data, n_blocks*BLCKSZ);
-			break;
-
-		case T_NeonErrorResponse:
-			ereport(ERROR,
-					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read SLRU %d segment %d at lsn %X/%08X",
-							kind,
-							segno,
-							LSN_FORMAT_ARGS(request_lsn)),
-					 errdetail("page server returned error: %s",
-							   ((NeonErrorResponse *) resp)->message)));
-			break;
-
-		default:
-			neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
-	}
-	pfree(resp);
-
-	return n_blocks;
-}
-
 static void
 AtEOXact_neon(XactEvent event, void *arg)
 {
@@ -2915,8 +2797,6 @@ static const struct f_smgr neon_smgr =
 	.smgr_start_unlogged_build = neon_start_unlogged_build,
 	.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
 	.smgr_end_unlogged_build = neon_end_unlogged_build,
-
-	.smgr_read_slru_segment = neon_read_slru_segment,
 };

 const f_smgr *
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -1804,21 +1804,11 @@ walprop_pg_finish_sync_safekeepers(WalProposer *wp, XLogRecPtr lsn)
 static void
 GetLatestNeonFeedback(PageserverFeedback *rf, WalProposer *wp)
 {
-	int			latest_safekeeper = -1;
+	int			latest_safekeeper = 0;
 	XLogRecPtr	last_received_lsn = InvalidXLogRecPtr;

 	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
-		/*
-		 * Non-zero shards don't known the timeline size and send zero.
-
-		 * TODO: right now we ignore all feedback from non zero shards. We
-		 * should make reporting sharding aware instead and do per shard
-		 * aggregation as any lagging shard should trigger backpressure.
-		 */
-		if (wp->safekeeper[i].appendResponse.rf.currentClusterSize == 0)
-			continue;
-
 		if (wp->safekeeper[i].appendResponse.rf.last_received_lsn > last_received_lsn)
 		{
 			latest_safekeeper = i;
@@ -1826,10 +1816,6 @@ GetLatestNeonFeedback(PageserverFeedback *rf, WalProposer *wp)
 		}
 	}

-	/* no feedback yet */
-	if (latest_safekeeper == -1)
-		return;
-
 	rf->currentClusterSize = wp->safekeeper[latest_safekeeper].appendResponse.rf.currentClusterSize;
 	rf->last_received_lsn = wp->safekeeper[latest_safekeeper].appendResponse.rf.last_received_lsn;
 	rf->disk_consistent_lsn = wp->safekeeper[latest_safekeeper].appendResponse.rf.disk_consistent_lsn;
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,87 +2,87 @@

 [[package]]
 name = "aiohttp"
-version = "3.9.2"
+version = "3.9.0"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:772fbe371788e61c58d6d3d904268e48a594ba866804d08c995ad71b144f94cb"},
-    {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:edd4f1af2253f227ae311ab3d403d0c506c9b4410c7fc8d9573dec6d9740369f"},
-    {file = "aiohttp-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cfee9287778399fdef6f8a11c9e425e1cb13cc9920fd3a3df8f122500978292b"},
-    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc158466f6a980a6095ee55174d1de5730ad7dec251be655d9a6a9dd7ea1ff9"},
-    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54ec82f45d57c9a65a1ead3953b51c704f9587440e6682f689da97f3e8defa35"},
-    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abeb813a18eb387f0d835ef51f88568540ad0325807a77a6e501fed4610f864e"},
-    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc91d07280d7d169f3a0f9179d8babd0ee05c79d4d891447629ff0d7d8089ec2"},
-    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b65e861f4bebfb660f7f0f40fa3eb9f2ab9af10647d05dac824390e7af8f75b7"},
-    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:04fd8ffd2be73d42bcf55fd78cde7958eeee6d4d8f73c3846b7cba491ecdb570"},
-    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3d8d962b439a859b3ded9a1e111a4615357b01620a546bc601f25b0211f2da81"},
-    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:8ceb658afd12b27552597cf9a65d9807d58aef45adbb58616cdd5ad4c258c39e"},
-    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0e4ee4df741670560b1bc393672035418bf9063718fee05e1796bf867e995fad"},
-    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2dec87a556f300d3211decf018bfd263424f0690fcca00de94a837949fbcea02"},
-    {file = "aiohttp-3.9.2-cp310-cp310-win32.whl", hash = "sha256:3e1a800f988ce7c4917f34096f81585a73dbf65b5c39618b37926b1238cf9bc4"},
-    {file = "aiohttp-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:ea510718a41b95c236c992b89fdfc3d04cc7ca60281f93aaada497c2b4e05c46"},
-    {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6aaa6f99256dd1b5756a50891a20f0d252bd7bdb0854c5d440edab4495c9f973"},
-    {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a27d8c70ad87bcfce2e97488652075a9bdd5b70093f50b10ae051dfe5e6baf37"},
-    {file = "aiohttp-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:54287bcb74d21715ac8382e9de146d9442b5f133d9babb7e5d9e453faadd005e"},
-    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb3d05569aa83011fcb346b5266e00b04180105fcacc63743fc2e4a1862a891"},
-    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8534e7d69bb8e8d134fe2be9890d1b863518582f30c9874ed7ed12e48abe3c4"},
-    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd9d5b989d57b41e4ff56ab250c5ddf259f32db17159cce630fd543376bd96b"},
-    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa6904088e6642609981f919ba775838ebf7df7fe64998b1a954fb411ffb4663"},
-    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bda42eb410be91b349fb4ee3a23a30ee301c391e503996a638d05659d76ea4c2"},
-    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:193cc1ccd69d819562cc7f345c815a6fc51d223b2ef22f23c1a0f67a88de9a72"},
-    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b9f1cb839b621f84a5b006848e336cf1496688059d2408e617af33e3470ba204"},
-    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d22a0931848b8c7a023c695fa2057c6aaac19085f257d48baa24455e67df97ec"},
-    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4112d8ba61fbd0abd5d43a9cb312214565b446d926e282a6d7da3f5a5aa71d36"},
-    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c4ad4241b52bb2eb7a4d2bde060d31c2b255b8c6597dd8deac2f039168d14fd7"},
-    {file = "aiohttp-3.9.2-cp311-cp311-win32.whl", hash = "sha256:ee2661a3f5b529f4fc8a8ffee9f736ae054adfb353a0d2f78218be90617194b3"},
-    {file = "aiohttp-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:4deae2c165a5db1ed97df2868ef31ca3cc999988812e82386d22937d9d6fed52"},
-    {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6f4cdba12539215aaecf3c310ce9d067b0081a0795dd8a8805fdb67a65c0572a"},
-    {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:84e843b33d5460a5c501c05539809ff3aee07436296ff9fbc4d327e32aa3a326"},
-    {file = "aiohttp-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8008d0f451d66140a5aa1c17e3eedc9d56e14207568cd42072c9d6b92bf19b52"},
-    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61c47ab8ef629793c086378b1df93d18438612d3ed60dca76c3422f4fbafa792"},
-    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc71f748e12284312f140eaa6599a520389273174b42c345d13c7e07792f4f57"},
-    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1c3a4d0ab2f75f22ec80bca62385db2e8810ee12efa8c9e92efea45c1849133"},
-    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a87aa0b13bbee025faa59fa58861303c2b064b9855d4c0e45ec70182bbeba1b"},
-    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2cc0d04688b9f4a7854c56c18aa7af9e5b0a87a28f934e2e596ba7e14783192"},
-    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1956e3ac376b1711c1533266dec4efd485f821d84c13ce1217d53e42c9e65f08"},
-    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:114da29f39eccd71b93a0fcacff178749a5c3559009b4a4498c2c173a6d74dff"},
-    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3f17999ae3927d8a9a823a1283b201344a0627272f92d4f3e3a4efe276972fe8"},
-    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f31df6a32217a34ae2f813b152a6f348154f948c83213b690e59d9e84020925c"},
-    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7a75307ffe31329928a8d47eae0692192327c599113d41b278d4c12b54e1bd11"},
-    {file = "aiohttp-3.9.2-cp312-cp312-win32.whl", hash = "sha256:972b63d589ff8f305463593050a31b5ce91638918da38139b9d8deaba9e0fed7"},
-    {file = "aiohttp-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:200dc0246f0cb5405c80d18ac905c8350179c063ea1587580e3335bfc243ba6a"},
-    {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:158564d0d1020e0d3fe919a81d97aadad35171e13e7b425b244ad4337fc6793a"},
-    {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:da1346cd0ccb395f0ed16b113ebb626fa43b7b07fd7344fce33e7a4f04a8897a"},
-    {file = "aiohttp-3.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:eaa9256de26ea0334ffa25f1913ae15a51e35c529a1ed9af8e6286dd44312554"},
-    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1543e7fb00214fb4ccead42e6a7d86f3bb7c34751ec7c605cca7388e525fd0b4"},
-    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186e94570433a004e05f31f632726ae0f2c9dee4762a9ce915769ce9c0a23d89"},
-    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d52d20832ac1560f4510d68e7ba8befbc801a2b77df12bd0cd2bcf3b049e52a4"},
-    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c45e4e815ac6af3b72ca2bde9b608d2571737bb1e2d42299fc1ffdf60f6f9a1"},
-    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa906b9bdfd4a7972dd0628dbbd6413d2062df5b431194486a78f0d2ae87bd55"},
-    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:68bbee9e17d66f17bb0010aa15a22c6eb28583edcc8b3212e2b8e3f77f3ebe2a"},
-    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4c189b64bd6d9a403a1a3f86a3ab3acbc3dc41a68f73a268a4f683f89a4dec1f"},
-    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8a7876f794523123bca6d44bfecd89c9fec9ec897a25f3dd202ee7fc5c6525b7"},
-    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d23fba734e3dd7b1d679b9473129cd52e4ec0e65a4512b488981a56420e708db"},
-    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b141753be581fab842a25cb319f79536d19c2a51995d7d8b29ee290169868eab"},
-    {file = "aiohttp-3.9.2-cp38-cp38-win32.whl", hash = "sha256:103daf41ff3b53ba6fa09ad410793e2e76c9d0269151812e5aba4b9dd674a7e8"},
-    {file = "aiohttp-3.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:328918a6c2835861ff7afa8c6d2c70c35fdaf996205d5932351bdd952f33fa2f"},
-    {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5264d7327c9464786f74e4ec9342afbbb6ee70dfbb2ec9e3dfce7a54c8043aa3"},
-    {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07205ae0015e05c78b3288c1517afa000823a678a41594b3fdc870878d645305"},
-    {file = "aiohttp-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0a1e638cffc3ec4d4784b8b4fd1cf28968febc4bd2718ffa25b99b96a741bd"},
-    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d43302a30ba1166325974858e6ef31727a23bdd12db40e725bec0f759abce505"},
-    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16a967685907003765855999af11a79b24e70b34dc710f77a38d21cd9fc4f5fe"},
-    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fa3ee92cd441d5c2d07ca88d7a9cef50f7ec975f0117cd0c62018022a184308"},
-    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b500c5ad9c07639d48615a770f49618130e61be36608fc9bc2d9bae31732b8f"},
-    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c07327b368745b1ce2393ae9e1aafed7073d9199e1dcba14e035cc646c7941bf"},
-    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc7d6502c23a0ec109687bf31909b3fb7b196faf198f8cff68c81b49eb316ea9"},
-    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:07be2be7071723c3509ab5c08108d3a74f2181d4964e869f2504aaab68f8d3e8"},
-    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:122468f6fee5fcbe67cb07014a08c195b3d4c41ff71e7b5160a7bcc41d585a5f"},
-    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:00a9abcea793c81e7f8778ca195a1714a64f6d7436c4c0bb168ad2a212627000"},
-    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a9825fdd64ecac5c670234d80bb52bdcaa4139d1f839165f548208b3779c6c6"},
-    {file = "aiohttp-3.9.2-cp39-cp39-win32.whl", hash = "sha256:5422cd9a4a00f24c7244e1b15aa9b87935c85fb6a00c8ac9b2527b38627a9211"},
-    {file = "aiohttp-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:7d579dcd5d82a86a46f725458418458fa43686f6a7b252f2966d359033ffc8ab"},
-    {file = "aiohttp-3.9.2.tar.gz", hash = "sha256:b0ad0a5e86ce73f5368a164c10ada10504bf91869c05ab75d982c6048217fbf7"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6896b8416be9ada4d22cd359d7cb98955576ce863eadad5596b7cdfbf3e17c6c"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1736d87dad8ef46a8ec9cddd349fa9f7bd3a064c47dd6469c0d6763d3d49a4fc"},
+    {file = "aiohttp-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c9e5f4d7208cda1a2bb600e29069eecf857e6980d0ccc922ccf9d1372c16f4b"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8488519aa05e636c5997719fe543c8daf19f538f4fa044f3ce94bee608817cff"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ab16c254e2312efeb799bc3c06897f65a133b38b69682bf75d1f1ee1a9c43a9"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a94bde005a8f926d0fa38b88092a03dea4b4875a61fbcd9ac6f4351df1b57cd"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b777c9286b6c6a94f50ddb3a6e730deec327e9e2256cb08b5530db0f7d40fd8"},
+    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:571760ad7736b34d05597a1fd38cbc7d47f7b65deb722cb8e86fd827404d1f6b"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:deac0a32aec29608eb25d730f4bc5a261a65b6c48ded1ed861d2a1852577c932"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4ee1b4152bc3190cc40ddd6a14715e3004944263ea208229ab4c297712aa3075"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:3607375053df58ed6f23903aa10cf3112b1240e8c799d243bbad0f7be0666986"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:65b0a70a25456d329a5e1426702dde67be0fb7a4ead718005ba2ca582d023a94"},
+    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a2eb5311a37fe105aa35f62f75a078537e1a9e4e1d78c86ec9893a3c97d7a30"},
+    {file = "aiohttp-3.9.0-cp310-cp310-win32.whl", hash = "sha256:2cbc14a13fb6b42d344e4f27746a4b03a2cb0c1c3c5b932b0d6ad8881aa390e3"},
+    {file = "aiohttp-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac9669990e2016d644ba8ae4758688534aabde8dbbc81f9af129c3f5f01ca9cd"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f8e05f5163528962ce1d1806fce763ab893b1c5b7ace0a3538cd81a90622f844"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4afa8f71dba3a5a2e1e1282a51cba7341ae76585345c43d8f0e624882b622218"},
+    {file = "aiohttp-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f929f4c9b9a00f3e6cc0587abb95ab9c05681f8b14e0fe1daecfa83ea90f8318"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28185e36a78d247c55e9fbea2332d16aefa14c5276a582ce7a896231c6b1c208"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a486ddf57ab98b6d19ad36458b9f09e6022de0381674fe00228ca7b741aacb2f"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70e851f596c00f40a2f00a46126c95c2e04e146015af05a9da3e4867cfc55911"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5b7bf8fe4d39886adc34311a233a2e01bc10eb4e842220235ed1de57541a896"},
+    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c67a51ea415192c2e53e4e048c78bab82d21955b4281d297f517707dc836bf3d"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:694df243f394629bcae2d8ed94c589a181e8ba8604159e6e45e7b22e58291113"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3dd8119752dd30dd7bca7d4bc2a92a59be6a003e4e5c2cf7e248b89751b8f4b7"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:eb6dfd52063186ac97b4caa25764cdbcdb4b10d97f5c5f66b0fa95052e744eb7"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d97c3e286d0ac9af6223bc132dc4bad6540b37c8d6c0a15fe1e70fb34f9ec411"},
+    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:816f4db40555026e4cdda604a1088577c1fb957d02f3f1292e0221353403f192"},
+    {file = "aiohttp-3.9.0-cp311-cp311-win32.whl", hash = "sha256:3abf0551874fecf95f93b58f25ef4fc9a250669a2257753f38f8f592db85ddea"},
+    {file = "aiohttp-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:e18d92c3e9e22553a73e33784fcb0ed484c9874e9a3e96c16a8d6a1e74a0217b"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:99ae01fb13a618b9942376df77a1f50c20a281390dad3c56a6ec2942e266220d"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:05857848da443c8c12110d99285d499b4e84d59918a21132e45c3f0804876994"},
+    {file = "aiohttp-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:317719d7f824eba55857fe0729363af58e27c066c731bc62cd97bc9c3d9c7ea4"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e3b3c107ccb0e537f309f719994a55621acd2c8fdf6d5ce5152aed788fb940"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45820ddbb276113ead8d4907a7802adb77548087ff5465d5c554f9aa3928ae7d"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a183f1978802588711aed0dea31e697d760ce9055292db9dc1604daa9a8ded"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a4cd44788ea0b5e6bb8fa704597af3a30be75503a7ed1098bc5b8ffdf6c982"},
+    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673343fbc0c1ac44d0d2640addc56e97a052504beacd7ade0dc5e76d3a4c16e8"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e8a3b79b6d186a9c99761fd4a5e8dd575a48d96021f220ac5b5fa856e5dd029"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6777a390e41e78e7c45dab43a4a0196c55c3b8c30eebe017b152939372a83253"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7ae5f99a32c53731c93ac3075abd3e1e5cfbe72fc3eaac4c27c9dd64ba3b19fe"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f1e4f254e9c35d8965d377e065c4a8a55d396fe87c8e7e8429bcfdeeb229bfb3"},
+    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11ca808f9a6b63485059f5f6e164ef7ec826483c1212a44f268b3653c91237d8"},
+    {file = "aiohttp-3.9.0-cp312-cp312-win32.whl", hash = "sha256:de3cc86f4ea8b4c34a6e43a7306c40c1275e52bfa9748d869c6b7d54aa6dad80"},
+    {file = "aiohttp-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca4fddf84ac7d8a7d0866664936f93318ff01ee33e32381a115b19fb5a4d1202"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f09960b5bb1017d16c0f9e9f7fc42160a5a49fa1e87a175fd4a2b1a1833ea0af"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8303531e2c17b1a494ffaeba48f2da655fe932c4e9a2626c8718403c83e5dd2b"},
+    {file = "aiohttp-3.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4790e44f46a4aa07b64504089def5744d3b6780468c4ec3a1a36eb7f2cae9814"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1d7edf74a36de0e5ca50787e83a77cf352f5504eb0ffa3f07000a911ba353fb"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94697c7293199c2a2551e3e3e18438b4cba293e79c6bc2319f5fd652fccb7456"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1b66dbb8a7d5f50e9e2ea3804b01e766308331d0cac76eb30c563ac89c95985"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9623cfd9e85b76b83ef88519d98326d4731f8d71869867e47a0b979ffec61c73"},
+    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f32c86dc967ab8c719fd229ce71917caad13cc1e8356ee997bf02c5b368799bf"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f50b4663c3e0262c3a361faf440761fbef60ccdde5fe8545689a4b3a3c149fb4"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dcf71c55ec853826cd70eadb2b6ac62ec577416442ca1e0a97ad875a1b3a0305"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:42fe4fd9f0dfcc7be4248c162d8056f1d51a04c60e53366b0098d1267c4c9da8"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76a86a9989ebf82ee61e06e2bab408aec4ea367dc6da35145c3352b60a112d11"},
+    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f9e09a1c83521d770d170b3801eea19b89f41ccaa61d53026ed111cb6f088887"},
+    {file = "aiohttp-3.9.0-cp38-cp38-win32.whl", hash = "sha256:a00ce44c21612d185c5275c5cba4bab8d7c1590f248638b667ed8a782fa8cd6f"},
+    {file = "aiohttp-3.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:d5b9345ab92ebe6003ae11d8092ce822a0242146e6fa270889b9ba965457ca40"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98d21092bf2637c5fa724a428a69e8f5955f2182bff61f8036827cf6ce1157bf"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:35a68cd63ca6aaef5707888f17a70c36efe62b099a4e853d33dc2e9872125be8"},
+    {file = "aiohttp-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7f6235c7475658acfc1769d968e07ab585c79f6ca438ddfecaa9a08006aee2"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db04d1de548f7a62d1dd7e7cdf7c22893ee168e22701895067a28a8ed51b3735"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:536b01513d67d10baf6f71c72decdf492fb7433c5f2f133e9a9087379d4b6f31"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c8b0a6487e8109427ccf638580865b54e2e3db4a6e0e11c02639231b41fc0f"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7276fe0017664414fdc3618fca411630405f1aaf0cc3be69def650eb50441787"},
+    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23170247ef89ffa842a02bbfdc425028574d9e010611659abeb24d890bc53bb8"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1a2ea8252cacc7fd51df5a56d7a2bb1986ed39be9397b51a08015727dfb69bd"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2d71abc15ff7047412ef26bf812dfc8d0d1020d664617f4913df2df469f26b76"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:2d820162c8c2bdbe97d328cd4f417c955ca370027dce593345e437b2e9ffdc4d"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:2779f5e7c70f7b421915fd47db332c81de365678180a9f3ab404088f87ba5ff9"},
+    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:366bc870d7ac61726f32a489fbe3d1d8876e87506870be66b01aeb84389e967e"},
+    {file = "aiohttp-3.9.0-cp39-cp39-win32.whl", hash = "sha256:1df43596b826022b14998f0460926ce261544fedefe0d2f653e1b20f49e96454"},
+    {file = "aiohttp-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c196b30f1b1aa3363a69dd69079ae9bec96c2965c4707eaa6914ba099fb7d4f"},
+    {file = "aiohttp-3.9.0.tar.gz", hash = "sha256:09f23292d29135025e19e8ff4f0a68df078fe4ee013bca0105b2e803989de92d"},
 ]

 [package.dependencies]
@@ -2043,7 +2043,6 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2669,4 +2668,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "e99954cbbfef8dcc5e13cea7103c87657639a192f2372983bdb8c5d624c2e447"
+content-hash = "9cf2734cafd5b6963165d398f1b24621193d5284d0bc7cc26a720a014f523860"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -62,8 +62,6 @@ socket2.workspace = true
 sync_wrapper.workspace = true
 task-local-extensions.workspace = true
 thiserror.workspace = true
-tikv-jemallocator.workspace = true
-tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
 tls-listener.workspace = true
 tokio-postgres.workspace = true
 tokio-rustls.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -9,7 +9,7 @@ use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::validate_password_and_exchange;
 use crate::cache::Cached;
 use crate::console::errors::GetAuthInfoError;
-use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
+use crate::console::provider::ConsoleBackend;
 use crate::console::AuthSecret;
 use crate::context::RequestMonitoring;
 use crate::proxy::connect_compute::handle_try_wake;
@@ -34,6 +34,8 @@ use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{error, info, warn};

+use super::IpPattern;
+
 /// This type serves two purposes:
 ///
 /// * When `T` is `()`, it's just a regular auth backend selector
@@ -54,9 +56,7 @@ pub enum BackendType<'a, T> {

 pub trait TestBackend: Send + Sync + 'static {
    fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
-    fn get_allowed_ips_and_secret(
-        &self,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
+    fn get_allowed_ips(&self) -> Result<Vec<IpPattern>, console::errors::GetAuthInfoError>;
 }

 impl std::fmt::Display for BackendType<'_, ()> {
@@ -190,26 +190,20 @@ async fn auth_quirks(
        Err(info) => {
            let res = hacks::password_hack_no_authentication(info, client, &mut ctx.latency_timer)
                .await?;
-
-            ctx.set_endpoint_id(res.info.endpoint.clone());
-            tracing::Span::current().record("ep", &tracing::field::display(&res.info.endpoint));
-
+            ctx.set_endpoint_id(Some(res.info.endpoint.clone()));
            (res.info, Some(res.keys))
        }
        Ok(info) => (info, None),
    };

    info!("fetching user's authentication info");
-    let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
+    let allowed_ips = api.get_allowed_ips(ctx, &info).await?;

    // check allowed list
    if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
        return Err(auth::AuthError::ip_address_not_allowed());
    }
-    let cached_secret = match maybe_secret {
-        Some(secret) => secret,
-        None => api.get_role_secret(ctx, &info).await?,
-    };
+    let cached_secret = api.get_role_secret(ctx, &info).await?;

    let secret = cached_secret.value.clone().unwrap_or_else(|| {
        // If we don't have an authentication secret, we mock one to
@@ -277,12 +271,19 @@ async fn authenticate_with_secret(
    classic::authenticate(info, client, config, &mut ctx.latency_timer, secret).await
 }

-/// wake a compute (or retrieve an existing compute session from cache)
-async fn wake_compute(
+/// Authenticate the user and then wake a compute (or retrieve an existing compute session from cache)
+/// only if authentication was successfuly.
+async fn auth_and_wake_compute(
    ctx: &mut RequestMonitoring,
    api: &impl console::Api,
-    compute_credentials: ComputeCredentials<ComputeCredentialKeys>,
+    user_info: ComputeUserInfoMaybeEndpoint,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    allow_cleartext: bool,
+    config: &'static AuthenticationConfig,
 ) -> auth::Result<(CachedNodeInfo, ComputeUserInfo)> {
+    let compute_credentials =
+        auth_quirks(ctx, api, user_info, client, allow_cleartext, config).await?;
+
    let mut num_retries = 0;
    let mut node = loop {
        let wake_res = api.wake_compute(ctx, &compute_credentials.info).await;
@@ -357,16 +358,16 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
                    "performing authentication using the console"
                );

-                let compute_credentials =
-                    auth_quirks(ctx, &*api, user_info, client, allow_cleartext, config).await?;
-                let (cache_info, user_info) = wake_compute(ctx, &*api, compute_credentials).await?;
+                let (cache_info, user_info) =
+                    auth_and_wake_compute(ctx, &*api, user_info, client, allow_cleartext, config)
+                        .await?;
                (cache_info, BackendType::Console(api, user_info))
            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
                info!("performing link authentication");

-                let node_info = link::authenticate(ctx, &url, client).await?;
+                let node_info = link::authenticate(&url, client).await?;

                (
                    CachedNodeInfo::new_uncached(node_info),
@@ -385,16 +386,16 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
 }

 impl BackendType<'_, ComputeUserInfo> {
-    pub async fn get_allowed_ips_and_secret(
+    pub async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
        use BackendType::*;
        match self {
-            Console(api, user_info) => api.get_allowed_ips_and_secret(ctx, user_info).await,
-            Link(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+            Console(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
+            Link(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
            #[cfg(test)]
-            Test(x) => x.get_allowed_ips_and_secret(),
+            Test(x) => Ok(Cached::new_uncached(Arc::new(x.get_allowed_ips()?))),
        }
    }

--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -1,7 +1,6 @@
 use crate::{
    auth, compute,
    console::{self, provider::NodeInfo},
-    context::RequestMonitoring,
    error::UserFacingError,
    stream::PqStream,
    waiters,
@@ -55,7 +54,6 @@ pub fn new_psql_session_id() -> String {
 }

 pub(super) async fn authenticate(
-    ctx: &mut RequestMonitoring,
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
@@ -96,10 +94,6 @@ pub(super) async fn authenticate(
        .dbname(&db_info.dbname)
        .user(&db_info.user);

-    ctx.set_user(db_info.user.into());
-    ctx.set_project(db_info.aux.clone());
-    tracing::Span::current().record("ep", &tracing::field::display(&db_info.aux.endpoint_id));
-
    // Backwards compatibility. pg_sni_proxy uses "--" in domain names
    // while direct connections do not. Once we migrate to pg_sni_proxy
    // everywhere, we can remove this.
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -2,8 +2,7 @@

 use crate::{
    auth::password_hack::parse_endpoint_param, context::RequestMonitoring, error::UserFacingError,
-    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI, proxy::NeonOptions, serverless::SERVERLESS_DRIVER_SNI,
-    EndpointId, RoleName,
+    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI, proxy::NeonOptions, EndpointId, RoleName,
 };
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
@@ -55,10 +54,10 @@ impl ComputeUserInfoMaybeEndpoint {
    }
 }

-pub fn endpoint_sni(
-    sni: &str,
+pub fn endpoint_sni<'a>(
+    sni: &'a str,
    common_names: &HashSet<String>,
-) -> Result<Option<EndpointId>, ComputeUserInfoParseError> {
+) -> Result<&'a str, ComputeUserInfoParseError> {
    let Some((subdomain, common_name)) = sni.split_once('.') else {
        return Err(ComputeUserInfoParseError::UnknownCommonName { cn: sni.into() });
    };
@@ -67,10 +66,7 @@ pub fn endpoint_sni(
            cn: common_name.into(),
        });
    }
-    if subdomain == SERVERLESS_DRIVER_SNI {
-        return Ok(None);
-    }
-    Ok(Some(EndpointId::from(subdomain)))
+    Ok(subdomain)
 }

 impl ComputeUserInfoMaybeEndpoint {
@@ -89,6 +85,7 @@ impl ComputeUserInfoMaybeEndpoint {
        // record the values if we have them
        ctx.set_application(params.get("application_name").map(SmolStr::from));
        ctx.set_user(user.clone());
+        ctx.set_endpoint_id(sni.map(EndpointId::from));

        // Project name might be passed via PG's command-line options.
        let endpoint_option = params
@@ -106,7 +103,7 @@ impl ComputeUserInfoMaybeEndpoint {

        let endpoint_from_domain = if let Some(sni_str) = sni {
            if let Some(cn) = common_names {
-                endpoint_sni(sni_str, cn)?
+                Some(EndpointId::from(endpoint_sni(sni_str, cn)?))
            } else {
                None
            }
@@ -120,18 +117,13 @@ impl ComputeUserInfoMaybeEndpoint {
                Some(Err(InconsistentProjectNames { domain, option }))
            }
            // Invariant: project name may not contain certain characters.
-            (a, b) => a.or(b).map(|name| match project_name_valid(name.as_ref()) {
+            (a, b) => a.or(b).map(|name| match project_name_valid(&name) {
                false => Err(MalformedProjectName(name)),
                true => Ok(name),
            }),
        }
        .transpose()?;

-        if let Some(ep) = &endpoint {
-            ctx.set_endpoint_id(ep.clone());
-            tracing::Span::current().record("ep", &tracing::field::display(ep));
-        }
-
        info!(%user, project = endpoint.as_deref(), "credentials");
        if sni.is_some() {
            info!("Connection with sni");
@@ -154,7 +146,7 @@ impl ComputeUserInfoMaybeEndpoint {

        Ok(Self {
            user,
-            endpoint_id: endpoint,
+            endpoint_id: endpoint.map(EndpointId::from),
            options,
        })
    }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -272,5 +272,5 @@ async fn handle_client(
    let client = tokio::net::TcpStream::connect(destination).await?;

    let metrics_aux: MetricsAuxInfo = Default::default();
-    proxy::proxy::passthrough::proxy_pass(ctx, tls_stream, client, metrics_aux).await
+    proxy::proxy::proxy_pass(ctx, tls_stream, client, metrics_aux).await
 }
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -32,9 +32,6 @@ project_build_tag!(BUILD_TAG);

 use clap::{Parser, ValueEnum};

-#[global_allocator]
-static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
-
 #[derive(Clone, Debug, ValueEnum)]
 enum AuthBackend {
    Console,
@@ -190,13 +187,6 @@ async fn main() -> anyhow::Result<()> {
    info!("Build_tag: {BUILD_TAG}");
    ::metrics::set_build_info_metric(GIT_VERSION, BUILD_TAG);

-    match proxy::jemalloc::MetricRecorder::new(prometheus::default_registry()) {
-        Ok(t) => {
-            t.start();
-        }
-        Err(e) => tracing::error!(error = ?e, "could not start jemalloc metrics loop"),
-    }
-
    let args = ProxyCliArgs::parse();
    let config = build_config(&args)?;

--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -1,7 +1,7 @@
-use anyhow::Context;
+use anyhow::{bail, Context};
 use dashmap::DashMap;
 use pq_proto::CancelKeyData;
-use std::{net::SocketAddr, sync::Arc};
+use std::net::SocketAddr;
 use tokio::net::TcpStream;
 use tokio_postgres::{CancelToken, NoTls};
 use tracing::info;
@@ -25,31 +25,39 @@ impl CancelMap {
    }

    /// Run async action within an ephemeral session identified by [`CancelKeyData`].
-    pub fn get_session(self: Arc<Self>) -> Session {
+    pub async fn with_session<'a, F, R, V>(&'a self, f: F) -> anyhow::Result<V>
+    where
+        F: FnOnce(Session<'a>) -> R,
+        R: std::future::Future<Output = anyhow::Result<V>>,
+    {
        // HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
        // expose it and we don't want to do another roundtrip to query
        // for it. The client will be able to notice that this is not the
        // actual backend_pid, but backend_pid is not used for anything
        // so it doesn't matter.
-        let key = loop {
-            let key = rand::random();
+        let key = rand::random();

-            // Random key collisions are unlikely to happen here, but they're still possible,
-            // which is why we have to take care not to rewrite an existing key.
-            match self.0.entry(key) {
-                dashmap::mapref::entry::Entry::Occupied(_) => continue,
-                dashmap::mapref::entry::Entry::Vacant(e) => {
-                    e.insert(None);
-                }
+        // Random key collisions are unlikely to happen here, but they're still possible,
+        // which is why we have to take care not to rewrite an existing key.
+        match self.0.entry(key) {
+            dashmap::mapref::entry::Entry::Occupied(_) => {
+                bail!("query cancellation key already exists: {key}")
            }
-            break key;
-        };
+            dashmap::mapref::entry::Entry::Vacant(e) => {
+                e.insert(None);
+            }
+        }
+
+        // This will guarantee that the session gets dropped
+        // as soon as the future is finished.
+        scopeguard::defer! {
+            self.0.remove(&key);
+            info!("dropped query cancellation key {key}");
+        }

        info!("registered new query cancellation key {key}");
-        Session {
-            key,
-            cancel_map: self,
-        }
+        let session = Session::new(key, self);
+        f(session).await
    }

    #[cfg(test)]
@@ -90,17 +98,23 @@ impl CancelClosure {
 }

 /// Helper for registering query cancellation tokens.
-pub struct Session {
+pub struct Session<'a> {
    /// The user-facing key identifying this session.
    key: CancelKeyData,
    /// The [`CancelMap`] this session belongs to.
-    cancel_map: Arc<CancelMap>,
+    cancel_map: &'a CancelMap,
 }

-impl Session {
+impl<'a> Session<'a> {
+    fn new(key: CancelKeyData, cancel_map: &'a CancelMap) -> Self {
+        Self { key, cancel_map }
+    }
+}
+
+impl Session<'_> {
    /// Store the cancel token for the given session.
    /// This enables query cancellation in `crate::proxy::prepare_client_connection`.
-    pub fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData {
+    pub fn enable_query_cancellation(self, cancel_closure: CancelClosure) -> CancelKeyData {
        info!("enabling query cancellation for this session");
        self.cancel_map.0.insert(self.key, Some(cancel_closure));

@@ -108,26 +122,37 @@ impl Session {
    }
 }

-impl Drop for Session {
-    fn drop(&mut self) {
-        self.cancel_map.0.remove(&self.key);
-        info!("dropped query cancellation key {}", &self.key);
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
+    use once_cell::sync::Lazy;

    #[tokio::test]
    async fn check_session_drop() -> anyhow::Result<()> {
-        let cancel_map: Arc<CancelMap> = Default::default();
+        static CANCEL_MAP: Lazy<CancelMap> = Lazy::new(Default::default);
+
+        let (tx, rx) = tokio::sync::oneshot::channel();
+        let task = tokio::spawn(CANCEL_MAP.with_session(|session| async move {
+            assert!(CANCEL_MAP.contains(&session));
+
+            tx.send(()).expect("failed to send");
+            futures::future::pending::<()>().await; // sleep forever
+
+            Ok(())
+        }));
+
+        // Wait until the task has been spawned.
+        rx.await.context("failed to hear from the task")?;
+
+        // Drop the session's entry by cancelling the task.
+        task.abort();
+        let error = task.await.expect_err("task should have failed");
+        if !error.is_cancelled() {
+            anyhow::bail!(error);
+        }

-        let session = cancel_map.clone().get_session();
-        assert!(cancel_map.contains(&session));
-        drop(session);
        // Check that the session has been dropped.
-        assert!(cancel_map.is_empty());
+        assert!(CANCEL_MAP.is_empty());

        Ok(())
    }
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -250,11 +250,11 @@ pub trait Api {
        user_info: &ComputeUserInfo,
    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;

-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
+    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;

    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
@@ -288,16 +288,16 @@ impl Api for ConsoleBackend {
        }
    }

-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
+    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
        use ConsoleBackend::*;
        match self {
-            Console(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Console(api) => api.get_allowed_ips(ctx, user_info).await,
            #[cfg(feature = "testing")]
-            Postgres(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Postgres(api) => api.get_allowed_ips(ctx, user_info).await,
        }
    }

--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -157,17 +157,14 @@ impl super::Api for Api {
        ))
    }

-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
        &self,
        _ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
-        Ok((
-            Cached::new_uncached(Arc::new(
-                self.do_get_auth_info(user_info).await?.allowed_ips,
-            )),
-            None,
-        ))
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
+        Ok(Cached::new_uncached(Arc::new(
+            self.do_get_auth_info(user_info).await?.allowed_ips,
+        )))
    }

    #[tracing::instrument(skip_all)]
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -194,17 +194,17 @@ impl super::Api for Api {
        Ok(Cached::new_uncached(auth_info.secret))
    }

-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
        let ep = &user_info.endpoint;
        if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(ep) {
            ALLOWED_IPS_BY_CACHE_OUTCOME
                .with_label_values(&["hit"])
                .inc();
-            return Ok((allowed_ips, None));
+            return Ok(allowed_ips);
        }
        ALLOWED_IPS_BY_CACHE_OUTCOME
            .with_label_values(&["miss"])
@@ -223,10 +223,7 @@ impl super::Api for Api {
                .project_info
                .insert_allowed_ips(&project_id, ep, allowed_ips.clone());
        }
-        Ok((
-            Cached::new_uncached(allowed_ips),
-            Some(Cached::new_uncached(auth_info.secret)),
-        ))
+        Ok(Cached::new_uncached(allowed_ips))
    }

    #[tracing::instrument(skip_all)]
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -89,11 +89,8 @@ impl RequestMonitoring {
        self.project = Some(x.project_id);
    }

-    pub fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {
-        crate::metrics::CONNECTING_ENDPOINTS
-            .with_label_values(&[self.protocol])
-            .measure(&endpoint_id);
-        self.endpoint_id = Some(endpoint_id);
+    pub fn set_endpoint_id(&mut self, endpoint_id: Option<EndpointId>) {
+        self.endpoint_id = endpoint_id.or_else(|| self.endpoint_id.clone());
    }

    pub fn set_application(&mut self, app: Option<SmolStr>) {
--- a/proxy/src/jemalloc.rs
+++ b/proxy/src/jemalloc.rs
@@ -1,100 +0,0 @@
-use std::time::Duration;
-
-use metrics::IntGauge;
-use prometheus::{register_int_gauge_with_registry, Registry};
-use tikv_jemalloc_ctl::{config, epoch, epoch_mib, stats, version};
-
-pub struct MetricRecorder {
-    epoch: epoch_mib,
-    active: stats::active_mib,
-    active_gauge: IntGauge,
-    allocated: stats::allocated_mib,
-    allocated_gauge: IntGauge,
-    mapped: stats::mapped_mib,
-    mapped_gauge: IntGauge,
-    metadata: stats::metadata_mib,
-    metadata_gauge: IntGauge,
-    resident: stats::resident_mib,
-    resident_gauge: IntGauge,
-    retained: stats::retained_mib,
-    retained_gauge: IntGauge,
-}
-
-impl MetricRecorder {
-    pub fn new(registry: &Registry) -> Result<Self, anyhow::Error> {
-        tracing::info!(
-            config = config::malloc_conf::read()?,
-            version = version::read()?,
-            "starting jemalloc recorder"
-        );
-
-        Ok(Self {
-            epoch: epoch::mib()?,
-            active: stats::active::mib()?,
-            active_gauge: register_int_gauge_with_registry!(
-                "jemalloc_active_bytes",
-                "Total number of bytes in active pages allocated by the process",
-                registry
-            )?,
-            allocated: stats::allocated::mib()?,
-            allocated_gauge: register_int_gauge_with_registry!(
-                "jemalloc_allocated_bytes",
-                "Total number of bytes allocated by the process",
-                registry
-            )?,
-            mapped: stats::mapped::mib()?,
-            mapped_gauge: register_int_gauge_with_registry!(
-                "jemalloc_mapped_bytes",
-                "Total number of bytes in active extents mapped by the allocator",
-                registry
-            )?,
-            metadata: stats::metadata::mib()?,
-            metadata_gauge: register_int_gauge_with_registry!(
-                "jemalloc_metadata_bytes",
-                "Total number of bytes dedicated to jemalloc metadata",
-                registry
-            )?,
-            resident: stats::resident::mib()?,
-            resident_gauge: register_int_gauge_with_registry!(
-                "jemalloc_resident_bytes",
-                "Total number of bytes in physically resident data pages mapped by the allocator",
-                registry
-            )?,
-            retained: stats::retained::mib()?,
-            retained_gauge: register_int_gauge_with_registry!(
-                "jemalloc_retained_bytes",
-                "Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system",
-                registry
-            )?,
-        })
-    }
-
-    fn _poll(&self) -> Result<(), anyhow::Error> {
-        self.epoch.advance()?;
-        self.active_gauge.set(self.active.read()? as i64);
-        self.allocated_gauge.set(self.allocated.read()? as i64);
-        self.mapped_gauge.set(self.mapped.read()? as i64);
-        self.metadata_gauge.set(self.metadata.read()? as i64);
-        self.resident_gauge.set(self.resident.read()? as i64);
-        self.retained_gauge.set(self.retained.read()? as i64);
-        Ok(())
-    }
-
-    #[inline]
-    pub fn poll(&self) {
-        if let Err(error) = self._poll() {
-            tracing::warn!(%error, "Failed to poll jemalloc stats");
-        }
-    }
-
-    pub fn start(self) -> tokio::task::JoinHandle<()> {
-        tokio::task::spawn(async move {
-            let mut interval = tokio::time::interval(Duration::from_secs(15));
-            interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
-            loop {
-                self.poll();
-                interval.tick().await;
-            }
-        })
-    }
-}
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -16,7 +16,6 @@ pub mod console;
 pub mod context;
 pub mod error;
 pub mod http;
-pub mod jemalloc;
 pub mod logging;
 pub mod metrics;
 pub mod parse;
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -1,7 +1,10 @@
 use ::metrics::{
-    exponential_buckets, register_histogram, register_histogram_vec, register_hll_vec,
-    register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge_vec, Histogram,
-    HistogramVec, HyperLogLogVec, IntCounterPairVec, IntCounterVec, IntGaugeVec,
+    exponential_buckets, register_int_counter_pair_vec, register_int_counter_vec,
+    IntCounterPairVec, IntCounterVec,
+};
+use prometheus::{
+    register_histogram, register_histogram_vec, register_int_gauge_vec, Histogram, HistogramVec,
+    IntGaugeVec,
 };

 use once_cell::sync::Lazy;
@@ -233,13 +236,3 @@ pub const fn bool_to_str(x: bool) -> &'static str {
        "false"
    }
 }
-
-pub static CONNECTING_ENDPOINTS: Lazy<HyperLogLogVec<32>> = Lazy::new(|| {
-    register_hll_vec!(
-        32,
-        "proxy_connecting_endpoints",
-        "HLL approximate cardinality of endpoints that are connecting",
-        &["protocol"],
-    )
-    .unwrap()
-});
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -2,34 +2,37 @@
 mod tests;

 pub mod connect_compute;
-pub mod handshake;
-pub mod passthrough;
 pub mod retry;

 use crate::{
    auth,
    cancellation::{self, CancelMap},
    compute,
-    config::{ProxyConfig, TlsConfig},
+    config::{AuthenticationConfig, ProxyConfig, TlsConfig},
+    console::messages::MetricsAuxInfo,
    context::RequestMonitoring,
-    metrics::{NUM_CLIENT_CONNECTION_GAUGE, NUM_CONNECTION_REQUESTS_GAUGE},
+    metrics::{
+        NUM_BYTES_PROXIED_COUNTER, NUM_BYTES_PROXIED_PER_CLIENT_COUNTER,
+        NUM_CLIENT_CONNECTION_GAUGE, NUM_CONNECTION_REQUESTS_GAUGE,
+    },
    protocol2::WithClientIp,
-    proxy::{handshake::handshake, passthrough::proxy_pass},
    rate_limiter::EndpointRateLimiter,
    stream::{PqStream, Stream},
+    usage_metrics::{Ids, USAGE_METRICS},
    EndpointCacheKey,
 };
 use anyhow::{bail, Context};
 use futures::TryFutureExt;
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
-use pq_proto::{BeMessage as Be, StartupMessageParams};
+use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
 use regex::Regex;
 use smol_str::{format_smolstr, SmolStr};
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, Instrument};
+use utils::measured_stream::MeasuredStream;

 use self::connect_compute::{connect_to_compute, TcpMechanism};

@@ -77,13 +80,6 @@ pub async fn task_main(
        let cancel_map = Arc::clone(&cancel_map);
        let endpoint_rate_limiter = endpoint_rate_limiter.clone();

-        let session_span = info_span!(
-            "handle_client",
-            ?session_id,
-            peer_addr = tracing::field::Empty,
-            ep = tracing::field::Empty,
-        );
-
        connections.spawn(
            async move {
                info!("accepted postgres client connection");
@@ -107,18 +103,22 @@ pub async fn task_main(
                handle_client(
                    config,
                    &mut ctx,
-                    cancel_map,
+                    &cancel_map,
                    socket,
                    ClientMode::Tcp,
                    endpoint_rate_limiter,
                )
                .await
            }
+            .instrument(info_span!(
+                "handle_client",
+                ?session_id,
+                peer_addr = tracing::field::Empty
+            ))
            .unwrap_or_else(move |e| {
                // Acknowledge that the task has finished with an error.
-                error!("per-client task finished with an error: {e:#}");
-            })
-            .instrument(session_span),
+                error!(?session_id, "per-client task finished with an error: {e:#}");
+            }),
        );
    }

@@ -171,7 +171,7 @@ impl ClientMode {
 pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    config: &'static ProxyConfig,
    ctx: &mut RequestMonitoring,
-    cancel_map: Arc<CancelMap>,
+    cancel_map: &CancelMap,
    stream: S,
    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
@@ -192,88 +192,138 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let tls = config.tls_config.as_ref();

    let pause = ctx.latency_timer.pause();
-    let do_handshake = handshake(stream, mode.handshake_tls(tls), &cancel_map);
+    let do_handshake = handshake(stream, mode.handshake_tls(tls), cancel_map);
    let (mut stream, params) = match do_handshake.await? {
        Some(x) => x,
        None => return Ok(()), // it's a cancellation request
    };
    drop(pause);

-    let hostname = mode.hostname(stream.get_ref());
-
-    let common_names = tls.map(|tls| &tls.common_names);
-
    // Extract credentials which we're going to use for auth.
-    let result = config
-        .auth_backend
-        .as_ref()
-        .map(|_| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names))
-        .transpose();
+    let user_info = {
+        let hostname = mode.hostname(stream.get_ref());

-    let user_info = match result {
-        Ok(user_info) => user_info,
-        Err(e) => stream.throw_error(e).await?,
+        let common_names = tls.map(|tls| &tls.common_names);
+        let result = config
+            .auth_backend
+            .as_ref()
+            .map(|_| {
+                auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names)
+            })
+            .transpose();
+
+        match result {
+            Ok(user_info) => user_info,
+            Err(e) => stream.throw_error(e).await?,
+        }
    };

-    // check rate limit
-    if let Some(ep) = user_info.get_endpoint() {
-        if !endpoint_rate_limiter.check(ep) {
-            return stream
-                .throw_error(auth::AuthError::too_many_connections())
-                .await;
+    ctx.set_endpoint_id(user_info.get_endpoint());
+
+    let client = Client::new(
+        stream,
+        user_info,
+        &params,
+        mode.allow_self_signed_compute(config),
+        endpoint_rate_limiter,
+    );
+    cancel_map
+        .with_session(|session| {
+            client.connect_to_db(ctx, session, mode, &config.authentication_config)
+        })
+        .await
+}
+
+/// Establish a (most probably, secure) connection with the client.
+/// For better testing experience, `stream` can be any object satisfying the traits.
+/// It's easier to work with owned `stream` here as we need to upgrade it to TLS;
+/// we also take an extra care of propagating only the select handshake errors to client.
+#[tracing::instrument(skip_all)]
+async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
+    stream: S,
+    mut tls: Option<&TlsConfig>,
+    cancel_map: &CancelMap,
+) -> anyhow::Result<Option<(PqStream<Stream<S>>, StartupMessageParams)>> {
+    // Client may try upgrading to each protocol only once
+    let (mut tried_ssl, mut tried_gss) = (false, false);
+
+    let mut stream = PqStream::new(Stream::from_raw(stream));
+    loop {
+        let msg = stream.read_startup_packet().await?;
+        info!("received {msg:?}");
+
+        use FeStartupPacket::*;
+        match msg {
+            SslRequest => match stream.get_ref() {
+                Stream::Raw { .. } if !tried_ssl => {
+                    tried_ssl = true;
+
+                    // We can't perform TLS handshake without a config
+                    let enc = tls.is_some();
+                    stream.write_message(&Be::EncryptionResponse(enc)).await?;
+                    if let Some(tls) = tls.take() {
+                        // Upgrade raw stream into a secure TLS-backed stream.
+                        // NOTE: We've consumed `tls`; this fact will be used later.
+
+                        let (raw, read_buf) = stream.into_inner();
+                        // TODO: Normally, client doesn't send any data before
+                        // server says TLS handshake is ok and read_buf is empy.
+                        // However, you could imagine pipelining of postgres
+                        // SSLRequest + TLS ClientHello in one hunk similar to
+                        // pipelining in our node js driver. We should probably
+                        // support that by chaining read_buf with the stream.
+                        if !read_buf.is_empty() {
+                            bail!("data is sent before server replied with EncryptionResponse");
+                        }
+                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;
+
+                        let (_, tls_server_end_point) = tls
+                            .cert_resolver
+                            .resolve(tls_stream.get_ref().1.server_name())
+                            .context("missing certificate")?;
+
+                        stream = PqStream::new(Stream::Tls {
+                            tls: Box::new(tls_stream),
+                            tls_server_end_point,
+                        });
+                    }
+                }
+                _ => bail!(ERR_PROTO_VIOLATION),
+            },
+            GssEncRequest => match stream.get_ref() {
+                Stream::Raw { .. } if !tried_gss => {
+                    tried_gss = true;
+
+                    // Currently, we don't support GSSAPI
+                    stream.write_message(&Be::EncryptionResponse(false)).await?;
+                }
+                _ => bail!(ERR_PROTO_VIOLATION),
+            },
+            StartupMessage { params, .. } => {
+                // Check that the config has been consumed during upgrade
+                // OR we didn't provide it at all (for dev purposes).
+                if tls.is_some() {
+                    stream.throw_error_str(ERR_INSECURE_CONNECTION).await?;
+                }
+
+                info!(session_type = "normal", "successful handshake");
+                break Ok(Some((stream, params)));
+            }
+            CancelRequest(cancel_key_data) => {
+                cancel_map.cancel_session(cancel_key_data).await?;
+
+                info!(session_type = "cancellation", "successful handshake");
+                break Ok(None);
+            }
        }
    }
-
-    let user = user_info.get_user().to_owned();
-    let (mut node_info, user_info) = match user_info
-        .authenticate(
-            ctx,
-            &mut stream,
-            mode.allow_cleartext(),
-            &config.authentication_config,
-        )
-        .await
-    {
-        Ok(auth_result) => auth_result,
-        Err(e) => {
-            let db = params.get("database");
-            let app = params.get("application_name");
-            let params_span = tracing::info_span!("", ?user, ?db, ?app);
-
-            return stream.throw_error(e).instrument(params_span).await;
-        }
-    };
-
-    node_info.allow_self_signed_compute = mode.allow_self_signed_compute(config);
-
-    let aux = node_info.aux.clone();
-    let mut node = connect_to_compute(
-        ctx,
-        &TcpMechanism { params: &params },
-        node_info,
-        &user_info,
-    )
-    .or_else(|e| stream.throw_error(e))
-    .await?;
-
-    let session = cancel_map.get_session();
-    prepare_client_connection(&node, &session, &mut stream).await?;
-
-    // Before proxy passing, forward to compute whatever data is left in the
-    // PqStream input buffer. Normally there is none, but our serverless npm
-    // driver in pipeline mode sends startup, password and first query
-    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
-    node.stream.write_all(&read_buf).await?;
-
-    proxy_pass(ctx, stream, node.stream, aux).await
 }

 /// Finish client connection initialization: confirm auth success, send params, etc.
 #[tracing::instrument(skip_all)]
 async fn prepare_client_connection(
    node: &compute::PostgresConnection,
-    session: &cancellation::Session,
+    session: cancellation::Session<'_>,
    stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> anyhow::Result<()> {
    // Register compute's query cancellation token and produce a new, unique one.
@@ -299,6 +349,151 @@ async fn prepare_client_connection(
    Ok(())
 }

+/// Forward bytes in both directions (client <-> compute).
+#[tracing::instrument(skip_all)]
+pub async fn proxy_pass(
+    ctx: &mut RequestMonitoring,
+    client: impl AsyncRead + AsyncWrite + Unpin,
+    compute: impl AsyncRead + AsyncWrite + Unpin,
+    aux: MetricsAuxInfo,
+) -> anyhow::Result<()> {
+    ctx.set_success();
+    ctx.log();
+
+    let usage = USAGE_METRICS.register(Ids {
+        endpoint_id: aux.endpoint_id.clone(),
+        branch_id: aux.branch_id.clone(),
+    });
+
+    let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx"]);
+    let m_sent2 = NUM_BYTES_PROXIED_PER_CLIENT_COUNTER.with_label_values(&aux.traffic_labels("tx"));
+    let mut client = MeasuredStream::new(
+        client,
+        |_| {},
+        |cnt| {
+            // Number of bytes we sent to the client (outbound).
+            m_sent.inc_by(cnt as u64);
+            m_sent2.inc_by(cnt as u64);
+            usage.record_egress(cnt as u64);
+        },
+    );
+
+    let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx"]);
+    let m_recv2 = NUM_BYTES_PROXIED_PER_CLIENT_COUNTER.with_label_values(&aux.traffic_labels("rx"));
+    let mut compute = MeasuredStream::new(
+        compute,
+        |_| {},
+        |cnt| {
+            // Number of bytes the client sent to the compute node (inbound).
+            m_recv.inc_by(cnt as u64);
+            m_recv2.inc_by(cnt as u64);
+        },
+    );
+
+    // Starting from here we only proxy the client's traffic.
+    info!("performing the proxy pass...");
+    let _ = tokio::io::copy_bidirectional(&mut client, &mut compute).await?;
+
+    Ok(())
+}
+
+/// Thin connection context.
+struct Client<'a, S> {
+    /// The underlying libpq protocol stream.
+    stream: PqStream<Stream<S>>,
+    /// Client credentials that we care about.
+    user_info: auth::BackendType<'a, auth::ComputeUserInfoMaybeEndpoint>,
+    /// KV-dictionary with PostgreSQL connection params.
+    params: &'a StartupMessageParams,
+    /// Allow self-signed certificates (for testing).
+    allow_self_signed_compute: bool,
+    /// Rate limiter for endpoints
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
+}
+
+impl<'a, S> Client<'a, S> {
+    /// Construct a new connection context.
+    fn new(
+        stream: PqStream<Stream<S>>,
+        user_info: auth::BackendType<'a, auth::ComputeUserInfoMaybeEndpoint>,
+        params: &'a StartupMessageParams,
+        allow_self_signed_compute: bool,
+        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
+    ) -> Self {
+        Self {
+            stream,
+            user_info,
+            params,
+            allow_self_signed_compute,
+            endpoint_rate_limiter,
+        }
+    }
+}
+
+impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
+    /// Let the client authenticate and connect to the designated compute node.
+    // Instrumentation logs endpoint name everywhere. Doesn't work for link
+    // auth; strictly speaking we don't know endpoint name in its case.
+    #[tracing::instrument(name = "", fields(ep = %self.user_info.get_endpoint().unwrap_or_default()), skip_all)]
+    async fn connect_to_db(
+        self,
+        ctx: &mut RequestMonitoring,
+        session: cancellation::Session<'_>,
+        mode: ClientMode,
+        config: &'static AuthenticationConfig,
+    ) -> anyhow::Result<()> {
+        let Self {
+            mut stream,
+            user_info,
+            params,
+            allow_self_signed_compute,
+            endpoint_rate_limiter,
+        } = self;
+
+        // check rate limit
+        if let Some(ep) = user_info.get_endpoint() {
+            if !endpoint_rate_limiter.check(ep) {
+                return stream
+                    .throw_error(auth::AuthError::too_many_connections())
+                    .await;
+            }
+        }
+
+        let user = user_info.get_user().to_owned();
+        let auth_result = match user_info
+            .authenticate(ctx, &mut stream, mode.allow_cleartext(), config)
+            .await
+        {
+            Ok(auth_result) => auth_result,
+            Err(e) => {
+                let db = params.get("database");
+                let app = params.get("application_name");
+                let params_span = tracing::info_span!("", ?user, ?db, ?app);
+
+                return stream.throw_error(e).instrument(params_span).await;
+            }
+        };
+
+        let (mut node_info, user_info) = auth_result;
+
+        node_info.allow_self_signed_compute = allow_self_signed_compute;
+
+        let aux = node_info.aux.clone();
+        let mut node = connect_to_compute(ctx, &TcpMechanism { params }, node_info, &user_info)
+            .or_else(|e| stream.throw_error(e))
+            .await?;
+
+        prepare_client_connection(&node, session, &mut stream).await?;
+        // Before proxy passing, forward to compute whatever data is left in the
+        // PqStream input buffer. Normally there is none, but our serverless npm
+        // driver in pipeline mode sends startup, password and first query
+        // immediately after opening the connection.
+        let (stream, read_buf) = stream.into_inner();
+        node.stream.write_all(&read_buf).await?;
+        proxy_pass(ctx, stream, node.stream, aux).await
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Default)]
 pub struct NeonOptions(Vec<(SmolStr, SmolStr)>);

--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -1,96 +0,0 @@
-use anyhow::{bail, Context};
-use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
-use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::info;
-
-use crate::{
-    cancellation::CancelMap,
-    config::TlsConfig,
-    proxy::{ERR_INSECURE_CONNECTION, ERR_PROTO_VIOLATION},
-    stream::{PqStream, Stream},
-};
-
-/// Establish a (most probably, secure) connection with the client.
-/// For better testing experience, `stream` can be any object satisfying the traits.
-/// It's easier to work with owned `stream` here as we need to upgrade it to TLS;
-/// we also take an extra care of propagating only the select handshake errors to client.
-#[tracing::instrument(skip_all)]
-pub async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    stream: S,
-    mut tls: Option<&TlsConfig>,
-    cancel_map: &CancelMap,
-) -> anyhow::Result<Option<(PqStream<Stream<S>>, StartupMessageParams)>> {
-    // Client may try upgrading to each protocol only once
-    let (mut tried_ssl, mut tried_gss) = (false, false);
-
-    let mut stream = PqStream::new(Stream::from_raw(stream));
-    loop {
-        let msg = stream.read_startup_packet().await?;
-        info!("received {msg:?}");
-
-        use FeStartupPacket::*;
-        match msg {
-            SslRequest => match stream.get_ref() {
-                Stream::Raw { .. } if !tried_ssl => {
-                    tried_ssl = true;
-
-                    // We can't perform TLS handshake without a config
-                    let enc = tls.is_some();
-                    stream.write_message(&Be::EncryptionResponse(enc)).await?;
-                    if let Some(tls) = tls.take() {
-                        // Upgrade raw stream into a secure TLS-backed stream.
-                        // NOTE: We've consumed `tls`; this fact will be used later.
-
-                        let (raw, read_buf) = stream.into_inner();
-                        // TODO: Normally, client doesn't send any data before
-                        // server says TLS handshake is ok and read_buf is empy.
-                        // However, you could imagine pipelining of postgres
-                        // SSLRequest + TLS ClientHello in one hunk similar to
-                        // pipelining in our node js driver. We should probably
-                        // support that by chaining read_buf with the stream.
-                        if !read_buf.is_empty() {
-                            bail!("data is sent before server replied with EncryptionResponse");
-                        }
-                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;
-
-                        let (_, tls_server_end_point) = tls
-                            .cert_resolver
-                            .resolve(tls_stream.get_ref().1.server_name())
-                            .context("missing certificate")?;
-
-                        stream = PqStream::new(Stream::Tls {
-                            tls: Box::new(tls_stream),
-                            tls_server_end_point,
-                        });
-                    }
-                }
-                _ => bail!(ERR_PROTO_VIOLATION),
-            },
-            GssEncRequest => match stream.get_ref() {
-                Stream::Raw { .. } if !tried_gss => {
-                    tried_gss = true;
-
-                    // Currently, we don't support GSSAPI
-                    stream.write_message(&Be::EncryptionResponse(false)).await?;
-                }
-                _ => bail!(ERR_PROTO_VIOLATION),
-            },
-            StartupMessage { params, .. } => {
-                // Check that the config has been consumed during upgrade
-                // OR we didn't provide it at all (for dev purposes).
-                if tls.is_some() {
-                    stream.throw_error_str(ERR_INSECURE_CONNECTION).await?;
-                }
-
-                info!(session_type = "normal", "successful handshake");
-                break Ok(Some((stream, params)));
-            }
-            CancelRequest(cancel_key_data) => {
-                cancel_map.cancel_session(cancel_key_data).await?;
-
-                info!(session_type = "cancellation", "successful handshake");
-                break Ok(None);
-            }
-        }
-    }
-}
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -1,57 +0,0 @@
-use crate::{
-    console::messages::MetricsAuxInfo,
-    context::RequestMonitoring,
-    metrics::{NUM_BYTES_PROXIED_COUNTER, NUM_BYTES_PROXIED_PER_CLIENT_COUNTER},
-    usage_metrics::{Ids, USAGE_METRICS},
-};
-use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::info;
-use utils::measured_stream::MeasuredStream;
-
-/// Forward bytes in both directions (client <-> compute).
-#[tracing::instrument(skip_all)]
-pub async fn proxy_pass(
-    ctx: &mut RequestMonitoring,
-    client: impl AsyncRead + AsyncWrite + Unpin,
-    compute: impl AsyncRead + AsyncWrite + Unpin,
-    aux: MetricsAuxInfo,
-) -> anyhow::Result<()> {
-    ctx.set_success();
-    ctx.log();
-
-    let usage = USAGE_METRICS.register(Ids {
-        endpoint_id: aux.endpoint_id.clone(),
-        branch_id: aux.branch_id.clone(),
-    });
-
-    let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx"]);
-    let m_sent2 = NUM_BYTES_PROXIED_PER_CLIENT_COUNTER.with_label_values(&aux.traffic_labels("tx"));
-    let mut client = MeasuredStream::new(
-        client,
-        |_| {},
-        |cnt| {
-            // Number of bytes we sent to the client (outbound).
-            m_sent.inc_by(cnt as u64);
-            m_sent2.inc_by(cnt as u64);
-            usage.record_egress(cnt as u64);
-        },
-    );
-
-    let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx"]);
-    let m_recv2 = NUM_BYTES_PROXIED_PER_CLIENT_COUNTER.with_label_values(&aux.traffic_labels("rx"));
-    let mut compute = MeasuredStream::new(
-        compute,
-        |_| {},
-        |cnt| {
-            // Number of bytes the client sent to the compute node (inbound).
-            m_recv.inc_by(cnt as u64);
-            m_recv2.inc_by(cnt as u64);
-        },
-    );
-
-    // Starting from here we only proxy the client's traffic.
-    info!("performing the proxy pass...");
-    let _ = tokio::io::copy_bidirectional(&mut client, &mut compute).await?;
-
-    Ok(())
-}
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -6,8 +6,8 @@ use super::connect_compute::ConnectMechanism;
 use super::retry::ShouldRetry;
 use super::*;
 use crate::auth::backend::{ComputeUserInfo, TestBackend};
+use crate::auth::IpPattern;
 use crate::config::CertResolver;
-use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
 use crate::console::{self, CachedNodeInfo, NodeInfo};
 use crate::proxy::retry::{retry_after, NUM_RETRIES_CONNECT};
 use crate::{auth, http, sasl, scram};
@@ -471,10 +471,7 @@ impl TestBackend for TestConnectMechanism {
        }
    }

-    fn get_allowed_ips_and_secret(
-        &self,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
-    {
+    fn get_allowed_ips(&self) -> Result<Vec<IpPattern>, console::errors::GetAuthInfoError> {
        unimplemented!("not used in tests")
    }
 }
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -41,8 +41,6 @@ use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, warn, Instrument};
 use utils::http::{error::ApiError, json::json_response};

-pub const SERVERLESS_DRIVER_SNI: &str = "api";
-
 pub async fn task_main(
    config: &'static ProxyConfig,
    ws_listener: TcpListener,
@@ -230,7 +228,7 @@ async fn request_handler(
                    config,
                    &mut ctx,
                    websocket,
-                    cancel_map,
+                    &cancel_map,
                    host,
                    endpoint_rate_limiter,
                )
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -540,7 +540,7 @@ async fn connect_to_compute(
        .map(|_| conn_info.user_info.clone());

    if !config.disable_ip_check_for_http {
-        let (allowed_ips, _) = backend.get_allowed_ips_and_secret(ctx).await?;
+        let allowed_ips = backend.get_allowed_ips(ctx).await?;
        if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
            return Err(auth::AuthError::ip_address_not_allowed().into());
        }
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -1,7 +1,6 @@
 use std::sync::Arc;

 use anyhow::bail;
-use anyhow::Context;
 use futures::pin_mut;
 use futures::StreamExt;
 use hyper::body::HttpBody;
@@ -36,11 +35,11 @@ use crate::config::TlsConfig;
 use crate::context::RequestMonitoring;
 use crate::metrics::NUM_CONNECTION_REQUESTS_GAUGE;
 use crate::proxy::NeonOptions;
+use crate::EndpointId;
 use crate::RoleName;

 use super::conn_pool::ConnInfo;
 use super::conn_pool::GlobalConnPool;
-use super::SERVERLESS_DRIVER_SNI;

 #[derive(serde::Deserialize)]
 struct QueryData {
@@ -62,6 +61,7 @@ enum Payload {

 const MAX_RESPONSE_SIZE: usize = 10 * 1024 * 1024; // 10 MiB
 const MAX_REQUEST_SIZE: u64 = 10 * 1024 * 1024; // 10 MiB
+const SERVERLESS_DRIVER_SNI_HOSTNAME_FIRST_PART: &str = "api";

 static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
 static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
@@ -188,8 +188,10 @@ fn get_conn_info(
        }
    }

-    let endpoint = endpoint_sni(hostname, &tls.common_names)?.context("malformed endpoint")?;
-    ctx.set_endpoint_id(endpoint.clone());
+    let endpoint = endpoint_sni(hostname, &tls.common_names)?;
+
+    let endpoint: EndpointId = endpoint.into();
+    ctx.set_endpoint_id(Some(endpoint.clone()));

    let pairs = connection_url.query_pairs();

@@ -225,7 +227,8 @@ fn check_matches(sni_hostname: &str, hostname: &str) -> Result<bool, anyhow::Err
    let (_, hostname_rest) = hostname
        .split_once('.')
        .ok_or_else(|| anyhow::anyhow!("Unexpected hostname format."))?;
-    Ok(sni_hostname_rest == hostname_rest && sni_hostname_first == SERVERLESS_DRIVER_SNI)
+    Ok(sni_hostname_rest == hostname_rest
+        && sni_hostname_first == SERVERLESS_DRIVER_SNI_HOSTNAME_FIRST_PART)
 }

 // TODO: return different http error codes
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -133,7 +133,7 @@ pub async fn serve_websocket(
    config: &'static ProxyConfig,
    ctx: &mut RequestMonitoring,
    websocket: HyperWebsocket,
-    cancel_map: Arc<CancelMap>,
+    cancel_map: &CancelMap,
    hostname: Option<String>,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ psutil = "^5.9.4"
 types-psutil = "^5.9.5.12"
 types-toml = "^0.10.8.6"
 pytest-httpserver = "^1.0.8"
-aiohttp = "3.9.2"
+aiohttp = "3.9.0"
 pytest-rerunfailures = "^13.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -3,9 +3,8 @@
 use anyhow::{bail, ensure, Context, Result};
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 use camino::Utf8PathBuf;
-use tokio::fs::File;
+use tokio::fs::{self, File};
 use tokio::io::AsyncWriteExt;
-use utils::crashsafe::durable_rename;

 use std::io::Read;
 use std::ops::Deref;
@@ -204,8 +203,35 @@ impl Storage for FileStorage {
            )
        })?;

+        // fsync the file
+        if !self.conf.no_sync {
+            control_partial.sync_all().await.with_context(|| {
+                format!(
+                    "failed to sync partial control file at {}",
+                    control_partial_path
+                )
+            })?;
+        }
+
        let control_path = self.timeline_dir.join(CONTROL_FILE_NAME);
-        durable_rename(&control_partial_path, &control_path, !self.conf.no_sync).await?;
+
+        // rename should be atomic
+        fs::rename(&control_partial_path, &control_path).await?;
+        // this sync is not required by any standard but postgres does this (see durable_rename)
+        if !self.conf.no_sync {
+            let new_f = File::open(&control_path).await?;
+            new_f
+                .sync_all()
+                .await
+                .with_context(|| format!("failed to sync control file at: {}", &control_path))?;
+
+            // fsync the directory (linux specific)
+            let tli_dir = File::open(&self.timeline_dir).await?;
+            tli_dir
+                .sync_all()
+                .await
+                .context("failed to sync control file directory")?;
+        }

        // update internal state
        self.state = s.clone();
@@ -223,7 +249,6 @@ mod test {
    use super::*;
    use crate::SafeKeeperConf;
    use anyhow::Result;
-    use tokio::fs;
    use utils::{id::TenantTimelineId, lsn::Lsn};

    fn stub_conf() -> SafeKeeperConf {
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -28,7 +28,7 @@ use crate::safekeeper::Term;
 use crate::safekeeper::{ServerInfo, TermLsn};
 use crate::send_wal::WalSenderState;
 use crate::timeline::PeerInfo;
-use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline};
+use crate::{copy_timeline, debug_dump, pull_timeline};

 use crate::timelines_global_map::TimelineDeleteForceResult;
 use crate::GlobalTimelines;
@@ -465,26 +465,6 @@ async fn dump_debug_handler(mut request: Request<Body>) -> Result<Response<Body>
    Ok(response)
 }

-async fn patch_control_file_handler(
-    mut request: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    check_permission(&request, None)?;
-
-    let ttid = TenantTimelineId::new(
-        parse_request_param(&request, "tenant_id")?,
-        parse_request_param(&request, "timeline_id")?,
-    );
-
-    let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
-
-    let patch_request: patch_control_file::Request = json_request(&mut request).await?;
-    let response = patch_control_file::handle_request(tli, patch_request)
-        .await
-        .map_err(ApiError::InternalServerError)?;
-
-    json_response(StatusCode::OK, response)
-}
-
 /// Safekeeper http router.
 pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
    let mut router = endpoint::make_router();
@@ -546,10 +526,6 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
            "/v1/tenant/:tenant_id/timeline/:source_timeline_id/copy",
            |r| request_span(r, timeline_copy_handler),
        )
-        .patch(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/control_file",
-            |r| request_span(r, patch_control_file_handler),
-        )
        // for tests
        .post("/v1/record_safekeeper_info/:tenant_id/:timeline_id", |r| {
            request_span(r, record_safekeeper_info)
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -22,7 +22,6 @@ pub mod handler;
 pub mod http;
 pub mod json_ctrl;
 pub mod metrics;
-pub mod patch_control_file;
 pub mod pull_timeline;
 pub mod receive_wal;
 pub mod recovery;
--- a/safekeeper/src/patch_control_file.rs
+++ b/safekeeper/src/patch_control_file.rs
@@ -1,85 +0,0 @@
-use std::sync::Arc;
-
-use serde::{Deserialize, Serialize};
-use serde_json::Value;
-use tracing::info;
-
-use crate::{state::TimelinePersistentState, timeline::Timeline};
-
-#[derive(Deserialize, Debug, Clone)]
-pub struct Request {
-    /// JSON object with fields to update
-    pub updates: serde_json::Value,
-    /// List of fields to apply
-    pub apply_fields: Vec<String>,
-}
-
-#[derive(Serialize)]
-pub struct Response {
-    pub old_control_file: TimelinePersistentState,
-    pub new_control_file: TimelinePersistentState,
-}
-
-/// Patch control file with given request. Will update the persistent state using
-/// fields from the request and persist the new state on disk.
-pub async fn handle_request(tli: Arc<Timeline>, request: Request) -> anyhow::Result<Response> {
-    let response = tli
-        .map_control_file(|state| {
-            let old_control_file = state.clone();
-            let new_control_file = state_apply_diff(&old_control_file, &request)?;
-
-            info!(
-                "patching control file, old: {:?}, new: {:?}, patch: {:?}",
-                old_control_file, new_control_file, request
-            );
-            *state = new_control_file.clone();
-
-            Ok(Response {
-                old_control_file,
-                new_control_file,
-            })
-        })
-        .await?;
-
-    Ok(response)
-}
-
-fn state_apply_diff(
-    state: &TimelinePersistentState,
-    request: &Request,
-) -> anyhow::Result<TimelinePersistentState> {
-    let mut json_value = serde_json::to_value(state)?;
-
-    if let Value::Object(a) = &mut json_value {
-        if let Value::Object(b) = &request.updates {
-            json_apply_diff(a, b, &request.apply_fields)?;
-        } else {
-            anyhow::bail!("request.updates is not a json object")
-        }
-    } else {
-        anyhow::bail!("TimelinePersistentState is not a json object")
-    }
-
-    let new_state: TimelinePersistentState = serde_json::from_value(json_value)?;
-    Ok(new_state)
-}
-
-fn json_apply_diff(
-    object: &mut serde_json::Map<String, Value>,
-    updates: &serde_json::Map<String, Value>,
-    apply_keys: &Vec<String>,
-) -> anyhow::Result<()> {
-    for key in apply_keys {
-        if let Some(new_value) = updates.get(key) {
-            if let Some(existing_value) = object.get_mut(key) {
-                *existing_value = new_value.clone();
-            } else {
-                anyhow::bail!("key not found in original object: {}", key);
-            }
-        } else {
-            anyhow::bail!("key not found in request.updates: {}", key);
-        }
-    }
-
-    Ok(())
-}
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -265,9 +265,9 @@ impl WalSendersShared {
    /// Update aggregated pageserver feedback. LSNs (last_received,
    /// disk_consistent, remote_consistent) and reply timestamp are just
    /// maximized; timeline_size if taken from feedback with highest
-    /// last_received lsn except when it is 0 (sent from non zero shards). This
-    /// is generally reasonable, but we might want to implement other policies
-    /// once multiple pageservers start to be actively used.
+    /// last_received lsn. This is generally reasonable, but we might want to
+    /// implement other policies once multiple pageservers start to be actively
+    /// used.
    fn update_ps_feedback(&mut self) {
        let init = PageserverFeedback::empty();
        let acc =
@@ -276,9 +276,7 @@ impl WalSendersShared {
                .flatten()
                .fold(init, |mut acc, ws_state| match ws_state.feedback {
                    ReplicationFeedback::Pageserver(feedback) => {
-                        if feedback.current_timeline_size != 0
-                            && feedback.last_received_lsn > acc.last_received_lsn
-                        {
+                        if feedback.last_received_lsn > acc.last_received_lsn {
                            acc.current_timeline_size = feedback.current_timeline_size;
                        }
                        acc.last_received_lsn =
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -901,20 +901,6 @@ impl Timeline {
            file_open,
        }
    }
-
-    /// Apply a function to the control file state and persist it.
-    pub async fn map_control_file<T>(
-        &self,
-        f: impl FnOnce(&mut TimelinePersistentState) -> Result<T>,
-    ) -> Result<T> {
-        let mut state = self.write_shared_state().await;
-        let mut persistent_state = state.sk.state.start_change();
-        // If f returns error, we abort the change and don't persist anything.
-        let res = f(&mut persistent_state)?;
-        // If persisting fails, we abort the change and return error.
-        state.sk.state.finish_change(&persistent_state).await?;
-        Ok(res)
-    }
 }

 /// Deletes directory and it's contents. Returns false if directory does not exist.
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -21,7 +21,6 @@ use tokio::fs::{self, remove_file, File, OpenOptions};
 use tokio::io::{AsyncRead, AsyncWriteExt};
 use tokio::io::{AsyncReadExt, AsyncSeekExt};
 use tracing::*;
-use utils::crashsafe::durable_rename;

 use crate::metrics::{time_io_closure, WalStorageMetrics, REMOVED_WAL_SEGMENTS};
 use crate::state::TimelinePersistentState;
@@ -197,6 +196,15 @@ impl PhysicalStorage {
        Ok(())
    }

+    /// Call fsync if config requires so.
+    async fn fsync_file(&mut self, file: &File) -> Result<()> {
+        if !self.conf.no_sync {
+            self.metrics
+                .observe_flush_seconds(time_io_closure(file.sync_all()).await?);
+        }
+        Ok(())
+    }
+
    /// Open or create WAL segment file. Caller must call seek to the wanted position.
    /// Returns `file` and `is_partial`.
    async fn open_or_create(&mut self, segno: XLogSegNo) -> Result<(File, bool)> {
@@ -215,33 +223,15 @@ impl PhysicalStorage {
            Ok((file, true))
        } else {
            // Create and fill new partial file
-            //
-            // We're using fdatasync during WAL writing, so file size must not
-            // change; to this end it is filled with zeros here. To avoid using
-            // half initialized segment, first bake it under tmp filename and
-            // then rename.
-            let tmp_path = self.timeline_dir.join("waltmp");
            let mut file = OpenOptions::new()
                .create(true)
                .write(true)
-                .open(&tmp_path)
+                .open(&wal_file_partial_path)
                .await
-                .with_context(|| format!("Failed to open tmp wal file {:?}", &tmp_path))?;
+                .with_context(|| format!("Failed to open log file {:?}", &wal_file_path))?;

            write_zeroes(&mut file, self.wal_seg_size).await?;
-
-            // Note: this doesn't get into observe_flush_seconds metric. But
-            // segment init should be separate metric, if any.
-            if let Err(e) =
-                durable_rename(&tmp_path, &wal_file_partial_path, !self.conf.no_sync).await
-            {
-                // Probably rename succeeded, but fsync of it failed. Remove
-                // the file then to avoid using it.
-                remove_file(wal_file_partial_path)
-                    .await
-                    .or_else(utils::fs_ext::ignore_not_found)?;
-                return Err(e.into());
-            }
+            self.fsync_file(&file).await?;
            Ok((file, true))
        }
    }
@@ -728,11 +718,6 @@ const ZERO_BLOCK: &[u8] = &[0u8; XLOG_BLCKSZ];

 /// Helper for filling file with zeroes.
 async fn write_zeroes(file: &mut File, mut count: usize) -> Result<()> {
-    fail::fail_point!("sk-write-zeroes", |_| {
-        info!("write_zeroes hit failpoint");
-        Err(anyhow::anyhow!("failpoint: sk-write-zeroes"))
-    });
-
    while count >= XLOG_BLCKSZ {
        file.write_all(ZERO_BLOCK).await?;
        count -= XLOG_BLCKSZ;
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -993,20 +993,13 @@ class NeonEnv:
        self.initial_tenant = config.initial_tenant
        self.initial_timeline = config.initial_timeline

-        # Find two adjacent ports for attachment service and its postgres DB.  This
-        # loop would eventually throw from get_port() if we run out of ports (extremely
-        # unlikely): usually we find two adjacent free ports on the first iteration.
-        while True:
-            self.attachment_service_port = self.port_distributor.get_port()
-            attachment_service_pg_port = self.port_distributor.get_port()
-            if attachment_service_pg_port == self.attachment_service_port + 1:
-                break
-
-        # The URL for the pageserver to use as its control_plane_api config
-        self.control_plane_api: str = f"http://127.0.0.1:{self.attachment_service_port}/upcall/v1"
-        # The base URL of the attachment service
-        self.attachment_service_api: str = f"http://127.0.0.1:{self.attachment_service_port}"
+        attachment_service_port = self.port_distributor.get_port()
+        # Reserve the next port after attachment service for use by its postgres: this
+        # will assert out if the next port wasn't free.
+        attachment_service_pg_port = self.port_distributor.get_port()
+        assert attachment_service_pg_port == attachment_service_port + 1

+        self.control_plane_api: str = f"http://127.0.0.1:{attachment_service_port}"
        self.attachment_service: NeonAttachmentService = NeonAttachmentService(
            self, config.auth_enabled
        )
@@ -1921,14 +1914,6 @@ class NeonAttachmentService:
            self.running = False
        return self

-    def pageserver_api(self) -> PageserverHttpClient:
-        """
-        The attachment service implements a subset of the pageserver REST API, for mapping
-        per-tenant actions into per-shard actions (e.g. timeline creation).  Tests should invoke those
-        functions via the HttpClient, as an implicit check that these APIs remain compatible.
-        """
-        return PageserverHttpClient(self.env.attachment_service_port, lambda: True)
-
    def request(self, method, *args, **kwargs) -> requests.Response:
        kwargs["headers"] = self.headers()
        return requests.request(method, *args, **kwargs)
@@ -1946,7 +1931,7 @@ class NeonAttachmentService:
    ) -> int:
        response = self.request(
            "POST",
-            f"{self.env.attachment_service_api}/debug/v1/attach-hook",
+            f"{self.env.control_plane_api}/attach-hook",
            json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
            headers=self.headers(),
        )
@@ -1958,7 +1943,7 @@ class NeonAttachmentService:
    def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
        response = self.request(
            "POST",
-            f"{self.env.attachment_service_api}/debug/v1/attach-hook",
+            f"{self.env.control_plane_api}/attach-hook",
            json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
            headers=self.headers(),
        )
@@ -1970,7 +1955,7 @@ class NeonAttachmentService:
        """
        response = self.request(
            "POST",
-            f"{self.env.attachment_service_api}/debug/v1/inspect",
+            f"{self.env.control_plane_api}/inspect",
            json={"tenant_shard_id": str(tenant_shard_id)},
            headers=self.headers(),
        )
@@ -1991,27 +1976,7 @@ class NeonAttachmentService:
        }
        log.info(f"node_register({body})")
        self.request(
-            "POST",
-            f"{self.env.attachment_service_api}/control/v1/node",
-            json=body,
-            headers=self.headers(),
-        ).raise_for_status()
-
-    def node_list(self):
-        response = self.request(
-            "GET", f"{self.env.attachment_service_api}/control/v1/node", headers=self.headers()
-        )
-        response.raise_for_status()
-        return response.json()
-
-    def node_configure(self, node_id, body: dict[str, Any]):
-        log.info(f"node_configure({node_id}, {body})")
-        body["node_id"] = node_id
-        self.request(
-            "PUT",
-            f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config",
-            json=body,
-            headers=self.headers(),
+            "POST", f"{self.env.control_plane_api}/node", json=body, headers=self.headers()
        ).raise_for_status()

    def tenant_create(
@@ -2021,9 +1986,6 @@ class NeonAttachmentService:
        shard_stripe_size: Optional[int] = None,
        tenant_config: Optional[Dict[Any, Any]] = None,
    ):
-        """
-        Use this rather than pageserver_api() when you need to include shard parameters
-        """
        body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}

        if shard_count is not None:
@@ -2037,18 +1999,22 @@ class NeonAttachmentService:
            for k, v in tenant_config.items():
                body[k] = v

-        response = self.request("POST", f"{self.env.attachment_service_api}/v1/tenant", json=body)
+        response = self.request("POST", f"{self.env.control_plane_api}/tenant", json=body)
        response.raise_for_status()
        log.info(f"tenant_create success: {response.json()}")

-    def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
-        """
-        :return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int}
-        """
+    def tenant_timeline_create(self, tenant_id: TenantId, timeline_id: TimelineId):
+        body: Dict[str, Any] = {"new_timeline_id": str(timeline_id)}
+
        response = self.request(
-            "GET", f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate"
+            "POST", f"{self.env.control_plane_api}/tenant/{tenant_id}/timeline", json=body
        )
        response.raise_for_status()
+        log.info(f"tenant_timeline_create success: {response.json()}")
+
+    def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
+        response = self.request("GET", f"{self.env.control_plane_api}/tenant/{tenant_id}/locate")
+        response.raise_for_status()
        body = response.json()
        shards: list[dict[str, Any]] = body["shards"]
        return shards
@@ -2056,7 +2022,7 @@ class NeonAttachmentService:
    def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]:
        response = self.request(
            "PUT",
-            f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split",
+            f"{self.env.control_plane_api}/tenant/{tenant_id}/shard_split",
            json={"new_shard_count": shard_count},
        )
        response.raise_for_status()
@@ -2068,7 +2034,7 @@ class NeonAttachmentService:
    def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
        response = self.request(
            "PUT",
-            f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate",
+            f"{self.env.control_plane_api}/tenant/{tenant_shard_id}/migrate",
            json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
        )
        response.raise_for_status()
@@ -3096,17 +3062,6 @@ class Endpoint(PgProtocol):

        return self

-    def edit_hba(self, hba: List[str]):
-        """Prepend hba lines into pg_hba.conf file."""
-        with open(os.path.join(self.pg_data_dir_path(), "pg_hba.conf"), "r+") as conf_file:
-            data = conf_file.read()
-            conf_file.seek(0)
-            conf_file.write("\n".join(hba) + "\n")
-            conf_file.write(data)
-
-        if self.running:
-            self.safe_psql("SELECT pg_reload_conf()")
-
    def reconfigure(self, pageserver_id: Optional[int] = None):
        assert self.endpoint_id is not None
        self.env.neon_cli.endpoint_reconfigure(self.endpoint_id, self.tenant_id, pageserver_id)
@@ -3205,6 +3160,23 @@ class Endpoint(PgProtocol):
    ):
        self.stop()

+    def log_contains(self, pattern: str) -> Optional[str]:
+        """Check that the compute log contains a line that matches the given regex"""
+        logfile = self.endpoint_path() / "compute.log"
+        if not logfile.exists():
+            log.warning(f"Skipping log check: {logfile} does not exist")
+            return None
+
+        contains_re = re.compile(pattern)
+
+        with logfile.open("r") as f:
+            for line in f:
+                if contains_re.search(line):
+                    # found it!
+                    return line
+
+        return None
+
    # Checkpoints running endpoint and returns pg_wal size in MB.
    def get_pg_wal_size(self):
        log.info(f'checkpointing at LSN {self.safe_psql("select pg_current_wal_lsn()")[0][0]}')
@@ -3488,24 +3460,6 @@ class SafekeeperHttpClient(requests.Session):
        assert isinstance(res_json, dict)
        return res_json

-    def patch_control_file(
-        self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        patch: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        res = self.patch(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/control_file",
-            json={
-                "updates": patch,
-                "apply_fields": list(patch.keys()),
-            },
-        )
-        res.raise_for_status()
-        res_json = res.json()
-        assert isinstance(res_json, dict)
-        return res_json
-
    def pull_timeline(self, body: Dict[str, Any]) -> Dict[str, Any]:
        res = self.post(f"http://localhost:{self.port}/v1/pull_timeline", json=body)
        res.raise_for_status()
@@ -3980,17 +3934,8 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
    # list files we're going to compare
    assert endpoint.pgdata_dir
    pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir))
-
    restored_files = list_files_to_compare(restored_dir_path)

-    if pgdata_files != restored_files:
-        # filter pg_xact and multixact files which are downloaded on demand
-        pgdata_files = [
-            f
-            for f in pgdata_files
-            if not f.startswith("pg_xact") and not f.startswith("pg_multixact")
-        ]
-
    # check that file sets are equal
    assert pgdata_files == restored_files

--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -549,12 +549,17 @@ class PageserverHttpClient(requests.Session):
        tenant_id: Union[TenantId, TenantShardId],
        timeline_id: TimelineId,
        timestamp,
+        version: Optional[int] = None,
    ):
        log.info(
            f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}"
        )
+        if version is None:
+            version_str = ""
+        else:
+            version_str = f"&version={version}"
        res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}",
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}{version_str}",
        )
        self.verbose_error(res)
        res_json = res.json()
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -52,7 +52,7 @@ class PgVersion(str, enum.Enum):
        return None


-DEFAULT_VERSION: PgVersion = PgVersion.V15
+DEFAULT_VERSION: PgVersion = PgVersion.V14


 def skip_on_postgres(version: PgVersion, reason: str):
@@ -78,13 +78,6 @@ def pytest_addoption(parser: Parser):
    )


-def run_only_on_default_postgres(reason: str):
-    return pytest.mark.skipif(
-        PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is not DEFAULT_VERSION,
-        reason=reason,
-    )
-
-
 def pytest_configure(config: Config):
    if config.getoption("--pg-version"):
        raise Exception("--pg-version is deprecated, use DEFAULT_PG_VERSION env var instead")
--- a/test_runner/performance/test_lazy_startup.py
+++ b/test_runner/performance/test_lazy_startup.py
@@ -1,111 +0,0 @@
-import pytest
-import requests
-from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-# Start and measure duration with huge SLRU segments.
-# This test is similar to test_startup_simple, but it creates huge number of transactions
-# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
-#
-# This test runs pretty quickly and can be informative when used in combination
-# with emulated network delay. Some useful delay commands:
-#
-# 1. Add 2msec delay to all localhost traffic
-# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
-#
-# 2. Test that it works (you should see 4ms ping)
-# `ping localhost`
-#
-# 3. Revert back to normal
-# `sudo tc qdisc del dev lo root netem`
-#
-# NOTE this test might not represent the real startup time because the basebackup
-#      for a large database might be larger if there's a lof of transaction metadata,
-#      or safekeepers might need more syncing, or there might be more operations to
-#      apply during config step, like more users, databases, or extensions. By default
-#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
-#      test we only load neon.
-@pytest.mark.timeout(1000)
-def test_lazy_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-
-    lazy_tenant, _ = env.neon_cli.create_tenant(
-        conf={
-            "lazy_slru_download": "true",
-        }
-    )
-    eager_tenant, _ = env.neon_cli.create_tenant(
-        conf={
-            "lazy_slru_download": "false",
-        }
-    )
-    tenants = [lazy_tenant, eager_tenant]
-    slru = "lazy"
-    for tenant in tenants:
-        endpoint = env.endpoints.create_start("main", tenant_id=tenant)
-        endpoint.safe_psql("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
-        endpoint.safe_psql("ALTER TABLE t SET (autovacuum_enabled = false)")
-        endpoint.safe_psql("INSERT INTO t VALUES (1, 0)")
-        endpoint.safe_psql(
-            """
-          CREATE PROCEDURE updating() as
-          $$
-            DECLARE
-              i integer;
-            BEGIN
-              FOR i IN 1..10000000 LOOP
-                UPDATE t SET x = x + 1 WHERE pk=1;
-                COMMIT;
-              END LOOP;
-            END
-          $$ LANGUAGE plpgsql
-        """
-        )
-        endpoint.safe_psql("SET statement_timeout=0")
-        endpoint.safe_psql("call updating()")
-
-        endpoint.stop()
-
-        # We do two iterations so we can see if the second startup is faster. It should
-        # be because the compute node should already be configured with roles, databases,
-        # extensions, etc from the first run.
-        for i in range(2):
-            # Start
-            with zenbenchmark.record_duration(f"{slru}_{i}_start"):
-                endpoint.start()
-
-            with zenbenchmark.record_duration(f"{slru}_{i}_select"):
-                sum = endpoint.safe_psql("select sum(x) from t")[0][0]
-                assert sum == 10000000
-
-            # Get metrics
-            metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
-            durations = {
-                "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
-                "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
-                "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
-                "basebackup_ms": f"{slru}_{i}_basebackup",
-                "start_postgres_ms": f"{slru}_{i}_start_postgres",
-                "config_ms": f"{slru}_{i}_config",
-                "total_startup_ms": f"{slru}_{i}_total_startup",
-            }
-            for key, name in durations.items():
-                value = metrics[key]
-                zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
-
-            basebackup_bytes = metrics["basebackup_bytes"]
-            zenbenchmark.record(
-                f"{slru}_{i}_basebackup_bytes",
-                basebackup_bytes,
-                "bytes",
-                report=MetricReport.LOWER_IS_BETTER,
-            )
-
-            # Stop so we can restart
-            endpoint.stop()
-
-            # Imitate optimizations that console would do for the second start
-            endpoint.respec(skip_pg_catalog_updates=True)
-            slru = "eager"
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -173,7 +173,6 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "image_creation_threshold": 7,
        "pitr_interval": "1m",
        "lagging_wal_timeout": "23m",
-        "lazy_slru_download": True,
        "max_lsn_wal_lag": 230000,
        "min_resident_size_override": 23,
        "trace_read_requests": True,
--- a/test_runner/regress/test_logging.py
+++ b/test_runner/regress/test_logging.py
@@ -3,12 +3,10 @@ import uuid
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder
-from fixtures.pg_version import run_only_on_default_postgres
 from fixtures.utils import wait_until


@pytest.mark.parametrize("level", ["trace", "debug", "info", "warn", "error"])
-@run_only_on_default_postgres("it does not use any postgres functionality")
 def test_logging_event_count(neon_env_builder: NeonEnvBuilder, level: str):
    # self-test: make sure the event is logged (i.e., our testing endpoint works)
    log_expected = {
--- a/test_runner/regress/test_lsn_mapping.py
+++ b/test_runner/regress/test_lsn_mapping.py
@@ -109,7 +109,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
        # Timestamp is in the unreachable past
        probe_timestamp = tbl[0][1] - timedelta(hours=10)
        result = client.timeline_get_lsn_by_timestamp(
-            tenant_id, timeline_id_child, f"{probe_timestamp.isoformat()}Z"
+            tenant_id, timeline_id_child, f"{probe_timestamp.isoformat()}Z", 2
        )
        assert result["kind"] == "past"
        # make sure that we return the minimum lsn here at the start of the range
--- a/test_runner/regress/test_migrations.py
+++ b/test_runner/regress/test_migrations.py
@@ -18,11 +18,11 @@ def test_migrations(neon_simple_env: NeonEnv):
    with endpoint.cursor() as cur:
        cur.execute("SELECT id FROM neon_migration.migration_id")
        migration_id = cur.fetchall()
-        assert migration_id[0][0] == 3
+        assert migration_id[0][0] == 2

    with open(log_path, "r") as log_file:
        logs = log_file.read()
-        assert "INFO handle_migrations: Ran 3 migrations" in logs
+        assert "INFO handle_migrations: Ran 2 migrations" in logs

    endpoint.stop()
    endpoint.start()
@@ -30,7 +30,7 @@ def test_migrations(neon_simple_env: NeonEnv):
    with endpoint.cursor() as cur:
        cur.execute("SELECT id FROM neon_migration.migration_id")
        migration_id = cur.fetchall()
-        assert migration_id[0][0] == 3
+        assert migration_id[0][0] == 2

    with open(log_path, "r") as log_file:
        logs = log_file.read()
--- a/test_runner/regress/test_neon_superuser.py
+++ b/test_runner/regress/test_neon_superuser.py
@@ -1,44 +1,26 @@
 import time

-from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv
 from fixtures.pg_version import PgVersion


 def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_neon_superuser_publisher", "empty")
-    pub = env.endpoints.create("test_neon_superuser_publisher")
-
-    env.neon_cli.create_branch("test_neon_superuser_subscriber")
-    sub = env.endpoints.create("test_neon_superuser_subscriber")
-
-    pub.respec(skip_pg_catalog_updates=False, features=["migrations"])
-    pub.start()
-
-    sub.respec(skip_pg_catalog_updates=False, features=["migrations"])
-    sub.start()
+    env.neon_cli.create_branch("test_neon_superuser", "empty")
+    endpoint = env.endpoints.create("test_neon_superuser")
+    endpoint.respec(skip_pg_catalog_updates=False, features=["migrations"])
+    endpoint.start()

    time.sleep(1)  # Sleep to let migrations run

-    with pub.cursor() as cur:
+    with endpoint.cursor() as cur:
        cur.execute(
            "CREATE ROLE mr_whiskers WITH PASSWORD 'cat' LOGIN INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser"
        )
        cur.execute("CREATE DATABASE neondb WITH OWNER mr_whiskers")
        cur.execute("GRANT ALL PRIVILEGES ON DATABASE neondb TO neon_superuser")

-        # If we don't do this, creating the subscription will fail later on PG16
-        pub.edit_hba(["host all mr_whiskers 0.0.0.0/0 md5"])
-
-    with sub.cursor() as cur:
-        cur.execute(
-            "CREATE ROLE mr_whiskers WITH PASSWORD 'cat' LOGIN INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser"
-        )
-        cur.execute("CREATE DATABASE neondb WITH OWNER mr_whiskers")
-        cur.execute("GRANT ALL PRIVILEGES ON DATABASE neondb TO neon_superuser")
-
-    with pub.cursor(dbname="neondb", user="mr_whiskers", password="cat") as cur:
+    with endpoint.cursor(dbname="neondb", user="mr_whiskers", password="cat") as cur:
        cur.execute("SELECT pg_has_role('mr_whiskers', 'neon_superuser', 'member')")
        assert cur.fetchall()[0][0]
        cur.execute("SELECT pg_has_role('mr_whiskers', 'neon_superuser', 'usage')")
@@ -50,28 +32,3 @@ def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):

        cur.execute("CREATE PUBLICATION pub FOR ALL TABLES")
        cur.execute("CREATE ROLE definitely_not_a_superuser WITH PASSWORD 'nope'")
-        cur.execute("CREATE DATABASE definitely_a_database")
-        cur.execute("CREATE TABLE t (a int)")
-        cur.execute("INSERT INTO t VALUES (10), (20)")
-        cur.execute("SELECT * from t")
-        res = cur.fetchall()
-        assert [r[0] for r in res] == [10, 20]
-
-    with sub.cursor(dbname="neondb", user="mr_whiskers", password="cat") as cur:
-        cur.execute("CREATE TABLE t (a int)")
-
-        pub_conn = f"host=localhost port={pub.pg_port} dbname=neondb user=mr_whiskers password=cat"
-        query = f"CREATE SUBSCRIPTION sub CONNECTION '{pub_conn}' PUBLICATION pub"
-        log.info(f"Creating subscription: {query}")
-        cur.execute(query)
-
-        with pub.cursor(dbname="neondb", user="mr_whiskers", password="cat") as pcur:
-            pcur.execute("INSERT INTO t VALUES (30), (40)")
-
-        time.sleep(1)  # Give the change time to propagate
-
-        cur.execute("SELECT * FROM t")
-        res = cur.fetchall()
-        log.info(res)
-        assert len(res) == 4
-        assert [r[0] for r in res] == [10, 20, 30, 40]
--- a/test_runner/regress/test_next_xid.py
+++ b/test_runner/regress/test_next_xid.py
@@ -3,10 +3,12 @@ import os
 import time
 from pathlib import Path

+import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_wal_insert_lsn
 from fixtures.pageserver.utils import (
    wait_for_last_record_lsn,
+    wait_for_upload,
 )
 from fixtures.remote_storage import RemoteStorageKind
 from fixtures.types import Lsn, TenantId, TimelineId
@@ -98,7 +100,7 @@ def test_import_at_2bil(
    vanilla_pg.safe_psql("CREATE TABLE t (t text);")
    vanilla_pg.safe_psql("INSERT INTO t VALUES ('inserted in vanilla')")

-    endpoint_id = "ep-import_from_vanilla"
+    branch_name = "import_from_vanilla"
    tenant = TenantId.generate()
    timeline = TimelineId.generate()

@@ -138,7 +140,7 @@ def test_import_at_2bil(
                "--timeline-id",
                str(timeline),
                "--node-name",
-                endpoint_id,
+                branch_name,
                "--base-lsn",
                start_lsn,
                "--base-tarfile",
@@ -157,7 +159,8 @@ def test_import_at_2bil(
    wait_for_last_record_lsn(ps_http, tenant, timeline, Lsn(end_lsn))

    endpoint = env.endpoints.create_start(
-        endpoint_id,
+        branch_name,
+        endpoint_id="ep-import_from_vanilla",
        tenant_id=tenant,
        config_lines=[
            "log_autovacuum_min_duration = 0",
@@ -166,7 +169,6 @@ def test_import_at_2bil(
    )
    assert endpoint.safe_psql("select count(*) from t") == [(1,)]

-    # Ok, consume
    conn = endpoint.connect()
    cur = conn.cursor()

@@ -203,16 +205,6 @@ def test_import_at_2bil(
        $$;
        """
    )
-
-    # Also create a multi-XID with members past the 2 billion mark
-    conn2 = endpoint.connect()
-    cur2 = conn2.cursor()
-    cur.execute("INSERT INTO t VALUES ('x')")
-    cur.execute("BEGIN; select * from t WHERE t = 'x' FOR SHARE;")
-    cur2.execute("BEGIN; select * from t WHERE t = 'x' FOR SHARE;")
-    cur.execute("COMMIT")
-    cur2.execute("COMMIT")
-
    # A checkpoint writes a WAL record with xl_xid=0. Many other WAL
    # records would have the same effect.
    cur.execute("checkpoint")
@@ -227,4 +219,213 @@ def test_import_at_2bil(
    conn = endpoint.connect()
    cur = conn.cursor()
    cur.execute("SELECT count(*) from t")
-    assert cur.fetchone() == (10000 + 1 + 1,)
+    assert cur.fetchone() == (10000 + 1,)
+
+
+# This is a followup to the test_import_at_2bil test.
+#
+# Use a failpoint to reintroduce the bug that test_import_at_2bil also
+# tests. Then, after the damage has been done, clear the failpoint to
+# fix the bug. Check that the one-off hack that we added for a particular
+# timeline that hit this in production fixes the broken timeline.
+def test_one_off_hack_for_nextxid_bug(
+    neon_env_builder: NeonEnvBuilder,
+    test_output_dir: Path,
+    pg_distrib_dir: Path,
+    pg_bin,
+    vanilla_pg,
+):
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+    env = neon_env_builder.init_start()
+    ps_http = env.pageserver.http_client()
+
+    env.pageserver.allowed_errors.append(".*nextXid fixed by one-off hack.*")
+
+    # We begin with the old bug still present, to create a broken timeline
+    ps_http.configure_failpoints(("reintroduce-nextxid-update-bug", "return(true)"))
+
+    # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
+    # PgBin sets it automatically, but here we need to pipe psql output to the tar command.
+    psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}
+
+    # Reset the vanilla Postgres instance to somewhat before 2 billion transactions,
+    # and around the same LSN as with the production timeline.
+    pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
+    cmd = [
+        pg_resetwal_path,
+        "--next-transaction-id=2129920000",
+        "-l",
+        "000000010000035A000000E0",
+        "-D",
+        str(vanilla_pg.pgdatadir),
+    ]
+    pg_bin.run_capture(cmd, env=psql_env)
+
+    vanilla_pg.start()
+    vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
+    vanilla_pg.safe_psql(
+        """create table tt as select 'long string to consume some space' || g
+     from generate_series(1,300000) g"""
+    )
+    assert vanilla_pg.safe_psql("select count(*) from tt") == [(300000,)]
+    vanilla_pg.safe_psql("CREATE TABLE t (t text);")
+    vanilla_pg.safe_psql("INSERT INTO t VALUES ('inserted in vanilla')")
+
+    branch_name = "import_from_vanilla"
+    # This is the tenant/timeline that the one-off hack targets
+    tenant = "df254570a4f603805528b46b0d45a76c"
+    timeline = TimelineId.generate()
+
+    env.pageserver.tenant_create(tenant)
+
+    # Take basebackup
+    basebackup_dir = os.path.join(test_output_dir, "basebackup")
+    base_tar = os.path.join(basebackup_dir, "base.tar")
+    wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
+    os.mkdir(basebackup_dir)
+    vanilla_pg.safe_psql("CHECKPOINT")
+    pg_bin.run(
+        [
+            "pg_basebackup",
+            "-F",
+            "tar",
+            "-d",
+            vanilla_pg.connstr(),
+            "-D",
+            basebackup_dir,
+        ]
+    )
+
+    # Get start_lsn and end_lsn
+    with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
+        manifest = json.load(f)
+        start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
+        end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
+
+    def import_tar(base, wal):
+        env.neon_cli.raw_cli(
+            [
+                "timeline",
+                "import",
+                "--tenant-id",
+                str(tenant),
+                "--timeline-id",
+                str(timeline),
+                "--node-name",
+                branch_name,
+                "--base-lsn",
+                start_lsn,
+                "--base-tarfile",
+                base,
+                "--end-lsn",
+                end_lsn,
+                "--wal-tarfile",
+                wal,
+                "--pg-version",
+                env.pg_version,
+            ]
+        )
+
+    # Importing correct backup works
+    import_tar(base_tar, wal_tar)
+    wait_for_last_record_lsn(ps_http, tenant, timeline, Lsn(end_lsn))
+
+    endpoint = env.endpoints.create_start(
+        branch_name,
+        endpoint_id="ep-import_from_vanilla",
+        tenant_id=tenant,
+        config_lines=[
+            "log_autovacuum_min_duration = 0",
+            "autovacuum_naptime='5 s'",
+        ],
+    )
+    assert endpoint.safe_psql("select count(*) from t") == [(1,)]
+
+    conn = endpoint.connect()
+    cur = conn.cursor()
+
+    # Install extension containing function needed for test
+    cur.execute("CREATE EXTENSION neon_test_utils")
+
+    # Advance nextXid to the target XID, which is somewhat above the 2
+    # billion mark.
+    while True:
+        xid = int(query_scalar(cur, "SELECT txid_current()"))
+        log.info(f"xid now {xid}")
+        # Consume 10k transactons at a time until we get to 2^31 - 200k
+        if xid < (2325447052 - 100000):
+            cur.execute("select test_consume_xids(50000);")
+        elif xid < 2325447052 - 10000:
+            cur.execute("select test_consume_xids(5000);")
+        else:
+            break
+
+    # Run a bunch of real INSERTs to cross over the 2 billion mark
+    # Use a begin-exception block to have a separate sub-XID for each insert.
+    cur.execute(
+        """
+        do $$
+        begin
+          for i in 1..10000 loop
+            -- Use a begin-exception block to generate a new subtransaction on each iteration
+            begin
+              insert into t values (i);
+            exception when others then
+              raise 'not expected %', sqlerrm;
+            end;
+          end loop;
+        end;
+        $$;
+        """
+    )
+    # A checkpoint writes a WAL record with xl_xid=0. Many other WAL
+    # records would have the same effect.
+    cur.execute("checkpoint")
+
+    # Ok, the nextXid in the pageserver at this LSN should now be incorrectly
+    # set to 1:1024. Remember this LSN.
+    broken_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
+
+    # Ensure that the broken checkpoint data has reached permanent storage
+    ps_http.timeline_checkpoint(tenant, timeline)
+    wait_for_upload(ps_http, tenant, timeline, broken_lsn)
+
+    # Now fix the bug, and generate some WAL with XIDs
+    ps_http.configure_failpoints(("reintroduce-nextxid-update-bug", "off"))
+    cur.execute("INSERT INTO t VALUES ('after fix')")
+    fixed_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
+
+    log.info(f"nextXid was broken by {broken_lsn}, and fixed again by {fixed_lsn}")
+
+    # Stop the original endpoint, we don't need it anymore.
+    endpoint.stop()
+
+    # Test that we cannot start a new endpoint at the broken LSN.
+    env.neon_cli.create_branch(
+        "at-broken-lsn", branch_name, ancestor_start_lsn=broken_lsn, tenant_id=tenant
+    )
+    endpoint_broken = env.endpoints.create(
+        "at-broken-lsn",
+        endpoint_id="ep-at-broken-lsn",
+        tenant_id=tenant,
+    )
+    with pytest.raises(RuntimeError, match="Postgres exited unexpectedly with code 1"):
+        endpoint_broken.start()
+    assert endpoint_broken.log_contains(
+        'Could not open file "pg_xact/0000": No such file or directory'
+    )
+
+    # But after the bug was fixed, the one-off hack fixed the timeline,
+    # and a later LSN works.
+    env.neon_cli.create_branch(
+        "at-fixed-lsn", branch_name, ancestor_start_lsn=fixed_lsn, tenant_id=tenant
+    )
+    endpoint_fixed = env.endpoints.create_start(
+        "at-fixed-lsn", endpoint_id="ep-at-fixed-lsn", tenant_id=tenant
+    )
+
+    conn = endpoint_fixed.connect()
+    cur = conn.cursor()
+    cur.execute("SELECT count(*) from t")
+    # One "inserted in vanilla" row, 10000 in the DO-loop, and one "after fix" row
+    assert cur.fetchone() == (1 + 10000 + 1,)
--- a/test_runner/regress/test_sharding_service.py
+++ b/test_runner/regress/test_sharding_service.py
@@ -1,272 +0,0 @@
-import time
-from collections import defaultdict
-
-from fixtures.neon_fixtures import (
-    NeonEnvBuilder,
-)
-from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pageserver.utils import tenant_delete_wait_completed, timeline_delete_wait_completed
-from fixtures.pg_version import PgVersion
-from fixtures.types import TenantId, TimelineId
-from fixtures.utils import wait_until
-
-
-def test_sharding_service_smoke(
-    neon_env_builder: NeonEnvBuilder,
-):
-    """
-    Test the basic lifecycle of a sharding service:
-    - Restarting
-    - Restarting a pageserver
-    - Creating and deleting tenants and timelines
-    - Marking a pageserver offline
-    """
-
-    neon_env_builder.num_pageservers = 3
-    env = neon_env_builder.init_configs()
-
-    # Start services by hand so that we can skip a pageserver (this will start + register later)
-    env.broker.try_start()
-    env.attachment_service.start()
-    env.pageservers[0].start()
-    env.pageservers[1].start()
-    for sk in env.safekeepers:
-        sk.start()
-
-    # The pageservers we started should have registered with the sharding service on startup
-    nodes = env.attachment_service.node_list()
-    assert len(nodes) == 2
-    assert set(n["node_id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id}
-
-    # Starting an additional pageserver should register successfully
-    env.pageservers[2].start()
-    nodes = env.attachment_service.node_list()
-    assert len(nodes) == 3
-    assert set(n["node_id"] for n in nodes) == {ps.id for ps in env.pageservers}
-
-    # Use a multiple of pageservers to get nice even number of shards on each one
-    tenant_shard_count = len(env.pageservers) * 4
-    tenant_count = len(env.pageservers) * 2
-    shards_per_tenant = tenant_shard_count // tenant_count
-    tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
-
-    # Creating several tenants should spread out across the pageservers
-    for tid in tenant_ids:
-        env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
-
-    def get_node_shard_counts():
-        counts: defaultdict[str, int] = defaultdict(int)
-        for tid in tenant_ids:
-            for shard in env.attachment_service.locate(tid):
-                counts[shard["node_id"]] += 1
-        return counts
-
-    for node_id, count in get_node_shard_counts().items():
-        # we used a multiple of pagservers for the total shard count,
-        # so expect equal number on all pageservers
-        assert count == tenant_shard_count / len(
-            env.pageservers
-        ), f"Node {node_id} has bad count {count}"
-
-    # Creating and deleting timelines should work, using identical API to pageserver
-    timeline_crud_tenant = next(iter(tenant_ids))
-    timeline_id = TimelineId.generate()
-    env.attachment_service.pageserver_api().timeline_create(
-        pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id
-    )
-    timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
-    assert len(timelines) == 2
-    assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines)
-    #    virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id)
-    timeline_delete_wait_completed(
-        env.attachment_service.pageserver_api(), timeline_crud_tenant, timeline_id
-    )
-    timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
-    assert len(timelines) == 1
-    assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines)
-
-    # Marking a pageserver offline should migrate tenants away from it.
-    env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
-
-    def node_evacuated(node_id: int):
-        counts = get_node_shard_counts()
-        assert counts[node_id] == 0
-
-    wait_until(10, 1, lambda: node_evacuated(env.pageservers[0].id))
-
-    # Marking pageserver active should not migrate anything to it
-    # immediately
-    env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Active"})
-    time.sleep(1)
-    assert get_node_shard_counts()[env.pageservers[0].id] == 0
-
-    # Delete all the tenants
-    for tid in tenant_ids:
-        tenant_delete_wait_completed(env.attachment_service.pageserver_api(), tid, 10)
-
-    # Set a scheduling policy on one node, create all the tenants, observe
-    # that the scheduling policy is respected.
-    env.attachment_service.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
-
-    # Create some fresh tenants
-    tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
-    for tid in tenant_ids:
-        env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
-
-    counts = get_node_shard_counts()
-    # Nothing should have been scheduled on the node in Draining
-    assert counts[env.pageservers[1].id] == 0
-    assert counts[env.pageservers[0].id] == tenant_shard_count // 2
-    assert counts[env.pageservers[2].id] == tenant_shard_count // 2
-
-
-def test_sharding_service_passthrough(
-    neon_env_builder: NeonEnvBuilder,
-):
-    """
-    For simple timeline/tenant GET APIs that don't require coordination across
-    shards, the sharding service implements a proxy to shard zero.  This test
-    calls those APIs.
-    """
-    neon_env_builder.num_pageservers = 2
-    env = neon_env_builder.init_start()
-
-    # We will talk to attachment service as if it was a pageserver, using the pageserver
-    # HTTP client
-    client = PageserverHttpClient(env.attachment_service_port, lambda: True)
-    timelines = client.timeline_list(tenant_id=env.initial_tenant)
-    assert len(timelines) == 1
-
-
-def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    tenant_a = env.initial_tenant
-    tenant_b = TenantId.generate()
-    env.attachment_service.tenant_create(tenant_b)
-    env.pageserver.tenant_detach(tenant_a)
-
-    # TODO: extend this test to use multiple pageservers, and check that locations don't move around
-    # on restart.
-
-    # Attachment service restart
-    env.attachment_service.stop()
-    env.attachment_service.start()
-
-    observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
-
-    # Tenant A should still be attached
-    assert tenant_a not in observed
-
-    # Tenant B should remain detached
-    assert tenant_b in observed
-
-    # Pageserver restart
-    env.pageserver.stop()
-    env.pageserver.start()
-
-    # Same assertions as above: restarting either service should not perturb things
-    observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
-    assert tenant_a not in observed
-    assert tenant_b in observed
-
-
-def test_sharding_service_onboarding(
-    neon_env_builder: NeonEnvBuilder,
-):
-    """
-    We onboard tenants to the sharding service by treating it as a 'virtual pageserver'
-    which provides the /location_config API.  This is similar to creating a tenant,
-    but imports the generation number.
-    """
-
-    neon_env_builder.num_pageservers = 2
-
-    # Start services by hand so that we can skip registration on one of the pageservers
-    env = neon_env_builder.init_configs()
-    env.broker.try_start()
-    env.attachment_service.start()
-
-    # This is the pageserver where we'll initially create the tenant
-    env.pageservers[0].start(register=False)
-    origin_ps = env.pageservers[0]
-
-    # This is the pageserver managed by the sharding service, where the tenant
-    # will be attached after onboarding
-    env.pageservers[1].start(register=True)
-    dest_ps = env.pageservers[1]
-    virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
-
-    for sk in env.safekeepers:
-        sk.start()
-
-    # Create a tenant directly via pageserver HTTP API, skipping the attachment service
-    tenant_id = TenantId.generate()
-    generation = 123
-    origin_ps.http_client().tenant_create(tenant_id, generation=generation)
-
-    # As if doing a live migration, first configure origin into stale mode
-    origin_ps.http_client().tenant_location_conf(
-        tenant_id,
-        {
-            "mode": "AttachedStale",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": generation,
-        },
-    )
-
-    # Call into attachment service to onboard the tenant
-    generation += 1
-    virtual_ps_http.tenant_location_conf(
-        tenant_id,
-        {
-            "mode": "AttachedMulti",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": generation,
-        },
-    )
-
-    # As if doing a live migration, detach the original pageserver
-    origin_ps.http_client().tenant_location_conf(
-        tenant_id,
-        {
-            "mode": "Detached",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": None,
-        },
-    )
-
-    # As if doing a live migration, call into the attachment service to
-    # set it to AttachedSingle: this is a no-op, but we test it because the
-    # cloud control plane may call this for symmetry with live migration to
-    # an individual pageserver
-    virtual_ps_http.tenant_location_conf(
-        tenant_id,
-        {
-            "mode": "AttachedSingle",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": generation,
-        },
-    )
-
-    # We should see the tenant is now attached to the pageserver managed
-    # by the sharding service
-    origin_tenants = origin_ps.http_client().tenant_list()
-    assert len(origin_tenants) == 0
-    dest_tenants = dest_ps.http_client().tenant_list()
-    assert len(dest_tenants) == 1
-    assert TenantId(dest_tenants[0]["id"]) == tenant_id
-
-    # sharding service advances generation by 1 when it first attaches
-    assert dest_tenants[0]["generation"] == generation + 1
-
-    # The onboarded tenant should survive a restart of sharding service
-    env.attachment_service.stop()
-    env.attachment_service.start()
-
-    # The onboarded tenant should surviev a restart of pageserver
-    dest_ps.stop()
-    dest_ps.start()
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -376,6 +376,11 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):
    # so we allow it to log at WARN, even if it is occasionally a false positive.
    env.pageserver.allowed_errors.append(".*failed to freeze and flush.*")

+    # When we shut down a tenant during a timeline creation, initdb is not cancelled, we wait
+    # for it to complete (since https://github.com/neondatabase/neon/pull/6451).  This means
+    # that shutdown can be delayed by >=1s on debug builds where initdb takes a long time to run.
+    env.pageserver.allowed_errors.append(".*still waiting, taking longer than expected... gate.*")
+
    def create_bg(delay_ms):
        time.sleep(delay_ms / 1000.0)
        try:
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1946,51 +1946,3 @@ def test_timeline_copy(neon_env_builder: NeonEnvBuilder, insert_rows: int):
            assert orig_digest == new_digest

    # TODO: test timelines can start after copy
-
-
-def test_patch_control_file(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.num_safekeepers = 1
-    env = neon_env_builder.init_start()
-
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    endpoint = env.endpoints.create_start("main")
-    # initialize safekeeper
-    endpoint.safe_psql("create table t(key int, value text)")
-
-    # update control file
-    res = (
-        env.safekeepers[0]
-        .http_client()
-        .patch_control_file(
-            tenant_id,
-            timeline_id,
-            {
-                "timeline_start_lsn": "0/1",
-            },
-        )
-    )
-
-    timeline_start_lsn_before = res["old_control_file"]["timeline_start_lsn"]
-    timeline_start_lsn_after = res["new_control_file"]["timeline_start_lsn"]
-
-    log.info(f"patch_control_file response: {res}")
-    log.info(
-        f"updated control file timeline_start_lsn, before {timeline_start_lsn_before}, after {timeline_start_lsn_after}"
-    )
-
-    assert timeline_start_lsn_after == "0/1"
-    env.safekeepers[0].stop().start()
-
-    # wait/check that safekeeper is alive
-    endpoint.safe_psql("insert into t values (1, 'payload')")
-
-    # check that timeline_start_lsn is updated
-    res = (
-        env.safekeepers[0]
-        .http_client()
-        .debug_dump({"dump_control_file": "true", "timeline_id": str(timeline_id)})
-    )
-    log.info(f"dump_control_file response: {res}")
-    assert res["timelines"][0]["control_file"]["timeline_start_lsn"] == "0/1"
--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -515,42 +515,6 @@ def test_recovery_uncommitted(neon_env_builder: NeonEnvBuilder):
    asyncio.run(run_recovery_uncommitted(env))


-async def run_segment_init_failure(env: NeonEnv):
-    env.neon_cli.create_branch("test_segment_init_failure")
-    ep = env.endpoints.create_start("test_segment_init_failure")
-    ep.safe_psql("create table t(key int, value text)")
-    ep.safe_psql("insert into t select generate_series(1, 100), 'payload'")
-
-    sk = env.safekeepers[0]
-    sk_http = sk.http_client()
-    sk_http.configure_failpoints([("sk-write-zeroes", "return")])
-    conn = await ep.connect_async()
-    ep.safe_psql("select pg_switch_wal()")  # jump to the segment boundary
-    # next insertion should hang until failpoint is disabled.
-    asyncio.create_task(conn.execute("insert into t select generate_series(1,1), 'payload'"))
-    sleep_sec = 2
-    await asyncio.sleep(sleep_sec)
-    # also restart ep at segment boundary to make test more interesting
-    ep.stop()
-    # it must still be not finished
-    # assert not bg_query.done()
-    # Without segment rename during init (#6402) previous statement created
-    # partially initialized 16MB segment, so sk restart also triggers #6401.
-    sk.stop().start()
-    ep = env.endpoints.create_start("test_segment_init_failure")
-    ep.safe_psql("insert into t select generate_series(1,1), 'payload'")  # should be ok now
-
-
-# Test (injected) failure during WAL segment init.
-# https://github.com/neondatabase/neon/issues/6401
-# https://github.com/neondatabase/neon/issues/6402
-def test_segment_init_failure(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.num_safekeepers = 1
-    env = neon_env_builder.init_start()
-
-    asyncio.run(run_segment_init_failure(env))
-
-
@dataclass
 class RaceConditionTest:
    iteration: int
--- a/trace/src/main.rs
+++ b/trace/src/main.rs
@@ -60,7 +60,6 @@ fn analyze_trace<R: std::io::Read>(mut reader: R) {
        match msg {
            PagestreamFeMessage::Exists(_) => {}
            PagestreamFeMessage::Nblocks(_) => {}
-            PagestreamFeMessage::GetSlruSegment(_) => {}
            PagestreamFeMessage::GetPage(req) => {
                total += 1;

--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Stas Kelvich	8fbecb1565	bump lsn a bit	2024-01-27 13:48:06 +02:00
Stas Kelvich	715772bdd6	new values	2024-01-27 13:34:28 +02:00
Heikki Linnakangas	1626a1f333	Fix the test Now it passes on my laptop at least	2024-01-27 13:21:13 +02:00
Heikki Linnakangas	12e39001ce	Apply the hack for all timelines of the target tenant This gives us more flexibility to try it on a branch first	2024-01-27 13:21:13 +02:00
Heikki Linnakangas	65cd16de86	Fix the test	2024-01-27 13:21:13 +02:00
Heikki Linnakangas	b308be20df	Fix typos and formatting in test, per 'ruff'	2024-01-27 13:21:13 +02:00
Heikki Linnakangas	a2d08cfc97	Fix formatting	2024-01-27 13:21:13 +02:00
Heikki Linnakangas	4ee11d9dfc	Retroactively fix the nextXid on a known broken timeline This one particular timeline in production hit the nextXid bug. Add a one-off hack that will fix the nextXid on that particular timeline.	2024-01-27 13:21:13 +02:00