turns out the boxing isn't necessary, we just needed to massage the stack usage properly

optimise passthrough calling convention to further reduce memory
dont box handle_client anymore and move spawning passthrough into handle_client so we don't need to move a heavy object in return position anymore
2026-02-10 14:10:37 +00:00 · 2025-05-30 08:47:44 +01:00 · 2025-05-29 18:35:24 +01:00 · 2025-05-29 18:20:29 +01:00 · 2025-05-29 17:56:25 +01:00 · 2025-05-29 17:52:26 +01:00
40 changed files with 1184 additions and 1214 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4305,7 +4305,6 @@ dependencies = [
 "hashlink",
 "hex",
 "hex-literal",
- "http 1.1.0",
 "http-utils",
 "humantime",
 "humantime-serde",
@@ -4368,7 +4367,6 @@ dependencies = [
 "toml_edit",
 "tonic 0.13.1",
 "tonic-reflection",
- "tower 0.5.2",
 "tracing",
 "tracing-utils",
 "twox-hash",
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -354,6 +354,9 @@ pub struct ShardImportProgressV1 {
    pub completed: usize,
    /// Hash of the plan
    pub import_plan_hash: u64,
+    /// Soft limit for the job size
+    /// This needs to remain constant throughout the import
+    pub job_soft_size_limit: usize,
 }

 impl ShardImportStatus {
@@ -1931,7 +1934,7 @@ pub enum PagestreamFeMessage {
 }

 // Wrapped in libpq CopyData
-#[derive(Debug, strum_macros::EnumProperty)]
+#[derive(strum_macros::EnumProperty)]
 pub enum PagestreamBeMessage {
    Exists(PagestreamExistsResponse),
    Nblocks(PagestreamNblocksResponse),
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -34,7 +34,6 @@ fail.workspace = true
 futures.workspace = true
 hashlink.workspace = true
 hex.workspace = true
-http.workspace = true
 http-utils.workspace = true
 humantime-serde.workspace = true
 humantime.workspace = true
@@ -94,7 +93,6 @@ tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tonic.workspace = true
 tonic-reflection.workspace = true
-tower.workspace = true
 tracing.workspace = true
 tracing-utils.workspace = true
 url.workspace = true
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -584,7 +584,6 @@ impl TryFrom<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
    type Error = ProtocolError;

    fn try_from(segment: GetSlruSegmentResponse) -> Result<Self, Self::Error> {
-        // TODO: can a segment legitimately be empty?
        if segment.is_empty() {
            return Err(ProtocolError::Missing("segment"));
        }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -804,7 +804,7 @@ fn start_pageserver(
        } else {
            None
        },
-        basebackup_cache,
+        basebackup_cache.clone(),
    );

    // Spawn a Pageserver gRPC server task. It will spawn separate tasks for
@@ -816,10 +816,12 @@ fn start_pageserver(
    let mut page_service_grpc = None;
    if let Some(grpc_listener) = grpc_listener {
        page_service_grpc = Some(page_service::spawn_grpc(
+            conf,
            tenant_manager.clone(),
            grpc_auth,
            otel_guard.as_ref().map(|g| g.dispatch.clone()),
            grpc_listener,
+            basebackup_cache,
        )?);
    }

--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -837,7 +837,30 @@ async fn collect_eviction_candidates(
                continue;
            }
            let info = tl.get_local_layers_for_disk_usage_eviction().await;
-            debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
+            debug!(
+                tenant_id=%tl.tenant_shard_id.tenant_id,
+                shard_id=%tl.tenant_shard_id.shard_slug(),
+                timeline_id=%tl.timeline_id,
+                "timeline resident layers count: {}", info.resident_layers.len()
+            );
+
+            tenant_candidates.extend(info.resident_layers.into_iter());
+            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
+
+            if cancel.is_cancelled() {
+                return Ok(EvictionCandidates::Cancelled);
+            }
+        }
+
+        // Also consider layers of timelines being imported for eviction
+        for tl in tenant.list_importing_timelines() {
+            let info = tl.timeline.get_local_layers_for_disk_usage_eviction().await;
+            debug!(
+                tenant_id=%tl.timeline.tenant_shard_id.tenant_id,
+                shard_id=%tl.timeline.tenant_shard_id.shard_slug(),
+                timeline_id=%tl.timeline.timeline_id,
+                "timeline resident layers count: {}", info.resident_layers.len()
+            );

            tenant_candidates.extend(info.resident_layers.into_iter());
            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -300,7 +300,7 @@ pub struct TenantShard {
    ///   as in progress.
    /// * Imported timelines are removed when the storage controller calls the post timeline
    ///   import activation endpoint.
-    timelines_importing: std::sync::Mutex<HashMap<TimelineId, ImportingTimeline>>,
+    timelines_importing: std::sync::Mutex<HashMap<TimelineId, Arc<ImportingTimeline>>>,

    /// The last tenant manifest known to be in remote storage. None if the manifest has not yet
    /// been either downloaded or uploaded. Always Some after tenant attach.
@@ -672,6 +672,7 @@ pub enum MaybeOffloaded {
 pub enum TimelineOrOffloaded {
    Timeline(Arc<Timeline>),
    Offloaded(Arc<OffloadedTimeline>),
+    Importing(Arc<ImportingTimeline>),
 }

 impl TimelineOrOffloaded {
@@ -683,6 +684,9 @@ impl TimelineOrOffloaded {
            TimelineOrOffloaded::Offloaded(offloaded) => {
                TimelineOrOffloadedArcRef::Offloaded(offloaded)
            }
+            TimelineOrOffloaded::Importing(importing) => {
+                TimelineOrOffloadedArcRef::Importing(importing)
+            }
        }
    }
    pub fn tenant_shard_id(&self) -> TenantShardId {
@@ -695,12 +699,16 @@ impl TimelineOrOffloaded {
        match self {
            TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
+            TimelineOrOffloaded::Importing(importing) => &importing.delete_progress,
        }
    }
    fn maybe_remote_client(&self) -> Option<Arc<RemoteTimelineClient>> {
        match self {
            TimelineOrOffloaded::Timeline(timeline) => Some(timeline.remote_client.clone()),
            TimelineOrOffloaded::Offloaded(_offloaded) => None,
+            TimelineOrOffloaded::Importing(importing) => {
+                Some(importing.timeline.remote_client.clone())
+            }
        }
    }
 }
@@ -708,6 +716,7 @@ impl TimelineOrOffloaded {
 pub enum TimelineOrOffloadedArcRef<'a> {
    Timeline(&'a Arc<Timeline>),
    Offloaded(&'a Arc<OffloadedTimeline>),
+    Importing(&'a Arc<ImportingTimeline>),
 }

 impl TimelineOrOffloadedArcRef<'_> {
@@ -715,12 +724,14 @@ impl TimelineOrOffloadedArcRef<'_> {
        match self {
            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.tenant_shard_id,
            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.tenant_shard_id,
+            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.tenant_shard_id,
        }
    }
    pub fn timeline_id(&self) -> TimelineId {
        match self {
            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.timeline_id,
            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.timeline_id,
+            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.timeline_id,
        }
    }
 }
@@ -737,6 +748,12 @@ impl<'a> From<&'a Arc<OffloadedTimeline>> for TimelineOrOffloadedArcRef<'a> {
    }
 }

+impl<'a> From<&'a Arc<ImportingTimeline>> for TimelineOrOffloadedArcRef<'a> {
+    fn from(timeline: &'a Arc<ImportingTimeline>) -> Self {
+        Self::Importing(timeline)
+    }
+}
+
 #[derive(Debug, thiserror::Error, PartialEq, Eq)]
 pub enum GetTimelineError {
    #[error("Timeline is shutting down")]
@@ -1789,20 +1806,25 @@ impl TenantShard {
                    },
                ) => {
                    let timeline_id = timeline.timeline_id;
+                    let import_task_gate = Gate::default();
+                    let import_task_guard = import_task_gate.enter().unwrap();
                    let import_task_handle =
                        tokio::task::spawn(self.clone().create_timeline_import_pgdata_task(
                            timeline.clone(),
                            import_pgdata,
                            guard,
+                            import_task_guard,
                            ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),
                        ));

                    let prev = self.timelines_importing.lock().unwrap().insert(
                        timeline_id,
-                        ImportingTimeline {
+                        Arc::new(ImportingTimeline {
                            timeline: timeline.clone(),
                            import_task_handle,
-                        },
+                            import_task_gate,
+                            delete_progress: TimelineDeleteProgress::default(),
+                        }),
                    );

                    assert!(prev.is_none());
@@ -2420,6 +2442,17 @@ impl TenantShard {
            .collect()
    }

+    /// Lists timelines the tenant contains.
+    /// It's up to callers to omit certain timelines that are not considered ready for use.
+    pub fn list_importing_timelines(&self) -> Vec<Arc<ImportingTimeline>> {
+        self.timelines_importing
+            .lock()
+            .unwrap()
+            .values()
+            .map(Arc::clone)
+            .collect()
+    }
+
    /// Lists timelines the tenant manages, including offloaded ones.
    ///
    /// It's up to callers to omit certain timelines that are not considered ready for use.
@@ -2853,19 +2886,25 @@ impl TenantShard {

        let (timeline, timeline_create_guard) = uninit_timeline.finish_creation_myself();

+        let import_task_gate = Gate::default();
+        let import_task_guard = import_task_gate.enter().unwrap();
+
        let import_task_handle = tokio::spawn(self.clone().create_timeline_import_pgdata_task(
            timeline.clone(),
            index_part,
            timeline_create_guard,
+            import_task_guard,
            timeline_ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),
        ));

        let prev = self.timelines_importing.lock().unwrap().insert(
            timeline.timeline_id,
-            ImportingTimeline {
+            Arc::new(ImportingTimeline {
                timeline: timeline.clone(),
                import_task_handle,
-            },
+                import_task_gate,
+                delete_progress: TimelineDeleteProgress::default(),
+            }),
        );

        // Idempotency is enforced higher up the stack
@@ -2924,6 +2963,7 @@ impl TenantShard {
        timeline: Arc<Timeline>,
        index_part: import_pgdata::index_part_format::Root,
        timeline_create_guard: TimelineCreateGuard,
+        _import_task_guard: GateGuard,
        ctx: RequestContext,
    ) {
        debug_assert_current_span_has_tenant_and_timeline_id();
@@ -3835,6 +3875,9 @@ impl TenantShard {
                        .build_timeline_client(offloaded.timeline_id, self.remote_storage.clone());
                    Arc::new(remote_client)
                }
+                TimelineOrOffloadedArcRef::Importing(_) => {
+                    unreachable!("Importing timelines are not included in the iterator")
+                }
            };

            // Shut down the timeline's remote client: this means that the indices we write
@@ -5044,6 +5087,14 @@ impl TenantShard {
                info!("timeline already exists but is offloaded");
                Err(CreateTimelineError::Conflict)
            }
+            Err(TimelineExclusionError::AlreadyExists {
+                existing: TimelineOrOffloaded::Importing(_existing),
+                ..
+            }) => {
+                // If there's a timeline already importing, then we would hit
+                // the [`TimelineExclusionError::AlreadyCreating`] branch above.
+                unreachable!("Importing timelines hold the creation guard")
+            }
            Err(TimelineExclusionError::AlreadyExists {
                existing: TimelineOrOffloaded::Timeline(existing),
                arg,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1348,6 +1348,21 @@ impl RemoteTimelineClient {
        Ok(())
    }

+    pub(crate) fn schedule_unlinking_of_layers_from_index_part<I>(
+        self: &Arc<Self>,
+        names: I,
+    ) -> Result<(), NotInitialized>
+    where
+        I: IntoIterator<Item = LayerName>,
+    {
+        let mut guard = self.upload_queue.lock().unwrap();
+        let upload_queue = guard.initialized_mut()?;
+
+        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
+
+        Ok(())
+    }
+
    /// Update the remote index file, removing the to-be-deleted files from the index,
    /// allowing scheduling of actual deletions later.
    fn schedule_unlinking_of_layers_from_index_part0<I>(
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -206,8 +206,8 @@ pub struct GcCompactionQueue {
 }

 static CONCURRENT_GC_COMPACTION_TASKS: Lazy<Arc<Semaphore>> = Lazy::new(|| {
-    // Only allow two timelines on one pageserver to run gc compaction at a time.
-    Arc::new(Semaphore::new(2))
+    // Only allow one timeline on one pageserver to run gc compaction at a time.
+    Arc::new(Semaphore::new(1))
 });

 impl GcCompactionQueue {
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -121,6 +121,7 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
    // This observes the locking order between timelines and timelines_offloaded
    let mut timelines = tenant.timelines.lock().unwrap();
    let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
+    let mut timelines_importing = tenant.timelines_importing.lock().unwrap();
    let offloaded_children_exist = timelines_offloaded
        .iter()
        .any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
@@ -150,8 +151,12 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
                .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
            offloaded_timeline.delete_from_ancestor_with_timelines(&timelines);
        }
+        TimelineOrOffloaded::Importing(importing) => {
+            timelines_importing.remove(&importing.timeline.timeline_id);
+        }
    }

+    drop(timelines_importing);
    drop(timelines_offloaded);
    drop(timelines);

@@ -203,8 +208,17 @@ impl DeleteTimelineFlow {
        guard.mark_in_progress()?;

        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
-        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
-            timeline.shutdown(super::ShutdownMode::Hard).await;
+        // TODO(vlad): shut down imported timeline here
+        match &timeline {
+            TimelineOrOffloaded::Timeline(timeline) => {
+                timeline.shutdown(super::ShutdownMode::Hard).await;
+            }
+            TimelineOrOffloaded::Importing(importing) => {
+                importing.shutdown().await;
+            }
+            TimelineOrOffloaded::Offloaded(_offloaded) => {
+                // Nothing to shut down in this case
+            }
        }

        tenant.gc_block.before_delete(&timeline.timeline_id());
@@ -389,10 +403,18 @@ impl DeleteTimelineFlow {
            Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
        });

-        // Offloaded timelines have no local state
-        // TODO: once we persist offloaded information, delete the timeline from there, too
-        if let TimelineOrOffloaded::Timeline(timeline) = timeline {
-            delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await;
+        match timeline {
+            TimelineOrOffloaded::Timeline(timeline) => {
+                delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await;
+            }
+            TimelineOrOffloaded::Importing(importing) => {
+                delete_local_timeline_directory(conf, tenant.tenant_shard_id, &importing.timeline)
+                    .await;
+            }
+            TimelineOrOffloaded::Offloaded(_offloaded) => {
+                // Offloaded timelines have no local state
+                // TODO: once we persist offloaded information, delete the timeline from there, too
+            }
        }

        fail::fail_point!("timeline-delete-after-rm", |_| {
@@ -451,12 +473,16 @@ pub(super) fn make_timeline_delete_guard(
    // For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
    let timelines = tenant.timelines.lock().unwrap();
    let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
+    let timelines_importing = tenant.timelines_importing.lock().unwrap();

    let timeline = match timelines.get(&timeline_id) {
        Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
        None => match timelines_offloaded.get(&timeline_id) {
            Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
-            None => return Err(DeleteTimelineError::NotFound),
+            None => match timelines_importing.get(&timeline_id) {
+                Some(t) => TimelineOrOffloaded::Importing(Arc::clone(t)),
+                None => return Err(DeleteTimelineError::NotFound),
+            },
        },
    };

--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -8,8 +8,10 @@ use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 use utils::lsn::Lsn;
+use utils::pausable_failpoint;
+use utils::sync::gate::Gate;

-use super::Timeline;
+use super::{Timeline, TimelineDeleteProgress};
 use crate::context::RequestContext;
 use crate::controller_upcall_client::{StorageControllerUpcallApi, StorageControllerUpcallClient};
 use crate::tenant::metadata::TimelineMetadata;
@@ -19,15 +21,23 @@ mod importbucket_client;
 mod importbucket_format;
 pub(crate) mod index_part_format;

-pub(crate) struct ImportingTimeline {
+pub struct ImportingTimeline {
    pub import_task_handle: JoinHandle<()>,
+    pub import_task_gate: Gate,
    pub timeline: Arc<Timeline>,
+    pub delete_progress: TimelineDeleteProgress,
+}
+
+impl std::fmt::Debug for ImportingTimeline {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "ImportingTimeline<{}>", self.timeline.timeline_id)
+    }
 }

 impl ImportingTimeline {
-    pub(crate) async fn shutdown(self) {
+    pub async fn shutdown(&self) {
        self.import_task_handle.abort();
-        let _ = self.import_task_handle.await;
+        self.import_task_gate.close().await;

        self.timeline.remote_client.shutdown().await;
    }
@@ -101,6 +111,8 @@ pub async fn doit(
                .schedule_index_upload_for_file_changes()?;
            timeline.remote_client.wait_completion().await?;

+            pausable_failpoint!("import-timeline-pre-success-notify-pausable");
+
            // Communicate that shard is done.
            // Ensure at-least-once delivery of the upcall to storage controller
            // before we mark the task as done and never come here again.
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -30,6 +30,7 @@

 use std::collections::HashSet;
 use std::hash::{Hash, Hasher};
+use std::num::NonZeroUsize;
 use std::ops::Range;
 use std::sync::Arc;

@@ -100,8 +101,24 @@ async fn run_v1(
        tasks: Vec::default(),
    };

-    let import_config = &timeline.conf.timeline_import_config;
-    let plan = planner.plan(import_config).await?;
+    // Use the job size limit encoded in the progress if we are resuming an import.
+    // This ensures that imports have stable plans even if the pageserver config changes.
+    let import_config = {
+        match &import_progress {
+            Some(progress) => {
+                let base = &timeline.conf.timeline_import_config;
+                TimelineImportConfig {
+                    import_job_soft_size_limit: NonZeroUsize::new(progress.job_soft_size_limit)
+                        .unwrap(),
+                    import_job_concurrency: base.import_job_concurrency,
+                    import_job_checkpoint_threshold: base.import_job_checkpoint_threshold,
+                }
+            }
+            None => timeline.conf.timeline_import_config.clone(),
+        }
+    };
+
+    let plan = planner.plan(&import_config).await?;

    // Hash the plan and compare with the hash of the plan we got back from the storage controller.
    // If the two match, it means that the planning stage had the same output.
@@ -126,7 +143,7 @@ async fn run_v1(
    pausable_failpoint!("import-timeline-pre-execute-pausable");

    let start_from_job_idx = import_progress.map(|progress| progress.completed);
-    plan.execute(timeline, start_from_job_idx, plan_hash, import_config, ctx)
+    plan.execute(timeline, start_from_job_idx, plan_hash, &import_config, ctx)
        .await
 }

@@ -453,6 +470,7 @@ impl Plan {
                                    jobs: jobs_in_plan,
                                    completed: last_completed_job_idx,
                                    import_plan_hash,
+                                    job_soft_size_limit: import_config.import_job_soft_size_limit.into(),
                                };

                                timeline.remote_client.schedule_index_upload_for_file_changes()?;
@@ -964,6 +982,15 @@ impl ChunkProcessingJob {
            .cloned();
        match existing_layer {
            Some(existing) => {
+                // Unlink the remote layer from the index without scheduling its deletion.
+                // When `existing_layer` drops [`LayerInner::drop`] will schedule its deletion from
+                // remote storage, but that assumes that the layer was unlinked from the index first.
+                timeline
+                    .remote_client
+                    .schedule_unlinking_of_layers_from_index_part(std::iter::once(
+                        existing.layer_desc().layer_name(),
+                    ))?;
+
                guard.open_mut()?.rewrite_layers(
                    &[(existing.clone(), resident_layer.clone())],
                    &[],
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -17,9 +17,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};

 use crate::auth::credentials::check_peer_addr_is_in_list;
-use crate::auth::{
-    self, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern, validate_password_and_exchange,
-};
+use crate::auth::{self, AuthError, ComputeUserInfoMaybeEndpoint, validate_password_and_exchange};
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
@@ -137,16 +135,6 @@ impl<'a, T> Backend<'a, T> {
        }
    }
 }
-impl<'a, T, E> Backend<'a, Result<T, E>> {
-    /// Very similar to [`std::option::Option::transpose`].
-    /// This is most useful for error handling.
-    pub(crate) fn transpose(self) -> Result<Backend<'a, T>, E> {
-        match self {
-            Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
-            Self::Local(l) => Ok(Backend::Local(l)),
-        }
-    }
-}

 pub(crate) struct ComputeCredentials {
    pub(crate) info: ComputeUserInfo,
@@ -284,7 +272,7 @@ async fn auth_quirks(
    allow_cleartext: bool,
    config: &'static AuthenticationConfig,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> auth::Result<(ComputeCredentials, Option<Vec<IpPattern>>)> {
+) -> auth::Result<ComputeCredentials> {
    // If there's no project so far, that entails that client doesn't
    // support SNI or other means of passing the endpoint (project) name.
    // We now expect to see a very specific payload in the place of password.
@@ -301,15 +289,12 @@ async fn auth_quirks(
    debug!("fetching authentication info and allowlists");

    // check allowed list
-    let allowed_ips = if config.ip_allowlist_check_enabled {
+    if config.ip_allowlist_check_enabled {
        let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
        if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
            return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
        }
-        allowed_ips
-    } else {
-        Cached::new_uncached(Arc::new(vec![]))
-    };
+    }

    // check if a VPC endpoint ID is coming in and if yes, if it's allowed
    let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
@@ -368,7 +353,7 @@ async fn auth_quirks(
    )
    .await
    {
-        Ok(keys) => Ok((keys, Some(allowed_ips.as_ref().clone()))),
+        Ok(keys) => Ok(keys),
        Err(e) => {
            if e.is_password_failed() {
                // The password could have been changed, so we invalidate the cache.
@@ -420,53 +405,39 @@ async fn authenticate_with_secret(
    classic::authenticate(ctx, info, client, config, secret).await
 }

-impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
-    /// Get username from the credentials.
-    pub(crate) fn get_user(&self) -> &str {
-        match self {
-            Self::ControlPlane(_, user_info) => &user_info.user,
-            Self::Local(_) => "local",
-        }
-    }
-
+impl ControlPlaneClient {
    /// Authenticate the client via the requested backend, possibly using credentials.
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub(crate) async fn authenticate(
-        self,
+        &self,
        ctx: &RequestContext,
        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+        user_info: ComputeUserInfoMaybeEndpoint,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<(Backend<'a, ComputeCredentials>, Option<Vec<IpPattern>>)> {
-        let res = match self {
-            Self::ControlPlane(api, user_info) => {
-                debug!(
-                    user = &*user_info.user,
-                    project = user_info.endpoint(),
-                    "performing authentication using the console"
-                );
+    ) -> auth::Result<ComputeCredentials> {
+        debug!(
+            user = &*user_info.user,
+            project = user_info.endpoint(),
+            "performing authentication using the console"
+        );

-                let (credentials, ip_allowlist) = auth_quirks(
-                    ctx,
-                    &*api,
-                    user_info,
-                    client,
-                    allow_cleartext,
-                    config,
-                    endpoint_rate_limiter,
-                )
-                .await?;
-                Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
-            }
-            Self::Local(_) => {
-                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"));
-            }
-        };
+        let credentials = auth_quirks(
+            ctx,
+            self,
+            user_info,
+            client,
+            allow_cleartext,
+            config,
+            endpoint_rate_limiter,
+        )
+        .await?;

        // TODO: replace with some metric
        info!("user successfully authenticated");
-        res
+
+        Ok(credentials)
    }
 }

@@ -536,6 +507,25 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
    }
 }

+pub struct ControlPlaneWakeCompute<'a> {
+    pub cplane: &'a ControlPlaneClient,
+    pub creds: ComputeCredentials,
+}
+
+#[async_trait::async_trait]
+impl ComputeConnectBackend for ControlPlaneWakeCompute<'_> {
+    async fn wake_compute(
+        &self,
+        ctx: &RequestContext,
+    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
+        self.cplane.wake_compute(ctx, &self.creds.info).await
+    }
+
+    fn get_keys(&self) -> &ComputeCredentialKeys {
+        &self.creds.keys
+    }
+}
+
 #[cfg(test)]
 mod tests {
    #![allow(clippy::unimplemented, clippy::unwrap_used)]
@@ -552,6 +542,7 @@ mod tests {
    use postgres_protocol::message::backend::Message as PgMessage;
    use postgres_protocol::message::frontend;
    use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
+    use tokio_util::task::TaskTracker;

    use super::jwt::JwkCache;
    use super::{AuthRateLimiter, auth_quirks};
@@ -702,7 +693,7 @@ mod tests {
    #[tokio::test]
    async fn auth_quirks_scram() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());

        let ctx = RequestContext::test();
        let api = Auth {
@@ -784,7 +775,7 @@ mod tests {
    #[tokio::test]
    async fn auth_quirks_cleartext() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());

        let ctx = RequestContext::test();
        let api = Auth {
@@ -838,7 +829,7 @@ mod tests {
    #[tokio::test]
    async fn auth_quirks_password_hack() {
        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
+        let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());

        let ctx = RequestContext::test();
        let api = Auth {
@@ -887,7 +878,7 @@ mod tests {
        .await
        .unwrap();

-        assert_eq!(creds.0.info.endpoint, "my-endpoint");
+        assert_eq!(creds.info.endpoint, "my-endpoint");

        handle.await.unwrap();
    }
--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -1,7 +1,7 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::Backend;
+pub use backend::{Backend, ControlPlaneWakeCompute};

 mod credentials;
 pub(crate) use credentials::{
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -18,6 +18,7 @@ use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
 use tokio::net::TcpListener;
 use tokio_rustls::TlsConnector;
 use tokio_util::sync::CancellationToken;
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::{Instrument, error, info};
 use utils::project_git_version;
 use utils::sentry_init::init_sentry;
@@ -226,7 +227,8 @@ pub(super) async fn task_main(
        let dest_suffix = Arc::clone(&dest_suffix);
        let compute_tls_config = compute_tls_config.clone();

-        connections.spawn(
+        let tracker = connections.token();
+        tokio::spawn(
            async move {
                socket
                    .set_nodelay(true)
@@ -249,6 +251,7 @@ pub(super) async fn task_main(
                    compute_tls_config,
                    tls_server_end_point,
                    socket,
+                    tracker,
                )
                .await
            }
@@ -274,10 +277,11 @@ const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmod
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
    ctx: &RequestContext,
    raw_stream: S,
+    tracker: TaskTrackerToken,
    tls_config: Arc<rustls::ServerConfig>,
    tls_server_end_point: TlsServerEndPoint,
-) -> anyhow::Result<Stream<S>> {
-    let mut stream = PqStream::new(Stream::from_raw(raw_stream));
+) -> anyhow::Result<(Stream<S>, TaskTrackerToken)> {
+    let mut stream = PqStream::new(Stream::from_raw(raw_stream), tracker);

    let msg = stream.read_startup_packet().await?;
    use pq_proto::FeStartupPacket::SslRequest;
@@ -291,7 +295,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
            // Upgrade raw stream into a secure TLS-backed stream.
            // NOTE: We've consumed `tls`; this fact will be used later.

-            let (raw, read_buf) = stream.into_inner();
+            let (raw, read_buf, tracker) = stream.into_inner();
            // TODO: Normally, client doesn't send any data before
            // server says TLS handshake is ok and read_buf is empty.
            // However, you could imagine pipelining of postgres
@@ -302,13 +306,16 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
                bail!("data is sent before server replied with EncryptionResponse");
            }

-            Ok(Stream::Tls {
-                tls: Box::new(
-                    raw.upgrade(tls_config, !ctx.has_private_peer_addr())
-                        .await?,
-                ),
-                tls_server_end_point,
-            })
+            Ok((
+                Stream::Tls {
+                    tls: Box::new(
+                        raw.upgrade(tls_config, !ctx.has_private_peer_addr())
+                            .await?,
+                    ),
+                    tls_server_end_point,
+                },
+                tracker,
+            ))
        }
        unexpected => {
            info!(
@@ -329,8 +336,10 @@ async fn handle_client(
    compute_tls_config: Option<Arc<rustls::ClientConfig>>,
    tls_server_end_point: TlsServerEndPoint,
    stream: impl AsyncRead + AsyncWrite + Unpin,
+    tracker: TaskTrackerToken,
 ) -> anyhow::Result<()> {
-    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config, tls_server_end_point).await?;
+    let (mut tls_stream, _tracker) =
+        ssl_handshake(&ctx, stream, tracker, tls_config, tls_server_end_point).await?;

    // Cut off first part of the SNI domain
    // We receive required destination details in the format of
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -323,7 +323,7 @@ impl CancellationHandler {
        }
    }

-    pub(crate) fn get_key(self: &Arc<Self>) -> Session {
+    pub(crate) fn get_key(self: Arc<Self>) -> Session {
        // we intentionally generate a random "backend pid" and "secret key" here.
        // we use the corresponding u64 as an identifier for the
        // actual endpoint+pid+secret for postgres/pgbouncer.
@@ -340,7 +340,7 @@ impl CancellationHandler {
        Session {
            key,
            redis_key,
-            cancellation_handler: Arc::clone(self),
+            cancellation_handler: self,
        }
    }

--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -1,8 +1,9 @@
 use std::sync::Arc;

-use futures::{FutureExt, TryFutureExt};
+use futures::TryFutureExt;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::{Instrument, debug, error, info};

 use crate::auth::backend::ConsoleRedirectBackend;
@@ -14,10 +15,8 @@ use crate::metrics::{Metrics, NumClientConnectionsGuard};
 use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
 use crate::proxy::connect_compute::{TcpMechanism, connect_to_compute};
 use crate::proxy::handshake::{HandshakeData, handshake};
-use crate::proxy::passthrough::ProxyPassthrough;
-use crate::proxy::{
-    ClientRequestError, ErrorSource, prepare_client_connection, run_until_cancelled,
-};
+use crate::proxy::passthrough::passthrough;
+use crate::proxy::{ClientRequestError, prepare_client_connection, run_until_cancelled};

 pub async fn task_main(
    config: &'static ProxyConfig,
@@ -35,7 +34,6 @@ pub async fn task_main(
    socket2::SockRef::from(&listener).set_keepalive(true)?;

    let connections = tokio_util::task::task_tracker::TaskTracker::new();
-    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();

    while let Some(accept_result) =
        run_until_cancelled(listener.accept(), &cancellation_token).await
@@ -49,11 +47,11 @@ pub async fn task_main(

        let session_id = uuid::Uuid::new_v4();
        let cancellation_handler = Arc::clone(&cancellation_handler);
-        let cancellations = cancellations.clone();

        debug!(protocol = "tcp", %session_id, "accepted new TCP connection");

-        connections.spawn(async move {
+        let tracker = connections.token();
+        tokio::spawn(async move {
            let (socket, peer_addr) = match read_proxy_protocol(socket).await {
                Err(e) => {
                    error!("per-client task finished with an error: {e:#}");
@@ -103,99 +101,80 @@ pub async fn task_main(
                &config.region,
            );

+            let span = ctx.span();
+            let mut slot = Some(ctx);
            let res = handle_client(
                config,
                backend,
-                &ctx,
+                &mut slot,
                cancellation_handler,
                socket,
                conn_gauge,
-                cancellations,
+                tracker,
            )
-            .instrument(ctx.span())
-            .boxed()
+            .instrument(span)
            .await;

-            match res {
-                Err(e) => {
+            match (slot, res) {
+                (None, _) => {}
+                (Some(ctx), Ok(())) => {
+                    ctx.success();
+                }
+                (Some(ctx), Err(e)) => {
                    ctx.set_error_kind(e.get_error_kind());
-                    error!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
-                }
-                Ok(None) => {
-                    ctx.set_success();
-                }
-                Ok(Some(p)) => {
-                    ctx.set_success();
-                    let _disconnect = ctx.log_connect();
-                    match p.proxy_pass(&config.connect_to_compute).await {
-                        Ok(()) => {}
-                        Err(ErrorSource::Client(e)) => {
-                            error!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the client: {e:#}"
-                            );
-                        }
-                        Err(ErrorSource::Compute(e)) => {
-                            error!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the compute: {e:#}"
-                            );
-                        }
-                    }
+                    tracing::warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
                }
            }
        });
    }

    connections.close();
-    cancellations.close();
    drop(listener);

    // Drain connections
    connections.wait().await;
-    cancellations.wait().await;

    Ok(())
 }

 #[allow(clippy::too_many_arguments)]
-pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
+pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
    config: &'static ProxyConfig,
    backend: &'static ConsoleRedirectBackend,
-    ctx: &RequestContext,
+    ctx_slot: &mut Option<RequestContext>,
    cancellation_handler: Arc<CancellationHandler>,
    stream: S,
    conn_gauge: NumClientConnectionsGuard<'static>,
-    cancellations: tokio_util::task::task_tracker::TaskTracker,
-) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
-    debug!(
-        protocol = %ctx.protocol(),
-        "handling interactive connection from client"
-    );
+    tracker: TaskTrackerToken,
+) -> Result<(), ClientRequestError> {
+    let protocol = ctx_slot.as_ref().expect("context must be set").protocol();
+    debug!(%protocol, "handling interactive connection from client");

    let metrics = &Metrics::get().proxy;
-    let proto = ctx.protocol();
-    let request_gauge = metrics.connection_requests.guard(proto);
+    let request_gauge = metrics.connection_requests.guard(protocol);

    let tls = config.tls_config.load();
    let tls = tls.as_deref();

-    let record_handshake_error = !ctx.has_private_peer_addr();
-    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-    let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
+    let data = {
+        let ctx = ctx_slot.as_ref().expect("context must be set");
+        let record_handshake_error = !ctx.has_private_peer_addr();
+        let _pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+        let do_handshake = handshake(ctx, stream, tracker, tls, record_handshake_error);
+        tokio::time::timeout(config.handshake_timeout, do_handshake).await??
+    };

-    let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
-        .await??
-    {
+    let (mut stream, params) = match data {
        HandshakeData::Startup(stream, params) => (stream, params),
-        HandshakeData::Cancel(cancel_key_data) => {
+        HandshakeData::Cancel(cancel_key_data, tracker) => {
            // spawn a task to cancel the session, but don't wait for it
-            cancellations.spawn({
-                let cancellation_handler_clone  = Arc::clone(&cancellation_handler);
-                let ctx = ctx.clone();
+            tokio::spawn({
+                let cancellation_handler_clone = Arc::clone(&cancellation_handler);
+                let ctx = ctx_slot.take().expect("context must be set");
                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
                cancel_span.follows_from(tracing::Span::current());
                async move {
+                    let _tracker = tracker;
                    cancellation_handler_clone
                        .cancel_session(
                            cancel_key_data,
@@ -205,15 +184,17 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
                            backend.get_api(),
                        )
                        .await
-                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
-                }.instrument(cancel_span)
+                        .inspect_err(|e| debug!(error = ?e, "cancel_session failed"))
+                        .ok();
+                }
+                .instrument(cancel_span)
            });

-            return Ok(None);
+            return Ok(());
        }
    };
-    drop(pause);

+    let ctx = ctx_slot.as_ref().expect("context must be set");
    ctx.set_db_options(params.clone());

    let (node_info, user_info, _ip_allowlist) = match backend
@@ -228,13 +209,13 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    let mut node = connect_to_compute(
        ctx,
-        &TcpMechanism {
+        TcpMechanism {
            user_info,
            params_compat: true,
            params: &params,
            locks: &config.connect_compute_locks,
        },
-        &node_info,
+        node_info,
        config.wake_compute_retry_config,
        &config.connect_to_compute,
    )
@@ -252,17 +233,22 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    // PqStream input buffer. Normally there is none, but our serverless npm
    // driver in pipeline mode sends startup, password and first query
    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
+    let (stream, read_buf, tracker) = stream.into_inner();
    node.stream.write_all(&read_buf).await?;

-    Ok(Some(ProxyPassthrough {
-        client: stream,
-        aux: node.aux.clone(),
-        private_link_id: None,
-        compute: node,
-        session_id: ctx.session_id(),
-        cancel: session,
-        _req: request_gauge,
-        _conn: conn_gauge,
-    }))
+    let ctx = ctx_slot.take().expect("context must be set");
+    ctx.set_success();
+
+    tokio::spawn(passthrough(
+        ctx,
+        &config.connect_to_compute,
+        stream,
+        node,
+        session,
+        request_gauge,
+        conn_gauge,
+        tracker,
+    ));
+
+    Ok(())
 }
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -38,7 +38,7 @@ pub struct RequestContext(
    /// I would typically use a RefCell but that would break the `Send` requirements
    /// so we need something with thread-safety. `TryLock` is a cheap alternative
    /// that offers similar semantics to a `RefCell` but with synchronisation.
-    TryLock<RequestContextInner>,
+    TryLock<Box<RequestContextInner>>,
 );

 struct RequestContextInner {
@@ -89,7 +89,7 @@ pub(crate) enum AuthMethod {
 impl Clone for RequestContext {
    fn clone(&self) -> Self {
        let inner = self.0.try_lock().expect("should not deadlock");
-        let new = RequestContextInner {
+        let new = Box::new(RequestContextInner {
            conn_info: inner.conn_info.clone(),
            session_id: inner.session_id,
            protocol: inner.protocol,
@@ -117,7 +117,7 @@ impl Clone for RequestContext {
            disconnect_sender: None,
            latency_timer: LatencyTimer::noop(inner.protocol),
            disconnect_timestamp: inner.disconnect_timestamp,
-        };
+        });

        Self(TryLock::new(new))
    }
@@ -140,7 +140,7 @@ impl RequestContext {
            role = tracing::field::Empty,
        );

-        let inner = RequestContextInner {
+        let inner = Box::new(RequestContextInner {
            conn_info,
            session_id,
            protocol,
@@ -168,7 +168,7 @@ impl RequestContext {
            disconnect_sender: LOG_CHAN_DISCONNECT.get().and_then(|tx| tx.upgrade()),
            latency_timer: LatencyTimer::new(protocol),
            disconnect_timestamp: None,
-        };
+        });

        Self(TryLock::new(inner))
    }
@@ -522,7 +522,7 @@ impl Drop for RequestContextInner {
    }
 }

-pub struct DisconnectLogger(RequestContextInner);
+pub struct DisconnectLogger(Box<RequestContextInner>);

 impl Drop for DisconnectLogger {
    fn drop(&mut self) {
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -53,6 +53,25 @@ pub(crate) trait ConnectMechanism {
    fn update_connect_config(&self, conf: &mut compute::ConnCfg);
 }

+#[async_trait]
+impl<T: ConnectMechanism + Sync> ConnectMechanism for &T {
+    type Connection = T::Connection;
+    type ConnectError = T::ConnectError;
+    type Error = T::Error;
+    async fn connect_once(
+        &self,
+        ctx: &RequestContext,
+        node_info: &control_plane::CachedNodeInfo,
+        config: &ComputeConfig,
+    ) -> Result<Self::Connection, Self::ConnectError> {
+        T::connect_once(self, ctx, node_info, config).await
+    }
+
+    fn update_connect_config(&self, conf: &mut compute::ConnCfg) {
+        T::update_connect_config(self, conf);
+    }
+}
+
 #[async_trait]
 pub(crate) trait ComputeConnectBackend {
    async fn wake_compute(
@@ -105,8 +124,8 @@ impl ConnectMechanism for TcpMechanism<'_> {
 #[tracing::instrument(skip_all)]
 pub(crate) async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
    ctx: &RequestContext,
-    mechanism: &M,
-    user_info: &B,
+    mechanism: M,
+    backend: B,
    wake_compute_retry_config: RetryConfig,
    compute: &ComputeConfig,
 ) -> Result<M::Connection, M::Error>
@@ -116,9 +135,9 @@ where
 {
    let mut num_retries = 0;
    let mut node_info =
-        wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;
+        wake_compute(&mut num_retries, ctx, &backend, wake_compute_retry_config).await?;

-    node_info.set_keys(user_info.get_keys());
+    node_info.set_keys(backend.get_keys());
    mechanism.update_connect_config(&mut node_info.config);

    // try once
@@ -159,7 +178,7 @@ where
        let old_node_info = invalidate_cache(node_info);
        // TODO: increment num_retries?
        let mut node_info =
-            wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;
+            wake_compute(&mut num_retries, ctx, &backend, wake_compute_retry_config).await?;
        node_info.reuse_settings(old_node_info);

        mechanism.update_connect_config(&mut node_info.config);
--- a/proxy/src/proxy/copy_bidirectional.rs
+++ b/proxy/src/proxy/copy_bidirectional.rs
@@ -67,7 +67,6 @@ where
    }
 }

-#[tracing::instrument(skip_all)]
 pub async fn copy_bidirectional_client_compute<Client, Compute>(
    client: &mut Client,
    compute: &mut Compute,
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -5,6 +5,7 @@ use pq_proto::{
 };
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::{debug, info, warn};

 use crate::auth::endpoint_sni;
@@ -51,7 +52,7 @@ impl ReportableError for HandshakeError {

 pub(crate) enum HandshakeData<S> {
    Startup(PqStream<Stream<S>>, StartupMessageParams),
-    Cancel(CancelKeyData),
+    Cancel(CancelKeyData, TaskTrackerToken),
 }

 /// Establish a (most probably, secure) connection with the client.
@@ -62,6 +63,7 @@ pub(crate) enum HandshakeData<S> {
 pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    ctx: &RequestContext,
    stream: S,
+    tracker: TaskTrackerToken,
    mut tls: Option<&TlsConfig>,
    record_handshake_error: bool,
 ) -> Result<HandshakeData<S>, HandshakeError> {
@@ -71,7 +73,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    const PG_PROTOCOL_EARLIEST: ProtocolVersion = ProtocolVersion::new(3, 0);
    const PG_PROTOCOL_LATEST: ProtocolVersion = ProtocolVersion::new(3, 0);

-    let mut stream = PqStream::new(Stream::from_raw(stream));
+    let mut stream = PqStream::new(Stream::from_raw(stream), tracker);
    loop {
        let msg = stream.read_startup_packet().await?;
        match msg {
@@ -157,15 +159,13 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                        let (_, tls_server_end_point) =
                            tls.cert_resolver.resolve(conn_info.server_name());

-                        stream = PqStream {
-                            framed: Framed {
-                                stream: Stream::Tls {
-                                    tls: Box::new(tls_stream),
-                                    tls_server_end_point,
-                                },
-                                read_buf,
-                                write_buf,
+                        stream.framed = Framed {
+                            stream: Stream::Tls {
+                                tls: Box::new(tls_stream),
+                                tls_server_end_point,
                            },
+                            read_buf,
+                            write_buf,
                        };
                    }
                }
@@ -248,7 +248,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
            }
            FeStartupPacket::CancelRequest(cancel_key_data) => {
                info!(session_type = "cancellation", "successful handshake");
-                break Ok(HandshakeData::Cancel(cancel_key_data));
+                break Ok(HandshakeData::Cancel(cancel_key_data, stream.tracker));
            }
        }
    }
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -10,26 +10,27 @@ pub(crate) mod wake_compute;
 use std::sync::Arc;

 pub use copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};
-use futures::{FutureExt, TryFutureExt};
+use futures::TryFutureExt;
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
+use passthrough::passthrough;
 use pq_proto::{BeMessage as Be, CancelKeyData, StartupMessageParams};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use smol_str::{SmolStr, ToSmolStr, format_smolstr};
+use smol_str::{SmolStr, format_smolstr};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::{Instrument, debug, error, info, warn};

 use self::connect_compute::{TcpMechanism, connect_to_compute};
-use self::passthrough::ProxyPassthrough;
 use crate::cancellation::{self, CancellationHandler};
 use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
-use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
+use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
 use crate::proxy::handshake::{HandshakeData, handshake};
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::stream::{PqStream, Stream};
@@ -70,7 +71,6 @@ pub async fn task_main(
    socket2::SockRef::from(&listener).set_keepalive(true)?;

    let connections = tokio_util::task::task_tracker::TaskTracker::new();
-    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();

    while let Some(accept_result) =
        run_until_cancelled(listener.accept(), &cancellation_token).await
@@ -84,12 +84,12 @@ pub async fn task_main(

        let session_id = uuid::Uuid::new_v4();
        let cancellation_handler = Arc::clone(&cancellation_handler);
-        let cancellations = cancellations.clone();

        debug!(protocol = "tcp", %session_id, "accepted new TCP connection");
        let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();

-        connections.spawn(async move {
+        let tracker = connections.token();
+        tokio::spawn(async move {
            let (socket, conn_info) = match read_proxy_protocol(socket).await {
                Err(e) => {
                    warn!("per-client task finished with an error: {e:#}");
@@ -138,60 +138,41 @@ pub async fn task_main(
                crate::metrics::Protocol::Tcp,
                &config.region,
            );
+            let span = ctx.span();
+            let mut ctx = Some(ctx);

            let res = handle_client(
                config,
                auth_backend,
-                &ctx,
+                &mut ctx,
                cancellation_handler,
                socket,
                ClientMode::Tcp,
                endpoint_rate_limiter2,
                conn_gauge,
-                cancellations,
+                tracker,
            )
-            .instrument(ctx.span())
-            .boxed()
+            .instrument(span)
            .await;

-            match res {
-                Err(e) => {
+            match (ctx, res) {
+                (None, _) => {}
+                (Some(ctx), Ok(())) => {
+                    ctx.success();
+                }
+                (Some(ctx), Err(e)) => {
                    ctx.set_error_kind(e.get_error_kind());
                    warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
                }
-                Ok(None) => {
-                    ctx.set_success();
-                }
-                Ok(Some(p)) => {
-                    ctx.set_success();
-                    let _disconnect = ctx.log_connect();
-                    match p.proxy_pass(&config.connect_to_compute).await {
-                        Ok(()) => {}
-                        Err(ErrorSource::Client(e)) => {
-                            warn!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the client: {e:#}"
-                            );
-                        }
-                        Err(ErrorSource::Compute(e)) => {
-                            error!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the compute: {e:#}"
-                            );
-                        }
-                    }
-                }
            }
        });
    }

    connections.close();
-    cancellations.close();
    drop(listener);

    // Drain connections
    connections.wait().await;
-    cancellations.wait().await;

    Ok(())
 }
@@ -258,46 +239,79 @@ impl ReportableError for ClientRequestError {
 }

 #[allow(clippy::too_many_arguments)]
-pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
+pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
    config: &'static ProxyConfig,
    auth_backend: &'static auth::Backend<'static, ()>,
-    ctx: &RequestContext,
+    ctx_slot: &mut Option<RequestContext>,
    cancellation_handler: Arc<CancellationHandler>,
    stream: S,
    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    conn_gauge: NumClientConnectionsGuard<'static>,
-    cancellations: tokio_util::task::task_tracker::TaskTracker,
-) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
-    debug!(
-        protocol = %ctx.protocol(),
-        "handling interactive connection from client"
-    );
+    tracker: TaskTrackerToken,
+) -> Result<(), ClientRequestError> {
+    let cplane = match auth_backend {
+        auth::Backend::ControlPlane(cplane, ()) => &**cplane,
+        auth::Backend::Local(_) => unreachable!("local proxy does not run tcp proxy service"),
+    };
+
+    let protocol = ctx_slot.as_ref().expect("context must be set").protocol();
+    debug!(%protocol, "handling interactive connection from client");

    let metrics = &Metrics::get().proxy;
-    let proto = ctx.protocol();
-    let request_gauge = metrics.connection_requests.guard(proto);
+    let request_gauge = metrics.connection_requests.guard(protocol);

-    let tls = config.tls_config.load();
-    let tls = tls.as_deref();
+    let handshake_result: Result<_, ClientRequestError> = async {
+        let tls = config.tls_config.load();
+        let tls = tls.as_deref();

-    let record_handshake_error = !ctx.has_private_peer_addr();
-    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-    let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error);
+        let ctx = ctx_slot.as_ref().expect("context must be set");
+        let record_handshake_error = !ctx.has_private_peer_addr();
+        let data = {
+            let _pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+            tokio::time::timeout(
+                config.handshake_timeout,
+                handshake(
+                    ctx,
+                    stream,
+                    tracker,
+                    mode.handshake_tls(tls),
+                    record_handshake_error,
+                ),
+            )
+            .await??
+        };

-    let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
-        .await??
-    {
-        HandshakeData::Startup(stream, params) => (stream, params),
-        HandshakeData::Cancel(cancel_key_data) => {
-            // spawn a task to cancel the session, but don't wait for it
-            cancellations.spawn({
-                let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let ctx = ctx.clone();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
+        match data {
+            HandshakeData::Startup(mut stream, params) => {
+                ctx.set_db_options(params.clone());
+
+                let host = mode.hostname(stream.get_ref());
+                let cn = tls.map(|tls| &tls.common_names);
+
+                // Extract credentials which we're going to use for auth.
+                let result = auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, host, cn);
+                let user_info = match result {
+                    Ok(user_info) => user_info,
+                    Err(e) => stream.throw_error(e, Some(ctx)).await?,
+                };
+
+                let session = cancellation_handler.get_key();
+                Ok(Some((stream, params, session, user_info)))
+            }
+            HandshakeData::Cancel(cancel_key_data, tracker) => {
+                let ctx = ctx_slot.take().expect("context must be set");
+                ctx.set_success();
+
+                let cancel_span = tracing::info_span!(parent: None, "cancel_session", session_id = ?ctx.session_id());
                cancel_span.follows_from(tracing::Span::current());
-                async move {
-                    cancellation_handler_clone
+
+                // spawn a task to cancel the session, but don't wait for it
+                tokio::spawn(async move {
+                    // ensure the proxy doesn't shutdown until we complete this task.
+                    let _tracker = tracker;
+
+                    cancellation_handler
                        .cancel_session(
                            cancel_key_data,
                            ctx,
@@ -305,111 +319,108 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
                            config.authentication_config.is_vpc_acccess_proxy,
                            auth_backend.get_api(),
                        )
+                        .instrument(cancel_span)
                        .await
-                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
-                }.instrument(cancel_span)
-            });
+                        .unwrap_or_else(|e| debug!(error = ?e, "cancel_session failed"));
+                });

-            return Ok(None);
+                Ok(None)
+            }
        }
+    }
+    .await;
+
+    let Some((mut stream, params, session, user_info)) = handshake_result? else {
+        return Ok(());
    };
-    drop(pause);
+    let ctx = ctx_slot.as_ref().expect("context must be set");

-    ctx.set_db_options(params.clone());
+    let auth_result: Result<_, ClientRequestError> = async {
+        let user = user_info.user.clone();

-    let hostname = mode.hostname(stream.get_ref());
+        match cplane
+            .authenticate(
+                ctx,
+                &mut stream,
+                user_info,
+                mode.allow_cleartext(),
+                &config.authentication_config,
+                endpoint_rate_limiter,
+            )
+            .await
+        {
+            Ok(auth_result) => Ok(auth_result),
+            Err(e) => {
+                let db = params.get("database");
+                let app = params.get("application_name");
+                let params_span = tracing::info_span!("", ?user, ?db, ?app);
+                stream
+                    .throw_error(e, Some(ctx))
+                    .instrument(params_span)
+                    .await?
+            }
+        }
+    }
+    .await;

-    let common_names = tls.map(|tls| &tls.common_names);
+    let compute_creds = auth_result?;

-    // Extract credentials which we're going to use for auth.
-    let result = auth_backend
-        .as_ref()
-        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names))
-        .transpose();
+    let connect_result: Result<_, ClientRequestError> = async {
+        let compute_user_info = compute_creds.info.clone();
+        let params_compat = compute_user_info
+            .options
+            .get(NeonOptions::PARAMS_COMPAT)
+            .is_some();

-    let user_info = match result {
-        Ok(user_info) => user_info,
-        Err(e) => stream.throw_error(e, Some(ctx)).await?,
-    };
-
-    let user = user_info.get_user().to_owned();
-    let (user_info, _ip_allowlist) = match user_info
-        .authenticate(
+        let mut node = connect_to_compute(
            ctx,
-            &mut stream,
-            mode.allow_cleartext(),
-            &config.authentication_config,
-            endpoint_rate_limiter,
+            TcpMechanism {
+                user_info: compute_user_info,
+                params_compat,
+                params: &params,
+                locks: &config.connect_compute_locks,
+            },
+            auth::ControlPlaneWakeCompute {
+                cplane,
+                creds: compute_creds,
+            },
+            config.wake_compute_retry_config,
+            &config.connect_to_compute,
        )
-        .await
-    {
-        Ok(auth_result) => auth_result,
-        Err(e) => {
-            let db = params.get("database");
-            let app = params.get("application_name");
-            let params_span = tracing::info_span!("", ?user, ?db, ?app);
+        .or_else(|e| stream.throw_error(e, Some(ctx)))
+        .await?;

-            return stream
-                .throw_error(e, Some(ctx))
-                .instrument(params_span)
-                .await?;
-        }
-    };
+        session.write_cancel_key(node.cancel_closure.clone())?;
+        prepare_client_connection(&node, *session.key(), &mut stream).await?;

-    let compute_user_info = match &user_info {
-        auth::Backend::ControlPlane(_, info) => &info.info,
-        auth::Backend::Local(_) => unreachable!("local proxy does not run tcp proxy service"),
-    };
-    let params_compat = compute_user_info
-        .options
-        .get(NeonOptions::PARAMS_COMPAT)
-        .is_some();
+        // Before proxy passing, forward to compute whatever data is left in the
+        // PqStream input buffer. Normally there is none, but our serverless npm
+        // driver in pipeline mode sends startup, password and first query
+        // immediately after opening the connection.
+        let (stream, read_buf, tracker) = stream.into_inner();
+        node.stream.write_all(&read_buf).await?;

-    let mut node = connect_to_compute(
+        Ok((node, stream, tracker))
+    }
+    .await;
+
+    let (node, stream, tracker) = connect_result?;
+
+    let ctx = ctx_slot.take().expect("context must be set");
+    ctx.set_success();
+
+    tokio::spawn(passthrough(
        ctx,
-        &TcpMechanism {
-            user_info: compute_user_info.clone(),
-            params_compat,
-            params: &params,
-            locks: &config.connect_compute_locks,
-        },
-        &user_info,
-        config.wake_compute_retry_config,
        &config.connect_to_compute,
-    )
-    .or_else(|e| stream.throw_error(e, Some(ctx)))
-    .await?;
+        stream,
+        node,
+        session,
+        request_gauge,
+        conn_gauge,
+        tracker,
+    ));

-    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-    let session = cancellation_handler_clone.get_key();
-
-    session.write_cancel_key(node.cancel_closure.clone())?;
-
-    prepare_client_connection(&node, *session.key(), &mut stream).await?;
-
-    // Before proxy passing, forward to compute whatever data is left in the
-    // PqStream input buffer. Normally there is none, but our serverless npm
-    // driver in pipeline mode sends startup, password and first query
-    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
-    node.stream.write_all(&read_buf).await?;
-
-    let private_link_id = match ctx.extra() {
-        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
-        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
-        None => None,
-    };
-
-    Ok(Some(ProxyPassthrough {
-        client: stream,
-        aux: node.aux.clone(),
-        private_link_id,
-        compute: node,
-        session_id: ctx.session_id(),
-        cancel: session,
-        _req: request_gauge,
-        _conn: conn_gauge,
-    }))
+    Ok(())
 }

 /// Finish client connection initialization: confirm auth success, send params, etc.
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -1,5 +1,6 @@
-use smol_str::SmolStr;
+use smol_str::{SmolStr, ToSmolStr};
 use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::debug;
 use utils::measured_stream::MeasuredStream;

@@ -7,13 +8,14 @@ use super::copy_bidirectional::ErrorSource;
 use crate::cancellation;
 use crate::compute::PostgresConnection;
 use crate::config::ComputeConfig;
+use crate::context::RequestContext;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
+use crate::protocol2::ConnectionInfoExtra;
 use crate::stream::Stream;
 use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};

 /// Forward bytes in both directions (client <-> compute).
-#[tracing::instrument(skip_all)]
 pub(crate) async fn proxy_pass(
    client: impl AsyncRead + AsyncWrite + Unpin,
    compute: impl AsyncRead + AsyncWrite + Unpin,
@@ -61,41 +63,53 @@ pub(crate) async fn proxy_pass(
    Ok(())
 }

-pub(crate) struct ProxyPassthrough<S> {
-    pub(crate) client: Stream<S>,
-    pub(crate) compute: PostgresConnection,
-    pub(crate) aux: MetricsAuxInfo,
-    pub(crate) session_id: uuid::Uuid,
-    pub(crate) private_link_id: Option<SmolStr>,
-    pub(crate) cancel: cancellation::Session,
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn passthrough<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
+    ctx: RequestContext,
+    compute_config: &'static ComputeConfig,

-    pub(crate) _req: NumConnectionRequestsGuard<'static>,
-    pub(crate) _conn: NumClientConnectionsGuard<'static>,
-}
+    client: Stream<S>,
+    compute: PostgresConnection,
+    cancel: cancellation::Session,

-impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
-    pub(crate) async fn proxy_pass(
-        self,
-        compute_config: &ComputeConfig,
-    ) -> Result<(), ErrorSource> {
-        let res = proxy_pass(
-            self.client,
-            self.compute.stream,
-            self.aux,
-            self.private_link_id,
-        )
-        .await;
-        if let Err(err) = self
-            .compute
-            .cancel_closure
-            .try_cancel_query(compute_config)
-            .await
-        {
-            tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
+    _req: NumConnectionRequestsGuard<'static>,
+    _conn: NumClientConnectionsGuard<'static>,
+    _tracker: TaskTrackerToken,
+) {
+    let session_id = ctx.session_id();
+    let private_link_id = match ctx.extra() {
+        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
+        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
+        None => None,
+    };
+
+    let _disconnect = ctx.log_connect();
+    let res = proxy_pass(client, compute.stream, compute.aux, private_link_id).await;
+
+    match res {
+        Ok(()) => {}
+        Err(ErrorSource::Client(e)) => {
+            tracing::warn!(
+                session_id = ?session_id,
+                "per-client task finished with an IO error from the client: {e:#}"
+            );
+        }
+        Err(ErrorSource::Compute(e)) => {
+            tracing::error!(
+                session_id = ?session_id,
+                "per-client task finished with an IO error from the compute: {e:#}"
+            );
        }
-
-        drop(self.cancel.remove_cancel_key()); // we don't need a result. If the queue is full, we just log the error
-
-        res
    }
+
+    if let Err(err) = compute
+        .cancel_closure
+        .try_cancel_query(compute_config)
+        .await
+    {
+        tracing::warn!(session_id = ?session_id, ?err, "could not cancel the query in the database");
+    }
+
+    // we don't need a result. If the queue is full, we just log the error
+    drop(cancel.remove_cancel_key());
 }
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -38,6 +38,7 @@ async fn proxy_mitm(
        let (end_client, startup) = match handshake(
            &RequestContext::test(),
            client1,
+            TaskTracker::new().token(),
            Some(&server_config1),
            false,
        )
@@ -45,7 +46,7 @@ async fn proxy_mitm(
        .unwrap()
        {
            HandshakeData::Startup(stream, params) => (stream, params),
-            HandshakeData::Cancel(_) => panic!("cancellation not supported"),
+            HandshakeData::Cancel(_, _) => panic!("cancellation not supported"),
        };

        let mut end_server = tokio_util::codec::Framed::new(end_server, PgFrame);
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -15,6 +15,7 @@ use rstest::rstest;
 use rustls::crypto::ring;
 use rustls::pki_types;
 use tokio::io::DuplexStream;
+use tokio_util::task::TaskTracker;
 use tracing_test::traced_test;

 use super::connect_compute::ConnectMechanism;
@@ -178,10 +179,12 @@ async fn dummy_proxy(
    auth: impl TestAuth + Send,
 ) -> anyhow::Result<()> {
    let (client, _) = read_proxy_protocol(client).await?;
-    let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? {
-        HandshakeData::Startup(stream, _) => stream,
-        HandshakeData::Cancel(_) => bail!("cancellation not supported"),
-    };
+    let t = TaskTracker::new().token();
+    let mut stream =
+        match handshake(&RequestContext::test(), client, t, tls.as_ref(), false).await? {
+            HandshakeData::Startup(stream, _) => stream,
+            HandshakeData::Cancel(_, _) => bail!("cancellation not supported"),
+        };

    auth.authenticate(&mut stream).await?;

@@ -622,7 +625,7 @@ async fn connect_to_compute_success() {
    let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap();
    mechanism.verify();
@@ -636,7 +639,7 @@ async fn connect_to_compute_retry() {
    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap();
    mechanism.verify();
@@ -651,7 +654,7 @@ async fn connect_to_compute_non_retry_1() {
    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap_err();
    mechanism.verify();
@@ -666,7 +669,7 @@ async fn connect_to_compute_non_retry_2() {
    let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap();
    mechanism.verify();
@@ -691,7 +694,7 @@ async fn connect_to_compute_non_retry_3() {
    connect_to_compute(
        &ctx,
        &mechanism,
-        &user_info,
+        user_info,
        wake_compute_retry_config,
        &config,
    )
@@ -709,7 +712,7 @@ async fn wake_retry() {
    let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap();
    mechanism.verify();
@@ -724,7 +727,7 @@ async fn wake_non_retry() {
    let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);
    let user_info = helper_create_connect_info(&mechanism);
    let config = config();
-    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
+    connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
        .await
        .unwrap_err();
    mechanism.verify();
@@ -743,7 +746,7 @@ async fn fail_but_wake_invalidates_cache() {
    let user = helper_create_connect_info(&mech);
    let cfg = config();

-    connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
+    connect_to_compute(&ctx, &mech, user, cfg.retry, &cfg)
        .await
        .unwrap();

@@ -764,7 +767,7 @@ async fn fail_no_wake_skips_cache_invalidation() {
    let user = helper_create_connect_info(&mech);
    let cfg = config();

-    connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
+    connect_to_compute(&ctx, &mech, user, cfg.retry, &cfg)
        .await
        .unwrap();

@@ -785,7 +788,7 @@ async fn retry_but_wake_invalidates_cache() {
    let user_info = helper_create_connect_info(&mechanism);
    let cfg = config();

-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
+    connect_to_compute(&ctx, &mechanism, user_info, cfg.retry, &cfg)
        .await
        .unwrap();
    mechanism.verify();
@@ -808,7 +811,7 @@ async fn retry_no_wake_skips_invalidation() {
    let user_info = helper_create_connect_info(&mechanism);
    let cfg = config();

-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
+    connect_to_compute(&ctx, &mechanism, user_info, cfg.retry, &cfg)
        .await
        .unwrap_err();
    mechanism.verify();
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -224,13 +224,13 @@ impl PoolingBackend {
        let backend = self.auth_backend.as_ref().map(|()| keys);
        crate::proxy::connect_compute::connect_to_compute(
            ctx,
-            &TokioMechanism {
+            TokioMechanism {
                conn_id,
                conn_info,
                pool: self.pool.clone(),
                locks: &self.config.connect_compute_locks,
            },
-            &backend,
+            backend,
            self.config.wake_compute_retry_config,
            &self.config.connect_to_compute,
        )
@@ -268,13 +268,13 @@ impl PoolingBackend {
        });
        crate::proxy::connect_compute::connect_to_compute(
            ctx,
-            &HyperMechanism {
+            HyperMechanism {
                conn_id,
                conn_info,
                pool: self.http_conn_pool.clone(),
                locks: &self.config.connect_compute_locks,
            },
-            &backend,
+            backend,
            self.config.wake_compute_retry_config,
            &self.config.connect_to_compute,
        )
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -41,7 +41,7 @@ use tokio::net::{TcpListener, TcpStream};
 use tokio::time::timeout;
 use tokio_rustls::TlsAcceptor;
 use tokio_util::sync::CancellationToken;
-use tokio_util::task::TaskTracker;
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::{Instrument, info, warn};

 use crate::cancellation::CancellationHandler;
@@ -124,7 +124,6 @@ pub async fn task_main(
    let connections = tokio_util::task::task_tracker::TaskTracker::new();
    connections.close(); // allows `connections.wait to complete`

-    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
    while let Some(res) = run_until_cancelled(ws_listener.accept(), &cancellation_token).await {
        let (conn, peer_addr) = res.context("could not accept TCP stream")?;
        if let Err(e) = conn.set_nodelay(true) {
@@ -150,11 +149,11 @@ pub async fn task_main(
        let conn_token = cancellation_token.child_token();
        let tls_acceptor = tls_acceptor.clone();
        let backend = backend.clone();
-        let connections2 = connections.clone();
        let cancellation_handler = cancellation_handler.clone();
        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
-        let cancellations = cancellations.clone();
-        connections.spawn(
+
+        let tracker = connections.token();
+        tokio::spawn(
            async move {
                let conn_token2 = conn_token.clone();
                let _cancel_guard = config.http_config.cancel_set.insert(conn_id, conn_token2);
@@ -181,8 +180,7 @@ pub async fn task_main(
                Box::pin(connection_handler(
                    config,
                    backend,
-                    connections2,
-                    cancellations,
+                    tracker,
                    cancellation_handler,
                    endpoint_rate_limiter,
                    conn_token,
@@ -305,8 +303,7 @@ async fn connection_startup(
 async fn connection_handler(
    config: &'static ProxyConfig,
    backend: Arc<PoolingBackend>,
-    connections: TaskTracker,
-    cancellations: TaskTracker,
+    tracker: TaskTrackerToken,
    cancellation_handler: Arc<CancellationHandler>,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    cancellation_token: CancellationToken,
@@ -347,19 +344,17 @@ async fn connection_handler(

            // `request_handler` is not cancel safe. It expects to be cancelled only at specific times.
            // By spawning the future, we ensure it never gets cancelled until it decides to.
-            let cancellations = cancellations.clone();
-            let handler = connections.spawn(
+            let handler = tokio::spawn(
                request_handler(
                    req,
                    config,
                    backend.clone(),
-                    connections.clone(),
+                    tracker.clone(),
                    cancellation_handler.clone(),
                    session_id,
                    conn_info2.clone(),
                    http_request_token,
                    endpoint_rate_limiter.clone(),
-                    cancellations,
                )
                .in_current_span()
                .map_ok_or_else(api_error_into_response, |r| r),
@@ -400,14 +395,13 @@ async fn request_handler(
    mut request: hyper::Request<Incoming>,
    config: &'static ProxyConfig,
    backend: Arc<PoolingBackend>,
-    ws_connections: TaskTracker,
+    tracker: TaskTrackerToken,
    cancellation_handler: Arc<CancellationHandler>,
    session_id: uuid::Uuid,
    conn_info: ConnectionInfo,
    // used to cancel in-flight HTTP requests. not used to cancel websockets
    http_cancellation_token: CancellationToken,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    cancellations: TaskTracker,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
    let host = request
        .headers()
@@ -441,10 +435,17 @@ async fn request_handler(
        let (response, websocket) = framed_websockets::upgrade::upgrade(&mut request)
            .map_err(|e| ApiError::BadRequest(e.into()))?;

-        let cancellations = cancellations.clone();
-        ws_connections.spawn(
+        tokio::spawn(
            async move {
-                if let Err(e) = websocket::serve_websocket(
+                let websocket = match websocket.await {
+                    Err(e) => {
+                        warn!("could not upgrade websocket connection: {e:#}");
+                        return;
+                    }
+                    Ok(websocket) => websocket,
+                };
+
+                websocket::serve_websocket(
                    config,
                    backend.auth_backend,
                    ctx,
@@ -452,12 +453,9 @@ async fn request_handler(
                    cancellation_handler,
                    endpoint_rate_limiter,
                    host,
-                    cancellations,
+                    tracker,
                )
-                .await
-                {
-                    warn!("error in websocket connection: {e:#}");
-                }
+                .await;
            }
            .instrument(span),
        );
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -2,14 +2,14 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll, ready};

-use anyhow::Context as _;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use framed_websockets::{Frame, OpCode, WebSocketServer};
 use futures::{Sink, Stream};
-use hyper::upgrade::OnUpgrade;
+use hyper::upgrade::Upgraded;
 use hyper_util::rt::TokioIo;
 use pin_project_lite::pin_project;
 use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::warn;

 use crate::cancellation::CancellationHandler;
@@ -17,7 +17,7 @@ use crate::config::ProxyConfig;
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
-use crate::proxy::{ClientMode, ErrorSource, handle_client};
+use crate::proxy::{ClientMode, handle_client};
 use crate::rate_limiter::EndpointRateLimiter;

 pin_project! {
@@ -128,13 +128,12 @@ pub(crate) async fn serve_websocket(
    config: &'static ProxyConfig,
    auth_backend: &'static crate::auth::Backend<'static, ()>,
    ctx: RequestContext,
-    websocket: OnUpgrade,
+    websocket: Upgraded,
    cancellation_handler: Arc<CancellationHandler>,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    hostname: Option<String>,
-    cancellations: tokio_util::task::task_tracker::TaskTracker,
-) -> anyhow::Result<()> {
-    let websocket = websocket.await?;
+    tracker: TaskTrackerToken,
+) {
    let websocket = WebSocketServer::after_handshake(TokioIo::new(websocket));

    let conn_gauge = Metrics::get()
@@ -142,36 +141,28 @@ pub(crate) async fn serve_websocket(
        .client_connections
        .guard(crate::metrics::Protocol::Ws);

-    let res = Box::pin(handle_client(
+    let mut ctx_slot = Some(ctx);
+    let res = handle_client(
        config,
        auth_backend,
-        &ctx,
+        &mut ctx_slot,
        cancellation_handler,
        WebSocketRw::new(websocket),
        ClientMode::Websockets { hostname },
        endpoint_rate_limiter,
        conn_gauge,
-        cancellations,
-    ))
+        tracker,
+    )
    .await;

-    match res {
-        Err(e) => {
+    match (ctx_slot, res) {
+        (None, _) => {}
+        (Some(ctx), Err(e)) => {
            ctx.set_error_kind(e.get_error_kind());
-            Err(e.into())
+            tracing::warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
        }
-        Ok(None) => {
+        (Some(ctx), Ok(())) => {
            ctx.set_success();
-            Ok(())
-        }
-        Ok(Some(p)) => {
-            ctx.set_success();
-            ctx.log_connect();
-            match p.proxy_pass(&config.connect_to_compute).await {
-                Ok(()) => Ok(()),
-                Err(ErrorSource::Client(err)) => Err(err).context("client"),
-                Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
-            }
        }
    }
 }
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 use tokio_rustls::server::TlsStream;
+use tokio_util::task::task_tracker::TaskTrackerToken;
 use tracing::debug;

 use crate::control_plane::messages::ColdStartInfo;
@@ -24,19 +25,22 @@ use crate::tls::TlsServerEndPoint;
 /// to pass random malformed bytes through the connection).
 pub struct PqStream<S> {
    pub(crate) framed: Framed<S>,
+    pub(crate) tracker: TaskTrackerToken,
 }

 impl<S> PqStream<S> {
    /// Construct a new libpq protocol wrapper.
-    pub fn new(stream: S) -> Self {
+    pub fn new(stream: S, tracker: TaskTrackerToken) -> Self {
        Self {
            framed: Framed::new(stream),
+            tracker,
        }
    }

    /// Extract the underlying stream and read buffer.
-    pub fn into_inner(self) -> (S, BytesMut) {
-        self.framed.into_inner()
+    pub fn into_inner(self) -> (S, BytesMut, TaskTrackerToken) {
+        let (stream, read) = self.framed.into_inner();
+        (stream, read, self.tracker)
    }

    /// Get a shared reference to the underlying stream.
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -44,6 +44,7 @@ struct GlobalTimelinesState {
    // on-demand timeline creation from recreating deleted timelines.  This is only soft-enforced, as
    // this map is dropped on restart.
    tombstones: HashMap<TenantTimelineId, Instant>,
+    tenant_tombstones: HashMap<TenantId, Instant>,

    conf: Arc<SafeKeeperConf>,
    broker_active_set: Arc<TimelinesSet>,
@@ -81,10 +82,25 @@ impl GlobalTimelinesState {
        }
    }

+    fn has_tombstone(&self, ttid: &TenantTimelineId) -> bool {
+        self.tombstones.contains_key(ttid) || self.tenant_tombstones.contains_key(&ttid.tenant_id)
+    }
+
+    /// Removes all blocking tombstones for the given timeline ID.
+    /// Returns `true` if there have been actual changes.
+    fn remove_tombstone(&mut self, ttid: &TenantTimelineId) -> bool {
+        self.tombstones.remove(ttid).is_some()
+            || self.tenant_tombstones.remove(&ttid.tenant_id).is_some()
+    }
+
    fn delete(&mut self, ttid: TenantTimelineId) {
        self.timelines.remove(&ttid);
        self.tombstones.insert(ttid, Instant::now());
    }
+
+    fn add_tenant_tombstone(&mut self, tenant_id: TenantId) {
+        self.tenant_tombstones.insert(tenant_id, Instant::now());
+    }
 }

 /// A struct used to manage access to the global timelines map.
@@ -99,6 +115,7 @@ impl GlobalTimelines {
            state: Mutex::new(GlobalTimelinesState {
                timelines: HashMap::new(),
                tombstones: HashMap::new(),
+                tenant_tombstones: HashMap::new(),
                conf,
                broker_active_set: Arc::new(TimelinesSet::default()),
                global_rate_limiter: RateLimiter::new(1, 1),
@@ -245,7 +262,7 @@ impl GlobalTimelines {
                return Ok(timeline);
            }

-            if state.tombstones.contains_key(&ttid) {
+            if state.has_tombstone(&ttid) {
                anyhow::bail!("Timeline {ttid} is deleted, refusing to recreate");
            }

@@ -295,13 +312,14 @@ impl GlobalTimelines {
                _ => {}
            }
            if check_tombstone {
-                if state.tombstones.contains_key(&ttid) {
+                if state.has_tombstone(&ttid) {
                    anyhow::bail!("timeline {ttid} is deleted, refusing to recreate");
                }
            } else {
                // We may be have been asked to load a timeline that was previously deleted (e.g. from `pull_timeline.rs`).  We trust
                // that the human doing this manual intervention knows what they are doing, and remove its tombstone.
-                if state.tombstones.remove(&ttid).is_some() {
+                // It's also possible that we enter this when the tenant has been deleted, even if the timeline itself has never existed.
+                if state.remove_tombstone(&ttid) {
                    warn!("un-deleted timeline {ttid}");
                }
            }
@@ -482,6 +500,7 @@ impl GlobalTimelines {
        let tli_res = {
            let state = self.state.lock().unwrap();

+            // Do NOT check tenant tombstones here: those were set earlier
            if state.tombstones.contains_key(ttid) {
                // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do.
                info!("Timeline {ttid} was already deleted");
@@ -557,6 +576,10 @@ impl GlobalTimelines {
        action: DeleteOrExclude,
    ) -> Result<HashMap<TenantTimelineId, TimelineDeleteResult>> {
        info!("deleting all timelines for tenant {}", tenant_id);
+
+        // Adding a tombstone before getting the timelines to prevent new timeline additions
+        self.state.lock().unwrap().add_tenant_tombstone(*tenant_id);
+
        let to_delete = self.get_all_for_tenant(*tenant_id);

        let mut err = None;
@@ -600,6 +623,9 @@ impl GlobalTimelines {
        state
            .tombstones
            .retain(|_, v| now.duration_since(*v) < *tombstone_ttl);
+        state
+            .tenant_tombstones
+            .retain(|_, v| now.duration_since(*v) < *tombstone_ttl);
    }
 }

--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -482,6 +482,10 @@ async fn handle_tenant_timeline_delete(
        ForwardOutcome::NotForwarded(_req) => {}
    };

+    service
+        .maybe_delete_timeline_import(tenant_id, timeline_id)
+        .await?;
+
    // For timeline deletions, which both implement an "initially return 202, then 404 once
    // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
    async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -99,8 +99,8 @@ use crate::tenant_shard::{
    ScheduleOptimization, ScheduleOptimizationAction, TenantShard,
 };
 use crate::timeline_import::{
-    ImportResult, ShardImportStatuses, TimelineImport, TimelineImportFinalizeError,
-    TimelineImportState, UpcallClient,
+    FinalizingImport, ImportResult, ShardImportStatuses, TimelineImport,
+    TimelineImportFinalizeError, TimelineImportState, UpcallClient,
 };

 const WAITER_FILL_DRAIN_POLL_TIMEOUT: Duration = Duration::from_millis(500);
@@ -232,6 +232,9 @@ struct ServiceState {

    /// Queue of tenants who are waiting for concurrency limits to permit them to reconcile
    delayed_reconcile_rx: tokio::sync::mpsc::Receiver<TenantShardId>,
+
+    /// Tracks ongoing timeline import finalization tasks
+    imports_finalizing: BTreeMap<(TenantId, TimelineId), FinalizingImport>,
 }

 /// Transform an error from a pageserver into an error to return to callers of a storage
@@ -308,6 +311,7 @@ impl ServiceState {
            scheduler,
            ongoing_operation: None,
            delayed_reconcile_rx,
+            imports_finalizing: Default::default(),
        }
    }

@@ -4097,13 +4101,58 @@ impl Service {
    ///
    /// If this method gets pre-empted by shut down, it will be called again at start-up (on-going
    /// imports are stored in the database).
+    ///
+    /// # Cancel-Safety
+    /// Not cancel safe.
+    /// If the caller stops polling, the import will not be removed from
+    /// [`ServiceState::imports_finalizing`].
    #[instrument(skip_all, fields(
        tenant_id=%import.tenant_id,
        timeline_id=%import.timeline_id,
    ))]
+
    async fn finalize_timeline_import(
        self: &Arc<Self>,
        import: TimelineImport,
+    ) -> Result<(), TimelineImportFinalizeError> {
+        let tenant_timeline = (import.tenant_id, import.timeline_id);
+
+        let (_finalize_import_guard, cancel) = {
+            let mut locked = self.inner.write().unwrap();
+            let gate = Gate::default();
+            let cancel = CancellationToken::default();
+
+            let guard = gate.enter().unwrap();
+
+            locked.imports_finalizing.insert(
+                tenant_timeline,
+                FinalizingImport {
+                    gate,
+                    cancel: cancel.clone(),
+                },
+            );
+
+            (guard, cancel)
+        };
+
+        let res = tokio::select! {
+            res = self.finalize_timeline_import_impl(import) => {
+                res
+            },
+            _ = cancel.cancelled() => {
+                Err(TimelineImportFinalizeError::Cancelled)
+            }
+        };
+
+        let mut locked = self.inner.write().unwrap();
+        locked.imports_finalizing.remove(&tenant_timeline);
+
+        res
+    }
+
+    async fn finalize_timeline_import_impl(
+        self: &Arc<Self>,
+        import: TimelineImport,
    ) -> Result<(), TimelineImportFinalizeError> {
        tracing::info!("Finalizing timeline import");

@@ -4303,6 +4352,46 @@ impl Service {
        .await;
    }

+    /// Delete a timeline import if it exists
+    ///
+    /// Firstly, delete the entry from the database. Any updates
+    /// from pageservers after the update will fail with a 404, so the
+    /// import cannot progress into finalizing state if it's not there already.
+    /// Secondly, cancel the finalization if one is in progress.
+    pub(crate) async fn maybe_delete_timeline_import(
+        self: &Arc<Self>,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<(), DatabaseError> {
+        let tenant_has_ongoing_import = {
+            let locked = self.inner.read().unwrap();
+            locked
+                .tenants
+                .range(TenantShardId::tenant_range(tenant_id))
+                .any(|(_tid, shard)| shard.importing == TimelineImportState::Importing)
+        };
+
+        if !tenant_has_ongoing_import {
+            return Ok(());
+        }
+
+        self.persistence
+            .delete_timeline_import(tenant_id, timeline_id)
+            .await?;
+
+        let maybe_finalizing = {
+            let mut locked = self.inner.write().unwrap();
+            locked.imports_finalizing.remove(&(tenant_id, timeline_id))
+        };
+
+        if let Some(finalizing) = maybe_finalizing {
+            finalizing.cancel.cancel();
+            finalizing.gate.close().await;
+        }
+
+        Ok(())
+    }
+
    pub(crate) async fn tenant_timeline_archival_config(
        &self,
        tenant_id: TenantId,
@@ -8538,8 +8627,9 @@ impl Service {
        Some(ShardCount(new_shard_count))
    }

-    /// Fetches the top tenant shards from every node, in descending order of
-    /// max logical size. Any node errors will be logged and ignored.
+    /// Fetches the top tenant shards from every available node, in descending order of
+    /// max logical size. Offline nodes are skipped, and any errors from available nodes
+    /// will be logged and ignored.
    async fn get_top_tenant_shards(
        &self,
        request: &TopTenantShardsRequest,
@@ -8550,6 +8640,7 @@ impl Service {
            .unwrap()
            .nodes
            .values()
+            .filter(|node| node.is_available())
            .cloned()
            .collect_vec();

--- a/storage_controller/src/timeline_import.rs
+++ b/storage_controller/src/timeline_import.rs
@@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};

 use pageserver_api::models::{ShardImportProgress, ShardImportStatus};
 use tokio_util::sync::CancellationToken;
+use utils::sync::gate::Gate;
 use utils::{
    id::{TenantId, TimelineId},
    shard::ShardIndex,
@@ -55,6 +56,8 @@ pub(crate) enum TimelineImportUpdateFollowUp {
 pub(crate) enum TimelineImportFinalizeError {
    #[error("Shut down interrupted import finalize")]
    ShuttingDown,
+    #[error("Import finalization was cancelled")]
+    Cancelled,
    #[error("Mismatched shard detected during import finalize: {0}")]
    MismatchedShards(ShardIndex),
 }
@@ -164,6 +167,11 @@ impl TimelineImport {
    }
 }

+pub(crate) struct FinalizingImport {
+    pub(crate) gate: Gate,
+    pub(crate) cancel: CancellationToken,
+}
+
 pub(crate) type ImportResult = Result<(), String>;

 pub(crate) struct UpcallClient {
--- a/test_runner/fixtures/fast_import.py
+++ b/test_runner/fixtures/fast_import.py
@@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 import subprocess
@@ -11,6 +12,7 @@ from _pytest.config import Config

 from fixtures.log_helper import log
 from fixtures.neon_cli import AbstractNeonCli
+from fixtures.neon_fixtures import Endpoint, VanillaPostgres
 from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import MockS3Server

@@ -161,3 +163,57 @@ def fast_import(
            f.write(fi.cmd.stderr)

        log.info("Written logs to %s", test_output_dir)
+
+
+def mock_import_bucket(vanilla_pg: VanillaPostgres, path: Path):
+    """
+    Mock the import S3 bucket into a local directory for a provided vanilla PG instance.
+    """
+    assert not vanilla_pg.is_running()
+
+    path.mkdir()
+    # what cplane writes before scheduling fast_import
+    specpath = path / "spec.json"
+    specpath.write_text(json.dumps({"branch_id": "somebranch", "project_id": "someproject"}))
+    # what fast_import writes
+    vanilla_pg.pgdatadir.rename(path / "pgdata")
+    statusdir = path / "status"
+    statusdir.mkdir()
+    (statusdir / "pgdata").write_text(json.dumps({"done": True}))
+    (statusdir / "fast_import").write_text(json.dumps({"command": "pgdata", "done": True}))
+
+
+def populate_vanilla_pg(vanilla_pg: VanillaPostgres, target_relblock_size: int) -> int:
+    assert vanilla_pg.is_running()
+
+    vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
+    # fillfactor so we don't need to produce that much data
+    # 900 byte per row is > 10% => 1 row per page
+    vanilla_pg.safe_psql("""create table t (data char(900)) with (fillfactor = 10)""")
+
+    nrows = 0
+    while True:
+        relblock_size = vanilla_pg.safe_psql_scalar("select pg_relation_size('t')")
+        log.info(
+            f"relblock size: {relblock_size / 8192} pages (target: {target_relblock_size // 8192}) pages"
+        )
+        if relblock_size >= target_relblock_size:
+            break
+        addrows = int((target_relblock_size - relblock_size) // 8192)
+        assert addrows >= 1, "forward progress"
+        vanilla_pg.safe_psql(
+            f"insert into t select generate_series({nrows + 1}, {nrows + addrows})"
+        )
+        nrows += addrows
+
+    return nrows
+
+
+def validate_import_from_vanilla_pg(endpoint: Endpoint, nrows: int):
+    assert endpoint.safe_psql_many(
+        [
+            "set effective_io_concurrency=32;",
+            "SET statement_timeout='300s';",
+            "select count(*), sum(data::bigint)::bigint from t",
+        ]
+    ) == [[], [], [(nrows, nrows * (nrows + 1) // 2)]]
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2337,6 +2337,22 @@ class NeonStorageController(MetricsGetter, LogUtils):
            headers=self.headers(TokenScope.ADMIN),
        )

+    def import_status(
+        self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, generation: int
+    ):
+        payload = {
+            "tenant_shard_id": str(tenant_shard_id),
+            "timeline_id": str(timeline_id),
+            "generation": generation,
+        }
+
+        self.request(
+            "GET",
+            f"{self.api}/upcall/v1/timeline_import_status",
+            headers=self.headers(TokenScope.GENERATIONS_API),
+            json=payload,
+        )
+
    def reconcile_all(self):
        r = self.request(
            "POST",
@@ -2813,6 +2829,11 @@ class NeonPageserver(PgProtocol, LogUtils):
        if self.running:
            self.http_client().configure_failpoints([(name, action)])

+    def clear_persistent_failpoint(self, name: str):
+        del self._persistent_failpoints[name]
+        if self.running:
+            self.http_client().configure_failpoints([(name, "off")])
+
    def timeline_dir(
        self,
        tenant_shard_id: TenantId | TenantShardId,
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -675,7 +675,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_delete(
        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, **kwargs
-    ):
+    ) -> int:
        """
        Note that deletion is not instant, it is scheduled and performed mostly in the background.
        So if you need to wait for it to complete use `timeline_delete_wait_completed`.
@@ -688,6 +688,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        res_json = res.json()
        assert res_json is None

+        return res.status_code
+
    def timeline_gc(
        self,
        tenant_id: TenantId | TenantShardId,
--- a/test_runner/regress/test_disk_usage_eviction.py
+++ b/test_runner/regress/test_disk_usage_eviction.py
@@ -1,31 +1,41 @@
 from __future__ import annotations

 import enum
+import json
 import time
 from collections import Counter
 from dataclasses import dataclass
 from enum import StrEnum
+from threading import Event
 from typing import TYPE_CHECKING

 import pytest
 from fixtures.common_types import Lsn, TenantId, TimelineId
+from fixtures.fast_import import mock_import_bucket, populate_vanilla_pg
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnv,
    NeonEnvBuilder,
    NeonPageserver,
    PgBin,
+    VanillaPostgres,
    wait_for_last_flush_lsn,
 )
+from fixtures.pageserver.http import (
+    ImportPgdataIdemptencyKey,
+)
 from fixtures.pageserver.utils import wait_for_upload_queue_empty
 from fixtures.remote_storage import RemoteStorageKind
-from fixtures.utils import human_bytes, wait_until
+from fixtures.utils import human_bytes, run_only_on_default_postgres, wait_until
+from werkzeug.wrappers.response import Response

 if TYPE_CHECKING:
    from collections.abc import Iterable
    from typing import Any

    from fixtures.pageserver.http import PageserverHttpClient
+    from pytest_httpserver import HTTPServer
+    from werkzeug.wrappers.request import Request


 GLOBAL_LRU_LOG_LINE = "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy"
@@ -164,6 +174,7 @@ class EvictionEnv:
        min_avail_bytes,
        mock_behavior,
        eviction_order: EvictionOrder,
+        wait_logical_size: bool = True,
    ):
        """
        Starts pageserver up with mocked statvfs setup. The startup is
@@ -201,11 +212,12 @@ class EvictionEnv:
        pageserver.start()

        # we now do initial logical size calculation on startup, which on debug builds can fight with disk usage based eviction
-        for tenant_id, timeline_id in self.timelines:
-            tenant_ps = self.neon_env.get_tenant_pageserver(tenant_id)
-            # Pageserver may be none if we are currently not attached anywhere, e.g. during secondary eviction test
-            if tenant_ps is not None:
-                tenant_ps.http_client().timeline_wait_logical_size(tenant_id, timeline_id)
+        if wait_logical_size:
+            for tenant_id, timeline_id in self.timelines:
+                tenant_ps = self.neon_env.get_tenant_pageserver(tenant_id)
+                # Pageserver may be none if we are currently not attached anywhere, e.g. during secondary eviction test
+                if tenant_ps is not None:
+                    tenant_ps.http_client().timeline_wait_logical_size(tenant_id, timeline_id)

        def statvfs_called():
            pageserver.assert_log_contains(".*running mocked statvfs.*")
@@ -882,3 +894,121 @@ def test_secondary_mode_eviction(eviction_env_ha: EvictionEnv):
    assert total_size - post_eviction_total_size >= evict_bytes, (
        "we requested at least evict_bytes worth of free space"
    )
+
+
+@run_only_on_default_postgres(reason="PG version is irrelevant here")
+def test_import_timeline_disk_pressure_eviction(
+    neon_env_builder: NeonEnvBuilder,
+    vanilla_pg: VanillaPostgres,
+    make_httpserver: HTTPServer,
+    pg_bin: PgBin,
+):
+    """
+    TODO
+    """
+    # Set up mock control plane HTTP server to listen for import completions
+    import_completion_signaled = Event()
+
+    def handler(request: Request) -> Response:
+        log.info(f"control plane /import_complete request: {request.json}")
+        import_completion_signaled.set()
+        return Response(json.dumps({}), status=200)
+
+    cplane_mgmt_api_server = make_httpserver
+    cplane_mgmt_api_server.expect_request(
+        "/storage/api/v1/import_complete", method="PUT"
+    ).respond_with_handler(handler)
+
+    # Plug the cplane mock in
+    neon_env_builder.control_plane_hooks_api = (
+        f"http://{cplane_mgmt_api_server.host}:{cplane_mgmt_api_server.port}/storage/api/v1/"
+    )
+
+    # The import will specifiy a local filesystem path mocking remote storage
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+
+    vanilla_pg.start()
+    target_relblock_size = 1024 * 1024 * 128
+    populate_vanilla_pg(vanilla_pg, target_relblock_size)
+    vanilla_pg.stop()
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    importbucket_path = neon_env_builder.repo_dir / "test_import_completion_bucket"
+    mock_import_bucket(vanilla_pg, importbucket_path)
+
+    tenant_id = TenantId.generate()
+    timeline_id = TimelineId.generate()
+    idempotency = ImportPgdataIdemptencyKey.random()
+
+    eviction_env = EvictionEnv(
+        timelines=[(tenant_id, timeline_id)],
+        neon_env=env,
+        pageserver_http=env.pageserver.http_client(),
+        layer_size=5 * 1024 * 1024,  # Doesn't apply here
+        pg_bin=pg_bin,  # Not used here
+        pgbench_init_lsns={},  # Not used here
+    )
+
+    # Pause before delivering the final notification to storcon.
+    # This keeps the import in progress.
+    failpoint_name = "import-timeline-pre-success-notify-pausable"
+    env.pageserver.add_persistent_failpoint(failpoint_name, "pause")
+
+    env.storage_controller.tenant_create(tenant_id)
+    env.storage_controller.timeline_create(
+        tenant_id,
+        {
+            "new_timeline_id": str(timeline_id),
+            "import_pgdata": {
+                "idempotency_key": str(idempotency),
+                "location": {"LocalFs": {"path": str(importbucket_path.absolute())}},
+            },
+        },
+    )
+
+    def hit_failpoint():
+        log.info("Checking log for pattern...")
+        try:
+            assert env.pageserver.log_contains(f".*at failpoint {failpoint_name}.*")
+        except Exception:
+            log.exception("Failed to find pattern in log")
+            raise
+
+    wait_until(hit_failpoint)
+    assert not import_completion_signaled.is_set()
+
+    env.pageserver.stop()
+
+    total_size, _, _ = eviction_env.timelines_du(env.pageserver)
+    blocksize = 512
+    total_blocks = (total_size + (blocksize - 1)) // blocksize
+
+    eviction_env.pageserver_start_with_disk_usage_eviction(
+        env.pageserver,
+        period="1s",
+        max_usage_pct=33,
+        min_avail_bytes=0,
+        mock_behavior={
+            "type": "Success",
+            "blocksize": blocksize,
+            "total_blocks": total_blocks,
+            # Only count layer files towards used bytes in the mock_statvfs.
+            # This avoids accounting for metadata files & tenant conf in the tests.
+            "name_filter": ".*__.*",
+        },
+        eviction_order=EvictionOrder.RELATIVE_ORDER_SPARE,
+        wait_logical_size=False,
+    )
+
+    wait_until(lambda: env.pageserver.assert_log_contains(".*disk usage pressure relieved"))
+
+    env.pageserver.clear_persistent_failpoint(failpoint_name)
+
+    def cplane_notified():
+        assert import_completion_signaled.is_set()
+
+    wait_until(cplane_notified)
+
+    env.pageserver.allowed_errors.append(r".* running disk usage based eviction due to pressure.*")
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -12,13 +12,19 @@ import psycopg2
 import psycopg2.errors
 import pytest
 from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
-from fixtures.fast_import import FastImport
+from fixtures.fast_import import (
+    FastImport,
+    mock_import_bucket,
+    populate_vanilla_pg,
+    validate_import_from_vanilla_pg,
+)
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    PageserverImportConfig,
    PgBin,
    PgProtocol,
+    StorageControllerApiException,
    StorageControllerMigrationConfig,
    VanillaPostgres,
 )
@@ -59,24 +65,6 @@ smoke_params = [
 ]


-def mock_import_bucket(vanilla_pg: VanillaPostgres, path: Path):
-    """
-    Mock the import S3 bucket into a local directory for a provided vanilla PG instance.
-    """
-    assert not vanilla_pg.is_running()
-
-    path.mkdir()
-    # what cplane writes before scheduling fast_import
-    specpath = path / "spec.json"
-    specpath.write_text(json.dumps({"branch_id": "somebranch", "project_id": "someproject"}))
-    # what fast_import writes
-    vanilla_pg.pgdatadir.rename(path / "pgdata")
-    statusdir = path / "status"
-    statusdir.mkdir()
-    (statusdir / "pgdata").write_text(json.dumps({"done": True}))
-    (statusdir / "fast_import").write_text(json.dumps({"command": "pgdata", "done": True}))
-
-
@skip_in_debug_build("MULTIPLE_RELATION_SEGMENTS has non trivial amount of data")
@pytest.mark.parametrize("shard_count,stripe_size,rel_block_size", smoke_params)
 def test_pgdata_import_smoke(
@@ -131,10 +119,6 @@ def test_pgdata_import_smoke(
    # Put data in vanilla pg
    #

-    vanilla_pg.start()
-    vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
-
-    log.info("create relblock data")
    if rel_block_size == RelBlockSize.ONE_STRIPE_SIZE:
        target_relblock_size = stripe_size * 8192
    elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD:
@@ -145,45 +129,8 @@ def test_pgdata_import_smoke(
    else:
        raise ValueError

-    # fillfactor so we don't need to produce that much data
-    # 900 byte per row is > 10% => 1 row per page
-    vanilla_pg.safe_psql("""create table t (data char(900)) with (fillfactor = 10)""")
-
-    nrows = 0
-    while True:
-        relblock_size = vanilla_pg.safe_psql_scalar("select pg_relation_size('t')")
-        log.info(
-            f"relblock size: {relblock_size / 8192} pages (target: {target_relblock_size // 8192}) pages"
-        )
-        if relblock_size >= target_relblock_size:
-            break
-        addrows = int((target_relblock_size - relblock_size) // 8192)
-        assert addrows >= 1, "forward progress"
-        vanilla_pg.safe_psql(
-            f"insert into t select generate_series({nrows + 1}, {nrows + addrows})"
-        )
-        nrows += addrows
-    expect_nrows = nrows
-    expect_sum = (
-        (nrows) * (nrows + 1) // 2
-    )  # https://stackoverflow.com/questions/43901484/sum-of-the-integers-from-1-to-n
-
-    def validate_vanilla_equivalence(ep):
-        # TODO: would be nicer to just compare pgdump
-
-        # Enable IO concurrency for batching on large sequential scan, to avoid making
-        # this test unnecessarily onerous on CPU. Especially on debug mode, it's still
-        # pretty onerous though, so increase statement_timeout to avoid timeouts.
-        assert ep.safe_psql_many(
-            [
-                "set effective_io_concurrency=32;",
-                "SET statement_timeout='300s';",
-                "select count(*), sum(data::bigint)::bigint from t",
-            ]
-        ) == [[], [], [(expect_nrows, expect_sum)]]
-
-    validate_vanilla_equivalence(vanilla_pg)
-
+    vanilla_pg.start()
+    rows_inserted = populate_vanilla_pg(vanilla_pg, target_relblock_size)
    vanilla_pg.stop()

    #
@@ -274,14 +221,14 @@ def test_pgdata_import_smoke(
        config_lines=ep_config,
    )

-    validate_vanilla_equivalence(ro_endpoint)
+    validate_import_from_vanilla_pg(ro_endpoint, rows_inserted)

    # ensure the import survives restarts
    ro_endpoint.stop()
    env.pageserver.stop(immediate=True)
    env.pageserver.start()
    ro_endpoint.start()
-    validate_vanilla_equivalence(ro_endpoint)
+    validate_import_from_vanilla_pg(ro_endpoint, rows_inserted)

    #
    # validate the layer files in each shard only have the shard-specific data
@@ -321,7 +268,7 @@ def test_pgdata_import_smoke(
    child_workload = workload.branch(timeline_id=child_timeline_id, branch_name="br-tip")
    child_workload.validate()

-    validate_vanilla_equivalence(child_workload.endpoint())
+    validate_import_from_vanilla_pg(child_workload.endpoint(), rows_inserted)

    # ... at the initdb lsn
    _ = env.create_branch(
@@ -336,7 +283,7 @@ def test_pgdata_import_smoke(
        tenant_id=tenant_id,
        config_lines=ep_config,
    )
-    validate_vanilla_equivalence(br_initdb_endpoint)
+    validate_import_from_vanilla_pg(br_initdb_endpoint, rows_inserted)
    with pytest.raises(psycopg2.errors.UndefinedTable):
        br_initdb_endpoint.safe_psql(f"select * from {workload.table}")

@@ -423,8 +370,12 @@ def test_import_completion_on_restart(


@run_only_on_default_postgres(reason="PG version is irrelevant here")
-def test_import_respects_tenant_shutdown(
-    neon_env_builder: NeonEnvBuilder, vanilla_pg: VanillaPostgres, make_httpserver: HTTPServer
+@pytest.mark.parametrize("action", ["restart", "delete"])
+def test_import_respects_timeline_lifecycle(
+    neon_env_builder: NeonEnvBuilder,
+    vanilla_pg: VanillaPostgres,
+    make_httpserver: HTTPServer,
+    action: str,
 ):
    """
    Validate that importing timelines respect the usual timeline life cycle:
@@ -492,16 +443,33 @@ def test_import_respects_tenant_shutdown(
    wait_until(hit_failpoint)
    assert not import_completion_signaled.is_set()

-    # Restart the pageserver while an import job is in progress.
-    # This clears the failpoint and we expect that the import starts up afresh
-    # after the restart and eventually completes.
-    env.pageserver.stop()
-    env.pageserver.start()
+    if action == "restart":
+        # Restart the pageserver while an import job is in progress.
+        # This clears the failpoint and we expect that the import starts up afresh
+        # after the restart and eventually completes.
+        env.pageserver.stop()
+        env.pageserver.start()

-    def cplane_notified():
-        assert import_completion_signaled.is_set()
+        def cplane_notified():
+            assert import_completion_signaled.is_set()

-    wait_until(cplane_notified)
+        wait_until(cplane_notified)
+    elif action == "delete":
+        status = env.storage_controller.pageserver_api().timeline_delete(tenant_id, timeline_id)
+        assert status == 200
+
+        timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
+        assert not timeline_path.exists(), "Timeline dir exists after deletion"
+
+        shard_zero = TenantShardId(tenant_id, 0, 0)
+        location = env.storage_controller.inspect(shard_zero)
+        assert location is not None
+        generation = location[0]
+
+        with pytest.raises(StorageControllerApiException, match="not found"):
+            env.storage_controller.import_status(shard_zero, timeline_id, generation)
+    else:
+        raise RuntimeError(f"{action} param not recognized")


@skip_in_debug_build("Validation query takes too long in debug builds")
@@ -556,23 +524,8 @@ def test_import_chaos(
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)

    vanilla_pg.start()
-    vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
-    vanilla_pg.safe_psql("""create table t (data char(900)) with (fillfactor = 10)""")

-    nrows = 0
-    while True:
-        relblock_size = vanilla_pg.safe_psql_scalar("select pg_relation_size('t')")
-        log.info(
-            f"relblock size: {relblock_size / 8192} pages (target: {TARGET_RELBOCK_SIZE // 8192}) pages"
-        )
-        if relblock_size >= TARGET_RELBOCK_SIZE:
-            break
-        addrows = int((TARGET_RELBOCK_SIZE - relblock_size) // 8192)
-        assert addrows >= 1, "forward progress"
-        vanilla_pg.safe_psql(
-            f"insert into t select generate_series({nrows + 1}, {nrows + addrows})"
-        )
-        nrows += addrows
+    inserted_rows = populate_vanilla_pg(vanilla_pg, TARGET_RELBOCK_SIZE)

    vanilla_pg.stop()

@@ -740,13 +693,7 @@ def test_import_chaos(
    endpoint = env.endpoints.create_start(branch_name=import_branch_name, tenant_id=tenant_id)

    # Validate the imported data is legit
-    assert endpoint.safe_psql_many(
-        [
-            "set effective_io_concurrency=32;",
-            "SET statement_timeout='300s';",
-            "select count(*), sum(data::bigint)::bigint from t",
-        ]
-    ) == [[], [], [(nrows, nrows * (nrows + 1) // 2)]]
+    validate_import_from_vanilla_pg(endpoint, inserted_rows)

    endpoint.stop()

--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -4192,10 +4192,10 @@ def test_storcon_create_delete_sk_down(
    # ensure the safekeeper deleted the timeline
    def timeline_deleted_on_active_sks():
        env.safekeepers[0].assert_log_contains(
-            f"deleting timeline {tenant_id}/{child_timeline_id} from disk"
+            f"((deleting timeline|Timeline) {tenant_id}/{child_timeline_id} (from disk|was already deleted)|DELETE.*tenant/{tenant_id} .*status: 200 OK)"
        )
        env.safekeepers[2].assert_log_contains(
-            f"deleting timeline {tenant_id}/{child_timeline_id} from disk"
+            f"((deleting timeline|Timeline) {tenant_id}/{child_timeline_id} (from disk|was already deleted)|DELETE.*tenant/{tenant_id} .*status: 200 OK)"
        )

    wait_until(timeline_deleted_on_active_sks)
@@ -4210,7 +4210,7 @@ def test_storcon_create_delete_sk_down(
    # ensure that there is log msgs for the third safekeeper too
    def timeline_deleted_on_sk():
        env.safekeepers[1].assert_log_contains(
-            f"deleting timeline {tenant_id}/{child_timeline_id} from disk"
+            f"((deleting timeline|Timeline) {tenant_id}/{child_timeline_id} (from disk|was already deleted)|DELETE.*tenant/{tenant_id} .*status: 200 OK)"
        )

    wait_until(timeline_deleted_on_sk)
Author	SHA1	Message	Date
Conrad Ludgate	631139ceeb	turns out the boxing isn't necessary, we just needed to massage the stack usage properly	2025-05-30 08:47:44 +01:00
Conrad Ludgate	fd43058bd7	optimise passthrough calling convention to further reduce memory	2025-05-29 18:35:24 +01:00
Conrad Ludgate	cf07c5b5f9	dont box handle_client anymore and move spawning passthrough into handle_client so we don't need to move a heavy object in return position anymore	2025-05-29 18:20:29 +01:00
Conrad Ludgate	11bb84c38d	save 1000 bytes by removing instrument	2025-05-29 17:56:25 +01:00
Conrad Ludgate	219c72c24c	optimise proxy_pass memory size a little, also boxing requestcontext since it is large	2025-05-29 17:52:26 +01:00
Conrad Ludgate	0633cd6385	small changes to connect compute mechanism/backend handling	2025-05-29 16:21:55 +01:00
Conrad Ludgate	0cdb0c5704	reuse the same tracker token for websockets and http	2025-05-29 16:04:14 +01:00
Conrad Ludgate	eefac5d78b	box the connect to compute task	2025-05-29 15:58:28 +01:00
Conrad Ludgate	7d1c908b1b	box authenticate task	2025-05-29 15:55:17 +01:00
Conrad Ludgate	cfa2813446	remove unnecessary aux field from passthrough	2025-05-29 15:51:57 +01:00
Conrad Ludgate	034bdb1552	move more work inside handshake	2025-05-29 15:50:10 +01:00
Conrad Ludgate	8b1ffa1718	simplify cplane authentication	2025-05-29 15:46:40 +01:00
Conrad Ludgate	2d3ea77953	box the handshake task	2025-05-29 15:39:33 +01:00
Conrad Ludgate	3124729f53	spawn passthrough as a separate task to reduce influence from the handshake task	2025-05-29 15:21:54 +01:00
Conrad Ludgate	6463eb38be	manually handle task tracker tokens	2025-05-29 15:19:03 +01:00
Conrad Ludgate	ae506fd791	proxy: remove unused ip return value	2025-05-29 15:04:40 +01:00
Vlad Lazar	8a6fc6fd8c	pageserver: hook importing timelines up into disk usage eviction (#12038 ) ## Problem Disk usage eviction isn't sensitive to layers of imported timelines. ## Summary of changes Hook importing timelines up into eviction and add a test for it. I don't think we need any special eviction logic for this. These layers will all be visible and their access time will be their creation time. Hence, we'll remove covered layers first and get to the imported layers if there's still disk pressure.	2025-05-29 13:01:10 +00:00
Vlad Lazar	51639cd6af	pageserver: allow for deletion of importing timelines (#12033 ) ## Problem Importing timelines can't currently be deleted. This is problematic because: 1. Cplane cannot delete failed imports and we leave the timeline behind. 2. The flow does not support user driven cancellation of the import ## Summary of changes On the pageserver: I've taken the path of least resistance, extended `TimelineOrOffloaded` with a new variant and added handling in the right places. I'm open to thoughts here, but I think it turned out better than I was envisioning. On the storage controller: Again, fairly simple business: when a DELETE timeline request is received, we remove the import from the DB and stop any finalization tasks/futures. In order to stop finalizations, we track them in-memory. For each finalizing import, we associate a gate and a cancellation token. Note that we delete the entry from the database before cancelling any finalizations. This is such that a concurrent request can't progress the import into finalize state and race with the deletion. This concern about deleting an import with on-going finalization is theoretical in the near future. We are only going to delete importing timelines after the storage controller reports the failure to cplane. Alas, the design works for user driven cancellation too. Closes https://github.com/neondatabase/neon/issues/11897	2025-05-29 11:13:52 +00:00
devin-ai-integration[bot]	529d661532	storcon: skip offline nodes in get_top_tenant_shards (#12057 ) ## Summary The optimiser background loop could get delayed a lot by waiting for timeouts trying to talk to offline nodes. Fixes: #12056 ## Solution - Skip offline nodes in `get_top_tenant_shards` Link to Devin run: https://app.devin.ai/sessions/065afd6756734d33bbd4d012428c4b6e Requested by: John Spray (john@neon.tech) Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: John Spray <john@neon.tech>	2025-05-29 11:07:09 +00:00
Alex Chi Z.	9e4cf52949	pageserver: reduce concurrency for gc-compaction (#12054 ) ## Problem Temporarily reduce the concurrency of gc-compaction to 1 job at a time. We are going to roll out in the largest AWS region next week. Having one job running at a time makes it easier to identify what tenant causes problem if it's not running well and pause gc-compaction for that specific tenant. (We can make this configurable via pageserver config in the future!) ## Summary of changes Reduce `CONCURRENT_GC_COMPACTION_TASKS` from 2 to 1. Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-05-29 09:32:19 +00:00
Arpad Müller	831f2a4ba7	Fix flakiness of test_storcon_create_delete_sk_down (#12040 ) The `test_storcon_create_delete_sk_down` test is still flaky. This test addresses two possible causes for flakiness. both causes are related to deletion racing with `pull_timeline` which hasn't finished yet. * the first cause is timeline deletion racing with `pull_timeline`: * the first deletion attempt doesn't contain the line because the timeline doesn't exist yet * the subsequent deletion attempts don't contain it either, only a note that the timeline is already deleted. * so this patch adds the note that the timeline is already deleted to the regex * the second cause is about tenant deletion racing with `pull_timeline`: * there were no tenant specific tombstones so if a tenant was deleted, we only added tombstones for the specific timelines being deleted, not for the tenant itself. * This patch changes this, so we now have tenant specific tombstones as well as timeline specific ones, and creation of a timeline checks both. * we also don't see any retries of the tenant deletion in the logs. once it's done it's done. so extend the regex to contain the tenant deletion message as well. One could wonder why the regex and why not using the API to check whether the timeline is just "gone". The issue with the API is that it doesn't allow one to distinguish between "deleted" and "has never existed", and latter case might race with `pull_timeline`. I.e. the second case flakiness helped in the discovery of a real bug (no tenant tombstones), so the more precise check was helpful. Before, I could easily reproduce 2-9 occurences of flakiness when running the test with an additional `range(128)` parameter (i.e. 218 times 4 times). With this patch, I ran it three times, not a single failure. Fixes #11838	2025-05-28 18:20:38 +00:00
Vlad Lazar	eadabeddb8	pageserver: use the same job size throughout the import lifetime (#12026 ) ## Problem Import planning takes a job size limit as its input. Previously, the job size came from a pageserver config field. This field may change while imports are in progress. If this happens, plans will no longer be identical and the import would fail permanently. ## Summary of Changes Bake the job size into the import progress reported to the storage controller. For new imports, use the value from the pagesever config, and, for existing imports, use the value present in the shard progress. This value is identical for all shards, but we want it to be versioned since future versions of the planner might split the jobs up differently. Hence, it ends up in `ShardImportProgress`. Closes https://github.com/neondatabase/neon/issues/11983	2025-05-28 15:19:41 +00:00