From 82960b2175211c0f666b91b5258c5e2253a245c7 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 14 May 2024 16:39:17 +0100 Subject: [PATCH] pageserver: skip waiting for logical size on shard >0 (#7744) ## Problem Shards with number >0 could hang waiting for `await_initial_logical_size`, as we don't calculate logical size on these shards. This causes them to hold onto semaphore units and starve other tenants out from proceeding with warmup activation. That doesn't hurt availability (we still have on-demand activation), but it does mean that some background tasks like consumption metrics would omit some tenants. ## Summary of changes - Skip waiting for logical size calculation on shards >0 - Upgrade unexpected code paths to use debug_assert!(), which acts as an implicit regression test for this issue, and make the info() one into a warn() --- .../src/tenant/remote_timeline_client.rs | 5 +++++ pageserver/src/tenant/timeline.rs | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 9103760388..630ade5c13 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1127,6 +1127,11 @@ impl RemoteTimelineClient { Ok(()) } + pub(crate) fn is_deleting(&self) -> bool { + let mut locked = self.upload_queue.lock().unwrap(); + locked.stopped_mut().is_ok() + } + pub(crate) async fn preserve_initdb_archive( self: &Arc, tenant_id: &TenantId, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 9ee24a4ff0..ca34b4fadc 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2631,6 +2631,7 @@ impl Timeline { // Don't make noise. } else { warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work"); + debug_assert!(false); } } }; @@ -4355,6 +4356,21 @@ impl Timeline { /// this Timeline is shut down. Calling this function will cause the initial /// logical size calculation to skip waiting for the background jobs barrier. pub(crate) async fn await_initial_logical_size(self: Arc) { + if !self.shard_identity.is_shard_zero() { + // We don't populate logical size on shard >0: skip waiting for it. + return; + } + + if self + .remote_client + .as_ref() + .map(|c| c.is_deleting()) + .unwrap_or(false) + { + // The timeline was created in a deletion-resume state, we don't expect logical size to be populated + return; + } + if let Some(await_bg_cancel) = self .current_logical_size .cancel_wait_for_background_loop_concurrency_limit_semaphore @@ -4366,9 +4382,10 @@ impl Timeline { // the logical size cancellation to skip the concurrency limit semaphore. // TODO: this is an unexpected case. We should restructure so that it // can't happen. - tracing::info!( + tracing::warn!( "await_initial_logical_size: can't get semaphore cancel token, skipping" ); + debug_assert!(false); } tokio::select!(