From 931f8c43003275a4f90c8ca1b439e92455881830 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 15 Apr 2025 11:16:16 -0400 Subject: [PATCH] fix(pageserver): check if cancelled before waiting logical size (2/2) (#11575) ## Problem close https://github.com/neondatabase/neon/issues/11486, proceeding https://github.com/neondatabase/neon/pull/11531 ## Summary of changes This patch fixes the rest 50% of instability of `test_create_churn_during_restart`. During tenant warmup, we'll request logical size; however, if the startup gets cancelled, we won't be able to spawn the initial logical size calculation task that sets the `cancel_wait_for_background_loop_concurrency_limit_semaphore`. Therefore, we check `cancelled` before proceeding to get `cancel_wait_for_background_loop_concurrency_limit_semaphore`. There will still be a race if the timeline shutdown happens after L5710 and before L5711, but it should be enough to reduce the flakiness of the test. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index c27a4b62da..613834dc88 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5702,6 +5702,12 @@ impl Timeline { return; } + if self.cancel.is_cancelled() { + // We already requested stopping the tenant, so we cannot wait for the logical size + // calculation to complete given the task might have been already cancelled. + return; + } + if let Some(await_bg_cancel) = self .current_logical_size .cancel_wait_for_background_loop_concurrency_limit_semaphore