From 3b04f3a7490562999397ed4263fb7cd0a817addb Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Mon, 11 Dec 2023 23:27:53 +0200 Subject: [PATCH] fix: accidential return Ok (#6106) Error indicating request cancellation OR timeline shutdown was deemed as a reason to exit the background worker that calculated synthetic size. Fix it to only be considered for avoiding logging such of such errors. --- pageserver/src/consumption_metrics.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pageserver/src/consumption_metrics.rs b/pageserver/src/consumption_metrics.rs index bb13bdd5e5..8f2b88d191 100644 --- a/pageserver/src/consumption_metrics.rs +++ b/pageserver/src/consumption_metrics.rs @@ -287,14 +287,20 @@ async fn calculate_synthetic_size_worker( // By using the same limiter, we centralize metrics collection for "start" and "finished" counters, // which turns out is really handy to understand the system. if let Err(e) = tenant.calculate_synthetic_size(cause, cancel, ctx).await { - if let Some(PageReconstructError::Cancelled) = - e.downcast_ref::() - { - return Ok(()); - } - error!( - "failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}" + // this error can be returned if timeline is shutting down, but it does not + // mean the synthetic size worker should terminate. we do not need any checks + // in this function because `mgr::get_tenant` will error out after shutdown has + // progressed to shutting down tenants. + let is_cancelled = matches!( + e.downcast_ref::(), + Some(PageReconstructError::Cancelled) ); + + if !is_cancelled { + error!( + "failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}" + ); + } } } } @@ -307,7 +313,7 @@ async fn calculate_synthetic_size_worker( let res = tokio::time::timeout_at( started_at + synthetic_size_calculation_interval, - task_mgr::shutdown_token().cancelled(), + cancel.cancelled(), ) .await; if res.is_ok() {