From 3b3f040be339efd89dd7057ebdded172f753d8ae Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Sun, 19 Nov 2023 15:16:31 +0100 Subject: [PATCH] fix(background_tasks): first backoff, compaction error stacktraces (#5881) First compaction/gc error backoff starts from 0 which is less than 2s what it was before #5672. This is now fixed to be the intended 2**n. Additionally noticed the `compaction_iteration` creating an `anyhow::Error` via `into()` always captures a stacktrace even if we had a stacktraceful anyhow error within the CompactionError because there is no stable api for querying that. --- pageserver/src/tenant.rs | 14 ++++---------- pageserver/src/tenant/tasks.rs | 4 ++-- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 758f8b15a1..4025e93f66 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1649,22 +1649,16 @@ impl Tenant { /// This function is periodically called by compactor task. /// Also it can be explicitly requested per timeline through page server /// api's 'compact' command. - pub async fn compaction_iteration( + async fn compaction_iteration( &self, cancel: &CancellationToken, ctx: &RequestContext, - ) -> anyhow::Result<()> { - // Don't start doing work during shutdown - if let TenantState::Stopping { .. } = self.current_state() { + ) -> anyhow::Result<(), timeline::CompactionError> { + // Don't start doing work during shutdown, or when broken, we do not need those in the logs + if !self.is_active() { return Ok(()); } - // We should only be called once the tenant has activated. - anyhow::ensure!( - self.is_active(), - "Cannot run compaction iteration on inactive tenant" - ); - { let conf = self.tenant_conf.read().unwrap(); if !conf.location.may_delete_layers_hint() || !conf.location.may_upload_layers_hint() { diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index eb77f7c83a..381d731b79 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -180,7 +180,7 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { // Run compaction if let Err(e) = tenant.compaction_iteration(&cancel, &ctx).await { let wait_duration = backoff::exponential_backoff_duration_seconds( - error_run_count, + error_run_count + 1, 1.0, MAX_BACKOFF_SECS, ); @@ -261,7 +261,7 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { .await; if let Err(e) = res { let wait_duration = backoff::exponential_backoff_duration_seconds( - error_run_count, + error_run_count + 1, 1.0, MAX_BACKOFF_SECS, );