fix(background_tasks): first backoff, compaction error stacktraces (#5881)

First compaction/gc error backoff starts from 0 which is less than 2s
what it was before #5672. This is now fixed to be the intended 2**n.

Additionally noticed the `compaction_iteration` creating an
`anyhow::Error` via `into()` always captures a stacktrace even if we had
a stacktraceful anyhow error within the CompactionError because there is
no stable api for querying that.
This commit is contained in:
Joonas Koivunen
2023-11-19 15:16:31 +01:00
committed by GitHub
parent cad0dca4b8
commit 3b3f040be3
2 changed files with 6 additions and 12 deletions

View File

@@ -1649,22 +1649,16 @@ impl Tenant {
/// This function is periodically called by compactor task.
/// Also it can be explicitly requested per timeline through page server
/// api's 'compact' command.
pub async fn compaction_iteration(
async fn compaction_iteration(
&self,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> anyhow::Result<()> {
// Don't start doing work during shutdown
if let TenantState::Stopping { .. } = self.current_state() {
) -> anyhow::Result<(), timeline::CompactionError> {
// Don't start doing work during shutdown, or when broken, we do not need those in the logs
if !self.is_active() {
return Ok(());
}
// We should only be called once the tenant has activated.
anyhow::ensure!(
self.is_active(),
"Cannot run compaction iteration on inactive tenant"
);
{
let conf = self.tenant_conf.read().unwrap();
if !conf.location.may_delete_layers_hint() || !conf.location.may_upload_layers_hint() {

View File

@@ -180,7 +180,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
// Run compaction
if let Err(e) = tenant.compaction_iteration(&cancel, &ctx).await {
let wait_duration = backoff::exponential_backoff_duration_seconds(
error_run_count,
error_run_count + 1,
1.0,
MAX_BACKOFF_SECS,
);
@@ -261,7 +261,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
.await;
if let Err(e) = res {
let wait_duration = backoff::exponential_backoff_duration_seconds(
error_run_count,
error_run_count + 1,
1.0,
MAX_BACKOFF_SECS,
);