From 47f5bcf2bcef94e6cdc9a9ec1b07f8bf94bf1c11 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 2 Apr 2025 14:55:15 +0200 Subject: [PATCH] pageserver: don't periodically flush layers for stale attachments (#11317) ## Problem Tenants in attachment state `Stale` can't upload layers, and don't run compaction, but still do periodic L0 layer flushes in the tenant housekeeping loop. If the tenant remains stuck in stale mode, this causes a large buildup of L0 layers, causing logging, metrics increases, and possibly alerts. Resolves #11245. ## Summary of changes Don't perform periodic layer flushes in stale attachment state. --- pageserver/src/tenant.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index f1dbb274b9..15853133d6 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3248,17 +3248,23 @@ impl Tenant { async fn housekeeping(&self) { // Call through to all timelines to freeze ephemeral layers as needed. This usually happens // during ingest, but we don't want idle timelines to hold open layers for too long. - let timelines = self - .timelines - .lock() - .unwrap() - .values() - .filter(|tli| tli.is_active()) - .cloned() - .collect_vec(); + // + // We don't do this if the tenant can't upload layers (i.e. it's in stale attachment mode). + // We don't run compaction in this case either, and don't want to keep flushing tiny L0 + // layers that won't be compacted down. + if self.tenant_conf.load().location.may_upload_layers_hint() { + let timelines = self + .timelines + .lock() + .unwrap() + .values() + .filter(|tli| tli.is_active()) + .cloned() + .collect_vec(); - for timeline in timelines { - timeline.maybe_freeze_ephemeral_layer().await; + for timeline in timelines { + timeline.maybe_freeze_ephemeral_layer().await; + } } // Shut down walredo if idle.