From 7588983168dbc2da7e025684180012e036f9b1b7 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:33:35 -0400 Subject: [PATCH] fix(scrubber): log even if no refs are found (#11160) ## Problem Investigate https://github.com/neondatabase/neon/issues/11159 ## Summary of changes This doesn't fix the issue, but at least we can narrow down the cause next time it happens by logging ancestor referenced layer cnt even if it's 0. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 3 ++- storage_scrubber/src/pageserver_physical_gc.rs | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 42b36f7252..123079804b 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1091,7 +1091,7 @@ impl Timeline { let latest_gc_cutoff = self.get_applied_gc_cutoff_lsn(); tracing::info!( - "latest_gc_cutoff: {}, pitr cutoff {}", + "starting shard ancestor compaction, latest_gc_cutoff: {}, pitr cutoff {}", *latest_gc_cutoff, self.gc_info.read().unwrap().cutoffs.time ); @@ -1120,6 +1120,7 @@ impl Timeline { // Expensive, exhaustive check of keys in this layer: this guards against ShardedRange's calculations being // wrong. If ShardedRange claims the local page count is zero, then no keys in this layer // should be !is_key_disposable() + // TODO: exclude sparse keyspace from this check, otherwise it will infinitely loop. let range = layer_desc.get_key_range(); let mut key = range.start; while key < range.end { diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index c956b1abbc..f14341c7bc 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -152,10 +152,8 @@ impl TenantRefAccumulator { } } - if !ancestor_refs.is_empty() { - tracing::info!(%ttid, "Found {} ancestor refs", ancestor_refs.len()); - self.ancestor_ref_shards.update(ttid, ancestor_refs); - } + tracing::info!(%ttid, "Found {} ancestor refs", ancestor_refs.len()); + self.ancestor_ref_shards.update(ttid, ancestor_refs); } /// Consume Self and return a vector of ancestor tenant shards that should be GC'd, and map of referenced ancestor layers to preserve @@ -779,7 +777,7 @@ pub async fn pageserver_physical_gc( let mut summary = GcSummary::default(); { - let timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); + let timelines = timelines.try_buffered(CONCURRENCY); let timelines = timelines.try_flatten(); let timelines = timelines.map_ok(|(ttid, tenant_manifest_arc)| { @@ -793,8 +791,8 @@ pub async fn pageserver_physical_gc( tenant_manifest_arc, ) }); - let mut timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); - + let timelines = timelines.try_buffered(CONCURRENCY); + let mut timelines = std::pin::pin!(timelines); // Drain futures for per-shard GC, populating accumulator as a side effect while let Some(i) = timelines.next().await { summary.merge(i?);