From 307e1e64c8f9edf641ae92e920821af4eb013b09 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 12 May 2025 17:17:35 +0800 Subject: [PATCH] fix(scrubber): more logs wrt relic timelines (#11895) ## Problem Further investigation on https://github.com/neondatabase/neon/issues/11159 reveals that the list_tenant function can find all the shards of the tenant, but then the shard gets missing during the gc timeline list blob. One reason could be that in some ways the timeline gets recognized as a relic timeline. ## Summary of changes Add logging to help identify the issue. Signed-off-by: Alex Chi Z --- storage_scrubber/src/checks.rs | 3 ++- storage_scrubber/src/pageserver_physical_gc.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index b151b612bf..40f3523a7e 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -355,6 +355,7 @@ pub(crate) async fn list_timeline_blobs( match res { ListTimelineBlobsResult::Ready(data) => Ok(data), ListTimelineBlobsResult::MissingIndexPart(_) => { + tracing::warn!("listing raced with removal of an index, retrying"); // Retry if listing raced with removal of an index let data = list_timeline_blobs_impl(remote_client, id, root_target) .await? @@ -441,7 +442,7 @@ async fn list_timeline_blobs_impl( } if index_part_keys.is_empty() && s3_layers.is_empty() { - tracing::debug!("Timeline is empty: expected post-deletion state."); + tracing::info!("Timeline is empty: expected post-deletion state."); if initdb_archive { tracing::info!("Timeline is post deletion but initdb archive is still present."); } diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index e1a4095a3c..49ab192285 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -593,6 +593,7 @@ async fn gc_timeline( index_part_snapshot_time: _, } => (index_part, *index_part_generation, data.unused_index_keys), BlobDataParseResult::Relic => { + tracing::info!("Skipping timeline {ttid}, it is a relic"); // Post-deletion tenant location: don't try and GC it. return Ok(summary); }