fix(scrubber): log download error (#11833)

## Problem

We use `head_object` to determine whether an object exists or not.
However, it does not always error due to a missing object.

## Summary of changes

Log the error so that we can have a better idea what's going on with the
scrubber errors in prod.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-05-07 17:21:17 +08:00
committed by GitHub
parent 0ef6851219
commit 608afc3055
3 changed files with 14 additions and 7 deletions

View File

@@ -165,16 +165,17 @@ pub(crate) async fn branch_cleanup_and_check_errors(
.head_object(&path, &CancellationToken::new())
.await;
if response.is_err() {
if let Err(e) = response {
// Object is not present.
let is_l0 = LayerMap::is_l0(layer.key_range(), layer.is_delta());
let msg = format!(
"index_part.json contains a layer {}{} (shard {}) that is not present in remote storage (layer_is_l0: {})",
"index_part.json contains a layer {}{} (shard {}) that is not present in remote storage (layer_is_l0: {}) with error: {}",
layer,
metadata.generation.get_suffix(),
metadata.shard,
is_l0,
e,
);
if is_l0 || ignore_error {

View File

@@ -137,11 +137,10 @@ struct TenantRefAccumulator {
impl TenantRefAccumulator {
fn update(&mut self, ttid: TenantShardTimelineId, index_part: &IndexPart) {
let this_shard_idx = ttid.tenant_shard_id.to_index();
(*self
.shards_seen
self.shards_seen
.entry(ttid.tenant_shard_id.tenant_id)
.or_default())
.insert(this_shard_idx);
.or_default()
.insert(this_shard_idx);
let mut ancestor_refs = Vec::new();
for (layer_name, layer_metadata) in &index_part.layer_metadata {
@@ -767,10 +766,13 @@ pub async fn pageserver_physical_gc(
stream_tenant_timelines(remote_client_ref, target_ref, tenant_shard_id).await?,
);
Ok(try_stream! {
let mut cnt = 0;
while let Some(ttid_res) = timelines.next().await {
let ttid = ttid_res?;
cnt += 1;
yield (ttid, tenant_manifest_arc.clone());
}
tracing::info!(%tenant_shard_id, "Found {} timelines", cnt);
})
}
});
@@ -790,6 +792,7 @@ pub async fn pageserver_physical_gc(
&accumulator,
tenant_manifest_arc,
)
.instrument(info_span!("gc_timeline", %ttid))
});
let timelines = timelines.try_buffered(CONCURRENCY);
let mut timelines = std::pin::pin!(timelines);

View File

@@ -153,7 +153,10 @@ pub async fn scan_pageserver_metadata(
const CONCURRENCY: usize = 32;
// Generate a stream of TenantTimelineId
let timelines = tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, t));
let timelines = tenants.map_ok(|t| {
tracing::info!("Found tenant: {}", t);
stream_tenant_timelines(&remote_client, &target, t)
});
let timelines = timelines.try_buffered(CONCURRENCY);
let timelines = timelines.try_flatten();