mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
storage_scrubber: retry on index deletion failures (#9204)
## Problem In automated tests running on AWS S3, we frequently see scrubber failures when it can't delete an index. `location_conf_churn`: https://neon-github-public-dev.s3.amazonaws.com/reports/main/11076221056/index.html#/testresult/f89b1916b6a693e2 `scrubber_physical_gc`: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9178/11074269153/index.html#/testresult/9885ed5aa0fe38b6 ## Summary of changes Wrap index deletion in a backoff::retry --------- Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
This commit is contained in:
@@ -4,7 +4,7 @@ use std::time::Duration;
|
||||
|
||||
use crate::checks::{list_timeline_blobs, BlobDataParseResult};
|
||||
use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
|
||||
use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
|
||||
use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, MAX_RETRIES};
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path};
|
||||
@@ -18,6 +18,7 @@ use serde::Serialize;
|
||||
use storage_controller_client::control_api;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, Instrument};
|
||||
use utils::backoff;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{TenantId, TenantTimelineId};
|
||||
|
||||
@@ -326,15 +327,25 @@ async fn maybe_delete_index(
|
||||
}
|
||||
|
||||
// All validations passed: erase the object
|
||||
match remote_client
|
||||
.delete(&obj.key, &CancellationToken::new())
|
||||
let cancel = CancellationToken::new();
|
||||
match backoff::retry(
|
||||
|| remote_client.delete(&obj.key, &cancel),
|
||||
|_| false,
|
||||
3,
|
||||
MAX_RETRIES as u32,
|
||||
"maybe_delete_index",
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
None => {
|
||||
unreachable!("Using a dummy cancellation token");
|
||||
}
|
||||
Some(Ok(_)) => {
|
||||
tracing::info!("Successfully deleted index");
|
||||
summary.indices_deleted += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
Some(Err(e)) => {
|
||||
tracing::warn!("Failed to delete index: {e}");
|
||||
summary.remote_storage_errors += 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user