fix(storage-scrubber): wrong trial condition (#8905)

ref https://github.com/neondatabase/neon/issues/8872

## Summary of changes

We saw stuck storage scrubber in staging caused by infinite retries. I
believe here we should use `min` instead of `max` to avoid getting
minutes or hours of retry backoff.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-09-04 05:39:56 +08:00
committed by GitHub
parent 3d9001d83f
commit ecfa3d9de9

View File

@@ -422,7 +422,7 @@ fn stream_objects_with_retries<'a>(
let yield_err = if err.is_permanent() {
true
} else {
let backoff_time = 1 << trial.max(5);
let backoff_time = 1 << trial.min(5);
tokio::time::sleep(Duration::from_secs(backoff_time)).await;
trial += 1;
trial == MAX_RETRIES - 1
@@ -473,7 +473,7 @@ async fn list_objects_with_retries(
s3_target.delimiter,
DisplayErrorContext(e),
);
let backoff_time = 1 << trial.max(5);
let backoff_time = 1 << trial.min(5);
tokio::time::sleep(Duration::from_secs(backoff_time)).await;
}
}
@@ -492,7 +492,7 @@ async fn download_object_with_retries(
Ok(response) => response,
Err(e) => {
error!("Failed to download object for key {key}: {e}");
let backoff_time = 1 << trial.max(5);
let backoff_time = 1 << trial.min(5);
tokio::time::sleep(Duration::from_secs(backoff_time)).await;
continue;
}
@@ -508,7 +508,7 @@ async fn download_object_with_retries(
}
Err(e) => {
error!("Failed to stream object body for key {key}: {e}");
let backoff_time = 1 << trial.max(5);
let backoff_time = 1 << trial.min(5);
tokio::time::sleep(Duration::from_secs(backoff_time)).await;
}
}