mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
fix(scrubber): remote_storage error causes layers to be deleted as orphans (#11924)
## Problem close https://github.com/neondatabase/neon/issues/11159 ; we get occasional wrong deletions of layer files being used and errors in staging. This patch fixed it. Example errors: ``` Timeline metadata errors: ["index_part.json contains a layer .... (shard 0000) that is not present in remote storage (layer_is_l0: false) with error: Failed to download a remote file: s3 head object\n\nCaused by:\n 0: dispatch failure\n 1: timeout\n 2: error trying to connect: HTTP connect timeout occurred after 3.1s\n ``` This error should not be fired because the file could exist, but we cannot know if it exists due to head request failure. ## Summary of changes Only generate cannot find layer errors when the head_object return type is `NotFound`. Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -13,7 +13,7 @@ use pageserver::tenant::remote_timeline_client::{
|
|||||||
};
|
};
|
||||||
use pageserver::tenant::storage_layer::LayerName;
|
use pageserver::tenant::storage_layer::LayerName;
|
||||||
use pageserver_api::shard::ShardIndex;
|
use pageserver_api::shard::ShardIndex;
|
||||||
use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
|
use remote_storage::{DownloadError, GenericRemoteStorage, ListingObject, RemotePath};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
use utils::generation::Generation;
|
use utils::generation::Generation;
|
||||||
@@ -165,23 +165,34 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
|||||||
.head_object(&path, &CancellationToken::new())
|
.head_object(&path, &CancellationToken::new())
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
if let Err(e) = response {
|
match response {
|
||||||
// Object is not present.
|
Ok(_) => {}
|
||||||
let is_l0 = LayerMap::is_l0(layer.key_range(), layer.is_delta());
|
Err(DownloadError::NotFound) => {
|
||||||
|
// Object is not present.
|
||||||
|
let is_l0 =
|
||||||
|
LayerMap::is_l0(layer.key_range(), layer.is_delta());
|
||||||
|
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
"index_part.json contains a layer {}{} (shard {}) that is not present in remote storage (layer_is_l0: {}) with error: {}",
|
"index_part.json contains a layer {}{} (shard {}) that is not present in remote storage (layer_is_l0: {})",
|
||||||
layer,
|
layer,
|
||||||
metadata.generation.get_suffix(),
|
metadata.generation.get_suffix(),
|
||||||
metadata.shard,
|
metadata.shard,
|
||||||
is_l0,
|
is_l0,
|
||||||
e,
|
);
|
||||||
);
|
|
||||||
|
|
||||||
if is_l0 || ignore_error {
|
if is_l0 || ignore_error {
|
||||||
result.warnings.push(msg);
|
result.warnings.push(msg);
|
||||||
} else {
|
} else {
|
||||||
result.errors.push(msg);
|
result.errors.push(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"cannot check if the layer {}{} is present in remote storage (error: {})",
|
||||||
|
layer,
|
||||||
|
metadata.generation.get_suffix(),
|
||||||
|
e,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user