scrubber: handle initdb files, fix an issue with prefixes (#6079)

- The code for calculating the prefix in the bucket was expecting a
trailing slash (as it is in the tests), but that's an awkward
expectation to impose for use in the field: make the code more flexible
by only trimming a trailing character if it is indeed a slash.
- initdb archives were detected by the scrubber as malformed layer
files. Teach it to recognize and ignore them.
This commit is contained in:
John Spray
2023-12-12 16:53:08 +00:00
committed by GitHub
parent 6acbee2368
commit dfb0a6fdaf
4 changed files with 40 additions and 3 deletions

View File

@@ -142,7 +142,9 @@ pub(crate) async fn branch_cleanup_and_check_errors(
.collect();
if !orphan_layers.is_empty() {
result.errors.push(format!(
// An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
// these as a hint that there is something worth cleaning up here.
result.warnings.push(format!(
"index_part.json does not contain layers from S3: {:?}",
orphan_layers
.iter()
@@ -170,6 +172,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
));
}
}
BlobDataParseResult::Relic => {}
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
parse_errors
.into_iter()
@@ -215,6 +218,8 @@ pub(crate) enum BlobDataParseResult {
index_part_generation: Generation,
s3_layers: HashSet<(LayerFileName, Generation)>,
},
/// The remains of a deleted Timeline (i.e. an initdb archive only)
Relic,
Incorrect(Vec<String>),
}
@@ -245,6 +250,7 @@ pub(crate) async fn list_timeline_blobs(
timeline_dir_target.delimiter = String::new();
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
let mut initdb_archive: bool = false;
let stream = stream_listing(s3_client, &timeline_dir_target);
pin_mut!(stream);
@@ -258,6 +264,10 @@ pub(crate) async fn list_timeline_blobs(
tracing::info!("Index key {key}");
index_parts.push(obj)
}
Some("initdb.tar.zst") => {
tracing::info!("initdb archive {key}");
initdb_archive = true;
}
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
Ok((new_layer, gen)) => {
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +289,16 @@ pub(crate) async fn list_timeline_blobs(
}
}
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
tracing::info!(
"Timeline is empty apart from initdb archive: expected post-deletion state."
);
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Relic,
keys_to_remove: Vec::new(),
});
}
// Choose the index_part with the highest generation
let (index_part_object, index_part_generation) = match index_parts
.iter()

View File

@@ -86,7 +86,9 @@ impl S3Target {
if new_self.prefix_in_bucket.is_empty() {
new_self.prefix_in_bucket = format!("/{}/", new_segment);
} else {
let _ = new_self.prefix_in_bucket.pop();
if new_self.prefix_in_bucket.ends_with('/') {
new_self.prefix_in_bucket.pop();
}
new_self.prefix_in_bucket =
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
}

View File

@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
));
match cli.command {
Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
Err(e) => {
tracing::error!("Failed: {e}");
Err(e)
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
}
if summary.is_fatal() {
Err(anyhow::anyhow!("Fatal scrub errors detected"))
} else if summary.is_empty() {
// Strictly speaking an empty bucket is a valid bucket, but if someone ran the
// scrubber they were likely expecting to scan something, and if we see no timelines
// at all then it's likely due to some configuration issues like a bad prefix
Err(anyhow::anyhow!(
"No timelines found in bucket {} prefix {}",
bucket_config.bucket,
bucket_config
.prefix_in_bucket
.unwrap_or("<none>".to_string())
))
} else {
Ok(())
}

View File

@@ -174,6 +174,10 @@ Timeline layer count: {6}
pub fn is_fatal(&self) -> bool {
!self.with_errors.is_empty()
}
pub fn is_empty(&self) -> bool {
self.count == 0
}
}
/// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.