Compare commits

...

4 Commits

Author SHA1 Message Date
John Spray
94e1d80a64 scrubber: make scan-metadata enumerate relics & unreadable timelines 2023-12-08 14:48:41 +00:00
John Spray
a28d91e8bc scrubber: report on generation-ful-ness of indices 2023-12-08 13:59:42 +00:00
John Spray
041d610fbe scrubber: handle initdb files 2023-12-08 13:49:40 +00:00
John Spray
ec03c29644 scrubber: only trim prefix if it ends with / 2023-12-08 13:49:40 +00:00
3 changed files with 70 additions and 18 deletions

View File

@@ -142,7 +142,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
.collect(); .collect();
if !orphan_layers.is_empty() { if !orphan_layers.is_empty() {
result.errors.push(format!( result.warnings.push(format!(
"index_part.json does not contain layers from S3: {:?}", "index_part.json does not contain layers from S3: {:?}",
orphan_layers orphan_layers
.iter() .iter()
@@ -170,6 +170,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
)); ));
} }
} }
BlobDataParseResult::Relic => {}
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend( BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
parse_errors parse_errors
.into_iter() .into_iter()
@@ -215,6 +216,8 @@ pub(crate) enum BlobDataParseResult {
index_part_generation: Generation, index_part_generation: Generation,
s3_layers: HashSet<(LayerFileName, Generation)>, s3_layers: HashSet<(LayerFileName, Generation)>,
}, },
/// The remains of a deleted Timeline (i.e. an initdb archive only)
Relic,
Incorrect(Vec<String>), Incorrect(Vec<String>),
} }
@@ -245,6 +248,7 @@ pub(crate) async fn list_timeline_blobs(
timeline_dir_target.delimiter = String::new(); timeline_dir_target.delimiter = String::new();
let mut index_parts: Vec<ObjectIdentifier> = Vec::new(); let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
let mut initdb_archive: bool = false;
let stream = stream_listing(s3_client, &timeline_dir_target); let stream = stream_listing(s3_client, &timeline_dir_target);
pin_mut!(stream); pin_mut!(stream);
@@ -258,6 +262,10 @@ pub(crate) async fn list_timeline_blobs(
tracing::info!("Index key {key}"); tracing::info!("Index key {key}");
index_parts.push(obj) index_parts.push(obj)
} }
Some("initdb.tar.zst") => {
tracing::info!("initdb archive {key}");
initdb_archive = true;
}
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) { Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
Ok((new_layer, gen)) => { Ok((new_layer, gen)) => {
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen); tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +287,16 @@ pub(crate) async fn list_timeline_blobs(
} }
} }
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
tracing::info!(
"Timeline is empty apart from initdb archive: expected post-deletion state."
);
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Relic,
keys_to_remove: Vec::new(),
});
}
// Choose the index_part with the highest generation // Choose the index_part with the highest generation
let (index_part_object, index_part_generation) = match index_parts let (index_part_object, index_part_generation) = match index_parts
.iter() .iter()

View File

@@ -86,7 +86,9 @@ impl S3Target {
if new_self.prefix_in_bucket.is_empty() { if new_self.prefix_in_bucket.is_empty() {
new_self.prefix_in_bucket = format!("/{}/", new_segment); new_self.prefix_in_bucket = format!("/{}/", new_segment);
} else { } else {
let _ = new_self.prefix_in_bucket.pop(); if new_self.prefix_in_bucket.ends_with('/') {
let _ = new_self.prefix_in_bucket.pop();
}
new_self.prefix_in_bucket = new_self.prefix_in_bucket =
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter); [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
} }

View File

@@ -20,6 +20,14 @@ pub struct MetadataSummary {
with_warnings: HashSet<TenantTimelineId>, with_warnings: HashSet<TenantTimelineId>,
with_garbage: HashSet<TenantTimelineId>, with_garbage: HashSet<TenantTimelineId>,
indices_by_version: HashMap<usize, usize>, indices_by_version: HashMap<usize, usize>,
indices_with_generation: usize,
indices_without_generation: usize,
/// Timelines that couldn't even parse metadata and/or object keys: extremely damaged
invalid_count: usize,
/// Timelines with just an initdb archive, left behind after deletion.
relic_count: usize,
layer_count: MinMaxHisto, layer_count: MinMaxHisto,
timeline_size_bytes: MinMaxHisto, timeline_size_bytes: MinMaxHisto,
@@ -39,6 +47,8 @@ impl MinMaxHisto {
fn new() -> Self { fn new() -> Self {
Self { Self {
histo: histogram::Histogram::builder() histo: histogram::Histogram::builder()
// Accomodate tenant sizes up to 32TiB
.maximum_value(32 * 1024 * 1024 * 1024 * 1024)
.build() .build()
.expect("Bad histogram params"), .expect("Bad histogram params"),
min: u64::MAX, min: u64::MAX,
@@ -90,6 +100,10 @@ impl MetadataSummary {
with_warnings: HashSet::new(), with_warnings: HashSet::new(),
with_garbage: HashSet::new(), with_garbage: HashSet::new(),
indices_by_version: HashMap::new(), indices_by_version: HashMap::new(),
indices_with_generation: 0,
indices_without_generation: 0,
invalid_count: 0,
relic_count: 0,
layer_count: MinMaxHisto::new(), layer_count: MinMaxHisto::new(),
timeline_size_bytes: MinMaxHisto::new(), timeline_size_bytes: MinMaxHisto::new(),
layer_size_bytes: MinMaxHisto::new(), layer_size_bytes: MinMaxHisto::new(),
@@ -111,24 +125,35 @@ impl MetadataSummary {
fn update_data(&mut self, data: &S3TimelineBlobData) { fn update_data(&mut self, data: &S3TimelineBlobData) {
self.count += 1; self.count += 1;
if let BlobDataParseResult::Parsed { match &data.blob_data {
index_part, BlobDataParseResult::Parsed {
index_part_generation: _, index_part,
s3_layers: _, index_part_generation,
} = &data.blob_data s3_layers: _,
{ } => {
*self *self
.indices_by_version .indices_by_version
.entry(index_part.get_version()) .entry(index_part.get_version())
.or_insert(0) += 1; .or_insert(0) += 1;
if let Err(e) = self.update_histograms(index_part) { // These statistics exist to track the transition to generations. By early 2024 there should be zero
// Value out of range? Warn that the results are untrustworthy // generation-less timelines in the field and this check can be removed.
tracing::warn!( if index_part_generation.is_none() {
"Error updating histograms, summary stats may be wrong: {}", self.indices_without_generation += 1;
e } else {
); self.indices_with_generation += 1;
}
if let Err(e) = self.update_histograms(index_part) {
// Value out of range? Warn that the results are untrustworthy
tracing::warn!(
"Error updating histograms, summary stats may be wrong: {}",
e
);
}
} }
BlobDataParseResult::Incorrect(_) => self.invalid_count += 1,
BlobDataParseResult::Relic => self.relic_count += 1,
} }
} }
@@ -156,7 +181,10 @@ impl MetadataSummary {
With errors: {1} With errors: {1}
With warnings: {2} With warnings: {2}
With garbage: {3} With garbage: {3}
Invalid: {9}
Relics: {10}
Index versions: {version_summary} Index versions: {version_summary}
Indices with/without generations: {7}/{8}
Timeline size bytes: {4} Timeline size bytes: {4}
Layer size bytes: {5} Layer size bytes: {5}
Timeline layer count: {6} Timeline layer count: {6}
@@ -168,6 +196,10 @@ Timeline layer count: {6}
self.timeline_size_bytes.oneline(), self.timeline_size_bytes.oneline(),
self.layer_size_bytes.oneline(), self.layer_size_bytes.oneline(),
self.layer_count.oneline(), self.layer_count.oneline(),
self.indices_with_generation,
self.indices_without_generation,
self.invalid_count,
self.relic_count
) )
} }