Compare commits

...

4 Commits

Author SHA1 Message Date
John Spray
94e1d80a64 scrubber: make scan-metadata enumerate relics & unreadable timelines 2023-12-08 14:48:41 +00:00
John Spray
a28d91e8bc scrubber: report on generation-ful-ness of indices 2023-12-08 13:59:42 +00:00
John Spray
041d610fbe scrubber: handle initdb files 2023-12-08 13:49:40 +00:00
John Spray
ec03c29644 scrubber: only trim prefix if it ends with / 2023-12-08 13:49:40 +00:00
3 changed files with 70 additions and 18 deletions

View File

@@ -142,7 +142,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
.collect();
if !orphan_layers.is_empty() {
result.errors.push(format!(
result.warnings.push(format!(
"index_part.json does not contain layers from S3: {:?}",
orphan_layers
.iter()
@@ -170,6 +170,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
));
}
}
BlobDataParseResult::Relic => {}
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
parse_errors
.into_iter()
@@ -215,6 +216,8 @@ pub(crate) enum BlobDataParseResult {
index_part_generation: Generation,
s3_layers: HashSet<(LayerFileName, Generation)>,
},
/// The remains of a deleted Timeline (i.e. an initdb archive only)
Relic,
Incorrect(Vec<String>),
}
@@ -245,6 +248,7 @@ pub(crate) async fn list_timeline_blobs(
timeline_dir_target.delimiter = String::new();
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
let mut initdb_archive: bool = false;
let stream = stream_listing(s3_client, &timeline_dir_target);
pin_mut!(stream);
@@ -258,6 +262,10 @@ pub(crate) async fn list_timeline_blobs(
tracing::info!("Index key {key}");
index_parts.push(obj)
}
Some("initdb.tar.zst") => {
tracing::info!("initdb archive {key}");
initdb_archive = true;
}
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
Ok((new_layer, gen)) => {
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +287,16 @@ pub(crate) async fn list_timeline_blobs(
}
}
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
tracing::info!(
"Timeline is empty apart from initdb archive: expected post-deletion state."
);
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Relic,
keys_to_remove: Vec::new(),
});
}
// Choose the index_part with the highest generation
let (index_part_object, index_part_generation) = match index_parts
.iter()

View File

@@ -86,7 +86,9 @@ impl S3Target {
if new_self.prefix_in_bucket.is_empty() {
new_self.prefix_in_bucket = format!("/{}/", new_segment);
} else {
let _ = new_self.prefix_in_bucket.pop();
if new_self.prefix_in_bucket.ends_with('/') {
let _ = new_self.prefix_in_bucket.pop();
}
new_self.prefix_in_bucket =
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
}

View File

@@ -20,6 +20,14 @@ pub struct MetadataSummary {
with_warnings: HashSet<TenantTimelineId>,
with_garbage: HashSet<TenantTimelineId>,
indices_by_version: HashMap<usize, usize>,
indices_with_generation: usize,
indices_without_generation: usize,
/// Timelines that couldn't even parse metadata and/or object keys: extremely damaged
invalid_count: usize,
/// Timelines with just an initdb archive, left behind after deletion.
relic_count: usize,
layer_count: MinMaxHisto,
timeline_size_bytes: MinMaxHisto,
@@ -39,6 +47,8 @@ impl MinMaxHisto {
fn new() -> Self {
Self {
histo: histogram::Histogram::builder()
// Accomodate tenant sizes up to 32TiB
.maximum_value(32 * 1024 * 1024 * 1024 * 1024)
.build()
.expect("Bad histogram params"),
min: u64::MAX,
@@ -90,6 +100,10 @@ impl MetadataSummary {
with_warnings: HashSet::new(),
with_garbage: HashSet::new(),
indices_by_version: HashMap::new(),
indices_with_generation: 0,
indices_without_generation: 0,
invalid_count: 0,
relic_count: 0,
layer_count: MinMaxHisto::new(),
timeline_size_bytes: MinMaxHisto::new(),
layer_size_bytes: MinMaxHisto::new(),
@@ -111,24 +125,35 @@ impl MetadataSummary {
fn update_data(&mut self, data: &S3TimelineBlobData) {
self.count += 1;
if let BlobDataParseResult::Parsed {
index_part,
index_part_generation: _,
s3_layers: _,
} = &data.blob_data
{
*self
.indices_by_version
.entry(index_part.get_version())
.or_insert(0) += 1;
match &data.blob_data {
BlobDataParseResult::Parsed {
index_part,
index_part_generation,
s3_layers: _,
} => {
*self
.indices_by_version
.entry(index_part.get_version())
.or_insert(0) += 1;
if let Err(e) = self.update_histograms(index_part) {
// Value out of range? Warn that the results are untrustworthy
tracing::warn!(
"Error updating histograms, summary stats may be wrong: {}",
e
);
// These statistics exist to track the transition to generations. By early 2024 there should be zero
// generation-less timelines in the field and this check can be removed.
if index_part_generation.is_none() {
self.indices_without_generation += 1;
} else {
self.indices_with_generation += 1;
}
if let Err(e) = self.update_histograms(index_part) {
// Value out of range? Warn that the results are untrustworthy
tracing::warn!(
"Error updating histograms, summary stats may be wrong: {}",
e
);
}
}
BlobDataParseResult::Incorrect(_) => self.invalid_count += 1,
BlobDataParseResult::Relic => self.relic_count += 1,
}
}
@@ -156,7 +181,10 @@ impl MetadataSummary {
With errors: {1}
With warnings: {2}
With garbage: {3}
Invalid: {9}
Relics: {10}
Index versions: {version_summary}
Indices with/without generations: {7}/{8}
Timeline size bytes: {4}
Layer size bytes: {5}
Timeline layer count: {6}
@@ -168,6 +196,10 @@ Timeline layer count: {6}
self.timeline_size_bytes.oneline(),
self.layer_size_bytes.oneline(),
self.layer_count.oneline(),
self.indices_with_generation,
self.indices_without_generation,
self.invalid_count,
self.relic_count
)
}