mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-12 12:50:37 +00:00
Compare commits
4 Commits
split-prox
...
scrubber/g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94e1d80a64 | ||
|
|
a28d91e8bc | ||
|
|
041d610fbe | ||
|
|
ec03c29644 |
@@ -142,7 +142,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
if !orphan_layers.is_empty() {
|
if !orphan_layers.is_empty() {
|
||||||
result.errors.push(format!(
|
result.warnings.push(format!(
|
||||||
"index_part.json does not contain layers from S3: {:?}",
|
"index_part.json does not contain layers from S3: {:?}",
|
||||||
orphan_layers
|
orphan_layers
|
||||||
.iter()
|
.iter()
|
||||||
@@ -170,6 +170,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BlobDataParseResult::Relic => {}
|
||||||
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
|
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
|
||||||
parse_errors
|
parse_errors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -215,6 +216,8 @@ pub(crate) enum BlobDataParseResult {
|
|||||||
index_part_generation: Generation,
|
index_part_generation: Generation,
|
||||||
s3_layers: HashSet<(LayerFileName, Generation)>,
|
s3_layers: HashSet<(LayerFileName, Generation)>,
|
||||||
},
|
},
|
||||||
|
/// The remains of a deleted Timeline (i.e. an initdb archive only)
|
||||||
|
Relic,
|
||||||
Incorrect(Vec<String>),
|
Incorrect(Vec<String>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -245,6 +248,7 @@ pub(crate) async fn list_timeline_blobs(
|
|||||||
timeline_dir_target.delimiter = String::new();
|
timeline_dir_target.delimiter = String::new();
|
||||||
|
|
||||||
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
|
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
|
||||||
|
let mut initdb_archive: bool = false;
|
||||||
|
|
||||||
let stream = stream_listing(s3_client, &timeline_dir_target);
|
let stream = stream_listing(s3_client, &timeline_dir_target);
|
||||||
pin_mut!(stream);
|
pin_mut!(stream);
|
||||||
@@ -258,6 +262,10 @@ pub(crate) async fn list_timeline_blobs(
|
|||||||
tracing::info!("Index key {key}");
|
tracing::info!("Index key {key}");
|
||||||
index_parts.push(obj)
|
index_parts.push(obj)
|
||||||
}
|
}
|
||||||
|
Some("initdb.tar.zst") => {
|
||||||
|
tracing::info!("initdb archive {key}");
|
||||||
|
initdb_archive = true;
|
||||||
|
}
|
||||||
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
|
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
|
||||||
Ok((new_layer, gen)) => {
|
Ok((new_layer, gen)) => {
|
||||||
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
|
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
|
||||||
@@ -279,6 +287,16 @@ pub(crate) async fn list_timeline_blobs(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
|
||||||
|
tracing::info!(
|
||||||
|
"Timeline is empty apart from initdb archive: expected post-deletion state."
|
||||||
|
);
|
||||||
|
return Ok(S3TimelineBlobData {
|
||||||
|
blob_data: BlobDataParseResult::Relic,
|
||||||
|
keys_to_remove: Vec::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Choose the index_part with the highest generation
|
// Choose the index_part with the highest generation
|
||||||
let (index_part_object, index_part_generation) = match index_parts
|
let (index_part_object, index_part_generation) = match index_parts
|
||||||
.iter()
|
.iter()
|
||||||
|
|||||||
@@ -86,7 +86,9 @@ impl S3Target {
|
|||||||
if new_self.prefix_in_bucket.is_empty() {
|
if new_self.prefix_in_bucket.is_empty() {
|
||||||
new_self.prefix_in_bucket = format!("/{}/", new_segment);
|
new_self.prefix_in_bucket = format!("/{}/", new_segment);
|
||||||
} else {
|
} else {
|
||||||
let _ = new_self.prefix_in_bucket.pop();
|
if new_self.prefix_in_bucket.ends_with('/') {
|
||||||
|
let _ = new_self.prefix_in_bucket.pop();
|
||||||
|
}
|
||||||
new_self.prefix_in_bucket =
|
new_self.prefix_in_bucket =
|
||||||
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
|
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,14 @@ pub struct MetadataSummary {
|
|||||||
with_warnings: HashSet<TenantTimelineId>,
|
with_warnings: HashSet<TenantTimelineId>,
|
||||||
with_garbage: HashSet<TenantTimelineId>,
|
with_garbage: HashSet<TenantTimelineId>,
|
||||||
indices_by_version: HashMap<usize, usize>,
|
indices_by_version: HashMap<usize, usize>,
|
||||||
|
indices_with_generation: usize,
|
||||||
|
indices_without_generation: usize,
|
||||||
|
|
||||||
|
/// Timelines that couldn't even parse metadata and/or object keys: extremely damaged
|
||||||
|
invalid_count: usize,
|
||||||
|
|
||||||
|
/// Timelines with just an initdb archive, left behind after deletion.
|
||||||
|
relic_count: usize,
|
||||||
|
|
||||||
layer_count: MinMaxHisto,
|
layer_count: MinMaxHisto,
|
||||||
timeline_size_bytes: MinMaxHisto,
|
timeline_size_bytes: MinMaxHisto,
|
||||||
@@ -39,6 +47,8 @@ impl MinMaxHisto {
|
|||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
histo: histogram::Histogram::builder()
|
histo: histogram::Histogram::builder()
|
||||||
|
// Accomodate tenant sizes up to 32TiB
|
||||||
|
.maximum_value(32 * 1024 * 1024 * 1024 * 1024)
|
||||||
.build()
|
.build()
|
||||||
.expect("Bad histogram params"),
|
.expect("Bad histogram params"),
|
||||||
min: u64::MAX,
|
min: u64::MAX,
|
||||||
@@ -90,6 +100,10 @@ impl MetadataSummary {
|
|||||||
with_warnings: HashSet::new(),
|
with_warnings: HashSet::new(),
|
||||||
with_garbage: HashSet::new(),
|
with_garbage: HashSet::new(),
|
||||||
indices_by_version: HashMap::new(),
|
indices_by_version: HashMap::new(),
|
||||||
|
indices_with_generation: 0,
|
||||||
|
indices_without_generation: 0,
|
||||||
|
invalid_count: 0,
|
||||||
|
relic_count: 0,
|
||||||
layer_count: MinMaxHisto::new(),
|
layer_count: MinMaxHisto::new(),
|
||||||
timeline_size_bytes: MinMaxHisto::new(),
|
timeline_size_bytes: MinMaxHisto::new(),
|
||||||
layer_size_bytes: MinMaxHisto::new(),
|
layer_size_bytes: MinMaxHisto::new(),
|
||||||
@@ -111,24 +125,35 @@ impl MetadataSummary {
|
|||||||
|
|
||||||
fn update_data(&mut self, data: &S3TimelineBlobData) {
|
fn update_data(&mut self, data: &S3TimelineBlobData) {
|
||||||
self.count += 1;
|
self.count += 1;
|
||||||
if let BlobDataParseResult::Parsed {
|
match &data.blob_data {
|
||||||
index_part,
|
BlobDataParseResult::Parsed {
|
||||||
index_part_generation: _,
|
index_part,
|
||||||
s3_layers: _,
|
index_part_generation,
|
||||||
} = &data.blob_data
|
s3_layers: _,
|
||||||
{
|
} => {
|
||||||
*self
|
*self
|
||||||
.indices_by_version
|
.indices_by_version
|
||||||
.entry(index_part.get_version())
|
.entry(index_part.get_version())
|
||||||
.or_insert(0) += 1;
|
.or_insert(0) += 1;
|
||||||
|
|
||||||
if let Err(e) = self.update_histograms(index_part) {
|
// These statistics exist to track the transition to generations. By early 2024 there should be zero
|
||||||
// Value out of range? Warn that the results are untrustworthy
|
// generation-less timelines in the field and this check can be removed.
|
||||||
tracing::warn!(
|
if index_part_generation.is_none() {
|
||||||
"Error updating histograms, summary stats may be wrong: {}",
|
self.indices_without_generation += 1;
|
||||||
e
|
} else {
|
||||||
);
|
self.indices_with_generation += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = self.update_histograms(index_part) {
|
||||||
|
// Value out of range? Warn that the results are untrustworthy
|
||||||
|
tracing::warn!(
|
||||||
|
"Error updating histograms, summary stats may be wrong: {}",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BlobDataParseResult::Incorrect(_) => self.invalid_count += 1,
|
||||||
|
BlobDataParseResult::Relic => self.relic_count += 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,7 +181,10 @@ impl MetadataSummary {
|
|||||||
With errors: {1}
|
With errors: {1}
|
||||||
With warnings: {2}
|
With warnings: {2}
|
||||||
With garbage: {3}
|
With garbage: {3}
|
||||||
|
Invalid: {9}
|
||||||
|
Relics: {10}
|
||||||
Index versions: {version_summary}
|
Index versions: {version_summary}
|
||||||
|
Indices with/without generations: {7}/{8}
|
||||||
Timeline size bytes: {4}
|
Timeline size bytes: {4}
|
||||||
Layer size bytes: {5}
|
Layer size bytes: {5}
|
||||||
Timeline layer count: {6}
|
Timeline layer count: {6}
|
||||||
@@ -168,6 +196,10 @@ Timeline layer count: {6}
|
|||||||
self.timeline_size_bytes.oneline(),
|
self.timeline_size_bytes.oneline(),
|
||||||
self.layer_size_bytes.oneline(),
|
self.layer_size_bytes.oneline(),
|
||||||
self.layer_count.oneline(),
|
self.layer_count.oneline(),
|
||||||
|
self.indices_with_generation,
|
||||||
|
self.indices_without_generation,
|
||||||
|
self.invalid_count,
|
||||||
|
self.relic_count
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user