feat(storage-scrubber): check layer map validity (#8867)

When implementing bottom-most gc-compaction, we analyzed the structure
of layer maps that the current compaction algorithm could produce, and
decided to only support structures without delta layer overlaps and LSN
intersections with the exception of single key layers.

## Summary of changes

This patch adds the layer map valid check in the storage scrubber.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-08-31 02:12:39 +08:00
committed by GitHub
parent e58e045ebb
commit df971f995c

View File

@@ -1,6 +1,7 @@
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use anyhow::Context;
use itertools::Itertools;
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver_api::shard::ShardIndex;
@@ -47,6 +48,56 @@ impl TimelineAnalysis {
}
}
/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
///
/// ```plain
/// | | | |
/// | 1 | | 2 | | 3 |
/// | | | | | |
/// ```
///
/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have
/// the same LSN range.
///
/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example,
///
/// ```plain
/// | | | 2 | | |
/// | 1 | |-------| | 3 |
/// | | | 4 | | |
///
/// If layer 2 and 4 contain the same single key, this is also a valid layer map.
fn check_valid_layermap(metadata: &HashMap<LayerName, LayerFileMetadata>) -> Option<String> {
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
let mut all_delta_layers = Vec::new();
for (name, _) in metadata.iter() {
if let LayerName::Delta(layer) = name {
if layer.key_range.start.next() != layer.key_range.end {
all_delta_layers.push(layer.clone());
}
}
}
for layer in &all_delta_layers {
let lsn_range = &layer.lsn_range;
lsn_split_point.insert(lsn_range.start);
lsn_split_point.insert(lsn_range.end);
}
for layer in &all_delta_layers {
let lsn_range = layer.lsn_range.clone();
let intersects = lsn_split_point.range(lsn_range).collect_vec();
if intersects.len() > 1 {
let err = format!(
"layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
layer,
intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
);
return Some(err);
}
}
None
}
pub(crate) async fn branch_cleanup_and_check_errors(
remote_client: &GenericRemoteStorage,
id: &TenantShardTimelineId,
@@ -126,6 +177,12 @@ pub(crate) async fn branch_cleanup_and_check_errors(
}
}
if let Some(err) = check_valid_layermap(&index_part.layer_metadata) {
result.errors.push(format!(
"index_part.json contains invalid layer map structure: {err}"
));
}
for (layer, metadata) in index_part.layer_metadata {
if metadata.file_size == 0 {
result.errors.push(format!(