From d0cbfda15c916ee75066919940d0c3da714c5b95 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:29:28 -0400 Subject: [PATCH] refactor(pageserver): check layer map valid in one place (#9051) We have 3 places where we implement layer map checks. ## Summary of changes Now we have a single check function being called in all places. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant.rs | 31 +++++++++++ pageserver/src/tenant/checks.rs | 55 +++++++++++++++++++ pageserver/src/tenant/timeline.rs | 33 ++---------- pageserver/src/tenant/timeline/compaction.rs | 21 +++----- storage_scrubber/src/checks.rs | 56 ++------------------ 5 files changed, 101 insertions(+), 95 deletions(-) create mode 100644 pageserver/src/tenant/checks.rs diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 14cb6f508d..d699d56075 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -140,6 +140,7 @@ pub mod metadata; pub mod remote_timeline_client; pub mod storage_layer; +pub mod checks; pub mod config; pub mod mgr; pub mod secondary; @@ -1573,6 +1574,9 @@ impl Tenant { image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { + use checks::check_valid_layermap; + use itertools::Itertools; + let tline = self .create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx) .await?; @@ -1587,6 +1591,18 @@ impl Tenant { .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx) .await?; } + let layer_names = tline + .layers + .read() + .await + .layer_map() + .unwrap() + .iter_historic_layers() + .map(|layer| layer.layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("invalid layermap: {err}"); + } Ok(tline) } @@ -3197,6 +3213,9 @@ impl Tenant { image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { + use checks::check_valid_layermap; + use itertools::Itertools; + let tline = self .branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx) .await?; @@ -3217,6 +3236,18 @@ impl Tenant { .force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx) .await?; } + let layer_names = tline + .layers + .read() + .await + .layer_map() + .unwrap() + .iter_historic_layers() + .map(|layer| layer.layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("invalid layermap: {err}"); + } Ok(tline) } diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs new file mode 100644 index 0000000000..8eaa8a001c --- /dev/null +++ b/pageserver/src/tenant/checks.rs @@ -0,0 +1,55 @@ +use std::collections::BTreeSet; + +use itertools::Itertools; + +use super::storage_layer::LayerName; + +/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong). +/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, +/// +/// ```plain +/// | | | | +/// | 1 | | 2 | | 3 | +/// | | | | | | +/// ``` +/// +/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have +/// the same LSN range. +/// +/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example, +/// +/// ```plain +/// | | | 2 | | | +/// | 1 | |-------| | 3 | +/// | | | 4 | | | +/// +/// If layer 2 and 4 contain the same single key, this is also a valid layer map. +pub fn check_valid_layermap(metadata: &[LayerName]) -> Option { + let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) + let mut all_delta_layers = Vec::new(); + for name in metadata { + if let LayerName::Delta(layer) = name { + if layer.key_range.start.next() != layer.key_range.end { + all_delta_layers.push(layer.clone()); + } + } + } + for layer in &all_delta_layers { + let lsn_range = &layer.lsn_range; + lsn_split_point.insert(lsn_range.start); + lsn_split_point.insert(lsn_range.end); + } + for layer in &all_delta_layers { + let lsn_range = layer.lsn_range.clone(); + let intersects = lsn_split_point.range(lsn_range).collect_vec(); + if intersects.len() > 1 { + let err = format!( + "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", + layer, + intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") + ); + return Some(err); + } + } + None +} diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index a06cea2c66..f08f5caf95 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5378,7 +5378,8 @@ impl Timeline { /// Force create an image layer and place it into the layer map. /// /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`] - /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run. + /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are + /// placed into the layer map in one run AND be validated. #[cfg(test)] pub(super) async fn force_create_image_layer( self: &Arc, @@ -5424,7 +5425,8 @@ impl Timeline { /// Force create a delta layer and place it into the layer map. /// /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`] - /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run. + /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are + /// placed into the layer map in one run AND be validated. #[cfg(test)] pub(super) async fn force_create_delta_layer( self: &Arc, @@ -5450,33 +5452,6 @@ impl Timeline { if let Some(check_start_lsn) = check_start_lsn { assert!(deltas.lsn_range.start >= check_start_lsn); } - // check if the delta layer does not violate the LSN invariant, the legacy compaction should always produce a batch of - // layers of the same start/end LSN, and so should the force inserted layer - { - /// Checks if a overlaps with b, assume a/b = [start, end). - pub fn overlaps_with(a: &Range, b: &Range) -> bool { - !(a.end <= b.start || b.end <= a.start) - } - - if deltas.key_range.start.next() != deltas.key_range.end { - let guard = self.layers.read().await; - let mut invalid_layers = - guard.layer_map()?.iter_historic_layers().filter(|layer| { - layer.is_delta() - && overlaps_with(&layer.lsn_range, &deltas.lsn_range) - && layer.lsn_range != deltas.lsn_range - // skip single-key layer files - && layer.key_range.start.next() != layer.key_range.end - }); - if let Some(layer) = invalid_layers.next() { - // If a delta layer overlaps with another delta layer AND their LSN range is not the same, panic - panic!( - "inserted layer violates delta layer LSN invariant: current_lsn_range={}..{}, conflict_lsn_range={}..{}", - deltas.lsn_range.start, deltas.lsn_range.end, layer.lsn_range.start, layer.lsn_range.end - ); - } - } - } let mut delta_layer_writer = DeltaLayerWriter::new( self.conf, self.timeline_id, diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index d1f06e3480..d1567b6b39 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -29,6 +29,7 @@ use utils::id::TimelineId; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; +use crate::tenant::checks::check_valid_layermap; use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::merge_iterator::MergeIterator; use crate::tenant::storage_layer::split_writer::{ @@ -1788,20 +1789,12 @@ impl Timeline { stat.visit_image_layer(desc.file_size()); } } - for layer in &layer_selection { - let desc = layer.layer_desc(); - let key_range = &desc.key_range; - if desc.is_delta() && key_range.start.next() != key_range.end { - let lsn_range = desc.lsn_range.clone(); - let intersects = lsn_split_point.range(lsn_range).collect_vec(); - if intersects.len() > 1 { - bail!( - "cannot run gc-compaction because it violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", - desc.key(), - intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") - ); - } - } + let layer_names: Vec = layer_selection + .iter() + .map(|layer| layer.layer_desc().layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("cannot run gc-compaction because {}", err); } // The maximum LSN we are processing in this compaction loop let end_lsn = layer_selection diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index 15dfb101b5..de6918b3da 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -1,7 +1,8 @@ -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use anyhow::Context; use itertools::Itertools; +use pageserver::tenant::checks::check_valid_layermap; use pageserver::tenant::layer_map::LayerMap; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver_api::shard::ShardIndex; @@ -48,56 +49,6 @@ impl TimelineAnalysis { } } -/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong). -/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, -/// -/// ```plain -/// | | | | -/// | 1 | | 2 | | 3 | -/// | | | | | | -/// ``` -/// -/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have -/// the same LSN range. -/// -/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example, -/// -/// ```plain -/// | | | 2 | | | -/// | 1 | |-------| | 3 | -/// | | | 4 | | | -/// -/// If layer 2 and 4 contain the same single key, this is also a valid layer map. -fn check_valid_layermap(metadata: &HashMap) -> Option { - let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) - let mut all_delta_layers = Vec::new(); - for (name, _) in metadata.iter() { - if let LayerName::Delta(layer) = name { - if layer.key_range.start.next() != layer.key_range.end { - all_delta_layers.push(layer.clone()); - } - } - } - for layer in &all_delta_layers { - let lsn_range = &layer.lsn_range; - lsn_split_point.insert(lsn_range.start); - lsn_split_point.insert(lsn_range.end); - } - for layer in &all_delta_layers { - let lsn_range = layer.lsn_range.clone(); - let intersects = lsn_split_point.range(lsn_range).collect_vec(); - if intersects.len() > 1 { - let err = format!( - "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", - layer, - intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") - ); - return Some(err); - } - } - None -} - pub(crate) async fn branch_cleanup_and_check_errors( remote_client: &GenericRemoteStorage, id: &TenantShardTimelineId, @@ -177,7 +128,8 @@ pub(crate) async fn branch_cleanup_and_check_errors( } } - if let Some(err) = check_valid_layermap(&index_part.layer_metadata) { + let layer_names = index_part.layer_metadata.keys().cloned().collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { result.errors.push(format!( "index_part.json contains invalid layer map structure: {err}" ));