From 98867418281a6da0e7f4204af1813edcc6a5fedf Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Wed, 11 Jan 2023 18:04:44 -0500 Subject: [PATCH] Compare against bruteforce, fix bugs --- pageserver/benches/bench_layer_map.rs | 32 ++++++++- pageserver/src/tenant/layer_map.rs | 96 ++++++++++++++++++++++++--- 2 files changed, 117 insertions(+), 11 deletions(-) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 218a78c047..7a9e2fa5e1 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -150,8 +150,6 @@ fn bench_from_captest_env(c: &mut Criterion) { // Benchmark using metadata extracted from a real project that was taknig // too long processing layer map queries. fn bench_from_real_project(c: &mut Criterion) { - // TODO consider compressing this file - // Init layer map let now = Instant::now(); let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt")); @@ -168,6 +166,36 @@ fn bench_from_real_project(c: &mut Criterion) { .unwrap(); let partitioning = uniform_key_partitioning(&layer_map, latest_lsn); + // Check correctness of get_difficulty_map + // TODO put this in a dedicated test outside of this mod + { + println!("running correctness check"); + + let now = Instant::now(); + let result_bruteforce = layer_map.get_difficulty_map_bruteforce(latest_lsn, &partitioning); + assert!(result_bruteforce.len() == partitioning.parts.len()); + println!("Finished bruteforce in {:?}", now.elapsed()); + + let now = Instant::now(); + let result_fast = layer_map.get_difficulty_map(latest_lsn, &partitioning); + assert!(result_fast.len() == partitioning.parts.len()); + println!("Finished fast in {:?}", now.elapsed()); + + // Assert results are equal. Manually iterate for easier debugging. + // + // TODO The difficulty numbers are in the 50-80 range. That's pretty bad. + // We should be monitoring it in prod. + let zip = std::iter::zip( + &partitioning.parts, + std::iter::zip(result_bruteforce, result_fast), + ); + for (_part, (bruteforce, fast)) in zip { + assert_eq!(bruteforce, fast); + } + + println!("No issues found"); + } + // Define and name the benchmark function let mut group = c.benchmark_group("real_map"); group.bench_function("uniform_queries", |b| { diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 644c7aeddf..c25b40bb19 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -324,13 +324,14 @@ where for (change_key, change_val) in version.delta_coverage.range(start..end) { // If there's a relevant delta in this part, add 1 and recurse down if let Some(val) = current_val { - if val.get_lsn_range().end.0 >= lsn.start.0 { + if val.get_lsn_range().end > lsn.start { let kr = Key::from_i128(current_key)..Key::from_i128(change_key); let lr = lsn.start..val.get_lsn_range().start; - let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?; - - max_stacked_deltas = - std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath); + if !kr.is_empty() { + let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?; + max_stacked_deltas = + std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath); + } } } @@ -340,19 +341,96 @@ where // Consider the last part if let Some(val) = current_val { - if val.get_lsn_range().end.0 >= lsn.start.0 { + if val.get_lsn_range().end > lsn.start { let kr = Key::from_i128(current_key)..Key::from_i128(end); let lr = lsn.start..val.get_lsn_range().start; - let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?; - max_stacked_deltas = - std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath); + if !kr.is_empty() { + let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?; + max_stacked_deltas = + std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath); + } } } Ok(max_stacked_deltas) } + /// Count how many layers we need to visit for given key-lsn pair. + /// + /// Used as a helper for correctness checks only. Performance not critical. + pub fn get_difficulty(&self, lsn: Lsn, key: Key) -> usize { + match self.search(key, lsn) { + Some(search_result) => { + if search_result.layer.is_incremental() { + 1 + self.get_difficulty(search_result.lsn_floor, key) + } else { + 0 + } + } + None => 0, + } + } + + /// Used for correctness checking. Results are expected to be identical to + /// self.get_difficulty_map. Assumes self.search is correct. + pub fn get_difficulty_map_bruteforce( + &self, + lsn: Lsn, + partitioning: &KeyPartitioning, + ) -> Vec { + // Looking at the difficulty as a function of key, it could only increase + // when a delta layer starts or an image layer ends. Therefore it's sufficient + // to check the difficulties at: + // - the key.start for each non-empty part range + // - the key.start for each delta + // - the key.end for each image + let keys_iter: Box> = { + let mut keys: Vec = self + .iter_historic_layers() + .map(|layer| { + if layer.is_incremental() { + layer.get_key_range().start + } else { + layer.get_key_range().end + } + }) + .collect(); + keys.sort(); + Box::new(keys.into_iter()) + }; + let mut keys_iter = keys_iter.peekable(); + + // Iter the partition and keys together and query all the necessary + // keys, computing the max difficulty for each part. + partitioning + .parts + .iter() + .map(|part| { + let mut difficulty = 0; + // Partition ranges are assumed to be sorted and disjoint + // TODO assert it + for range in &part.ranges { + if !range.is_empty() { + difficulty = + std::cmp::max(difficulty, self.get_difficulty(lsn, range.start)); + } + while let Some(key) = keys_iter.peek() { + if key >= &range.end { + break; + } + let key = keys_iter.next().unwrap(); + if key < range.start { + continue; + } + difficulty = std::cmp::max(difficulty, self.get_difficulty(lsn, key)); + } + } + difficulty + }) + .collect() + } + /// For each part of a keyspace partitioning, return the maximum number of layers /// that would be needed for page reconstruction in that part at the given LSN. ///