Compare against bruteforce, fix bugs

This commit is contained in:
Bojan Serafimov
2023-01-11 18:04:44 -05:00
parent 5557fc6062
commit 9886741828
2 changed files with 117 additions and 11 deletions

View File

@@ -150,8 +150,6 @@ fn bench_from_captest_env(c: &mut Criterion) {
// Benchmark using metadata extracted from a real project that was taknig
// too long processing layer map queries.
fn bench_from_real_project(c: &mut Criterion) {
// TODO consider compressing this file
// Init layer map
let now = Instant::now();
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
@@ -168,6 +166,36 @@ fn bench_from_real_project(c: &mut Criterion) {
.unwrap();
let partitioning = uniform_key_partitioning(&layer_map, latest_lsn);
// Check correctness of get_difficulty_map
// TODO put this in a dedicated test outside of this mod
{
println!("running correctness check");
let now = Instant::now();
let result_bruteforce = layer_map.get_difficulty_map_bruteforce(latest_lsn, &partitioning);
assert!(result_bruteforce.len() == partitioning.parts.len());
println!("Finished bruteforce in {:?}", now.elapsed());
let now = Instant::now();
let result_fast = layer_map.get_difficulty_map(latest_lsn, &partitioning);
assert!(result_fast.len() == partitioning.parts.len());
println!("Finished fast in {:?}", now.elapsed());
// Assert results are equal. Manually iterate for easier debugging.
//
// TODO The difficulty numbers are in the 50-80 range. That's pretty bad.
// We should be monitoring it in prod.
let zip = std::iter::zip(
&partitioning.parts,
std::iter::zip(result_bruteforce, result_fast),
);
for (_part, (bruteforce, fast)) in zip {
assert_eq!(bruteforce, fast);
}
println!("No issues found");
}
// Define and name the benchmark function
let mut group = c.benchmark_group("real_map");
group.bench_function("uniform_queries", |b| {

View File

@@ -324,13 +324,14 @@ where
for (change_key, change_val) in version.delta_coverage.range(start..end) {
// If there's a relevant delta in this part, add 1 and recurse down
if let Some(val) = current_val {
if val.get_lsn_range().end.0 >= lsn.start.0 {
if val.get_lsn_range().end > lsn.start {
let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
let lr = lsn.start..val.get_lsn_range().start;
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?;
max_stacked_deltas =
std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath);
if !kr.is_empty() {
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?;
max_stacked_deltas =
std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath);
}
}
}
@@ -340,19 +341,96 @@ where
// Consider the last part
if let Some(val) = current_val {
if val.get_lsn_range().end.0 >= lsn.start.0 {
if val.get_lsn_range().end > lsn.start {
let kr = Key::from_i128(current_key)..Key::from_i128(end);
let lr = lsn.start..val.get_lsn_range().start;
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?;
max_stacked_deltas =
std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath);
if !kr.is_empty() {
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr)?;
max_stacked_deltas =
std::cmp::max(max_stacked_deltas, 1 + max_stacked_deltas_underneath);
}
}
}
Ok(max_stacked_deltas)
}
/// Count how many layers we need to visit for given key-lsn pair.
///
/// Used as a helper for correctness checks only. Performance not critical.
pub fn get_difficulty(&self, lsn: Lsn, key: Key) -> usize {
match self.search(key, lsn) {
Some(search_result) => {
if search_result.layer.is_incremental() {
1 + self.get_difficulty(search_result.lsn_floor, key)
} else {
0
}
}
None => 0,
}
}
/// Used for correctness checking. Results are expected to be identical to
/// self.get_difficulty_map. Assumes self.search is correct.
pub fn get_difficulty_map_bruteforce(
&self,
lsn: Lsn,
partitioning: &KeyPartitioning,
) -> Vec<usize> {
// Looking at the difficulty as a function of key, it could only increase
// when a delta layer starts or an image layer ends. Therefore it's sufficient
// to check the difficulties at:
// - the key.start for each non-empty part range
// - the key.start for each delta
// - the key.end for each image
let keys_iter: Box<dyn Iterator<Item = Key>> = {
let mut keys: Vec<Key> = self
.iter_historic_layers()
.map(|layer| {
if layer.is_incremental() {
layer.get_key_range().start
} else {
layer.get_key_range().end
}
})
.collect();
keys.sort();
Box::new(keys.into_iter())
};
let mut keys_iter = keys_iter.peekable();
// Iter the partition and keys together and query all the necessary
// keys, computing the max difficulty for each part.
partitioning
.parts
.iter()
.map(|part| {
let mut difficulty = 0;
// Partition ranges are assumed to be sorted and disjoint
// TODO assert it
for range in &part.ranges {
if !range.is_empty() {
difficulty =
std::cmp::max(difficulty, self.get_difficulty(lsn, range.start));
}
while let Some(key) = keys_iter.peek() {
if key >= &range.end {
break;
}
let key = keys_iter.next().unwrap();
if key < range.start {
continue;
}
difficulty = std::cmp::max(difficulty, self.get_difficulty(lsn, key));
}
}
difficulty
})
.collect()
}
/// For each part of a keyspace partitioning, return the maximum number of layers
/// that would be needed for page reconstruction in that part at the given LSN.
///