mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-05 06:20:37 +00:00
Benchmark immutable bst layer map
This commit is contained in:
25
Cargo.lock
generated
25
Cargo.lock
generated
@@ -66,6 +66,15 @@ dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "archery"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02"
|
||||
dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.2"
|
||||
@@ -2345,6 +2354,7 @@ dependencies = [
|
||||
"rand",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"rpds",
|
||||
"rstar",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
@@ -3154,6 +3164,15 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rpds"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000"
|
||||
dependencies = [
|
||||
"archery",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstar"
|
||||
version = "0.9.3"
|
||||
@@ -3624,6 +3643,12 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "storage_broker"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -69,6 +69,7 @@ remote_storage = { path = "../libs/remote_storage" }
|
||||
tenant_size_model = { path = "../libs/tenant_size_model" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
rpds = "0.12.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use anyhow::Result;
|
||||
use num_traits::ToPrimitive;
|
||||
use pageserver::repository::{Key, Value};
|
||||
use pageserver::tenant::bst_layer_map::BSTLM;
|
||||
use pageserver::tenant::filename::{DeltaFileName, ImageFileName};
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::Layer;
|
||||
@@ -243,23 +245,67 @@ fn bench_from_captest_env(c: &mut Criterion) {
|
||||
// too long processing layer map queries.
|
||||
fn bench_from_real_project(c: &mut Criterion) {
|
||||
// TODO consider compressing this file
|
||||
|
||||
// Init layer map
|
||||
let now = Instant::now();
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Init bst layer map with the same layers
|
||||
let now = Instant::now();
|
||||
let mut bstlm = BSTLM::new();
|
||||
let mut sorted_layers: Vec<_> = layer_map.iter_historic_layers().collect();
|
||||
sorted_layers.sort_by(|a, b| a.get_lsn_range().start.cmp(&b.get_lsn_range().start));
|
||||
for layer in sorted_layers {
|
||||
if layer.is_incremental() {
|
||||
// TODO check if they're sorted
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
} else {
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("Finished bst init in {:?}", now.elapsed());
|
||||
|
||||
// Choose uniformly distributed queries
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Test with uniform query pattern
|
||||
c.bench_function("real_map_uniform_queries", |b| {
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("real_map_uniform_queries");
|
||||
group.bench_function("current_code", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
layer_map.search(q.0, q.1).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("persistent_bst", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
bstlm.query(q.0.to_i128(), q.1 .0);
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.
|
||||
fn bench_sequential(c: &mut Criterion) {
|
||||
let mut layer_map = LayerMap::default();
|
||||
|
||||
// Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.
|
||||
//
|
||||
// TODO This code is pretty slow and runs even if we're only running other
|
||||
@@ -267,39 +313,62 @@ fn bench_sequential(c: &mut Criterion) {
|
||||
// Putting it inside the `bench_function` closure is not a solution
|
||||
// because then it runs multiple times during warmup.
|
||||
let now = Instant::now();
|
||||
let mut layer_map = LayerMap::default();
|
||||
for i in 0..100_000 {
|
||||
// TODO try inserting a super-wide layer in between every 10 to reflect
|
||||
// what often happens with L1 layers that include non-rel changes.
|
||||
// Maybe do that as a separate test.
|
||||
let i32 = (i as u32) % 100;
|
||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
let layer = DummyImage {
|
||||
key_range: zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||
lsn: Lsn(10 * i),
|
||||
lsn: Lsn(i),
|
||||
};
|
||||
layer_map.insert_historic(Arc::new(layer));
|
||||
}
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Manually measure runtime without criterion because criterion
|
||||
// has a minimum sample size of 10 and I don't want to run it 10 times.
|
||||
println!("Finished init in {:?}", now.elapsed());
|
||||
// Init bst layer map with the same layers
|
||||
let now = Instant::now();
|
||||
let mut bstlm = BSTLM::new();
|
||||
for layer in layer_map.iter_historic_layers() {
|
||||
if layer.is_incremental() {
|
||||
panic!("AAA");
|
||||
} else {
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("Finished bst init in {:?}", now.elapsed());
|
||||
|
||||
// Choose 100 uniformly random queries
|
||||
let rng = &mut StdRng::seed_from_u64(1);
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map)
|
||||
.choose_multiple(rng, 1)
|
||||
.choose_multiple(rng, 100)
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
// Define and name the benchmark function
|
||||
c.bench_function("sequential_uniform_queries", |b| {
|
||||
// Run the search queries
|
||||
let mut group = c.benchmark_group("sequential_uniform_queries");
|
||||
group.bench_function("current_code", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
layer_map.search(q.0, q.1).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("persistent_bst", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
bstlm.query(q.0.to_i128(), q.1 .0);
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(group_1, bench_from_captest_env);
|
||||
|
||||
@@ -73,6 +73,7 @@ use utils::{
|
||||
|
||||
mod blob_io;
|
||||
pub mod block_io;
|
||||
pub mod bst_layer_map;
|
||||
mod delta_layer;
|
||||
mod disk_btree;
|
||||
pub(crate) mod ephemeral_file;
|
||||
|
||||
111
pageserver/src/tenant/bst_layer_map.rs
Normal file
111
pageserver/src/tenant/bst_layer_map.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
// TODO the `im` crate has 20x more downloads and also has
|
||||
// persistent/immutable BTree. See if it's better.
|
||||
use rpds::RedBlackTreeMap;
|
||||
|
||||
/// Layer map implemented using persistent/immutable binary search tree.
|
||||
/// This implementation is only good enough to run benchmarks,
|
||||
/// so it's missing unnecessary details. Values are String for now.
|
||||
pub struct BSTLM {
|
||||
/// Mapping key to the latest layer (if any) until the next key
|
||||
head: RedBlackTreeMap<i128, Option<String>>,
|
||||
|
||||
/// All previous states of `self.head`
|
||||
historic: BTreeMap<u64, RedBlackTreeMap<i128, Option<String>>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for BSTLM {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let head_vec: Vec<_> = self.head.iter().collect();
|
||||
write!(f, "BSTLM: head: {:?}", head_vec)
|
||||
}
|
||||
}
|
||||
|
||||
impl BSTLM {
|
||||
pub fn new() -> Self {
|
||||
BSTLM {
|
||||
head: RedBlackTreeMap::default(),
|
||||
historic: BTreeMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(self: &mut Self, key_begin: i128, key_end: i128, lsn: u64, value: String) {
|
||||
// TODO check for off-by-one errors
|
||||
|
||||
// It's only a persistent map, not a retroactive one
|
||||
if let Some(last_entry) = self.historic.iter().rev().next() {
|
||||
let last_lsn = last_entry.0;
|
||||
if lsn == *last_lsn {
|
||||
// TODO there are edge cases to take care of
|
||||
}
|
||||
if lsn < *last_lsn {
|
||||
todo!("smaller lsn not implemented yet")
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE The order of the following lines is important!!
|
||||
|
||||
// Preserve information after right endpoint
|
||||
let value_at_end = match self.head.range(0..key_end).last() {
|
||||
Some((_, Some(v))) => Some(v.clone()),
|
||||
Some((_, None)) => None,
|
||||
None => None,
|
||||
};
|
||||
self.head.insert_mut(key_end, value_at_end);
|
||||
|
||||
// Insert the left endpoint
|
||||
self.head.insert_mut(key_begin, Some(value.clone()));
|
||||
|
||||
// Cover the inside of the interval
|
||||
let to_remove: Vec<_> = self
|
||||
.head
|
||||
.range((key_begin + 1)..key_end)
|
||||
.map(|(k, _)| k.clone())
|
||||
.collect();
|
||||
for key in to_remove {
|
||||
self.head.remove_mut(&key);
|
||||
}
|
||||
|
||||
// Remember history. Clone is O(1)
|
||||
self.historic.insert(lsn, self.head.clone());
|
||||
}
|
||||
|
||||
pub fn query(self: &Self, key: i128, lsn: u64) -> Option<&String> {
|
||||
// TODO check for off-by-one errors
|
||||
|
||||
let version = self.historic.range(0..=lsn).rev().next()?.1;
|
||||
version.range(0..=key).rev().next()?.1.as_ref()
|
||||
}
|
||||
|
||||
// TODO Add API for delta layers with lsn range.
|
||||
// The easy solution is to only store images, and then from every
|
||||
// image point to deltas on top of it. There might be something
|
||||
// nicer but we have this solution as backup.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bstlm() {
|
||||
let mut bstlm = BSTLM::new();
|
||||
bstlm.insert(0, 5, 100, "Layer 1".to_string());
|
||||
dbg!(&bstlm);
|
||||
bstlm.insert(3, 9, 110, "Layer 2".to_string());
|
||||
dbg!(&bstlm);
|
||||
bstlm.insert(5, 6, 120, "Layer 3".to_string());
|
||||
dbg!(&bstlm);
|
||||
|
||||
// After Layer 1 insertion
|
||||
assert_eq!(bstlm.query(1, 105), Some(&"Layer 1".to_string()));
|
||||
assert_eq!(bstlm.query(4, 105), Some(&"Layer 1".to_string()));
|
||||
|
||||
// After Layer 2 insertion
|
||||
assert_eq!(bstlm.query(4, 115), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(8, 115), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(11, 115), None);
|
||||
|
||||
// After Layer 3 insertion
|
||||
assert_eq!(bstlm.query(4, 125), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(5, 125), Some(&"Layer 3".to_string()));
|
||||
|
||||
assert_eq!(bstlm.query(7, 125), Some(&"Layer 2".to_string()));
|
||||
}
|
||||
Reference in New Issue
Block a user