Hacky first attempt to integrate

This commit is contained in:
Bojan Serafimov
2022-12-06 14:14:27 -05:00
parent 39003aa9f3
commit 6bce11e810
3 changed files with 106 additions and 49 deletions

View File

@@ -1,7 +1,7 @@
use anyhow::Result;
use num_traits::ToPrimitive;
use pageserver::repository::{Key, Value};
use pageserver::tenant::bst_layer_map::BSTLM;
use pageserver::tenant::bst_layer_map::PersistentLayerMap;
use pageserver::tenant::filename::{DeltaFileName, ImageFileName};
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::storage_layer::Layer;
@@ -253,7 +253,7 @@ fn bench_from_real_project(c: &mut Criterion) {
// Init bst layer map with the same layers
let now = Instant::now();
let mut bstlm = BSTLM::new();
let mut bstlm = PersistentLayerMap::new();
let mut sorted_layers: Vec<_> = layer_map.iter_historic_layers().collect();
sorted_layers.sort_by(|a, b| a.get_lsn_range().start.cmp(&b.get_lsn_range().start));
// TODO implement out of order inserts
@@ -315,21 +315,20 @@ fn bench_sequential(c: &mut Criterion) {
// Init bst layer map with the same layers
let now = Instant::now();
let mut bstlm = BSTLM::new();
for layer in layer_map.iter_historic_layers() {
if layer.is_incremental() {
panic!("AAA");
} else {
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
let mut bstlm = PersistentLayerMap::new();
let mut sorted_layers: Vec<_> = layer_map.iter_historic_layers().collect();
sorted_layers.sort_by(|a, b| a.get_lsn_range().start.cmp(&b.get_lsn_range().start));
// TODO implement out of order inserts
for layer in sorted_layers {
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
bstlm.insert(
kr.start.to_i128(),
kr.end.to_i128(),
lr.start.0,
format!("Layer {}", lr.start.0),
);
}
bstlm.insert(
kr.start.to_i128(),
kr.end.to_i128(),
lr.start.0,
format!("Layer {}", lr.start.0),
);
}
println!("Finished bst init in {:?}", now.elapsed());

View File

@@ -2,35 +2,47 @@ use std::collections::BTreeMap;
// TODO the `im` crate has 20x more downloads and also has
// persistent/immutable BTree. See if it's better.
use rpds::RedBlackTreeMap;
use rpds::RedBlackTreeMapSync;
/// Layer map implemented using persistent/immutable binary search tree.
/// This implementation is only good enough to run benchmarks,
/// so it's missing unnecessary details. Values are String for now.
pub struct BSTLM {
/// Mapping key to the latest layer (if any) until the next key
head: RedBlackTreeMap<i128, Option<String>>,
/// It supports historical queries, but no retroactive inserts. For that
/// see RetroactiveLayerMap.
///
/// Layer type is abstracted as Value to make unit testing easier.
pub struct PersistentLayerMap<Value> {
/// Mapping key to the latest layer (if any) until the next key.
/// We use the Sync version of the map because we want Self to
/// be Sync.
head: RedBlackTreeMapSync<i128, Option<Value>>,
/// All previous states of `self.head`
historic: BTreeMap<u64, RedBlackTreeMap<i128, Option<String>>>,
///
/// TODO: Sorted Vec + binary search could be slightly faster.
historic: BTreeMap<u64, RedBlackTreeMapSync<i128, Option<Value>>>,
}
impl std::fmt::Debug for BSTLM {
impl<Value: std::fmt::Debug> std::fmt::Debug for PersistentLayerMap<Value> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let head_vec: Vec<_> = self.head.iter().collect();
write!(f, "BSTLM: head: {:?}", head_vec)
write!(f, "PersistentLayerMap: head: {:?}", head_vec)
}
}
impl BSTLM {
impl<T: Clone> Default for PersistentLayerMap<T> {
fn default() -> Self {
Self::new()
}
}
impl<Value: Clone> PersistentLayerMap<Value> {
pub fn new() -> Self {
BSTLM {
head: RedBlackTreeMap::default(),
Self {
head: RedBlackTreeMapSync::default(),
historic: BTreeMap::default(),
}
}
pub fn insert(self: &mut Self, key_begin: i128, key_end: i128, lsn: u64, value: String) {
pub fn insert(self: &mut Self, key_begin: i128, key_end: i128, lsn: u64, value: Value) {
// TODO check for off-by-one errors
// It's only a persistent map, not a retroactive one
@@ -40,7 +52,7 @@ impl BSTLM {
// TODO there are edge cases to take care of
}
if lsn < *last_lsn {
todo!("smaller lsn not implemented yet")
panic!("unexpected retroactive insert");
}
}
@@ -71,7 +83,7 @@ impl BSTLM {
self.historic.insert(lsn, self.head.clone());
}
pub fn query(self: &Self, key: i128, lsn: u64) -> Option<&String> {
pub fn query(self: &Self, key: i128, lsn: u64) -> Option<&Value> {
// TODO check for off-by-one errors
let version = self.historic.range(0..=lsn).rev().next()?.1;
@@ -84,28 +96,47 @@ impl BSTLM {
// nicer but we have this solution as backup.
}
/// Basic test for the immutable bst library, just to show usage.
#[test]
fn test_bstlm() {
let mut bstlm = BSTLM::new();
bstlm.insert(0, 5, 100, "Layer 1".to_string());
dbg!(&bstlm);
bstlm.insert(3, 9, 110, "Layer 2".to_string());
dbg!(&bstlm);
bstlm.insert(5, 6, 120, "Layer 3".to_string());
dbg!(&bstlm);
fn test_immutable_bst_dependency() {
let map = RedBlackTreeMapSync::<i32, i32>::default();
let mut v1 = map.clone();
let v2 = map.insert(1, 5);
// We can query current and past versions of key 1
assert_eq!(v1.get(&1), None);
assert_eq!(v2.get(&1), Some(&5));
// We can mutate old state, but it creates a branch.
// It doesn't retroactively change future versions.
v1.insert_mut(2, 6);
assert_eq!(v1.get(&2), Some(&6));
assert_eq!(v2.get(&2), None);
}
/// This is the most basic test that demonstrates intended usage.
#[test]
fn test_persistent_simple() {
let mut map = PersistentLayerMap::<String>::new();
map.insert(0, 5, 100, "Layer 1".to_string());
dbg!(&map);
map.insert(3, 9, 110, "Layer 2".to_string());
dbg!(&map);
map.insert(5, 6, 120, "Layer 3".to_string());
dbg!(&map);
// After Layer 1 insertion
assert_eq!(bstlm.query(1, 105), Some(&"Layer 1".to_string()));
assert_eq!(bstlm.query(4, 105), Some(&"Layer 1".to_string()));
assert_eq!(map.query(1, 105), Some(&"Layer 1".to_string()));
assert_eq!(map.query(4, 105), Some(&"Layer 1".to_string()));
// After Layer 2 insertion
assert_eq!(bstlm.query(4, 115), Some(&"Layer 2".to_string()));
assert_eq!(bstlm.query(8, 115), Some(&"Layer 2".to_string()));
assert_eq!(bstlm.query(11, 115), None);
assert_eq!(map.query(4, 115), Some(&"Layer 2".to_string()));
assert_eq!(map.query(8, 115), Some(&"Layer 2".to_string()));
assert_eq!(map.query(11, 115), None);
// After Layer 3 insertion
assert_eq!(bstlm.query(4, 125), Some(&"Layer 2".to_string()));
assert_eq!(bstlm.query(5, 125), Some(&"Layer 3".to_string()));
assert_eq!(bstlm.query(7, 125), Some(&"Layer 2".to_string()));
assert_eq!(map.query(4, 125), Some(&"Layer 2".to_string()));
assert_eq!(map.query(5, 125), Some(&"Layer 3".to_string()));
assert_eq!(map.query(7, 125), Some(&"Layer 2".to_string()));
}

View File

@@ -28,6 +28,8 @@ use std::sync::Arc;
use tracing::*;
use utils::lsn::Lsn;
use super::bst_layer_map::PersistentLayerMap;
///
/// LayerMap tracks what layers exist on a timeline.
///
@@ -55,6 +57,10 @@ pub struct LayerMap {
/// All the historic layers are kept here
historic_layers: RTree<LayerRTreeObject>,
/// HACK I'm experimenting with a new index to reaplace the RTree. If this
/// works out I'll clean up the struct later.
index: PersistentLayerMap<Arc<dyn Layer>>,
/// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
/// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
l0_delta_layers: Vec<Arc<dyn Layer>>,
@@ -241,6 +247,14 @@ impl LayerMap {
/// layer.
///
pub fn search(&self, key: Key, end_lsn: Lsn) -> Result<Option<SearchResult>> {
// HACK use the index to query and return early. If this works I'll
// rewrite the function.
let result = self.index.query(key.to_i128(), end_lsn.0);
return Ok(result.map(|layer| SearchResult {
layer: Arc::clone(layer),
lsn_floor: Lsn(0), // TODO what's this?
}));
// linear search
// Find the latest image layer that covers the given key
let mut latest_img: Option<Arc<dyn Layer>> = None;
@@ -345,10 +359,21 @@ impl LayerMap {
/// Insert an on-disk layer
///
pub fn insert_historic(&mut self, layer: Arc<dyn Layer>) {
// TODO the index needs to support out of order insertion for this to work
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
self.index.insert(
kr.start.to_i128(),
kr.end.to_i128(),
lr.start.0,
Arc::clone(&layer),
);
if layer.get_key_range() == (Key::MIN..Key::MAX) {
self.l0_delta_layers.push(layer.clone());
}
self.historic_layers.insert(LayerRTreeObject::new(layer));
// HACK don't update RTree, too slow
// self.historic_layers.insert(LayerRTreeObject::new(layer));
NUM_ONDISK_LAYERS.inc();
}
@@ -586,3 +611,5 @@ impl LayerMap {
Ok(())
}
}
// TODO add layer map tests