From 4e61edef7cc494ff8e06fe8e67b4faf253858949 Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Thu, 1 Dec 2022 15:05:21 -0500 Subject: [PATCH] wip --- Cargo.lock | 1 + pageserver/Cargo.toml | 1 + pageserver/benches/bench_layer_map.rs | 5 +- pageserver/benches/segment_tree_layer_map.rs | 0 pageserver/src/tenant.rs | 1 + .../src/tenant/segment_tree_layer_map.rs | 317 ++++++++++++++++++ 6 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 pageserver/benches/segment_tree_layer_map.rs create mode 100644 pageserver/src/tenant/segment_tree_layer_map.rs diff --git a/Cargo.lock b/Cargo.lock index 49806d8d78..8194e107ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2334,6 +2334,7 @@ dependencies = [ "num-traits", "once_cell", "pageserver_api", + "persistent_range_query", "pin-project-lite", "postgres", "postgres-protocol", diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 61c7b8ae97..6afebf6842 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -59,6 +59,7 @@ tracing = "0.1.36" url = "2" walkdir = "2.3.2" +persistent_range_query = { path = "../libs/persistent_range_query" } etcd_broker = { path = "../libs/etcd_broker" } metrics = { path = "../libs/metrics" } pageserver_api = { path = "../libs/pageserver_api" } diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index a99580bc65..760626c817 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -2,6 +2,7 @@ use anyhow::Result; use pageserver::repository::{Key, Value}; use pageserver::tenant::filename::{DeltaFileName, ImageFileName}; use pageserver::tenant::layer_map::LayerMap; +use pageserver::tenant::segment_tree_layer_map::STLM; use pageserver::tenant::storage_layer::Layer; use pageserver::tenant::storage_layer::ValueReconstructResult; use pageserver::tenant::storage_layer::ValueReconstructState; @@ -259,6 +260,7 @@ fn bench_from_real_project(c: &mut Criterion) { // Benchmark using synthetic data. Arrange image layers on stacked diagonal lines. fn bench_sequential(c: &mut Criterion) { let mut layer_map = LayerMap::default(); + let mut stlm = STLM::new(); // Init layer map. Create 100_000 layers arranged in 1000 diagonal lines. // @@ -275,9 +277,10 @@ fn bench_sequential(c: &mut Criterion) { let zero = Key::from_hex("000000000000000000000000000000000000").unwrap(); let layer = DummyImage { key_range: zero.add(10 * i32)..zero.add(10 * i32 + 1), - lsn: Lsn(10 * i), + lsn: Lsn(i), }; layer_map.insert_historic(Arc::new(layer)); + stlm.insert(10 * i32, 10 * i32 + 1); } // Manually measure runtime without criterion because criterion diff --git a/pageserver/benches/segment_tree_layer_map.rs b/pageserver/benches/segment_tree_layer_map.rs new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 981c049111..e2c014053f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -80,6 +80,7 @@ pub mod filename; mod image_layer; mod inmemory_layer; pub mod layer_map; +pub mod segment_tree_layer_map; pub mod metadata; mod par_fsync; diff --git a/pageserver/src/tenant/segment_tree_layer_map.rs b/pageserver/src/tenant/segment_tree_layer_map.rs new file mode 100644 index 0000000000..558962a9f2 --- /dev/null +++ b/pageserver/src/tenant/segment_tree_layer_map.rs @@ -0,0 +1,317 @@ +use persistent_range_query::naive::{IndexableKey, NaiveVecStorage}; +use persistent_range_query::ops::SameElementsInitializer; +use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree}; +use persistent_range_query::{ + LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult, + VecReadableVersion, +}; +use std::cmp::Ordering; +use std::ops::Range; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +struct PageIndex(u32); +type LayerId = String; + +impl IndexableKey for PageIndex { + fn index(all_keys: &Range, key: &Self) -> usize { + (key.0 as usize) - (all_keys.start.0 as usize) + } + + fn element_range(all_keys: &Range, index: usize) -> Range { + PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1) + } +} + +impl MidpointableKey for PageIndex { + fn midpoint(range: &Range) -> Self { + PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct LayerMapInformation { + // Only make sense for a range of length 1. + last_layer: Option, + last_image_layer: Option, + // Work for all ranges + max_delta_layers: (usize, Range), +} + +impl LayerMapInformation { + fn last_layers(&self) -> (&Option, &Option) { + (&self.last_layer, &self.last_image_layer) + } + + fn max_delta_layers(&self) -> &(usize, Range) { + &self.max_delta_layers + } +} + +fn merge_ranges(left: &Range, right: &Range) -> Range { + if left.is_empty() { + right.clone() + } else if right.is_empty() { + left.clone() + } else if left.end == right.start { + left.start..right.end + } else { + left.clone() + } +} + +impl RangeQueryResult for LayerMapInformation { + fn new_for_empty_range() -> Self { + LayerMapInformation { + last_layer: None, + last_image_layer: None, + max_delta_layers: (0, PageIndex(0)..PageIndex(0)), + } + } + + fn combine( + left: &Self, + _left_range: &Range, + right: &Self, + _right_range: &Range, + ) -> Self { + // Note that either range may be empty. + LayerMapInformation { + last_layer: left + .last_layer + .as_ref() + .or_else(|| right.last_layer.as_ref()) + .cloned(), + last_image_layer: left + .last_image_layer + .as_ref() + .or_else(|| right.last_image_layer.as_ref()) + .cloned(), + max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) { + Ordering::Less => right.max_delta_layers.clone(), + Ordering::Greater => left.max_delta_layers.clone(), + Ordering::Equal => ( + left.max_delta_layers.0, + merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1), + ), + }, + } + } + + fn add( + left: &mut Self, + left_range: &Range, + right: &Self, + right_range: &Range, + ) { + *left = Self::combine(&left, left_range, right, right_range); + } +} + +#[derive(Clone, Debug)] +struct AddDeltaLayers { + last_layer: LayerId, + count: usize, +} + +#[derive(Clone, Debug)] +struct LayerMapModification { + add_image_layer: Option, + add_delta_layers: Option, +} + +impl LayerMapModification { + fn add_image_layer(layer: impl Into) -> Self { + LayerMapModification { + add_image_layer: Some(layer.into()), + add_delta_layers: None, + } + } + + fn add_delta_layer(layer: impl Into) -> Self { + LayerMapModification { + add_image_layer: None, + add_delta_layers: Some(AddDeltaLayers { + last_layer: layer.into(), + count: 1, + }), + } + } +} + +impl RangeModification for LayerMapModification { + type Result = LayerMapInformation; + + fn no_op() -> Self { + LayerMapModification { + add_image_layer: None, + add_delta_layers: None, + } + } + + fn is_no_op(&self) -> bool { + self.add_image_layer.is_none() && self.add_delta_layers.is_none() + } + + fn is_reinitialization(&self) -> bool { + self.add_image_layer.is_some() + } + + fn apply(&self, result: &mut Self::Result, range: &Range) { + if let Some(layer) = &self.add_image_layer { + result.last_layer = Some(layer.clone()); + result.last_image_layer = Some(layer.clone()); + result.max_delta_layers = (0, range.clone()); + } + if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers { + result.last_layer = Some(last_layer.clone()); + result.max_delta_layers.0 += count; + } + } + + fn compose(later: &Self, earlier: &mut Self) { + if later.add_image_layer.is_some() { + *earlier = later.clone(); + return; + } + if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers { + let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers { + last_layer: LayerId::default(), + count: 0, + }); + res.last_layer = last_layer.clone(); + res.count += count; + } + } +} + +impl LazyRangeInitializer for SameElementsInitializer<()> { + fn get(&self, range: &Range) -> LayerMapInformation { + LayerMapInformation { + last_layer: None, + last_image_layer: None, + max_delta_layers: (0, range.clone()), + } + } +} + +pub struct STLM { + s: PersistentVecStorage, PageIndex>, +} + +impl STLM { + pub fn new() -> Self { + STLM { + s: PersistentVecStorage::new( + PageIndex(0)..PageIndex(100), + SameElementsInitializer::new(()), + ), + } + } + + pub fn insert(key_begin: i32, key_end: i32) { + s.modify( + &(PageIndex(key_begin)..PageIndex(key_end)), + &LayerMapModification::add_image_layer("Img0..70"), + ); + } +} + +fn test_layer_map< + S: PersistentVecStorage, PageIndex>, +>() { + let mut s = S::new( + PageIndex(0)..PageIndex(100), + SameElementsInitializer::new(()), + ); + s.modify( + &(PageIndex(0)..PageIndex(70)), + &LayerMapModification::add_image_layer("Img0..70"), + ); + s.modify( + &(PageIndex(50)..PageIndex(100)), + &LayerMapModification::add_image_layer("Img50..100"), + ); + s.modify( + &(PageIndex(10)..PageIndex(60)), + &LayerMapModification::add_delta_layer("Delta10..60"), + ); + let s_before_last_delta = s.freeze(); + s.modify( + &(PageIndex(20)..PageIndex(80)), + &LayerMapModification::add_delta_layer("Delta20..80"), + ); + + assert_eq!( + s.get(&(PageIndex(5)..PageIndex(6))).last_layers(), + (&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned())) + ); + assert_eq!( + s.get(&(PageIndex(15)..PageIndex(16))).last_layers(), + ( + &Some("Delta10..60".to_owned()), + &Some("Img0..70".to_owned()) + ) + ); + assert_eq!( + s.get(&(PageIndex(25)..PageIndex(26))).last_layers(), + ( + &Some("Delta20..80".to_owned()), + &Some("Img0..70".to_owned()) + ) + ); + assert_eq!( + s.get(&(PageIndex(65)..PageIndex(66))).last_layers(), + ( + &Some("Delta20..80".to_owned()), + &Some("Img50..100".to_owned()) + ) + ); + assert_eq!( + s.get(&(PageIndex(95)..PageIndex(96))).last_layers(), + ( + &Some("Img50..100".to_owned()), + &Some("Img50..100".to_owned()) + ) + ); + + assert_eq!( + s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(), + &(2, PageIndex(20)..PageIndex(60)), + ); + assert_eq!( + *s_before_last_delta + .get(&(PageIndex(0)..PageIndex(100))) + .max_delta_layers(), + (1, PageIndex(10)..PageIndex(60)), + ); + + assert_eq!( + *s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(), + (2, PageIndex(20)..PageIndex(30)) + ); + assert_eq!( + *s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(), + (1, PageIndex(10)..PageIndex(20)) + ); + + assert_eq!( + *s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(), + (1, PageIndex(70)..PageIndex(80)) + ); + assert_eq!( + *s_before_last_delta + .get(&(PageIndex(70)..PageIndex(80))) + .max_delta_layers(), + (0, PageIndex(70)..PageIndex(80)) + ); +} + +#[test] +fn test_naive() { + test_layer_map::>(); +} + +#[test] +fn test_segment_tree() { + test_layer_map::>(); +}