mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
314 lines
9.2 KiB
Rust
314 lines
9.2 KiB
Rust
//! Tenant size model tests.
|
|
|
|
use tenant_size_model::{Segment, SizeResult, StorageModel};
|
|
|
|
use std::collections::HashMap;
|
|
|
|
struct ScenarioBuilder {
|
|
segments: Vec<Segment>,
|
|
|
|
/// Mapping from the branch name to the index of a segment describing its latest state.
|
|
branches: HashMap<String, usize>,
|
|
}
|
|
|
|
impl ScenarioBuilder {
|
|
/// Creates a new storage with the given default branch name.
|
|
pub fn new(initial_branch: &str) -> ScenarioBuilder {
|
|
let init_segment = Segment {
|
|
parent: None,
|
|
lsn: 0,
|
|
size: Some(0),
|
|
needed: false, // determined later
|
|
};
|
|
|
|
ScenarioBuilder {
|
|
segments: vec![init_segment],
|
|
branches: HashMap::from([(initial_branch.into(), 0)]),
|
|
}
|
|
}
|
|
|
|
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
|
|
pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
|
|
let lastseg_id = *self.branches.get(branch).unwrap();
|
|
let newseg_id = self.segments.len();
|
|
let lastseg = &mut self.segments[lastseg_id];
|
|
|
|
let newseg = Segment {
|
|
parent: Some(lastseg_id),
|
|
lsn: lastseg.lsn + lsn_bytes,
|
|
size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
|
|
needed: false,
|
|
};
|
|
|
|
self.segments.push(newseg);
|
|
*self.branches.get_mut(branch).expect("read already") = newseg_id;
|
|
}
|
|
|
|
pub fn insert(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, bytes as i64);
|
|
}
|
|
|
|
pub fn update(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, 0i64);
|
|
}
|
|
|
|
pub fn _delete(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, -(bytes as i64));
|
|
}
|
|
|
|
/// Panics if the parent branch cannot be found.
|
|
pub fn branch(&mut self, parent: &str, name: &str) {
|
|
// Find the right segment
|
|
let branchseg_id = *self
|
|
.branches
|
|
.get(parent)
|
|
.expect("should had found the parent by key");
|
|
let _branchseg = &mut self.segments[branchseg_id];
|
|
|
|
// Create branch name for it
|
|
self.branches.insert(name.to_string(), branchseg_id);
|
|
}
|
|
|
|
pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
|
|
// Phase 1: Mark all the segments that need to be retained
|
|
for (_branch, &last_seg_id) in self.branches.iter() {
|
|
let last_seg = &self.segments[last_seg_id];
|
|
let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
|
|
let mut seg_id = last_seg_id;
|
|
loop {
|
|
let seg = &mut self.segments[seg_id];
|
|
if seg.lsn <= cutoff_lsn {
|
|
break;
|
|
}
|
|
seg.needed = true;
|
|
if let Some(prev_seg_id) = seg.parent {
|
|
seg_id = prev_seg_id;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the calculation
|
|
let storage_model = StorageModel {
|
|
segments: self.segments.clone(),
|
|
};
|
|
let size_result = storage_model.calculate();
|
|
(storage_model, size_result)
|
|
}
|
|
}
|
|
|
|
// Main branch only. Some updates on it.
|
|
#[test]
|
|
fn scenario_1() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Calculate the synthetic size with retention horizon 1000
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
// The end of the branch is at LSN 10000. Need to retain
|
|
// a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
|
|
// The logical snapshot has size 5000.
|
|
assert_eq!(result.total_size, 5000 + 1000);
|
|
}
|
|
|
|
// Main branch only. Some updates on it.
|
|
#[test]
|
|
fn scenario_2() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
scenario.update("main", 1_000);
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 11000
|
|
// *----*----*--------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, we need to retain logical snapshot
|
|
// at the branch point, size 5000, and the WAL from 10000-11000 on
|
|
// both branches.
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 1000);
|
|
}
|
|
|
|
// Like 2, but more updates on main
|
|
#[test]
|
|
fn scenario_3() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 15000
|
|
// *----*----*------------------------------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, it's still cheapest to retain
|
|
// - snapshot at branch point (size 5000)
|
|
// - WAL on child between 10000-11000
|
|
// - WAL on main between 10000-15000
|
|
//
|
|
// This is in total 5000 + 1000 + 5000
|
|
//
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 5000);
|
|
}
|
|
|
|
// Diverged branches
|
|
#[test]
|
|
fn scenario_4() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
for _ in 0..8 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 18000
|
|
// *----*----*------------------------------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, it's now cheapest to retain
|
|
// separate snapshots on both branches:
|
|
// - snapshot on main branch at LSN 17000 (size 5000)
|
|
// - WAL on main between 17000-18000
|
|
// - snapshot on child branch at LSN 10000 (size 5000)
|
|
// - WAL on child between 10000-11000
|
|
//
|
|
// This is in total 5000 + 1000 + 5000 + 1000 = 12000
|
|
//
|
|
// (If we used the method from the previous scenario, and
|
|
// kept only snapshot at the branch point, we'd need to keep
|
|
// all the WAL between 10000-18000 on the main branch, so
|
|
// the total size would be 5000 + 1000 + 8000 = 14000. The
|
|
// calculation always picks the cheapest alternative)
|
|
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
|
|
}
|
|
|
|
#[test]
|
|
fn scenario_5() {
|
|
let mut scenario = ScenarioBuilder::new("a");
|
|
scenario.insert("a", 5000);
|
|
scenario.branch("a", "b");
|
|
scenario.update("b", 4000);
|
|
scenario.update("a", 2000);
|
|
scenario.branch("a", "c");
|
|
scenario.insert("c", 4000);
|
|
scenario.insert("a", 2000);
|
|
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 17000);
|
|
}
|
|
|
|
#[test]
|
|
fn scenario_6() {
|
|
let branches = [
|
|
"7ff1edab8182025f15ae33482edb590a",
|
|
"b1719e044db05401a05a2ed588a3ad3f",
|
|
"0xb68d6691c895ad0a70809470020929ef",
|
|
];
|
|
|
|
// compared to other scenarios, this one uses bytes instead of kB
|
|
|
|
let mut scenario = ScenarioBuilder::new("");
|
|
|
|
scenario.branch("", branches[0]); // at 0
|
|
scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
|
|
scenario.branch(branches[0], branches[1]); // at 108951064
|
|
scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
|
|
scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
|
|
scenario.branch(branches[0], branches[2]); // at 283415424
|
|
scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
|
|
scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
|
|
|
|
let (model, result) = scenario.calculate(100_000);
|
|
|
|
// FIXME: We previously calculated 333_792_000. But with this PR, we get
|
|
// a much lower number. At a quick look at the model output and the
|
|
// calculations here, the new result seems correct to me.
|
|
eprintln!(
|
|
" MODEL: {}",
|
|
serde_json::to_string(&model.segments).unwrap()
|
|
);
|
|
eprintln!(
|
|
"RESULT: {}",
|
|
serde_json::to_string(&result.segments).unwrap()
|
|
);
|
|
|
|
assert_eq!(result.total_size, 136_236_928);
|
|
}
|