mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 09:22:55 +00:00
Refactor the tenant_size_model code. Segment now contains just the
minimum amount of information needed to calculate the size. Other
information that is useful for building up the segment tree, and for
display purposes, is now kept elsewhere. The code in 'main.rs' has a new
ScenarioBuilder struct for that.
Calculating which Segments are "needed" is now the responsibility of the
caller of tenant_size_mode, not part of the calculation itself. So it's
up to the caller to make all the decisions with retention periods for
each branch.
The output of the sizing calculation is now a Vec of SizeResults, rather
than a tree. It uses a tree representation internally, when doing the
calculation, but it's not exposed to the caller anymore.
Refactor the way the recursive calculation is performed.
Rewrite the code in size.rs that builds the Segment model. Get rid of
the intermediate representation with Update structs. Build the Segments
directly, with some local HashMaps and Vecs to track branch points to
help with that.
retention_period is now an input to gather_inputs(), rather than an
output.
Update pageserver http API: rename /size endpoint to /synthetic_size
with following parameters:
- /synthetic_size?inputs_only to get debug info;
- /synthetic_size?retention_period=0 to override cutoff that is used to
calculate the size;
pass header -H "Accept: text/html" to get HTML output, otherwise JSON is
returned
Update python tests and openapi spec.
---------
Co-authored-by: Anastasia Lubennikova <anastasia@neon.tech>
Co-authored-by: Joonas Koivunen <joonas@neon.tech>
314 lines
9.2 KiB
Rust
314 lines
9.2 KiB
Rust
//! Tenant size model tests.
|
|
|
|
use tenant_size_model::{Segment, SizeResult, StorageModel};
|
|
|
|
use std::collections::HashMap;
|
|
|
|
struct ScenarioBuilder {
|
|
segments: Vec<Segment>,
|
|
|
|
/// Mapping from the branch name to the index of a segment describing its latest state.
|
|
branches: HashMap<String, usize>,
|
|
}
|
|
|
|
impl ScenarioBuilder {
|
|
/// Creates a new storage with the given default branch name.
|
|
pub fn new(initial_branch: &str) -> ScenarioBuilder {
|
|
let init_segment = Segment {
|
|
parent: None,
|
|
lsn: 0,
|
|
size: Some(0),
|
|
needed: false, // determined later
|
|
};
|
|
|
|
ScenarioBuilder {
|
|
segments: vec![init_segment],
|
|
branches: HashMap::from([(initial_branch.into(), 0)]),
|
|
}
|
|
}
|
|
|
|
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
|
|
pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
|
|
let lastseg_id = *self.branches.get(branch).unwrap();
|
|
let newseg_id = self.segments.len();
|
|
let lastseg = &mut self.segments[lastseg_id];
|
|
|
|
let newseg = Segment {
|
|
parent: Some(lastseg_id),
|
|
lsn: lastseg.lsn + lsn_bytes,
|
|
size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
|
|
needed: false,
|
|
};
|
|
|
|
self.segments.push(newseg);
|
|
*self.branches.get_mut(branch).expect("read already") = newseg_id;
|
|
}
|
|
|
|
pub fn insert(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, bytes as i64);
|
|
}
|
|
|
|
pub fn update(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, 0i64);
|
|
}
|
|
|
|
pub fn _delete(&mut self, branch: &str, bytes: u64) {
|
|
self.modify_branch(branch, bytes, -(bytes as i64));
|
|
}
|
|
|
|
/// Panics if the parent branch cannot be found.
|
|
pub fn branch(&mut self, parent: &str, name: &str) {
|
|
// Find the right segment
|
|
let branchseg_id = *self
|
|
.branches
|
|
.get(parent)
|
|
.expect("should had found the parent by key");
|
|
let _branchseg = &mut self.segments[branchseg_id];
|
|
|
|
// Create branch name for it
|
|
self.branches.insert(name.to_string(), branchseg_id);
|
|
}
|
|
|
|
pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
|
|
// Phase 1: Mark all the segments that need to be retained
|
|
for (_branch, &last_seg_id) in self.branches.iter() {
|
|
let last_seg = &self.segments[last_seg_id];
|
|
let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
|
|
let mut seg_id = last_seg_id;
|
|
loop {
|
|
let seg = &mut self.segments[seg_id];
|
|
if seg.lsn <= cutoff_lsn {
|
|
break;
|
|
}
|
|
seg.needed = true;
|
|
if let Some(prev_seg_id) = seg.parent {
|
|
seg_id = prev_seg_id;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the calculation
|
|
let storage_model = StorageModel {
|
|
segments: self.segments.clone(),
|
|
};
|
|
let size_result = storage_model.calculate();
|
|
(storage_model, size_result)
|
|
}
|
|
}
|
|
|
|
// Main branch only. Some updates on it.
|
|
#[test]
|
|
fn scenario_1() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Calculate the synthetic size with retention horizon 1000
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
// The end of the branch is at LSN 10000. Need to retain
|
|
// a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
|
|
// The logical snapshot has size 5000.
|
|
assert_eq!(result.total_size, 5000 + 1000);
|
|
}
|
|
|
|
// Main branch only. Some updates on it.
|
|
#[test]
|
|
fn scenario_2() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
scenario.update("main", 1_000);
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 11000
|
|
// *----*----*--------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, we need to retain logical snapshot
|
|
// at the branch point, size 5000, and the WAL from 10000-11000 on
|
|
// both branches.
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 1000);
|
|
}
|
|
|
|
// Like 2, but more updates on main
|
|
#[test]
|
|
fn scenario_3() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 15000
|
|
// *----*----*------------------------------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, it's still cheapest to retain
|
|
// - snapshot at branch point (size 5000)
|
|
// - WAL on child between 10000-11000
|
|
// - WAL on main between 10000-15000
|
|
//
|
|
// This is in total 5000 + 1000 + 5000
|
|
//
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 5000);
|
|
}
|
|
|
|
// Diverged branches
|
|
#[test]
|
|
fn scenario_4() {
|
|
// Create main branch
|
|
let mut scenario = ScenarioBuilder::new("main");
|
|
|
|
// Bulk load 5 GB of data to it
|
|
scenario.insert("main", 5_000);
|
|
|
|
// Stream of updates
|
|
for _ in 0..5 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
// Branch
|
|
scenario.branch("main", "child");
|
|
scenario.update("child", 1_000);
|
|
|
|
// More updates on parent
|
|
for _ in 0..8 {
|
|
scenario.update("main", 1_000);
|
|
}
|
|
|
|
//
|
|
// The history looks like this now:
|
|
//
|
|
// 10000 18000
|
|
// *----*----*------------------------------------* main
|
|
// |
|
|
// | 11000
|
|
// +-------------- child
|
|
//
|
|
//
|
|
// With retention horizon 1000, it's now cheapest to retain
|
|
// separate snapshots on both branches:
|
|
// - snapshot on main branch at LSN 17000 (size 5000)
|
|
// - WAL on main between 17000-18000
|
|
// - snapshot on child branch at LSN 10000 (size 5000)
|
|
// - WAL on child between 10000-11000
|
|
//
|
|
// This is in total 5000 + 1000 + 5000 + 1000 = 12000
|
|
//
|
|
// (If we used the the method from the previous scenario, and
|
|
// kept only snapshot at the branch point, we'd need to keep
|
|
// all the WAL between 10000-18000 on the main branch, so
|
|
// the total size would be 5000 + 1000 + 8000 = 14000. The
|
|
// calculation always picks the cheapest alternative)
|
|
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
|
|
}
|
|
|
|
#[test]
|
|
fn scenario_5() {
|
|
let mut scenario = ScenarioBuilder::new("a");
|
|
scenario.insert("a", 5000);
|
|
scenario.branch("a", "b");
|
|
scenario.update("b", 4000);
|
|
scenario.update("a", 2000);
|
|
scenario.branch("a", "c");
|
|
scenario.insert("c", 4000);
|
|
scenario.insert("a", 2000);
|
|
|
|
let (_model, result) = scenario.calculate(1000);
|
|
|
|
assert_eq!(result.total_size, 17000);
|
|
}
|
|
|
|
#[test]
|
|
fn scenario_6() {
|
|
let branches = [
|
|
"7ff1edab8182025f15ae33482edb590a",
|
|
"b1719e044db05401a05a2ed588a3ad3f",
|
|
"0xb68d6691c895ad0a70809470020929ef",
|
|
];
|
|
|
|
// compared to other scenarios, this one uses bytes instead of kB
|
|
|
|
let mut scenario = ScenarioBuilder::new("");
|
|
|
|
scenario.branch("", branches[0]); // at 0
|
|
scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
|
|
scenario.branch(branches[0], branches[1]); // at 108951064
|
|
scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
|
|
scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
|
|
scenario.branch(branches[0], branches[2]); // at 283415424
|
|
scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
|
|
scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
|
|
|
|
let (model, result) = scenario.calculate(100_000);
|
|
|
|
// FIXME: We previously calculated 333_792_000. But with this PR, we get
|
|
// a much lower number. At a quick look at the model output and the
|
|
// calculations here, the new result seems correct to me.
|
|
eprintln!(
|
|
" MODEL: {}",
|
|
serde_json::to_string(&model.segments).unwrap()
|
|
);
|
|
eprintln!(
|
|
"RESULT: {}",
|
|
serde_json::to_string(&result.segments).unwrap()
|
|
);
|
|
|
|
assert_eq!(result.total_size, 136_236_928);
|
|
}
|