From 86e483f87bb47e216b8b759562714ca572ebf9be Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 22 Nov 2022 13:56:26 +0200 Subject: [PATCH] Fix tenant size modeling code to include WAL at end of branch Imagine that you have a tenant with a single branch like this: ---------------==========> ^ gc horizon where: ---- is the portion of the branch that is older than retention period ==== is the portion of the branch that is newer than retention period. Before this commit, the sizing model included the logical size at the GC horizon, but not the WAL after that. In particular, that meant that on a newly created tenant with just one timeline, where the retention period covered the whole history of the timeline, i.e. gc_cutoff was 0, the calculated tenant size was always zero. We now include the WAL after the GC horizon in the size. So in the above example, the calculated tenant size would be the logical size of the database the GC horizon, plus all the WAL after it (marked with ===). This adds a new `insert_point` function to the sizing model, alongside `modify_branch`, and changes the code in size.rs to use the new function. The new function takes an absolute lsn and logical size as argument, so we no longer need to calculate the difference to the previous point. Also, the end-size is now optional, because we now need to add a point to represent the end of each branch to the model, but we don't want to or need to calculate the logical size at that point. --- libs/tenant_size_model/src/lib.rs | 45 +++++++++++++++++--- libs/tenant_size_model/src/main.rs | 4 +- pageserver/src/tenant/size.rs | 56 ++++++++++--------------- test_runner/regress/test_tenant_size.py | 6 +-- 4 files changed, 64 insertions(+), 47 deletions(-) diff --git a/libs/tenant_size_model/src/lib.rs b/libs/tenant_size_model/src/lib.rs index c7ec1e8870..86814b5f25 100644 --- a/libs/tenant_size_model/src/lib.rs +++ b/libs/tenant_size_model/src/lib.rs @@ -33,8 +33,8 @@ pub struct Segment { /// Logical size before this state start_size: u64, - /// Logical size at this state - pub end_size: u64, + /// Logical size at this state. Can be None in the last Segment of a branch. + pub end_size: Option, /// Indices to [`Storage::segments`] /// @@ -115,7 +115,7 @@ impl Storage { start_lsn: 0, end_lsn: 0, start_size: 0, - end_size: 0, + end_size: Some(0), children_after: Vec::new(), }; @@ -125,6 +125,39 @@ impl Storage { } } + /// Advances the branch with a new point, at given LSN. + pub fn insert_point( + &mut self, + branch: &Q, + op: Cow<'static, str>, + lsn: u64, + size: Option, + ) where + K: std::borrow::Borrow, + Q: std::hash::Hash + Eq, + { + let lastseg_id = *self.branches.get(branch).unwrap(); + let newseg_id = self.segments.len(); + let lastseg = &mut self.segments[lastseg_id]; + + assert!(lsn > lastseg.end_lsn); + + let newseg = Segment { + op, + parent: Some(lastseg_id), + start_lsn: lastseg.end_lsn, + end_lsn: lsn, + start_size: lastseg.end_size.unwrap(), + end_size: size, + children_after: Vec::new(), + needed: false, + }; + lastseg.children_after.push(newseg_id); + + self.segments.push(newseg); + *self.branches.get_mut(branch).expect("read already") = newseg_id; + } + /// Advances the branch with the named operation, by the relative LSN and logical size bytes. pub fn modify_branch( &mut self, @@ -145,8 +178,8 @@ impl Storage { parent: Some(lastseg_id), start_lsn: lastseg.end_lsn, end_lsn: lastseg.end_lsn + lsn_bytes, - start_size: lastseg.end_size, - end_size: (lastseg.end_size as i64 + size_bytes) as u64, + start_size: lastseg.end_size.unwrap(), + end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64), children_after: Vec::new(), needed: false, }; @@ -321,7 +354,7 @@ impl Storage { Some(SegmentSize { seg_id, method: SnapshotAfter, - this_size: seg.end_size, + this_size: seg.end_size.unwrap(), children, }) } else { diff --git a/libs/tenant_size_model/src/main.rs b/libs/tenant_size_model/src/main.rs index 47c0e8122f..f5bea399a1 100644 --- a/libs/tenant_size_model/src/main.rs +++ b/libs/tenant_size_model/src/main.rs @@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) { let seg_id = node.seg_id; let seg = segments.get(seg_id).unwrap(); let lsn = seg.end_lsn; - let size = seg.end_size; + let size = seg.end_size.unwrap_or(0); let method = node.method; println!(" {{"); @@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) { print!( " label=\"{} / {}\"", next.end_lsn - seg.end_lsn, - (next.end_size as i128 - seg.end_size as i128) + (next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128) ); } else { print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn); diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index 86e685fd4c..24d9b2a10e 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -183,6 +183,19 @@ pub(super) async fn gather_inputs( } } + // all timelines also have an end point if they have made any progress + if last_record_lsn > timeline.get_ancestor_lsn() + && !interesting_lsns + .iter() + .any(|(lsn, _)| lsn == &last_record_lsn) + { + updates.push(Update { + lsn: last_record_lsn, + command: Command::EndOfBranch, + timeline_id: timeline.timeline_id, + }); + } + timeline_inputs.insert( timeline.timeline_id, TimelineInputs { @@ -270,48 +283,22 @@ impl ModelInputs { // impossible to always determine the a one main branch. let mut storage = tenant_size_model::Storage::>::new(None); - // tracking these not to require modifying the current implementation of the size model, - // which works in relative LSNs and sizes. - let mut last_state: HashMap = HashMap::new(); - for update in &self.updates { let Update { lsn, command: op, timeline_id, } = update; + let Lsn(now) = *lsn; match op { Command::Update(sz) => { - let latest = last_state.get_mut(timeline_id).ok_or_else(|| { - anyhow::anyhow!( - "ordering-mismatch: there must had been a previous state for {timeline_id}" - ) - })?; - - let lsn_bytes = { - let Lsn(now) = lsn; - let Lsn(prev) = latest.0; - debug_assert!(prev <= *now, "self.updates should had been sorted"); - now - prev - }; - - let size_diff = - i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| { - format!("size difference i64 overflow for {timeline_id}") - })?; - - storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff); - *latest = (*lsn, *sz); + storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz)); + } + Command::EndOfBranch => { + storage.insert_point(&Some(*timeline_id), "".into(), now, None); } Command::BranchFrom(parent) => { storage.branch(parent, Some(*timeline_id)); - - let size = parent - .as_ref() - .and_then(|id| last_state.get(id)) - .map(|x| x.1) - .unwrap_or(0); - last_state.insert(*timeline_id, (*lsn, size)); } } } @@ -320,10 +307,7 @@ impl ModelInputs { } } -/// Single size model update. -/// -/// Sizing model works with relative increments over latest branch state. -/// Updates are absolute, so additional state needs to be tracked when applying. +/// A point of interest in the tree of branches #[serde_with::serde_as] #[derive( Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize, @@ -342,6 +326,7 @@ struct Update { enum Command { Update(u64), BranchFrom(#[serde_as(as = "Option")] Option), + EndOfBranch, } impl std::fmt::Debug for Command { @@ -351,6 +336,7 @@ impl std::fmt::Debug for Command { match self { Self::Update(arg0) => write!(f, "Update({arg0})"), Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"), + Self::EndOfBranch => write!(f, "EndOfBranch"), } } } diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index d9aed351a5..f8197696bc 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -192,10 +192,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder "first-branch", main_branch_name, tenant_id ) - # unsure why this happens, the size difference is more than a page alignment size_after_first_branch = http_client.tenant_size(tenant_id) - assert size_after_first_branch > size_at_branch - assert size_after_first_branch - size_at_branch == gc_horizon + assert size_after_first_branch == size_at_branch first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id) @@ -221,7 +219,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder "second-branch", main_branch_name, tenant_id ) size_after_second_branch = http_client.tenant_size(tenant_id) - assert size_after_second_branch > size_after_continuing_on_main + assert size_after_second_branch == size_after_continuing_on_main second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)