Fix tenant size modeling code to include WAL at end of branch

Imagine that you have a tenant with a single branch like this:

---------------==========>
               ^
	    gc horizon
where:

----  is the portion of the branch that is older than retention period
====  is the portion of the branch that is newer than retention period.

Before this commit, the sizing model included the logical size at the
GC horizon, but not the WAL after that. In particular, that meant that
on a newly created tenant with just one timeline, where the retention
period covered the whole history of the timeline, i.e. gc_cutoff was 0,
the calculated tenant size was always zero.

We now include the WAL after the GC horizon in the size. So in the
above example, the calculated tenant size would be the logical size
of the database the GC horizon, plus all the WAL after it (marked with
===).

This adds a new `insert_point` function to the sizing model, alongside
`modify_branch`, and changes the code in size.rs to use the new
function. The new function takes an absolute lsn and logical size as
argument, so we no longer need to calculate the difference to the
previous point. Also, the end-size is now optional, because we now
need to add a point to represent the end of each branch to the model,
but we don't want to or need to calculate the logical size at that
point.
This commit is contained in:
Heikki Linnakangas
2022-11-22 13:56:26 +02:00
committed by Heikki Linnakangas
parent f50d0ec0c9
commit 86e483f87b
4 changed files with 64 additions and 47 deletions

View File

@@ -33,8 +33,8 @@ pub struct Segment {
/// Logical size before this state
start_size: u64,
/// Logical size at this state
pub end_size: u64,
/// Logical size at this state. Can be None in the last Segment of a branch.
pub end_size: Option<u64>,
/// Indices to [`Storage::segments`]
///
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
start_lsn: 0,
end_lsn: 0,
start_size: 0,
end_size: 0,
end_size: Some(0),
children_after: Vec::new(),
};
@@ -125,6 +125,39 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
}
}
/// Advances the branch with a new point, at given LSN.
pub fn insert_point<Q: ?Sized>(
&mut self,
branch: &Q,
op: Cow<'static, str>,
lsn: u64,
size: Option<u64>,
) where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
let lastseg_id = *self.branches.get(branch).unwrap();
let newseg_id = self.segments.len();
let lastseg = &mut self.segments[lastseg_id];
assert!(lsn > lastseg.end_lsn);
let newseg = Segment {
op,
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lsn,
start_size: lastseg.end_size.unwrap(),
end_size: size,
children_after: Vec::new(),
needed: false,
};
lastseg.children_after.push(newseg_id);
self.segments.push(newseg);
*self.branches.get_mut(branch).expect("read already") = newseg_id;
}
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
pub fn modify_branch<Q: ?Sized>(
&mut self,
@@ -145,8 +178,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lastseg.end_lsn + lsn_bytes,
start_size: lastseg.end_size,
end_size: (lastseg.end_size as i64 + size_bytes) as u64,
start_size: lastseg.end_size.unwrap(),
end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
children_after: Vec::new(),
needed: false,
};
@@ -321,7 +354,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
Some(SegmentSize {
seg_id,
method: SnapshotAfter,
this_size: seg.end_size,
this_size: seg.end_size.unwrap(),
children,
})
} else {

View File

@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
let seg_id = node.seg_id;
let seg = segments.get(seg_id).unwrap();
let lsn = seg.end_lsn;
let size = seg.end_size;
let size = seg.end_size.unwrap_or(0);
let method = node.method;
println!(" {{");
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
print!(
" label=\"{} / {}\"",
next.end_lsn - seg.end_lsn,
(next.end_size as i128 - seg.end_size as i128)
(next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
);
} else {
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);

View File

@@ -183,6 +183,19 @@ pub(super) async fn gather_inputs(
}
}
// all timelines also have an end point if they have made any progress
if last_record_lsn > timeline.get_ancestor_lsn()
&& !interesting_lsns
.iter()
.any(|(lsn, _)| lsn == &last_record_lsn)
{
updates.push(Update {
lsn: last_record_lsn,
command: Command::EndOfBranch,
timeline_id: timeline.timeline_id,
});
}
timeline_inputs.insert(
timeline.timeline_id,
TimelineInputs {
@@ -270,48 +283,22 @@ impl ModelInputs {
// impossible to always determine the a one main branch.
let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);
// tracking these not to require modifying the current implementation of the size model,
// which works in relative LSNs and sizes.
let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
for update in &self.updates {
let Update {
lsn,
command: op,
timeline_id,
} = update;
let Lsn(now) = *lsn;
match op {
Command::Update(sz) => {
let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
anyhow::anyhow!(
"ordering-mismatch: there must had been a previous state for {timeline_id}"
)
})?;
let lsn_bytes = {
let Lsn(now) = lsn;
let Lsn(prev) = latest.0;
debug_assert!(prev <= *now, "self.updates should had been sorted");
now - prev
};
let size_diff =
i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
format!("size difference i64 overflow for {timeline_id}")
})?;
storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
*latest = (*lsn, *sz);
storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
}
Command::EndOfBranch => {
storage.insert_point(&Some(*timeline_id), "".into(), now, None);
}
Command::BranchFrom(parent) => {
storage.branch(parent, Some(*timeline_id));
let size = parent
.as_ref()
.and_then(|id| last_state.get(id))
.map(|x| x.1)
.unwrap_or(0);
last_state.insert(*timeline_id, (*lsn, size));
}
}
}
@@ -320,10 +307,7 @@ impl ModelInputs {
}
}
/// Single size model update.
///
/// Sizing model works with relative increments over latest branch state.
/// Updates are absolute, so additional state needs to be tracked when applying.
/// A point of interest in the tree of branches
#[serde_with::serde_as]
#[derive(
Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
@@ -342,6 +326,7 @@ struct Update {
enum Command {
Update(u64),
BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
EndOfBranch,
}
impl std::fmt::Debug for Command {
@@ -351,6 +336,7 @@ impl std::fmt::Debug for Command {
match self {
Self::Update(arg0) => write!(f, "Update({arg0})"),
Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
Self::EndOfBranch => write!(f, "EndOfBranch"),
}
}
}

View File

@@ -192,10 +192,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
"first-branch", main_branch_name, tenant_id
)
# unsure why this happens, the size difference is more than a page alignment
size_after_first_branch = http_client.tenant_size(tenant_id)
assert size_after_first_branch > size_at_branch
assert size_after_first_branch - size_at_branch == gc_horizon
assert size_after_first_branch == size_at_branch
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
@@ -221,7 +219,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
"second-branch", main_branch_name, tenant_id
)
size_after_second_branch = http_client.tenant_size(tenant_id)
assert size_after_second_branch > size_after_continuing_on_main
assert size_after_second_branch == size_after_continuing_on_main
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)