mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 05:30:37 +00:00
pageserver: clean up GcCutoffs names (#8379)
- `horizon` is a confusing term, it's not at all obvious that this means space-based retention limit, rather than the total GC history limit. Rename to `GcCutoffs::space`. - `pitr` is less confusing, but still an unecessary level of indirection from what we really mean: a time-based condition. The fact that we use that that time-history for Point In Time Recovery doesn't mean we have to refer to time as "pitr" everywhere. Rename to `GcCutoffs::time`.
This commit is contained in:
@@ -2912,7 +2912,7 @@ impl Tenant {
|
||||
if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
|
||||
if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) {
|
||||
target.within_ancestor_pitr =
|
||||
timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.pitr;
|
||||
timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.time;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2928,7 +2928,7 @@ impl Tenant {
|
||||
timeline.metrics.pitr_history_size.set(
|
||||
timeline
|
||||
.get_last_record_lsn()
|
||||
.checked_sub(target.cutoffs.pitr)
|
||||
.checked_sub(target.cutoffs.time)
|
||||
.unwrap_or(Lsn(0))
|
||||
.0,
|
||||
);
|
||||
@@ -4262,7 +4262,7 @@ mod tests {
|
||||
.source()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
.contains("is earlier than latest GC horizon"));
|
||||
.contains("is earlier than latest GC cutoff"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6718,8 +6718,8 @@ mod tests {
|
||||
{
|
||||
// Update GC info
|
||||
let mut guard = tline.gc_info.write().unwrap();
|
||||
guard.cutoffs.pitr = Lsn(0x30);
|
||||
guard.cutoffs.horizon = Lsn(0x30);
|
||||
guard.cutoffs.time = Lsn(0x30);
|
||||
guard.cutoffs.space = Lsn(0x30);
|
||||
}
|
||||
|
||||
let expected_result = [
|
||||
@@ -7109,8 +7109,8 @@ mod tests {
|
||||
*guard = GcInfo {
|
||||
retain_lsns: vec![],
|
||||
cutoffs: GcCutoffs {
|
||||
pitr: Lsn(0x30),
|
||||
horizon: Lsn(0x30),
|
||||
time: Lsn(0x30),
|
||||
space: Lsn(0x30),
|
||||
},
|
||||
leases: Default::default(),
|
||||
within_ancestor_pitr: false,
|
||||
|
||||
@@ -135,11 +135,9 @@ pub struct TimelineInputs {
|
||||
ancestor_lsn: Lsn,
|
||||
last_record: Lsn,
|
||||
latest_gc_cutoff: Lsn,
|
||||
horizon_cutoff: Lsn,
|
||||
pitr_cutoff: Lsn,
|
||||
|
||||
/// Cutoff point based on GC settings
|
||||
next_gc_cutoff: Lsn,
|
||||
next_pitr_cutoff: Lsn,
|
||||
|
||||
/// Cutoff point calculated from the user-supplied 'max_retention_period'
|
||||
retention_param_cutoff: Option<Lsn>,
|
||||
@@ -150,7 +148,7 @@ pub struct TimelineInputs {
|
||||
|
||||
/// Gathers the inputs for the tenant sizing model.
|
||||
///
|
||||
/// Tenant size does not consider the latest state, but only the state until next_gc_cutoff, which
|
||||
/// Tenant size does not consider the latest state, but only the state until next_pitr_cutoff, which
|
||||
/// is updated on-demand, during the start of this calculation and separate from the
|
||||
/// [`TimelineInputs::latest_gc_cutoff`].
|
||||
///
|
||||
@@ -158,11 +156,8 @@ pub struct TimelineInputs {
|
||||
///
|
||||
/// ```text
|
||||
/// 0-----|---------|----|------------| · · · · · |·> lsn
|
||||
/// initdb_lsn branchpoints* next_gc_cutoff latest
|
||||
/// initdb_lsn branchpoints* next_pitr_cutoff latest
|
||||
/// ```
|
||||
///
|
||||
/// Until gc_horizon_cutoff > `Timeline::last_record_lsn` for any of the tenant's timelines, the
|
||||
/// tenant size will be zero.
|
||||
pub(super) async fn gather_inputs(
|
||||
tenant: &Tenant,
|
||||
limit: &Arc<Semaphore>,
|
||||
@@ -172,7 +167,7 @@ pub(super) async fn gather_inputs(
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<ModelInputs, CalculateSyntheticSizeError> {
|
||||
// refresh is needed to update gc related pitr_cutoff and horizon_cutoff
|
||||
// refresh is needed to update [`timeline::GcCutoffs`]
|
||||
tenant.refresh_gc_info(cancel, ctx).await?;
|
||||
|
||||
// Collect information about all the timelines
|
||||
@@ -236,20 +231,18 @@ pub(super) async fn gather_inputs(
|
||||
// we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not
|
||||
// actually removing files.
|
||||
//
|
||||
// We only consider [`GcInfo::pitr_cutoff`], and not [`GcInfo::horizon_cutoff`], because from
|
||||
// We only consider [`timeline::GcCutoffs::time`], and not [`timeline::GcCutoffs::space`], because from
|
||||
// a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather
|
||||
// than a space bound (horizon cutoff). This means that if someone drops a database and waits for their
|
||||
// than our internal space cutoff. This means that if someone drops a database and waits for their
|
||||
// PITR interval, they will see synthetic size decrease, even if we are still storing data inside
|
||||
// horizon_cutoff.
|
||||
let pitr_cutoff = gc_info.cutoffs.pitr;
|
||||
let horizon_cutoff = gc_info.cutoffs.horizon;
|
||||
let mut next_gc_cutoff = pitr_cutoff;
|
||||
// the space cutoff.
|
||||
let mut next_pitr_cutoff = gc_info.cutoffs.time;
|
||||
|
||||
// If the caller provided a shorter retention period, use that instead of the GC cutoff.
|
||||
let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
|
||||
let param_cutoff = Lsn(last_record_lsn.0.saturating_sub(max_retention_period));
|
||||
if next_gc_cutoff < param_cutoff {
|
||||
next_gc_cutoff = param_cutoff;
|
||||
if next_pitr_cutoff < param_cutoff {
|
||||
next_pitr_cutoff = param_cutoff;
|
||||
}
|
||||
Some(param_cutoff)
|
||||
} else {
|
||||
@@ -263,7 +256,7 @@ pub(super) async fn gather_inputs(
|
||||
.copied()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// next_gc_cutoff in parent branch are not of interest (right now at least), nor do we
|
||||
// next_pitr_cutoff in parent branch are not of interest (right now at least), nor do we
|
||||
// want to query any logical size before initdb_lsn.
|
||||
let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);
|
||||
|
||||
@@ -291,10 +284,10 @@ pub(super) async fn gather_inputs(
|
||||
)
|
||||
}
|
||||
|
||||
// Add a point for the GC cutoff
|
||||
let branch_start_needed = next_gc_cutoff <= branch_start_lsn;
|
||||
// Add a point for the PITR cutoff
|
||||
let branch_start_needed = next_pitr_cutoff <= branch_start_lsn;
|
||||
if !branch_start_needed {
|
||||
lsns.push((next_gc_cutoff, LsnKind::GcCutOff));
|
||||
lsns.push((next_pitr_cutoff, LsnKind::GcCutOff));
|
||||
}
|
||||
|
||||
lsns.sort_unstable();
|
||||
@@ -333,7 +326,7 @@ pub(super) async fn gather_inputs(
|
||||
parent: Some(parent),
|
||||
lsn: lsn.0,
|
||||
size: None,
|
||||
needed: lsn > next_gc_cutoff,
|
||||
needed: lsn > next_pitr_cutoff,
|
||||
},
|
||||
timeline_id: timeline.timeline_id,
|
||||
kind,
|
||||
@@ -357,8 +350,8 @@ pub(super) async fn gather_inputs(
|
||||
segment: Segment {
|
||||
parent: Some(lease_parent),
|
||||
lsn: lsn.0,
|
||||
size: None, // Filled in later, if necessary
|
||||
needed: lsn > next_gc_cutoff, // only needed if the point is within rentention.
|
||||
size: None, // Filled in later, if necessary
|
||||
needed: lsn > next_pitr_cutoff, // only needed if the point is within rentention.
|
||||
},
|
||||
timeline_id: timeline.timeline_id,
|
||||
kind: LsnKind::LeaseStart,
|
||||
@@ -398,9 +391,7 @@ pub(super) async fn gather_inputs(
|
||||
last_record: last_record_lsn,
|
||||
// this is not used above, because it might not have updated recently enough
|
||||
latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
next_gc_cutoff,
|
||||
next_pitr_cutoff,
|
||||
retention_param_cutoff,
|
||||
lease_points,
|
||||
});
|
||||
@@ -742,9 +733,7 @@ fn verify_size_for_multiple_branches() {
|
||||
"ancestor_lsn": "0/18D3D98",
|
||||
"last_record": "0/2230CD0",
|
||||
"latest_gc_cutoff": "0/1698C48",
|
||||
"horizon_cutoff": "0/2210CD0",
|
||||
"pitr_cutoff": "0/2210CD0",
|
||||
"next_gc_cutoff": "0/2210CD0",
|
||||
"next_pitr_cutoff": "0/2210CD0",
|
||||
"retention_param_cutoff": null,
|
||||
"lease_points": []
|
||||
},
|
||||
@@ -753,9 +742,7 @@ fn verify_size_for_multiple_branches() {
|
||||
"ancestor_lsn": "0/176D998",
|
||||
"last_record": "0/1837770",
|
||||
"latest_gc_cutoff": "0/1698C48",
|
||||
"horizon_cutoff": "0/1817770",
|
||||
"pitr_cutoff": "0/1817770",
|
||||
"next_gc_cutoff": "0/1817770",
|
||||
"next_pitr_cutoff": "0/1817770",
|
||||
"retention_param_cutoff": null,
|
||||
"lease_points": []
|
||||
},
|
||||
@@ -764,9 +751,7 @@ fn verify_size_for_multiple_branches() {
|
||||
"ancestor_lsn": "0/0",
|
||||
"last_record": "0/18D3D98",
|
||||
"latest_gc_cutoff": "0/1698C48",
|
||||
"horizon_cutoff": "0/18B3D98",
|
||||
"pitr_cutoff": "0/18B3D98",
|
||||
"next_gc_cutoff": "0/18B3D98",
|
||||
"next_pitr_cutoff": "0/18B3D98",
|
||||
"retention_param_cutoff": null,
|
||||
"lease_points": []
|
||||
}
|
||||
@@ -820,9 +805,7 @@ fn verify_size_for_one_branch() {
|
||||
"ancestor_lsn": "0/0",
|
||||
"last_record": "47/280A5860",
|
||||
"latest_gc_cutoff": "47/240A5860",
|
||||
"horizon_cutoff": "47/240A5860",
|
||||
"pitr_cutoff": "47/240A5860",
|
||||
"next_gc_cutoff": "47/240A5860",
|
||||
"next_pitr_cutoff": "47/240A5860",
|
||||
"retention_param_cutoff": "0/0",
|
||||
"lease_points": []
|
||||
}
|
||||
|
||||
@@ -478,37 +478,32 @@ impl GcInfo {
|
||||
}
|
||||
}
|
||||
|
||||
/// The `GcInfo` component describing which Lsns need to be retained.
|
||||
/// The `GcInfo` component describing which Lsns need to be retained. Functionally, this
|
||||
/// is a single number (the oldest LSN which we must retain), but it internally distinguishes
|
||||
/// between time-based and space-based retention for observability and consumption metrics purposes.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct GcCutoffs {
|
||||
/// Keep everything newer than this point.
|
||||
///
|
||||
/// This is calculated by subtracting 'gc_horizon' setting from
|
||||
/// last-record LSN
|
||||
///
|
||||
/// FIXME: is this inclusive or exclusive?
|
||||
pub(crate) horizon: Lsn,
|
||||
/// Calculated from the [`TenantConf::gc_horizon`], this LSN indicates how much
|
||||
/// history we must keep to retain a specified number of bytes of WAL.
|
||||
pub(crate) space: Lsn,
|
||||
|
||||
/// In addition to 'retain_lsns' and 'horizon_cutoff', keep everything newer than this
|
||||
/// point.
|
||||
///
|
||||
/// This is calculated by finding a number such that a record is needed for PITR
|
||||
/// if only if its LSN is larger than 'pitr_cutoff'.
|
||||
pub(crate) pitr: Lsn,
|
||||
/// Calculated from [`TenantConf::pitr_interval`], this LSN indicates how much
|
||||
/// history we must keep to enable reading back at least the PITR interval duration.
|
||||
pub(crate) time: Lsn,
|
||||
}
|
||||
|
||||
impl Default for GcCutoffs {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
horizon: Lsn::INVALID,
|
||||
pitr: Lsn::INVALID,
|
||||
space: Lsn::INVALID,
|
||||
time: Lsn::INVALID,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GcCutoffs {
|
||||
fn select_min(&self) -> Lsn {
|
||||
std::cmp::min(self.horizon, self.pitr)
|
||||
std::cmp::min(self.space, self.time)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -867,7 +862,7 @@ impl Timeline {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let history = self
|
||||
.get_last_record_lsn()
|
||||
.checked_sub(gc_info.cutoffs.pitr)
|
||||
.checked_sub(gc_info.cutoffs.time)
|
||||
.unwrap_or(Lsn(0))
|
||||
.0;
|
||||
(history, gc_info.within_ancestor_pitr)
|
||||
@@ -1566,7 +1561,7 @@ impl Timeline {
|
||||
) -> anyhow::Result<()> {
|
||||
ensure!(
|
||||
lsn >= **latest_gc_cutoff_lsn,
|
||||
"LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)",
|
||||
"LSN {} is earlier than latest GC cutoff {} (we might've already garbage collected needed data)",
|
||||
lsn,
|
||||
**latest_gc_cutoff_lsn,
|
||||
);
|
||||
@@ -4944,18 +4939,18 @@ impl Timeline {
|
||||
/// garbage collection.
|
||||
///
|
||||
/// We calculate two cutoffs, one based on time and one based on WAL size. `pitr`
|
||||
/// controls the time cutoff (or ZERO to disable time-based retention), and `cutoff_horizon` controls
|
||||
/// controls the time cutoff (or ZERO to disable time-based retention), and `space_cutoff` controls
|
||||
/// the space-based retention.
|
||||
///
|
||||
/// This function doesn't simply to calculate time & space based retention: it treats time-based
|
||||
/// retention as authoritative if enabled, and falls back to space-based retention if calculating
|
||||
/// the LSN for a time point isn't possible. Therefore the GcCutoffs::horizon in the response might
|
||||
/// be different to the `cutoff_horizon` input. Callers should treat the min() of the two cutoffs
|
||||
/// be different to the `space_cutoff` input. Callers should treat the min() of the two cutoffs
|
||||
/// in the response as the GC cutoff point for the timeline.
|
||||
#[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
|
||||
pub(super) async fn find_gc_cutoffs(
|
||||
&self,
|
||||
cutoff_horizon: Lsn,
|
||||
space_cutoff: Lsn,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
@@ -4972,8 +4967,8 @@ impl Timeline {
|
||||
// Unit tests which specify zero PITR interval expect to avoid doing any I/O for timestamp lookup
|
||||
if pitr == Duration::ZERO {
|
||||
return Ok(GcCutoffs {
|
||||
pitr: self.get_last_record_lsn(),
|
||||
horizon: cutoff_horizon,
|
||||
time: self.get_last_record_lsn(),
|
||||
space: space_cutoff,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -4981,8 +4976,7 @@ impl Timeline {
|
||||
// Calculate a time-based limit on how much to retain:
|
||||
// - if PITR interval is set, then this is our cutoff.
|
||||
// - if PITR interval is not set, then we do a lookup
|
||||
// based on DEFAULT_PITR_INTERVAL, so that size-based retention (horizon)
|
||||
// does not result in keeping history around permanently on idle databases.
|
||||
// based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
|
||||
let time_cutoff = {
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
@@ -5023,31 +5017,31 @@ impl Timeline {
|
||||
// PITR is not set. Retain the size-based limit, or the default time retention,
|
||||
// whichever requires less data.
|
||||
GcCutoffs {
|
||||
pitr: std::cmp::max(time_cutoff, cutoff_horizon),
|
||||
horizon: std::cmp::max(time_cutoff, cutoff_horizon),
|
||||
time: self.get_last_record_lsn(),
|
||||
space: std::cmp::max(time_cutoff, space_cutoff),
|
||||
}
|
||||
}
|
||||
(Duration::ZERO, None) => {
|
||||
// PITR is not set, and time lookup failed
|
||||
GcCutoffs {
|
||||
pitr: self.get_last_record_lsn(),
|
||||
horizon: cutoff_horizon,
|
||||
time: self.get_last_record_lsn(),
|
||||
space: space_cutoff,
|
||||
}
|
||||
}
|
||||
(_, None) => {
|
||||
// PITR interval is set & we didn't look up a timestamp successfully. Conservatively assume PITR
|
||||
// cannot advance beyond what was already GC'd, and respect space-based retention
|
||||
GcCutoffs {
|
||||
pitr: *self.get_latest_gc_cutoff_lsn(),
|
||||
horizon: cutoff_horizon,
|
||||
time: *self.get_latest_gc_cutoff_lsn(),
|
||||
space: space_cutoff,
|
||||
}
|
||||
}
|
||||
(_, Some(time_cutoff)) => {
|
||||
// PITR interval is set and we looked up timestamp successfully. Ignore
|
||||
// size based retention and make time cutoff authoritative
|
||||
GcCutoffs {
|
||||
pitr: time_cutoff,
|
||||
horizon: time_cutoff,
|
||||
time: time_cutoff,
|
||||
space: time_cutoff,
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -5074,11 +5068,11 @@ impl Timeline {
|
||||
return Err(GcError::TimelineCancelled);
|
||||
}
|
||||
|
||||
let (horizon_cutoff, pitr_cutoff, retain_lsns, max_lsn_with_valid_lease) = {
|
||||
let (space_cutoff, time_cutoff, retain_lsns, max_lsn_with_valid_lease) = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
|
||||
let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
|
||||
let pitr_cutoff = gc_info.cutoffs.pitr;
|
||||
let space_cutoff = min(gc_info.cutoffs.space, self.get_disk_consistent_lsn());
|
||||
let time_cutoff = gc_info.cutoffs.time;
|
||||
let retain_lsns = gc_info.retain_lsns.clone();
|
||||
|
||||
// Gets the maximum LSN that holds the valid lease.
|
||||
@@ -5088,14 +5082,14 @@ impl Timeline {
|
||||
let max_lsn_with_valid_lease = gc_info.leases.last_key_value().map(|(lsn, _)| *lsn);
|
||||
|
||||
(
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
space_cutoff,
|
||||
time_cutoff,
|
||||
retain_lsns,
|
||||
max_lsn_with_valid_lease,
|
||||
)
|
||||
};
|
||||
|
||||
let mut new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);
|
||||
let mut new_gc_cutoff = Lsn::min(space_cutoff, time_cutoff);
|
||||
let standby_horizon = self.standby_horizon.load();
|
||||
// Hold GC for the standby, but as a safety guard do it only within some
|
||||
// reasonable lag.
|
||||
@@ -5124,8 +5118,8 @@ impl Timeline {
|
||||
|
||||
let res = self
|
||||
.gc_timeline(
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
space_cutoff,
|
||||
time_cutoff,
|
||||
retain_lsns,
|
||||
max_lsn_with_valid_lease,
|
||||
new_gc_cutoff,
|
||||
@@ -5143,8 +5137,8 @@ impl Timeline {
|
||||
|
||||
async fn gc_timeline(
|
||||
&self,
|
||||
horizon_cutoff: Lsn,
|
||||
pitr_cutoff: Lsn,
|
||||
space_cutoff: Lsn,
|
||||
time_cutoff: Lsn,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
max_lsn_with_valid_lease: Option<Lsn>,
|
||||
new_gc_cutoff: Lsn,
|
||||
@@ -5205,22 +5199,22 @@ impl Timeline {
|
||||
result.layers_total += 1;
|
||||
|
||||
// 1. Is it newer than GC horizon cutoff point?
|
||||
if l.get_lsn_range().end > horizon_cutoff {
|
||||
if l.get_lsn_range().end > space_cutoff {
|
||||
debug!(
|
||||
"keeping {} because it's newer than horizon_cutoff {}",
|
||||
"keeping {} because it's newer than space_cutoff {}",
|
||||
l.layer_name(),
|
||||
horizon_cutoff,
|
||||
space_cutoff,
|
||||
);
|
||||
result.layers_needed_by_cutoff += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
// 2. It is newer than PiTR cutoff point?
|
||||
if l.get_lsn_range().end > pitr_cutoff {
|
||||
if l.get_lsn_range().end > time_cutoff {
|
||||
debug!(
|
||||
"keeping {} because it's newer than pitr_cutoff {}",
|
||||
"keeping {} because it's newer than time_cutoff {}",
|
||||
l.layer_name(),
|
||||
pitr_cutoff,
|
||||
time_cutoff,
|
||||
);
|
||||
result.layers_needed_by_pitr += 1;
|
||||
continue 'outer;
|
||||
|
||||
@@ -195,7 +195,7 @@ impl Timeline {
|
||||
tracing::info!(
|
||||
"latest_gc_cutoff: {}, pitr cutoff {}",
|
||||
*latest_gc_cutoff,
|
||||
self.gc_info.read().unwrap().cutoffs.pitr
|
||||
self.gc_info.read().unwrap().cutoffs.time
|
||||
);
|
||||
|
||||
let layers = self.layers.read().await;
|
||||
@@ -990,7 +990,7 @@ impl Timeline {
|
||||
"enhanced legacy compaction currently does not support retain_lsns (branches)"
|
||||
)));
|
||||
}
|
||||
let gc_cutoff = Lsn::min(gc_info.cutoffs.horizon, gc_info.cutoffs.pitr);
|
||||
let gc_cutoff = gc_info.cutoffs.select_min();
|
||||
let mut selected_layers = Vec::new();
|
||||
// TODO: consider retain_lsns
|
||||
drop(gc_info);
|
||||
|
||||
Reference in New Issue
Block a user