diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 0b36eb5df7..0d6791cddd 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -449,7 +449,7 @@ async fn build_timeline_info_common( // Internally we distinguish between the planned GC cutoff (PITR point) and the "applied" GC cutoff (where we // actually trimmed data to), which can pass each other when PITR is changed. let min_readable_lsn = std::cmp::max( - timeline.get_gc_cutoff_lsn(), + timeline.get_gc_cutoff_lsn().unwrap_or_default(), *timeline.get_applied_gc_cutoff_lsn(), ); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 35ddba355d..c15b44469a 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -4594,7 +4594,7 @@ impl TenantShard { target.cutoffs = GcCutoffs { space: space_cutoff, - time: Lsn::INVALID, + time: None, }; } } @@ -4678,7 +4678,7 @@ impl TenantShard { if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() { if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) { target.within_ancestor_pitr = - timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.time; + Some(timeline.get_ancestor_lsn()) >= ancestor_gc_cutoffs.time; } } @@ -4691,13 +4691,15 @@ impl TenantShard { } else { 0 }); - timeline.metrics.pitr_history_size.set( - timeline - .get_last_record_lsn() - .checked_sub(target.cutoffs.time) - .unwrap_or(Lsn(0)) - .0, - ); + if let Some(time_cutoff) = target.cutoffs.time { + timeline.metrics.pitr_history_size.set( + timeline + .get_last_record_lsn() + .checked_sub(time_cutoff) + .unwrap_or_default() + .0, + ); + } // Apply the cutoffs we found to the Timeline's GcInfo. Why might we _not_ have cutoffs for a timeline? // - this timeline was created while we were finding cutoffs @@ -4706,8 +4708,8 @@ impl TenantShard { let original_cutoffs = target.cutoffs.clone(); // GC cutoffs should never go back target.cutoffs = GcCutoffs { - space: Lsn(cutoffs.space.0.max(original_cutoffs.space.0)), - time: Lsn(cutoffs.time.0.max(original_cutoffs.time.0)), + space: cutoffs.space.max(original_cutoffs.space), + time: cutoffs.time.max(original_cutoffs.time), } } } @@ -8952,7 +8954,7 @@ mod tests { .await; // Update GC info let mut guard = tline.gc_info.write().unwrap(); - guard.cutoffs.time = Lsn(0x30); + guard.cutoffs.time = Some(Lsn(0x30)); guard.cutoffs.space = Lsn(0x30); } @@ -9060,7 +9062,7 @@ mod tests { .await; // Update GC info let mut guard = tline.gc_info.write().unwrap(); - guard.cutoffs.time = Lsn(0x40); + guard.cutoffs.time = Some(Lsn(0x40)); guard.cutoffs.space = Lsn(0x40); } tline @@ -9478,7 +9480,7 @@ mod tests { *guard = GcInfo { retain_lsns: vec![], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -9562,7 +9564,7 @@ mod tests { .await; // Update GC info let mut guard = tline.gc_info.write().unwrap(); - guard.cutoffs.time = Lsn(0x40); + guard.cutoffs.time = Some(Lsn(0x40)); guard.cutoffs.space = Lsn(0x40); } tline @@ -10033,7 +10035,7 @@ mod tests { (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), ], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -10096,7 +10098,7 @@ mod tests { let verify_result = || async { let gc_horizon = { let gc_info = tline.gc_info.read().unwrap(); - gc_info.cutoffs.time + gc_info.cutoffs.time.unwrap_or_default() }; for idx in 0..10 { assert_eq!( @@ -10174,7 +10176,7 @@ mod tests { .await; // Update GC info let mut guard = tline.gc_info.write().unwrap(); - guard.cutoffs.time = Lsn(0x38); + guard.cutoffs.time = Some(Lsn(0x38)); guard.cutoffs.space = Lsn(0x38); } tline @@ -10282,7 +10284,7 @@ mod tests { (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), ], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -10345,7 +10347,7 @@ mod tests { let verify_result = || async { let gc_horizon = { let gc_info = tline.gc_info.read().unwrap(); - gc_info.cutoffs.time + gc_info.cutoffs.time.unwrap_or_default() }; for idx in 0..10 { assert_eq!( @@ -10531,7 +10533,7 @@ mod tests { *guard = GcInfo { retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)], cutoffs: GcCutoffs { - time: Lsn(0x10), + time: Some(Lsn(0x10)), space: Lsn(0x10), }, leases: Default::default(), @@ -10551,7 +10553,7 @@ mod tests { *guard = GcInfo { retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)], cutoffs: GcCutoffs { - time: Lsn(0x50), + time: Some(Lsn(0x50)), space: Lsn(0x50), }, leases: Default::default(), @@ -11272,7 +11274,7 @@ mod tests { *guard = GcInfo { retain_lsns: vec![(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No)], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -11661,7 +11663,7 @@ mod tests { (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), ], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -11724,7 +11726,7 @@ mod tests { let verify_result = || async { let gc_horizon = { let gc_info = tline.gc_info.read().unwrap(); - gc_info.cutoffs.time + gc_info.cutoffs.time.unwrap_or_default() }; for idx in 0..10 { assert_eq!( @@ -11913,7 +11915,7 @@ mod tests { (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), ], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), @@ -11976,7 +11978,7 @@ mod tests { let verify_result = || async { let gc_horizon = { let gc_info = tline.gc_info.read().unwrap(); - gc_info.cutoffs.time + gc_info.cutoffs.time.unwrap_or_default() }; for idx in 0..10 { assert_eq!( @@ -12239,7 +12241,7 @@ mod tests { *guard = GcInfo { retain_lsns: vec![], cutoffs: GcCutoffs { - time: Lsn(0x30), + time: Some(Lsn(0x30)), space: Lsn(0x30), }, leases: Default::default(), diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index bf5d9bc87a..d1020cff96 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -235,7 +235,7 @@ pub(super) async fn gather_inputs( // than our internal space cutoff. This means that if someone drops a database and waits for their // PITR interval, they will see synthetic size decrease, even if we are still storing data inside // the space cutoff. - let mut next_pitr_cutoff = gc_info.cutoffs.time; + let mut next_pitr_cutoff = gc_info.cutoffs.time.unwrap_or_default(); // TODO: handle None // If the caller provided a shorter retention period, use that instead of the GC cutoff. let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index da2e56d80a..670e0865df 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -529,29 +529,24 @@ impl GcInfo { /// The `GcInfo` component describing which Lsns need to be retained. Functionally, this /// is a single number (the oldest LSN which we must retain), but it internally distinguishes /// between time-based and space-based retention for observability and consumption metrics purposes. -#[derive(Debug, Clone)] +#[derive(Clone, Debug, Default)] pub(crate) struct GcCutoffs { /// Calculated from the [`pageserver_api::models::TenantConfig::gc_horizon`], this LSN indicates how much /// history we must keep to retain a specified number of bytes of WAL. pub(crate) space: Lsn, - /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates how much - /// history we must keep to enable reading back at least the PITR interval duration. - pub(crate) time: Lsn, -} - -impl Default for GcCutoffs { - fn default() -> Self { - Self { - space: Lsn::INVALID, - time: Lsn::INVALID, - } - } + /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates + /// how much history we must keep to enable reading back at least the PITR interval duration. + /// + /// None indicates that the PITR cutoff has not been computed. A PITR interval of 0 will yield + /// Some(last_record_lsn). + pub(crate) time: Option, } impl GcCutoffs { fn select_min(&self) -> Lsn { - std::cmp::min(self.space, self.time) + // NB: if we haven't computed the PITR cutoff yet, we can't GC anything. + self.space.min(self.time.unwrap_or_default()) } } @@ -1088,11 +1083,14 @@ impl Timeline { /// Get the bytes written since the PITR cutoff on this branch, and /// whether this branch's ancestor_lsn is within its parent's PITR. pub(crate) fn get_pitr_history_stats(&self) -> (u64, bool) { + // TODO: for backwards compatibility, we return the full history back to 0 when the PITR + // cutoff has not yet been initialized. This should return None instead, but this is exposed + // in external HTTP APIs and callers may not handle a null value. let gc_info = self.gc_info.read().unwrap(); let history = self .get_last_record_lsn() - .checked_sub(gc_info.cutoffs.time) - .unwrap_or(Lsn(0)) + .checked_sub(gc_info.cutoffs.time.unwrap_or_default()) + .unwrap_or_default() .0; (history, gc_info.within_ancestor_pitr) } @@ -1102,9 +1100,10 @@ impl Timeline { self.applied_gc_cutoff_lsn.read() } - /// Read timeline's planned GC cutoff: this is the logical end of history that users - /// are allowed to read (based on configured PITR), even if physically we have more history. - pub(crate) fn get_gc_cutoff_lsn(&self) -> Lsn { + /// Read timeline's planned GC cutoff: this is the logical end of history that users are allowed + /// to read (based on configured PITR), even if physically we have more history. Returns None + /// if the PITR cutoff has not yet been initialized. + pub(crate) fn get_gc_cutoff_lsn(&self) -> Option { self.gc_info.read().unwrap().cutoffs.time } @@ -6235,14 +6234,12 @@ impl Timeline { pausable_failpoint!("Timeline::find_gc_cutoffs-pausable"); - if cfg!(test) { + if cfg!(test) && pitr == Duration::ZERO { // Unit tests which specify zero PITR interval expect to avoid doing any I/O for timestamp lookup - if pitr == Duration::ZERO { - return Ok(GcCutoffs { - time: self.get_last_record_lsn(), - space: space_cutoff, - }); - } + return Ok(GcCutoffs { + time: Some(self.get_last_record_lsn()), + space: space_cutoff, + }); } // Calculate a time-based limit on how much to retain: @@ -6256,14 +6253,14 @@ impl Timeline { // PITR is not set. Retain the size-based limit, or the default time retention, // whichever requires less data. GcCutoffs { - time: self.get_last_record_lsn(), + time: Some(self.get_last_record_lsn()), space: std::cmp::max(time_cutoff, space_cutoff), } } (Duration::ZERO, None) => { // PITR is not set, and time lookup failed GcCutoffs { - time: self.get_last_record_lsn(), + time: Some(self.get_last_record_lsn()), space: space_cutoff, } } @@ -6271,7 +6268,7 @@ impl Timeline { // PITR interval is set & we didn't look up a timestamp successfully. Conservatively assume PITR // cannot advance beyond what was already GC'd, and respect space-based retention GcCutoffs { - time: *self.get_applied_gc_cutoff_lsn(), + time: Some(*self.get_applied_gc_cutoff_lsn()), space: space_cutoff, } } @@ -6279,7 +6276,7 @@ impl Timeline { // PITR interval is set and we looked up timestamp successfully. Ignore // size based retention and make time cutoff authoritative GcCutoffs { - time: time_cutoff, + time: Some(time_cutoff), space: time_cutoff, } } @@ -6332,7 +6329,7 @@ impl Timeline { ) }; - let mut new_gc_cutoff = Lsn::min(space_cutoff, time_cutoff); + let mut new_gc_cutoff = space_cutoff.min(time_cutoff.unwrap_or_default()); let standby_horizon = self.standby_horizon.load(); // Hold GC for the standby, but as a safety guard do it only within some // reasonable lag. @@ -6381,7 +6378,7 @@ impl Timeline { async fn gc_timeline( &self, space_cutoff: Lsn, - time_cutoff: Lsn, + time_cutoff: Option, // None if uninitialized retain_lsns: Vec, max_lsn_with_valid_lease: Option, new_gc_cutoff: Lsn, @@ -6400,6 +6397,12 @@ impl Timeline { return Ok(result); } + let Some(time_cutoff) = time_cutoff else { + // The GC cutoff should have been computed by now, but let's be defensive. + info!("Nothing to GC: time_cutoff not yet computed"); + return Ok(result); + }; + // We need to ensure that no one tries to read page versions or create // branches at a point before latest_gc_cutoff_lsn. See branch_timeline() // for details. This will block until the old value is no longer in use. diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 37c1a8f60c..0e4b14c3e4 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1526,7 +1526,7 @@ impl Timeline { info!( "starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \ checked {layers_checked}/{layers_total} layers \ - (latest_gc_cutoff={} pitr_cutoff={})", + (latest_gc_cutoff={} pitr_cutoff={:?})", layers_to_rewrite.len(), drop_layers.len(), *latest_gc_cutoff,