mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
fix(pageserver): ensure GC computes time cutoff using the same start time (#10193)
## Problem close https://github.com/neondatabase/neon/issues/10192 ## Summary of changes * `find_gc_time_cutoff` takes `now` parameter so that all branches compute the cutoff based on the same start time, avoiding races. * gc-compaction uses a single `get_gc_compaction_watermark` function to get the safe LSN to compact. --------- Signed-off-by: Alex Chi Z <chi@neon.tech> Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
This commit is contained in:
@@ -4488,13 +4488,17 @@ impl Tenant {
|
||||
let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
|
||||
HashMap::with_capacity(timelines.len());
|
||||
|
||||
// Ensures all timelines use the same start time when computing the time cutoff.
|
||||
let now_ts_for_pitr_calc = SystemTime::now();
|
||||
for timeline in timelines.iter() {
|
||||
let cutoff = timeline
|
||||
.get_last_record_lsn()
|
||||
.checked_sub(horizon)
|
||||
.unwrap_or(Lsn(0));
|
||||
|
||||
let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
|
||||
let cutoffs = timeline
|
||||
.find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx)
|
||||
.await?;
|
||||
let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
|
||||
assert!(old.is_none());
|
||||
}
|
||||
|
||||
@@ -4859,6 +4859,7 @@ impl Timeline {
|
||||
|
||||
async fn find_gc_time_cutoff(
|
||||
&self,
|
||||
now: SystemTime,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
@@ -4866,7 +4867,6 @@ impl Timeline {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
if self.shard_identity.is_shard_zero() {
|
||||
// Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
|
||||
} else {
|
||||
@@ -4952,6 +4952,7 @@ impl Timeline {
|
||||
#[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
|
||||
pub(super) async fn find_gc_cutoffs(
|
||||
&self,
|
||||
now: SystemTime,
|
||||
space_cutoff: Lsn,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
@@ -4979,7 +4980,7 @@ impl Timeline {
|
||||
// - if PITR interval is set, then this is our cutoff.
|
||||
// - if PITR interval is not set, then we do a lookup
|
||||
// based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
|
||||
let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
|
||||
let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;
|
||||
|
||||
Ok(match (pitr, time_cutoff) {
|
||||
(Duration::ZERO, Some(time_cutoff)) => {
|
||||
|
||||
@@ -1799,6 +1799,24 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
|
||||
/// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
|
||||
/// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them
|
||||
/// here.
|
||||
pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {
|
||||
let gc_cutoff_lsn = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
gc_info.min_cutoff()
|
||||
};
|
||||
|
||||
// TODO: standby horizon should use leases so we don't really need to consider it here.
|
||||
// let watermark = watermark.min(self.standby_horizon.load());
|
||||
|
||||
// TODO: ensure the child branches will not use anything below the watermark, or consider
|
||||
// them when computing the watermark.
|
||||
gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn())
|
||||
}
|
||||
|
||||
/// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
|
||||
/// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN
|
||||
/// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts
|
||||
@@ -1811,7 +1829,7 @@ impl Timeline {
|
||||
let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
|
||||
job.compact_lsn_range.end
|
||||
} else {
|
||||
*self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
|
||||
self.get_gc_compaction_watermark()
|
||||
};
|
||||
|
||||
// Split compaction job to about 4GB each
|
||||
@@ -2006,7 +2024,7 @@ impl Timeline {
|
||||
// Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of
|
||||
// cleaning everything that theoritically it could. In the future, it should use `self.gc_info`
|
||||
// to get the truth data.
|
||||
let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
|
||||
let real_gc_cutoff = self.get_gc_compaction_watermark();
|
||||
// The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
|
||||
// each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
|
||||
// the real cutoff.
|
||||
|
||||
Reference in New Issue
Block a user