mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 23:12:54 +00:00
refactor(update_gc_info): split GcInfo to compose out of GcCutoffs (#7584)
Split `GcInfo` and replace `Timeline::update_gc_info` with a method that simply finds gc cutoffs `Timeline::find_gc_cutoffs` to be combined as `Timeline::gc_info` at the caller. This change will be followed up with a change that finds the GC cutoff values before taking the `Tenant::gc_cs` lock. Cc: #7560
This commit is contained in:
@@ -51,8 +51,8 @@ pub(crate) enum StorageTimeOperation {
|
||||
#[strum(serialize = "gc")]
|
||||
Gc,
|
||||
|
||||
#[strum(serialize = "update gc info")]
|
||||
UpdateGcInfo,
|
||||
#[strum(serialize = "find gc cutoffs")]
|
||||
FindGcCutoffs,
|
||||
|
||||
#[strum(serialize = "create tenant")]
|
||||
CreateTenant,
|
||||
@@ -1989,7 +1989,7 @@ pub(crate) struct TimelineMetrics {
|
||||
pub imitate_logical_size_histo: StorageTimeMetrics,
|
||||
pub load_layer_map_histo: StorageTimeMetrics,
|
||||
pub garbage_collect_histo: StorageTimeMetrics,
|
||||
pub update_gc_info_histo: StorageTimeMetrics,
|
||||
pub find_gc_cutoffs_histo: StorageTimeMetrics,
|
||||
pub last_record_gauge: IntGauge,
|
||||
resident_physical_size_gauge: UIntGauge,
|
||||
/// copy of LayeredTimeline.current_logical_size
|
||||
@@ -2050,8 +2050,8 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let update_gc_info_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::UpdateGcInfo,
|
||||
let find_gc_cutoffs_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::FindGcCutoffs,
|
||||
&tenant_id,
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
@@ -2098,7 +2098,7 @@ impl TimelineMetrics {
|
||||
logical_size_histo,
|
||||
imitate_logical_size_histo,
|
||||
garbage_collect_histo,
|
||||
update_gc_info_histo,
|
||||
find_gc_cutoffs_histo,
|
||||
load_layer_map_histo,
|
||||
last_record_gauge,
|
||||
resident_physical_size_gauge,
|
||||
|
||||
@@ -62,6 +62,7 @@ use self::timeline::uninit::TimelineCreateGuard;
|
||||
use self::timeline::uninit::TimelineExclusionError;
|
||||
use self::timeline::uninit::UninitializedTimeline;
|
||||
use self::timeline::EvictionTaskTenantState;
|
||||
use self::timeline::GcInfo;
|
||||
use self::timeline::TimelineResources;
|
||||
use self::timeline::WaitLsnError;
|
||||
use crate::config::PageServerConf;
|
||||
@@ -86,7 +87,6 @@ use crate::tenant::remote_timeline_client::INITDB_PATH;
|
||||
use crate::tenant::storage_layer::DeltaLayer;
|
||||
use crate::tenant::storage_layer::ImageLayer;
|
||||
use crate::InitializationOrder;
|
||||
use std::cmp::min;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
@@ -2886,9 +2886,12 @@ impl Tenant {
|
||||
))
|
||||
.map(|&x| x.1)
|
||||
.collect();
|
||||
timeline
|
||||
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
|
||||
.await?;
|
||||
let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
|
||||
|
||||
*timeline.gc_info.write().unwrap() = GcInfo {
|
||||
retain_lsns: branchpoints,
|
||||
cutoffs,
|
||||
};
|
||||
|
||||
gc_timelines.push(timeline);
|
||||
}
|
||||
@@ -2977,7 +2980,7 @@ impl Tenant {
|
||||
// and then the planned GC cutoff
|
||||
{
|
||||
let gc_info = src_timeline.gc_info.read().unwrap();
|
||||
let cutoff = min(gc_info.pitr_cutoff, gc_info.horizon_cutoff);
|
||||
let cutoff = gc_info.min_cutoff();
|
||||
if start_lsn < cutoff {
|
||||
return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
|
||||
"invalid branch start lsn: less than planned GC cutoff {cutoff}"
|
||||
@@ -4513,18 +4516,20 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn bulk_insert_compact_gc(
|
||||
timeline: Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
lsn: Lsn,
|
||||
repeat: usize,
|
||||
key_count: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
let compact = true;
|
||||
bulk_insert_maybe_compact_gc(timeline, ctx, lsn, repeat, key_count, compact).await
|
||||
bulk_insert_maybe_compact_gc(tenant, timeline, ctx, lsn, repeat, key_count, compact).await
|
||||
}
|
||||
|
||||
async fn bulk_insert_maybe_compact_gc(
|
||||
timeline: Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
mut lsn: Lsn,
|
||||
repeat: usize,
|
||||
@@ -4537,6 +4542,8 @@ mod tests {
|
||||
// Enforce that key range is monotonously increasing
|
||||
let mut keyspace = KeySpaceAccum::new();
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
for _ in 0..repeat {
|
||||
for _ in 0..key_count {
|
||||
test_key.field6 = blknum;
|
||||
@@ -4558,24 +4565,19 @@ mod tests {
|
||||
blknum += 1;
|
||||
}
|
||||
|
||||
let cutoff = timeline.get_last_record_lsn();
|
||||
|
||||
timeline
|
||||
.update_gc_info(
|
||||
Vec::new(),
|
||||
cutoff,
|
||||
Duration::ZERO,
|
||||
&CancellationToken::new(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
timeline.freeze_and_flush().await?;
|
||||
if compact {
|
||||
timeline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), ctx)
|
||||
.await?;
|
||||
// this requires timeline to be &Arc<Timeline>
|
||||
timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
|
||||
}
|
||||
timeline.gc().await?;
|
||||
|
||||
// this doesn't really need to use the timeline_id target, but it is closer to what it
|
||||
// originally was.
|
||||
let res = tenant
|
||||
.gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx)
|
||||
.await?;
|
||||
|
||||
assert_eq!(res.layers_removed, 0, "this never removes anything");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -4594,7 +4596,7 @@ mod tests {
|
||||
.await?;
|
||||
|
||||
let lsn = Lsn(0x10);
|
||||
bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;
|
||||
bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -4625,7 +4627,7 @@ mod tests {
|
||||
.await?;
|
||||
|
||||
let lsn = Lsn(0x10);
|
||||
bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;
|
||||
bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
|
||||
|
||||
let guard = tline.layers.read().await;
|
||||
guard.layer_map().dump(true, &ctx).await?;
|
||||
@@ -5079,6 +5081,7 @@ mod tests {
|
||||
.await?;
|
||||
|
||||
const NUM_KEYS: usize = 1000;
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
|
||||
@@ -5138,18 +5141,10 @@ mod tests {
|
||||
}
|
||||
|
||||
// Perform a cycle of flush, and GC
|
||||
let cutoff = tline.get_last_record_lsn();
|
||||
tline
|
||||
.update_gc_info(
|
||||
Vec::new(),
|
||||
cutoff,
|
||||
Duration::ZERO,
|
||||
&CancellationToken::new(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.gc().await?;
|
||||
tenant
|
||||
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -5170,6 +5165,8 @@ mod tests {
|
||||
|
||||
let mut keyspace = KeySpaceAccum::new();
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// Track when each page was last modified. Used to assert that
|
||||
// a read sees the latest page version.
|
||||
let mut updated = [Lsn(0); NUM_KEYS];
|
||||
@@ -5233,21 +5230,11 @@ mod tests {
|
||||
}
|
||||
|
||||
// Perform a cycle of flush, compact, and GC
|
||||
let cutoff = tline.get_last_record_lsn();
|
||||
tline
|
||||
.update_gc_info(
|
||||
Vec::new(),
|
||||
cutoff,
|
||||
Duration::ZERO,
|
||||
&CancellationToken::new(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
|
||||
tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
|
||||
tenant
|
||||
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
|
||||
.await?;
|
||||
tline.gc().await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -5452,7 +5439,7 @@ mod tests {
|
||||
|
||||
let lsn = Lsn(0x10);
|
||||
let compact = false;
|
||||
bulk_insert_maybe_compact_gc(tline.clone(), &ctx, lsn, 50, 10000, compact).await?;
|
||||
bulk_insert_maybe_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000, compact).await?;
|
||||
|
||||
let test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
let read_lsn = Lsn(u64::MAX - 1);
|
||||
|
||||
@@ -192,7 +192,9 @@ pub(super) async fn gather_inputs(
|
||||
// than a space bound (horizon cutoff). This means that if someone drops a database and waits for their
|
||||
// PITR interval, they will see synthetic size decrease, even if we are still storing data inside
|
||||
// horizon_cutoff.
|
||||
let mut next_gc_cutoff = gc_info.pitr_cutoff;
|
||||
let pitr_cutoff = gc_info.cutoffs.pitr;
|
||||
let horizon_cutoff = gc_info.cutoffs.horizon;
|
||||
let mut next_gc_cutoff = pitr_cutoff;
|
||||
|
||||
// If the caller provided a shorter retention period, use that instead of the GC cutoff.
|
||||
let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
|
||||
@@ -297,8 +299,8 @@ pub(super) async fn gather_inputs(
|
||||
last_record: last_record_lsn,
|
||||
// this is not used above, because it might not have updated recently enough
|
||||
latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
|
||||
horizon_cutoff: gc_info.horizon_cutoff,
|
||||
pitr_cutoff: gc_info.pitr_cutoff,
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
next_gc_cutoff,
|
||||
retention_param_cutoff,
|
||||
});
|
||||
|
||||
@@ -325,7 +325,7 @@ pub struct Timeline {
|
||||
|
||||
// List of child timelines and their branch points. This is needed to avoid
|
||||
// garbage collecting data that is still needed by the child timelines.
|
||||
pub gc_info: std::sync::RwLock<GcInfo>,
|
||||
pub(crate) gc_info: std::sync::RwLock<GcInfo>,
|
||||
|
||||
// It may change across major versions so for simplicity
|
||||
// keep it after running initdb for a timeline.
|
||||
@@ -409,33 +409,59 @@ pub struct WalReceiverInfo {
|
||||
pub last_received_msg_ts: u128,
|
||||
}
|
||||
|
||||
///
|
||||
/// Information about how much history needs to be retained, needed by
|
||||
/// Garbage Collection.
|
||||
///
|
||||
pub struct GcInfo {
|
||||
#[derive(Default)]
|
||||
pub(crate) struct GcInfo {
|
||||
/// Specific LSNs that are needed.
|
||||
///
|
||||
/// Currently, this includes all points where child branches have
|
||||
/// been forked off from. In the future, could also include
|
||||
/// explicit user-defined snapshot points.
|
||||
pub retain_lsns: Vec<Lsn>,
|
||||
pub(crate) retain_lsns: Vec<Lsn>,
|
||||
|
||||
/// In addition to 'retain_lsns', keep everything newer than this
|
||||
/// point.
|
||||
/// The cutoff coordinates, which are combined by selecting the minimum.
|
||||
pub(crate) cutoffs: GcCutoffs,
|
||||
}
|
||||
|
||||
impl GcInfo {
|
||||
pub(crate) fn min_cutoff(&self) -> Lsn {
|
||||
self.cutoffs.select_min()
|
||||
}
|
||||
}
|
||||
|
||||
/// The `GcInfo` component describing which Lsns need to be retained.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct GcCutoffs {
|
||||
/// Keep everything newer than this point.
|
||||
///
|
||||
/// This is calculated by subtracting 'gc_horizon' setting from
|
||||
/// last-record LSN
|
||||
///
|
||||
/// FIXME: is this inclusive or exclusive?
|
||||
pub horizon_cutoff: Lsn,
|
||||
pub(crate) horizon: Lsn,
|
||||
|
||||
/// In addition to 'retain_lsns' and 'horizon_cutoff', keep everything newer than this
|
||||
/// point.
|
||||
///
|
||||
/// This is calculated by finding a number such that a record is needed for PITR
|
||||
/// if only if its LSN is larger than 'pitr_cutoff'.
|
||||
pub pitr_cutoff: Lsn,
|
||||
pub(crate) pitr: Lsn,
|
||||
}
|
||||
|
||||
impl Default for GcCutoffs {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
horizon: Lsn::INVALID,
|
||||
pitr: Lsn::INVALID,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GcCutoffs {
|
||||
fn select_min(&self) -> Lsn {
|
||||
std::cmp::min(self.horizon, self.pitr)
|
||||
}
|
||||
}
|
||||
|
||||
/// An error happened in a get() operation.
|
||||
@@ -1155,7 +1181,7 @@ impl Timeline {
|
||||
" - keyspace={:?} lsn={}"),
|
||||
seq_err, keyspace, lsn) },
|
||||
(Ok(_), Err(GetVectoredError::GetReadyAncestorError(GetReadyAncestorError::AncestorLsnTimeout(_)))) => {
|
||||
// Sequential get runs after vectored get, so it is possible for the later
|
||||
// Sequential get runs after vectored get, so it is possible for the later
|
||||
// to time out while waiting for its ancestor's Lsn to become ready and for the
|
||||
// former to succeed (it essentially has a doubled wait time).
|
||||
},
|
||||
@@ -2097,11 +2123,7 @@ impl Timeline {
|
||||
|
||||
write_lock: tokio::sync::Mutex::new(None),
|
||||
|
||||
gc_info: std::sync::RwLock::new(GcInfo {
|
||||
retain_lsns: Vec::new(),
|
||||
horizon_cutoff: Lsn(0),
|
||||
pitr_cutoff: Lsn(0),
|
||||
}),
|
||||
gc_info: std::sync::RwLock::new(GcInfo::default()),
|
||||
|
||||
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
|
||||
initdb_lsn: metadata.initdb_lsn(),
|
||||
@@ -4383,7 +4405,7 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update information about which layer files need to be retained on
|
||||
/// Find the Lsns above which layer files need to be retained on
|
||||
/// garbage collection. This is separate from actually performing the GC,
|
||||
/// and is updated more frequently, so that compaction can remove obsolete
|
||||
/// page versions more aggressively.
|
||||
@@ -4391,17 +4413,6 @@ impl Timeline {
|
||||
/// TODO: that's wishful thinking, compaction doesn't actually do that
|
||||
/// currently.
|
||||
///
|
||||
/// The caller specifies how much history is needed with the 3 arguments:
|
||||
///
|
||||
/// retain_lsns: keep a version of each page at these LSNs
|
||||
/// cutoff_horizon: also keep everything newer than this LSN
|
||||
/// pitr: the time duration required to keep data for PITR
|
||||
///
|
||||
/// The 'retain_lsns' list is currently used to prevent removing files that
|
||||
/// are needed by child timelines. In the future, the user might be able to
|
||||
/// name additional points in time to retain. The caller is responsible for
|
||||
/// collecting that information.
|
||||
///
|
||||
/// The 'cutoff_horizon' point is used to retain recent versions that might still be
|
||||
/// needed by read-only nodes. (As of this writing, the caller just passes
|
||||
/// the latest LSN subtracted by a constant, and doesn't do anything smart
|
||||
@@ -4409,26 +4420,17 @@ impl Timeline {
|
||||
///
|
||||
/// The 'pitr' duration is used to calculate a 'pitr_cutoff', which can be used to determine
|
||||
/// whether a record is needed for PITR.
|
||||
///
|
||||
/// NOTE: This function holds a short-lived lock to protect the 'gc_info'
|
||||
/// field, so that the three values passed as argument are stored
|
||||
/// atomically. But the caller is responsible for ensuring that no new
|
||||
/// branches are created that would need to be included in 'retain_lsns',
|
||||
/// for example. The caller should hold `Tenant::gc_cs` lock to ensure
|
||||
/// that.
|
||||
///
|
||||
#[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
|
||||
pub(super) async fn update_gc_info(
|
||||
pub(super) async fn find_gc_cutoffs(
|
||||
&self,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
cutoff_horizon: Lsn,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> anyhow::Result<GcCutoffs> {
|
||||
let _timer = self
|
||||
.metrics
|
||||
.update_gc_info_histo
|
||||
.find_gc_cutoffs_histo
|
||||
.start_timer()
|
||||
.record_on_drop();
|
||||
|
||||
@@ -4481,14 +4483,10 @@ impl Timeline {
|
||||
self.get_last_record_lsn()
|
||||
};
|
||||
|
||||
// Grab the lock and update the values
|
||||
*self.gc_info.write().unwrap() = GcInfo {
|
||||
retain_lsns,
|
||||
horizon_cutoff: cutoff_horizon,
|
||||
pitr_cutoff,
|
||||
};
|
||||
|
||||
Ok(())
|
||||
Ok(GcCutoffs {
|
||||
horizon: cutoff_horizon,
|
||||
pitr: pitr_cutoff,
|
||||
})
|
||||
}
|
||||
|
||||
/// Garbage collect layer files on a timeline that are no longer needed.
|
||||
@@ -4517,8 +4515,8 @@ impl Timeline {
|
||||
let (horizon_cutoff, pitr_cutoff, retain_lsns) = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
|
||||
let horizon_cutoff = min(gc_info.horizon_cutoff, self.get_disk_consistent_lsn());
|
||||
let pitr_cutoff = gc_info.pitr_cutoff;
|
||||
let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
|
||||
let pitr_cutoff = gc_info.cutoffs.pitr;
|
||||
let retain_lsns = gc_info.retain_lsns.clone();
|
||||
(horizon_cutoff, pitr_cutoff, retain_lsns)
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user