Also consider offloaded timelines for obtaining retain_lsn (#9308)

Also consider offloaded timelines for obtaining `retain_lsn`. This is
required for correctness for all timelines that have not been flattened
yet: otherwise we GC data that might still be required for reading.

This somewhat counteracts the original purpose of timeline offloading of
not having to iterate over offloaded timelines, but sadly it's required.
In the future, we can improve the way the offloaded timelines are
stored.

We also make the `retain_lsn` optional so that in the future, when we
implement flattening, we can make it None. This also applies to full
timeline objects by the way, where it would probably make most sense to
add a bool flag whether the timeline is successfully flattened, and if
it is, one can exclude it from `retain_lsn` as well.

Also, track whether a timeline was offloaded or not in `retain_lsn` so
that the `retain_lsn` can be excluded from visibility and size
calculation.

Part of #8088
This commit is contained in:
Arpad Müller
2024-10-14 17:54:03 +02:00
committed by GitHub
parent f4f7ea247c
commit f54e3e9147
4 changed files with 71 additions and 23 deletions

View File

@@ -493,6 +493,8 @@ pub struct OffloadedTimeline {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,
/// Whether to retain the branch lsn at the ancestor or not
pub ancestor_retain_lsn: Option<Lsn>,
// TODO: once we persist offloaded state, make this lazily constructed
pub remote_client: Arc<RemoteTimelineClient>,
@@ -504,10 +506,14 @@ pub struct OffloadedTimeline {
impl OffloadedTimeline {
fn from_timeline(timeline: &Timeline) -> Self {
let ancestor_retain_lsn = timeline
.get_ancestor_timeline_id()
.map(|_timeline_id| timeline.get_ancestor_lsn());
Self {
tenant_shard_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
ancestor_retain_lsn,
remote_client: timeline.remote_client.clone(),
delete_progress: timeline.delete_progress.clone(),
@@ -515,6 +521,12 @@ impl OffloadedTimeline {
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum MaybeOffloaded {
Yes,
No,
}
#[derive(Clone)]
pub enum TimelineOrOffloaded {
Timeline(Arc<Timeline>),
@@ -2253,12 +2265,13 @@ impl Tenant {
if activating {
let timelines_accessor = self.timelines.lock().unwrap();
let timelines_offloaded_accessor = self.timelines_offloaded.lock().unwrap();
let timelines_to_activate = timelines_accessor
.values()
.filter(|timeline| !(timeline.is_broken() || timeline.is_stopping()));
// Before activation, populate each Timeline's GcInfo with information about its children
self.initialize_gc_info(&timelines_accessor);
self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor);
// Spawn gc and compaction loops. The loops will shut themselves
// down when they notice that the tenant is inactive.
@@ -3298,6 +3311,7 @@ impl Tenant {
fn initialize_gc_info(
&self,
timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
timelines_offloaded: &std::sync::MutexGuard<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
) {
// This function must be called before activation: after activation timeline create/delete operations
// might happen, and this function is not safe to run concurrently with those.
@@ -3305,20 +3319,37 @@ impl Tenant {
// Scan all timelines. For each timeline, remember the timeline ID and
// the branch point where it was created.
let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId)>> = BTreeMap::new();
let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId, MaybeOffloaded)>> =
BTreeMap::new();
timelines.iter().for_each(|(timeline_id, timeline_entry)| {
if let Some(ancestor_timeline_id) = &timeline_entry.get_ancestor_timeline_id() {
let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();
ancestor_children.push((timeline_entry.get_ancestor_lsn(), *timeline_id));
ancestor_children.push((
timeline_entry.get_ancestor_lsn(),
*timeline_id,
MaybeOffloaded::No,
));
}
});
timelines_offloaded
.iter()
.for_each(|(timeline_id, timeline_entry)| {
let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id else {
return;
};
let Some(retain_lsn) = timeline_entry.ancestor_retain_lsn else {
return;
};
let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();
ancestor_children.push((retain_lsn, *timeline_id, MaybeOffloaded::Yes));
});
// The number of bytes we always keep, irrespective of PITR: this is a constant across timelines
let horizon = self.get_gc_horizon();
// Populate each timeline's GcInfo with information about its child branches
for timeline in timelines.values() {
let mut branchpoints: Vec<(Lsn, TimelineId)> = all_branchpoints
let mut branchpoints: Vec<(Lsn, TimelineId, MaybeOffloaded)> = all_branchpoints
.remove(&timeline.timeline_id)
.unwrap_or_default();
@@ -4878,7 +4909,10 @@ mod tests {
{
let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
assert_eq!(branchpoints.len(), 1);
assert_eq!(branchpoints[0], (Lsn(0x40), NEW_TIMELINE_ID));
assert_eq!(
branchpoints[0],
(Lsn(0x40), NEW_TIMELINE_ID, MaybeOffloaded::No)
);
}
// You can read the key from the child branch even though the parent is
@@ -8261,8 +8295,8 @@ mod tests {
let mut guard = tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![
(Lsn(0x10), tline.timeline_id),
(Lsn(0x20), tline.timeline_id),
(Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),
(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
],
cutoffs: GcCutoffs {
time: Lsn(0x30),
@@ -8489,8 +8523,8 @@ mod tests {
let mut guard = tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![
(Lsn(0x10), tline.timeline_id),
(Lsn(0x20), tline.timeline_id),
(Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),
(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
],
cutoffs: GcCutoffs {
time: Lsn(0x30),
@@ -8723,7 +8757,7 @@ mod tests {
// Update GC info
let mut guard = parent_tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id)],
retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)],
cutoffs: GcCutoffs {
time: Lsn(0x10),
space: Lsn(0x10),
@@ -8737,7 +8771,7 @@ mod tests {
// Update GC info
let mut guard = branch_tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id)],
retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)],
cutoffs: GcCutoffs {
time: Lsn(0x50),
space: Lsn(0x50),

View File

@@ -12,7 +12,7 @@ use crate::context::RequestContext;
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use super::{GcError, LogicalSizeCalculationCause, Tenant};
use crate::tenant::Timeline;
use crate::tenant::{MaybeOffloaded, Timeline};
use utils::id::TimelineId;
use utils::lsn::Lsn;
@@ -264,10 +264,12 @@ pub(super) async fn gather_inputs(
let mut lsns: Vec<(Lsn, LsnKind)> = gc_info
.retain_lsns
.iter()
.filter(|(lsn, _child_id)| lsn > &ancestor_lsn)
.filter(|(lsn, _child_id, is_offloaded)| {
lsn > &ancestor_lsn && *is_offloaded == MaybeOffloaded::No
})
.copied()
// this assumes there are no other retain_lsns than the branchpoints
.map(|(lsn, _child_id)| (lsn, LsnKind::BranchPoint))
.map(|(lsn, _child_id, _is_offloaded)| (lsn, LsnKind::BranchPoint))
.collect::<Vec<_>>();
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));

View File

@@ -139,8 +139,10 @@ use self::logical_size::LogicalSize;
use self::walreceiver::{WalReceiver, WalReceiverConf};
use super::{
config::TenantConf, storage_layer::inmemory_layer, storage_layer::LayerVisibilityHint,
config::TenantConf,
storage_layer::{inmemory_layer, LayerVisibilityHint},
upload_queue::NotInitialized,
MaybeOffloaded,
};
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
@@ -450,7 +452,7 @@ pub(crate) struct GcInfo {
/// Currently, this includes all points where child branches have
/// been forked off from. In the future, could also include
/// explicit user-defined snapshot points.
pub(crate) retain_lsns: Vec<(Lsn, TimelineId)>,
pub(crate) retain_lsns: Vec<(Lsn, TimelineId, MaybeOffloaded)>,
/// The cutoff coordinates, which are combined by selecting the minimum.
pub(crate) cutoffs: GcCutoffs,
@@ -467,8 +469,13 @@ impl GcInfo {
self.cutoffs.select_min()
}
pub(super) fn insert_child(&mut self, child_id: TimelineId, child_lsn: Lsn) {
self.retain_lsns.push((child_lsn, child_id));
pub(super) fn insert_child(
&mut self,
child_id: TimelineId,
child_lsn: Lsn,
is_offloaded: MaybeOffloaded,
) {
self.retain_lsns.push((child_lsn, child_id, is_offloaded));
self.retain_lsns.sort_by_key(|i| i.0);
}
@@ -2164,7 +2171,9 @@ impl Timeline {
if let Some(ancestor) = &ancestor {
let mut ancestor_gc_info = ancestor.gc_info.write().unwrap();
ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn());
// If we construct an explicit timeline object, it's obviously not offloaded
let is_offloaded = MaybeOffloaded::No;
ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn(), is_offloaded);
}
Arc::new_cyclic(|myself| {
@@ -4875,7 +4884,7 @@ impl Timeline {
let retain_lsns = gc_info
.retain_lsns
.iter()
.map(|(lsn, _child_id)| *lsn)
.map(|(lsn, _child_id, _is_offloaded)| *lsn)
.collect();
// Gets the maximum LSN that holds the valid lease.

View File

@@ -42,7 +42,7 @@ use crate::tenant::storage_layer::{
use crate::tenant::timeline::ImageLayerCreationOutcome;
use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter};
use crate::tenant::timeline::{Layer, ResidentLayer};
use crate::tenant::DeltaLayer;
use crate::tenant::{DeltaLayer, MaybeOffloaded};
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
use pageserver_api::config::tenant_conf_defaults::{
DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD,
@@ -639,7 +639,10 @@ impl Timeline {
let children = self.gc_info.read().unwrap().retain_lsns.clone();
let mut readable_points = Vec::with_capacity(children.len() + 1);
for (child_lsn, _child_timeline_id) in &children {
for (child_lsn, _child_timeline_id, is_offloaded) in &children {
if *is_offloaded == MaybeOffloaded::Yes {
continue;
}
readable_points.push(*child_lsn);
}
readable_points.push(head_lsn);
@@ -1741,7 +1744,7 @@ impl Timeline {
let gc_info = self.gc_info.read().unwrap();
let mut retain_lsns_below_horizon = Vec::new();
let gc_cutoff = gc_info.cutoffs.select_min();
for (lsn, _timeline_id) in &gc_info.retain_lsns {
for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns {
if lsn < &gc_cutoff {
retain_lsns_below_horizon.push(*lsn);
}