From b874f1dc94db545409109bc6d4cbec02aa35a2da Mon Sep 17 00:00:00 2001 From: John Spray Date: Sat, 6 Jul 2024 16:04:21 +0100 Subject: [PATCH] pageserver: maintain GcInfo incrementally (2/2) --- pageserver/src/tenant.rs | 5 +++++ pageserver/src/tenant/timeline.rs | 9 +++++++++ pageserver/src/tenant/timeline/delete.rs | 17 +++++++++++++---- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index fd9839577c..7723bdb947 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -631,6 +631,11 @@ impl Tenant { timeline.maybe_spawn_flush_loop(); } } + + if let Some(ancestor) = timeline.get_ancestor_timeline() { + let mut ancestor_gc_info = ancestor.gc_info.write().unwrap(); + ancestor_gc_info.insert_child(timeline.timeline_id, timeline.get_ancestor_lsn()); + } }; // Sanity check: a timeline should have some content. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 1578a00ad2..257a564f3b 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -474,6 +474,15 @@ impl GcInfo { pub(crate) fn min_cutoff(&self) -> Lsn { self.cutoffs.select_min() } + + pub(super) fn insert_child(&mut self, child_id: TimelineId, child_lsn: Lsn) { + self.retain_lsns.push((child_lsn, child_id)); + self.retain_lsns.sort_by_key(|i| i.0); + } + + pub(super) fn remove_child(&mut self, child_id: TimelineId) { + self.retain_lsns.retain(|i| i.1 != child_id); + } } /// The `GcInfo` component describing which Lsns need to be retained. diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index b0088f4ea2..ebb7d29736 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -148,14 +148,14 @@ async fn cleanup_remaining_timeline_fs_traces( /// For more context see comments in [`DeleteTimelineFlow::prepare`] async fn remove_timeline_from_tenant( tenant: &Tenant, - timeline_id: TimelineId, + timeline: &Timeline, _: &DeletionGuard, // using it as a witness ) -> anyhow::Result<()> { // Remove the timeline from the map. let mut timelines = tenant.timelines.lock().unwrap(); let children_exist = timelines .iter() - .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id)); + .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id)); // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`. // We already deleted the layer files, so it's probably best to panic. // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart) @@ -163,8 +163,14 @@ async fn remove_timeline_from_tenant( panic!("Timeline grew children while we removed layer files"); } + // Unlink from parent + if let Some(ancestor) = timeline.get_ancestor_timeline() { + let mut ancestor_gc_info = ancestor.gc_info.write().unwrap(); + ancestor_gc_info.remove_child(timeline.timeline_id); + } + timelines - .remove(&timeline_id) + .remove(&timeline.timeline_id) .expect("timeline that we were deleting was concurrently removed from 'timelines' map"); drop(timelines); @@ -293,6 +299,9 @@ impl DeleteTimelineFlow { { let mut locked = tenant.timelines.lock().unwrap(); locked.insert(timeline_id, Arc::clone(&timeline)); + + // Note that we do not insert this into the parent branch's GcInfo: the parent is not obliged to retain + // any data for child timelines being deleted. } guard.mark_in_progress()?; @@ -413,7 +422,7 @@ impl DeleteTimelineFlow { pausable_failpoint!("in_progress_delete"); - remove_timeline_from_tenant(tenant, timeline.timeline_id, &guard).await?; + remove_timeline_from_tenant(tenant, timeline, &guard).await?; *guard = Self::Finished;