gc refactoring

- rename 'compact' argument of GC to 'checkpoint_before_gc'. - gc_iteration_internal() refactoring
2026-01-06 04:52:55 +00:00 · 2021-09-10 14:09:05 +03:00
parent d7cff8fbaf
commit e554f9514f
2 changed files with 57 additions and 51 deletions
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -193,12 +193,12 @@ impl Repository for LayeredRepository {
        &self,
        target_timelineid: Option<ZTimelineId>,
        horizon: u64,
-        compact: bool,
+        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
        STORAGE_TIME
            .with_label_values(&["gc"])
            .observe_closure_duration(|| {
-                self.gc_iteration_internal(target_timelineid, horizon, compact)
+                self.gc_iteration_internal(target_timelineid, horizon, checkpoint_before_gc)
            })
    }
 }
@@ -403,14 +403,14 @@ impl LayeredRepository {
    //    don't cover any branch point LSNs.
    //
    // TODO:
-    // - if a relation has been modified on a child branch, then we
+    // - if a relation has a non-incremental persistent layer on a child branch, then we
    //   don't need to keep that in the parent anymore. But currently
    //   we do.
    fn gc_iteration_internal(
        &self,
        target_timelineid: Option<ZTimelineId>,
        horizon: u64,
-        compact: bool,
+        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
        let mut totals: GcResult = Default::default();
        let now = Instant::now();
@@ -422,63 +422,72 @@ impl LayeredRepository {
        // Scan all timelines. For each timeline, remember the timeline ID and
        // the branch point where it was created.
        //
+        let mut timelineids: Vec<ZTimelineId> = Vec::new();
+
        // We scan the directory, not the in-memory hash table, because the hash
        // table only contains entries for timelines that have been accessed. We
        // need to take all timelines into account, not only the active ones.
-        let mut timelineids: Vec<ZTimelineId> = Vec::new();
-        let mut all_branchpoints: BTreeSet<(ZTimelineId, Lsn)> = BTreeSet::new();
        let timelines_path = self.conf.timelines_path(&self.tenantid);
+
        for direntry in fs::read_dir(timelines_path)? {
            let direntry = direntry?;
            if let Some(fname) = direntry.file_name().to_str() {
                if let Ok(timelineid) = fname.parse::<ZTimelineId>() {
                    timelineids.push(timelineid);
-
-                    // Read the metadata of this timeline to get its parent timeline.
-                    //
-                    // We read the ancestor information directly from the file, instead
-                    // of calling get_timeline(). We don't want to load the timeline
-                    // into memory just for GC.
-                    //
-                    // FIXME: we open the timeline in the loop below with
-                    // get_timeline_locked() anyway, so maybe we should just do it
-                    // here, too.
-                    let metadata = Self::load_metadata(self.conf, timelineid, self.tenantid)?;
-                    if let Some(ancestor_timeline) = metadata.ancestor_timeline {
-                        all_branchpoints.insert((ancestor_timeline, metadata.ancestor_lsn));
-                    }
                }
            }
        }

-        // Ok, we now know all the branch points. Iterate through them.
+        //Now collect info about branchpoints
+        let mut all_branchpoints: BTreeSet<(ZTimelineId, Lsn)> = BTreeSet::new();
+        for timelineid in &timelineids {
+            let timeline = self.get_timeline_locked(*timelineid, &mut *timelines)?;
+
+            if let Some(ancestor_timeline) = &timeline.ancestor_timeline {
+                // If target_timeline is specified, we only need to know branchpoints of its childs
+                if let Some(timelineid) = target_timelineid {
+                    if ancestor_timeline.timelineid == timelineid {
+                        all_branchpoints
+                            .insert((ancestor_timeline.timelineid, timeline.ancestor_lsn));
+                    }
+                }
+                // Collect branchpoints for all timelines
+                else {
+                    all_branchpoints.insert((ancestor_timeline.timelineid, timeline.ancestor_lsn));
+                }
+            }
+        }
+
+        // Ok, we now know all the branch points.
+        // Perform GC for each timeline.
        for timelineid in timelineids {
-            // If a target timeline was specified, leave the other timelines alone.
-            // This is a bit inefficient, because we still collect the information for
-            // all the timelines above.
-            if let Some(x) = target_timelineid {
-                if x != timelineid {
+            // We have already loaded all timelines above
+            // so this operation is just a quick map lookup.
+            let timeline = self.get_timeline_locked(timelineid, &mut *timelines)?;
+
+            // If target_timeline is specified, only GC it
+            if let Some(target_timelineid) = target_timelineid {
+                if timelineid != target_timelineid {
                    continue;
                }
            }

-            let branchpoints: Vec<Lsn> = all_branchpoints
-                .range((
-                    Included((timelineid, Lsn(0))),
-                    Included((timelineid, Lsn(u64::MAX))),
-                ))
-                .map(|&x| x.1)
-                .collect();
+            // cutoff is
+            if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
+                let branchpoints: Vec<Lsn> = all_branchpoints
+                    .range((
+                        Included((timelineid, Lsn(0))),
+                        Included((timelineid, Lsn(u64::MAX))),
+                    ))
+                    .map(|&x| x.1)
+                    .collect();

-            let timeline = self.get_timeline_locked(timelineid, &mut *timelines)?;
-            let last_lsn = timeline.get_last_record_lsn();
-
-            if let Some(cutoff) = last_lsn.checked_sub(horizon) {
-                // If GC was explicitly requested by the admin, force flush all in-memory
-                // layers to disk first, so that they too can be garbage collected. That's
+                // If requested, force flush all in-memory layers to disk first,
+                // so that they too can be garbage collected. That's
                // used in tests, so we want as deterministic results as possible.
-                if compact {
+                if checkpoint_before_gc {
                    timeline.checkpoint()?;
+                    info!("timeline {} checkpoint_before_gc done", timelineid);
                }

                let result = timeline.gc_timeline(branchpoints, cutoff)?;
@@ -1440,6 +1449,7 @@ impl LayeredTimeline {
            "running GC on timeline {}, cutoff {}",
            self.timelineid, cutoff
        );
+        info!("retain_lsns:  {:?}", retain_lsns);

        let mut layers_to_remove: Vec<Arc<dyn Layer>> = Vec::new();

@@ -1476,10 +1486,10 @@ impl LayeredTimeline {
                continue 'outer;
            }

-            // Is it needed by a child branch?
+            // 2. Is it needed by any child branch?
            for retain_lsn in &retain_lsns {
-                // FIXME: are the bounds inclusive or exclusive?
-                if l.get_start_lsn() <= *retain_lsn && *retain_lsn <= l.get_end_lsn() {
+                // start_lsn is inclusive and end_lsn is exclusive
+                if l.get_start_lsn() <= *retain_lsn && *retain_lsn < l.get_end_lsn() {
                    info!(
                        "keeping {} {}-{} because it's needed by branch point {}",
                        seg,
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -28,18 +28,14 @@ pub trait Repository: Send + Sync {
    ///
    /// 'timelineid' specifies the timeline to GC, or None for all.
    /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
-    /// `compact` parameter is used to force compaction of storage.
-    /// some storage implementation are based on lsm tree and require periodic merge (compaction).
-    /// usually storage implementation determines itself when compaction should be performed.
-    /// but for gc tests it way be useful to force compaction just after completion of gc iteration
-    /// to make sure that all detected garbage is removed.
-    /// so right now `compact` is set to true when gc explicitly requested through page srver api,
-    /// and is st to false in gc threads which infinitely repeats gc iterations in loop.
+    /// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
+    /// to make tests more deterministic.
+    /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
    fn gc_iteration(
        &self,
        timelineid: Option<ZTimelineId>,
        horizon: u64,
-        compact: bool,
+        checkpoint_before_gc: bool,
    ) -> Result<GcResult>;
 }