From 2aceb6a3095bf0ee6cf7ef3ecc1bb182864abccb Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 23 May 2022 20:58:27 +0300 Subject: [PATCH] Fix garbage collection to not remove image layers that are still needed. The logic would incorrectly remove an image layer, if a new image layer existed, even though the older image layer was still needed by some delta layers after it. See example given in the comment this adds. Without this fix, I was getting a lot of "could not find data for key 010000000000000000000000000000000000" errors from GC, with the new test case being added in PR #1735. Fixes #707 --- pageserver/src/layered_repository.rs | 24 ++++++++++++------- .../src/layered_repository/layer_map.rs | 13 ++++------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index fc4ab942f6..a83907430e 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -18,7 +18,7 @@ use itertools::Itertools; use lazy_static::lazy_static; use tracing::*; -use std::cmp::{max, Ordering}; +use std::cmp::{max, min, Ordering}; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::{BTreeSet, HashSet}; @@ -2165,7 +2165,7 @@ impl LayeredTimeline { let gc_info = self.gc_info.read().unwrap(); let retain_lsns = &gc_info.retain_lsns; - let cutoff = gc_info.cutoff; + let cutoff = min(gc_info.cutoff, disk_consistent_lsn); let pitr = gc_info.pitr; // Calculate pitr cutoff point. @@ -2294,12 +2294,20 @@ impl LayeredTimeline { // is 102, then it might not have been fully flushed to disk // before crash. // - // FIXME: This logic is wrong. See https://github.com/zenithdb/zenith/issues/707 - if !layers.newer_image_layer_exists( - &l.get_key_range(), - l.get_lsn_range().end, - disk_consistent_lsn + 1, - )? { + // For example, imagine that the following layers exist: + // + // 1000 - image (A) + // 1000-2000 - delta (B) + // 2000 - image (C) + // 2000-3000 - delta (D) + // 3000 - image (E) + // + // If GC horizon is at 2500, we can remove layers A and B, but + // we cannot remove C, even though it's older than 2500, because + // the delta layer 2000-3000 depends on it. + if !layers + .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))? + { debug!( "keeping {} because it is the latest layer", l.filename().display() diff --git a/pageserver/src/layered_repository/layer_map.rs b/pageserver/src/layered_repository/layer_map.rs index 7491294c03..f7f51bf21f 100644 --- a/pageserver/src/layered_repository/layer_map.rs +++ b/pageserver/src/layered_repository/layer_map.rs @@ -201,18 +201,14 @@ impl LayerMap { NUM_ONDISK_LAYERS.dec(); } - /// Is there a newer image layer for given key-range? + /// Is there a newer image layer for given key- and LSN-range? /// /// This is used for garbage collection, to determine if an old layer can /// be deleted. - /// We ignore layers newer than disk_consistent_lsn because they will be removed at restart - /// We also only look at historic layers - //#[allow(dead_code)] - pub fn newer_image_layer_exists( + pub fn image_layer_exists( &self, key_range: &Range, - lsn: Lsn, - disk_consistent_lsn: Lsn, + lsn_range: &Range, ) -> Result { let mut range_remain = key_range.clone(); @@ -225,8 +221,7 @@ impl LayerMap { let img_lsn = l.get_lsn_range().start; if !l.is_incremental() && l.get_key_range().contains(&range_remain.start) - && img_lsn > lsn - && img_lsn < disk_consistent_lsn + && lsn_range.contains(&img_lsn) { made_progress = true; let img_key_end = l.get_key_range().end;