mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 04:22:56 +00:00
Fix garbage collection to not remove image layers that are still needed.
The logic would incorrectly remove an image layer, if a new image layer existed, even though the older image layer was still needed by some delta layers after it. See example given in the comment this adds. Without this fix, I was getting a lot of "could not find data for key 010000000000000000000000000000000000" errors from GC, with the new test case being added in PR #1735. Fixes #707
This commit is contained in:
@@ -18,7 +18,7 @@ use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use tracing::*;
|
||||
|
||||
use std::cmp::{max, Ordering};
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
@@ -2165,7 +2165,7 @@ impl LayeredTimeline {
|
||||
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let retain_lsns = &gc_info.retain_lsns;
|
||||
let cutoff = gc_info.cutoff;
|
||||
let cutoff = min(gc_info.cutoff, disk_consistent_lsn);
|
||||
let pitr = gc_info.pitr;
|
||||
|
||||
// Calculate pitr cutoff point.
|
||||
@@ -2294,12 +2294,20 @@ impl LayeredTimeline {
|
||||
// is 102, then it might not have been fully flushed to disk
|
||||
// before crash.
|
||||
//
|
||||
// FIXME: This logic is wrong. See https://github.com/zenithdb/zenith/issues/707
|
||||
if !layers.newer_image_layer_exists(
|
||||
&l.get_key_range(),
|
||||
l.get_lsn_range().end,
|
||||
disk_consistent_lsn + 1,
|
||||
)? {
|
||||
// For example, imagine that the following layers exist:
|
||||
//
|
||||
// 1000 - image (A)
|
||||
// 1000-2000 - delta (B)
|
||||
// 2000 - image (C)
|
||||
// 2000-3000 - delta (D)
|
||||
// 3000 - image (E)
|
||||
//
|
||||
// If GC horizon is at 2500, we can remove layers A and B, but
|
||||
// we cannot remove C, even though it's older than 2500, because
|
||||
// the delta layer 2000-3000 depends on it.
|
||||
if !layers
|
||||
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))?
|
||||
{
|
||||
debug!(
|
||||
"keeping {} because it is the latest layer",
|
||||
l.filename().display()
|
||||
|
||||
@@ -201,18 +201,14 @@ impl LayerMap {
|
||||
NUM_ONDISK_LAYERS.dec();
|
||||
}
|
||||
|
||||
/// Is there a newer image layer for given key-range?
|
||||
/// Is there a newer image layer for given key- and LSN-range?
|
||||
///
|
||||
/// This is used for garbage collection, to determine if an old layer can
|
||||
/// be deleted.
|
||||
/// We ignore layers newer than disk_consistent_lsn because they will be removed at restart
|
||||
/// We also only look at historic layers
|
||||
//#[allow(dead_code)]
|
||||
pub fn newer_image_layer_exists(
|
||||
pub fn image_layer_exists(
|
||||
&self,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
lsn_range: &Range<Lsn>,
|
||||
) -> Result<bool> {
|
||||
let mut range_remain = key_range.clone();
|
||||
|
||||
@@ -225,8 +221,7 @@ impl LayerMap {
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
if !l.is_incremental()
|
||||
&& l.get_key_range().contains(&range_remain.start)
|
||||
&& img_lsn > lsn
|
||||
&& img_lsn < disk_consistent_lsn
|
||||
&& lsn_range.contains(&img_lsn)
|
||||
{
|
||||
made_progress = true;
|
||||
let img_key_end = l.get_key_range().end;
|
||||
|
||||
Reference in New Issue
Block a user