Fix gc_internal to treat dropped layers.

Some dropped layers serve as tombstones for earlier layers and thus cannot be garbage collected.
Add new fields to GcResult for layers that are preserved as tombstones
This commit is contained in:
anastasia
2021-09-10 17:04:16 +03:00
committed by lubennikovaav
parent c934e724a8
commit a4fc6da57b
5 changed files with 129 additions and 12 deletions

View File

@@ -472,7 +472,6 @@ impl LayeredRepository {
}
}
// cutoff is
if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
let branchpoints: Vec<Lsn> = all_branchpoints
.range((
@@ -1453,12 +1452,14 @@ impl LayeredTimeline {
let mut layers_to_remove: Vec<Arc<dyn Layer>> = Vec::new();
// Scan all layer files in the directory. For each file, if a newer file
// exists, we can remove the old one.
// Scan all on-disk layers in the timeline.
//
// Garbage collect the layer if all conditions are satisfied:
// 1. it is older than cutoff LSN;
// 2. it doesn't need to be retained for 'retain_lsns';
// 3. newer on-disk layer exists (only for non-dropped segments);
// 4. this layer doesn't serve as a tombstone for some older layer;
//
// Determine for each file if it needs to be retained
// FIXME: also scan open in-memory layers. Normally we cannot remove the
// latest layer of any seg, but if it was dropped it's possible
let mut layers = self.layers.lock().unwrap();
'outer: for l in layers.iter_historic_layers() {
let seg = l.get_seg_tag();
@@ -1469,7 +1470,7 @@ impl LayeredTimeline {
result.ondisk_nonrelfiles_total += 1;
}
// Is it newer than cutoff point?
// 1. Is it newer than cutoff point?
if l.get_end_lsn() > cutoff {
info!(
"keeping {} {}-{} because it's newer than cutoff {}",
@@ -1486,7 +1487,7 @@ impl LayeredTimeline {
continue 'outer;
}
// 2. Is it needed by any child branch?
// 2. Is it needed by a child branch?
for retain_lsn in &retain_lsns {
// start_lsn is inclusive and end_lsn is exclusive
if l.get_start_lsn() <= *retain_lsn && *retain_lsn < l.get_end_lsn() {
@@ -1506,9 +1507,15 @@ impl LayeredTimeline {
}
}
// Unless the relation was dropped, is there a later image file for this relation?
// 3. Is there a later on-disk layer for this relation?
if !l.is_dropped() && !layers.newer_image_layer_exists(l.get_seg_tag(), l.get_end_lsn())
{
info!(
"keeping {} {}-{} because it is the latest layer",
seg,
l.get_start_lsn(),
l.get_end_lsn()
);
if seg.rel.is_relation() {
result.ondisk_relfiles_not_updated += 1;
} else {
@@ -1517,6 +1524,77 @@ impl LayeredTimeline {
continue 'outer;
}
// 4. Does this layer serve as a tombstome for some older layer?
if l.is_dropped() {
let prior_lsn = l.get_start_lsn().checked_sub(1u64).unwrap();
// Check if this layer serves as a tombstone for this timeline
// We have to do this separately from timeline check below,
// because LayerMap of this timeline is already locked.
let mut is_tombstone = layers.layer_exists_at_lsn(l.get_seg_tag(), prior_lsn);
if is_tombstone {
info!(
"earlier layer exists at {} in {}",
prior_lsn, self.timelineid
);
}
// Now check ancestor timelines, if any
else if let Some(ancestor) = &self.ancestor_timeline {
let prior_lsn = ancestor.get_last_record_lsn();
if seg.rel.is_blocky() {
info!(
"check blocky relish size {} at {} in {} for layer {}-{}",
seg,
prior_lsn,
ancestor.timelineid,
l.get_start_lsn(),
l.get_end_lsn()
);
match ancestor.get_relish_size(seg.rel, prior_lsn).unwrap() {
Some(size) => {
let last_live_seg = SegmentTag::from_blknum(seg.rel, size - 1);
info!(
"blocky rel size is {} last_live_seg.segno {} seg.segno {}",
size, last_live_seg.segno, seg.segno
);
if last_live_seg.segno >= seg.segno {
is_tombstone = true;
}
}
_ => {
info!("blocky rel doesn't exist");
}
}
} else {
info!(
"check non-blocky relish existence {} at {} in {} for layer {}-{}",
seg,
prior_lsn,
ancestor.timelineid,
l.get_start_lsn(),
l.get_end_lsn()
);
is_tombstone = ancestor.get_rel_exists(seg.rel, prior_lsn).unwrap_or(false);
}
}
if is_tombstone {
info!(
"keeping {} {}-{} because this layer servers as a tombstome for older layer",
seg,
l.get_start_lsn(),
l.get_end_lsn()
);
if seg.rel.is_relation() {
result.ondisk_relfiles_needed_as_tombstone += 1;
} else {
result.ondisk_nonrelfiles_needed_as_tombstone += 1;
}
continue 'outer;
}
}
// We didn't find any reason to keep this file, so remove it.
info!(
"garbage collecting {} {}-{} {}",

View File

@@ -174,6 +174,20 @@ impl LayerMap {
}
}
/// Is there any layer for given segment that is alive at the lsn?
///
/// This is a public wrapper for SegEntry fucntion,
/// used for garbage collection, to determine if some alive layer
/// exists at the lsn. If so, we shouldn't delete a newer dropped layer
/// to avoid incorrectly making it visible.
pub fn layer_exists_at_lsn(&self, seg: SegmentTag, lsn: Lsn) -> bool {
if let Some(segentry) = self.segs.get(&seg) {
segentry.exists_at_lsn(lsn).unwrap_or(false)
} else {
false
}
}
/// Return the oldest in-memory layer, along with its generation number.
pub fn peek_oldest_open(&self) -> Option<(Arc<InMemoryLayer>, u64)> {
self.open_layers

View File

@@ -654,12 +654,14 @@ impl postgres_backend::Handler for PageServerHandler {
RowDescriptor::int8_col(b"layer_relfiles_needed_by_cutoff"),
RowDescriptor::int8_col(b"layer_relfiles_needed_by_branches"),
RowDescriptor::int8_col(b"layer_relfiles_not_updated"),
RowDescriptor::int8_col(b"layer_relfiles_needed_as_tombstone"),
RowDescriptor::int8_col(b"layer_relfiles_removed"),
RowDescriptor::int8_col(b"layer_relfiles_dropped"),
RowDescriptor::int8_col(b"layer_nonrelfiles_total"),
RowDescriptor::int8_col(b"layer_nonrelfiles_needed_by_cutoff"),
RowDescriptor::int8_col(b"layer_nonrelfiles_needed_by_branches"),
RowDescriptor::int8_col(b"layer_nonrelfiles_not_updated"),
RowDescriptor::int8_col(b"layer_nonrelfiles_needed_as_tombstone"),
RowDescriptor::int8_col(b"layer_nonrelfiles_removed"),
RowDescriptor::int8_col(b"layer_nonrelfiles_dropped"),
RowDescriptor::int8_col(b"elapsed"),
@@ -679,6 +681,12 @@ impl postgres_backend::Handler for PageServerHandler {
.as_bytes(),
),
Some(result.ondisk_relfiles_not_updated.to_string().as_bytes()),
Some(
result
.ondisk_relfiles_needed_as_tombstone
.to_string()
.as_bytes(),
),
Some(result.ondisk_relfiles_removed.to_string().as_bytes()),
Some(result.ondisk_relfiles_dropped.to_string().as_bytes()),
Some(result.ondisk_nonrelfiles_total.to_string().as_bytes()),
@@ -695,6 +703,12 @@ impl postgres_backend::Handler for PageServerHandler {
.as_bytes(),
),
Some(result.ondisk_nonrelfiles_not_updated.to_string().as_bytes()),
Some(
result
.ondisk_nonrelfiles_needed_as_tombstone
.to_string()
.as_bytes(),
),
Some(result.ondisk_nonrelfiles_removed.to_string().as_bytes()),
Some(result.ondisk_nonrelfiles_dropped.to_string().as_bytes()),
Some(result.elapsed.as_millis().to_string().as_bytes()),

View File

@@ -48,6 +48,7 @@ pub struct GcResult {
pub ondisk_relfiles_needed_by_cutoff: u64,
pub ondisk_relfiles_needed_by_branches: u64,
pub ondisk_relfiles_not_updated: u64,
pub ondisk_relfiles_needed_as_tombstone: u64,
pub ondisk_relfiles_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
pub ondisk_relfiles_dropped: u64, // # of layer files removed because the relation was dropped
@@ -55,6 +56,7 @@ pub struct GcResult {
pub ondisk_nonrelfiles_needed_by_cutoff: u64,
pub ondisk_nonrelfiles_needed_by_branches: u64,
pub ondisk_nonrelfiles_not_updated: u64,
pub ondisk_nonrelfiles_needed_as_tombstone: u64,
pub ondisk_nonrelfiles_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
pub ondisk_nonrelfiles_dropped: u64, // # of layer files removed because the relation was dropped
@@ -67,6 +69,7 @@ impl AddAssign for GcResult {
self.ondisk_relfiles_needed_by_cutoff += other.ondisk_relfiles_needed_by_cutoff;
self.ondisk_relfiles_needed_by_branches += other.ondisk_relfiles_needed_by_branches;
self.ondisk_relfiles_not_updated += other.ondisk_relfiles_not_updated;
self.ondisk_relfiles_needed_as_tombstone += other.ondisk_relfiles_needed_as_tombstone;
self.ondisk_relfiles_removed += other.ondisk_relfiles_removed;
self.ondisk_relfiles_dropped += other.ondisk_relfiles_dropped;
@@ -74,6 +77,7 @@ impl AddAssign for GcResult {
self.ondisk_nonrelfiles_needed_by_cutoff += other.ondisk_nonrelfiles_needed_by_cutoff;
self.ondisk_nonrelfiles_needed_by_branches += other.ondisk_nonrelfiles_needed_by_branches;
self.ondisk_nonrelfiles_not_updated += other.ondisk_nonrelfiles_not_updated;
self.ondisk_nonrelfiles_needed_as_tombstone += other.ondisk_nonrelfiles_needed_as_tombstone;
self.ondisk_nonrelfiles_removed += other.ondisk_nonrelfiles_removed;
self.ondisk_nonrelfiles_dropped += other.ondisk_nonrelfiles_dropped;

View File

@@ -6,8 +6,8 @@ pytest_plugins = ("fixtures.zenith_fixtures")
def print_gc_result(row):
print("GC duration {elapsed} ms".format_map(row));
print(" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row))
print(" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row))
print(" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row))
print(" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row))
#
@@ -113,12 +113,19 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
row = pscur.fetchone()
print_gc_result(row);
# We still cannot remove the latest layers
# because they serve as tombstones for earlier layers.
assert row['layer_relfiles_dropped'] == 0
# Each relation fork is counted separately, hence 3.
assert row['layer_relfiles_dropped'] == 3
assert row['layer_relfiles_needed_as_tombstone'] == 3
# The catalog updates also create new layer files of the catalogs, which
# are counted as 'removed'
assert row['layer_relfiles_removed'] > 0
# TODO Change the test to check actual CG of dropped layers.
# Each relation fork is counted separately, hence 3.
#assert row['layer_relfiles_dropped'] == 3
# TODO: perhaps we should count catalog and user relations separately,
# to make this kind of testing more robust