fix(pageserver): skip gc-compaction for metadata key ranges (#12618)

## Problem

part of https://github.com/neondatabase/neon/issues/11318 ; it is not
entirely safe to run gc-compaction over the metadata key range due to
tombstones and implications of image layers (missing key in image layer
== key not exist). The auto gc-compaction trigger already skips metadata
key ranges (see `schedule_auto_compaction` call in
`trigger_auto_compaction`). In this patch we enforce it directly in
gc_compact_inner so that compactions triggered via HTTP API will also be
subject to this restriction.

## Summary of changes

Ensure gc-compaction only runs on rel key ranges.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-07-16 17:52:18 -04:00
committed by GitHub
parent fb796229bf
commit f2828bbe19
4 changed files with 123 additions and 27 deletions

View File

@@ -2357,6 +2357,7 @@ async fn timeline_compact_handler(
flags,
sub_compaction,
sub_compaction_max_job_size_mb,
gc_compaction_do_metadata_compaction: false,
};
let scheduled = compact_request

View File

@@ -9216,7 +9216,11 @@ mod tests {
let cancel = CancellationToken::new();
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9299,7 +9303,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9836,7 +9844,11 @@ mod tests {
let cancel = CancellationToken::new();
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9871,7 +9883,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -10446,7 +10462,7 @@ mod tests {
&cancel,
CompactOptions {
flags: dryrun_flags,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -10457,14 +10473,22 @@ mod tests {
verify_result().await;
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10483,14 +10507,22 @@ mod tests {
guard.cutoffs.space = Lsn(0x38);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
// not increasing the GC horizon and compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10695,7 +10727,7 @@ mod tests {
&cancel,
CompactOptions {
flags: dryrun_flags,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -10706,14 +10738,22 @@ mod tests {
verify_result().await;
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10913,7 +10953,11 @@ mod tests {
let cancel = CancellationToken::new();
branch_tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -10926,7 +10970,7 @@ mod tests {
&cancel,
CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11594,7 +11638,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(2)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11641,7 +11685,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(2)..get_key(4)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11693,7 +11737,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(4)..get_key(9)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11744,7 +11788,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(9)..get_key(10)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11800,7 +11844,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(10)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12071,7 +12115,7 @@ mod tests {
&cancel,
CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12106,7 +12150,11 @@ mod tests {
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -12325,7 +12373,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(0)..get_key(2)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12371,7 +12419,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(3)..get_key(8)).into()),
compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12419,7 +12467,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(0)..get_key(5)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12454,7 +12502,11 @@ mod tests {
// final full compaction
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -12564,7 +12616,7 @@ mod tests {
CompactOptions {
compact_key_range: None,
compact_lsn_range: None,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)

View File

@@ -939,6 +939,20 @@ pub(crate) struct CompactOptions {
/// Set job size for the GC compaction.
/// This option is only used by GC compaction.
pub sub_compaction_max_job_size_mb: Option<u64>,
/// Only for GC compaction.
/// If set, the compaction will compact the metadata layers. Should be only set to true in unit tests
/// because metadata compaction is not fully supported yet.
pub gc_compaction_do_metadata_compaction: bool,
}
impl CompactOptions {
#[cfg(test)]
pub fn default_for_gc_compaction_unit_tests() -> Self {
Self {
gc_compaction_do_metadata_compaction: true,
..Default::default()
}
}
}
impl std::fmt::Debug for Timeline {
@@ -2185,6 +2199,7 @@ impl Timeline {
compact_lsn_range: None,
sub_compaction: false,
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
},
ctx,
)

View File

@@ -396,6 +396,7 @@ impl GcCompactionQueue {
}),
compact_lsn_range: None,
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
},
permit,
);
@@ -512,6 +513,7 @@ impl GcCompactionQueue {
compact_key_range: Some(job.compact_key_range.into()),
compact_lsn_range: Some(job.compact_lsn_range.into()),
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
};
pending_tasks.push(GcCompactionQueueItem::SubCompactionJob {
options,
@@ -785,6 +787,8 @@ pub(crate) struct GcCompactJob {
/// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`].
/// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here.
pub compact_lsn_range: Range<Lsn>,
/// See [`CompactOptions::gc_compaction_do_metadata_compaction`].
pub do_metadata_compaction: bool,
}
impl GcCompactJob {
@@ -799,6 +803,7 @@ impl GcCompactJob {
.compact_lsn_range
.map(|x| x.into())
.unwrap_or(Lsn::INVALID..Lsn::MAX),
do_metadata_compaction: options.gc_compaction_do_metadata_compaction,
}
}
}
@@ -3174,6 +3179,7 @@ impl Timeline {
dry_run: job.dry_run,
compact_key_range: start..end,
compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
do_metadata_compaction: false,
});
current_start = Some(end);
}
@@ -3236,7 +3242,7 @@ impl Timeline {
async fn compact_with_gc_inner(
self: &Arc<Self>,
cancel: &CancellationToken,
job: GcCompactJob,
mut job: GcCompactJob,
ctx: &RequestContext,
yield_for_l0: bool,
) -> Result<CompactionOutcome, CompactionError> {
@@ -3244,6 +3250,28 @@ impl Timeline {
// with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
// Note that we already acquired the compaction lock when the outer `compact` function gets called.
// If the job is not configured to compact the metadata key range, shrink the key range
// to exclude the metadata key range. The check is done by checking if the end of the key range
// is larger than the start of the metadata key range. Note that metadata keys cover the entire
// second half of the keyspace, so it's enough to only check the end of the key range.
if !job.do_metadata_compaction
&& job.compact_key_range.end > Key::metadata_key_range().start
{
tracing::info!(
"compaction for metadata key range is not supported yet, overriding compact_key_range from {} to {}",
job.compact_key_range.end,
Key::metadata_key_range().start
);
// Shrink the key range to exclude the metadata key range.
job.compact_key_range.end = Key::metadata_key_range().start;
// Skip the job if the key range completely lies within the metadata key range.
if job.compact_key_range.start >= job.compact_key_range.end {
tracing::info!("compact_key_range is empty, skipping compaction");
return Ok(CompactionOutcome::Done);
}
}
let timer = Instant::now();
let begin_timer = timer;