fix(pageserver): skip gc-compaction for metadata key ranges (#12618)

## Problem

part of https://github.com/neondatabase/neon/issues/11318 ; it is not
entirely safe to run gc-compaction over the metadata key range due to
tombstones and implications of image layers (missing key in image layer
== key not exist). The auto gc-compaction trigger already skips metadata
key ranges (see `schedule_auto_compaction` call in
`trigger_auto_compaction`). In this patch we enforce it directly in
gc_compact_inner so that compactions triggered via HTTP API will also be
subject to this restriction.

## Summary of changes

Ensure gc-compaction only runs on rel key ranges.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-07-16 17:52:18 -04:00
committed by GitHub
parent fb796229bf
commit f2828bbe19
4 changed files with 123 additions and 27 deletions

View File

@@ -2357,6 +2357,7 @@ async fn timeline_compact_handler(
flags, flags,
sub_compaction, sub_compaction,
sub_compaction_max_job_size_mb, sub_compaction_max_job_size_mb,
gc_compaction_do_metadata_compaction: false,
}; };
let scheduled = compact_request let scheduled = compact_request

View File

@@ -9216,7 +9216,11 @@ mod tests {
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
@@ -9299,7 +9303,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40); guard.cutoffs.space = Lsn(0x40);
} }
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
@@ -9836,7 +9844,11 @@ mod tests {
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
@@ -9871,7 +9883,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40); guard.cutoffs.space = Lsn(0x40);
} }
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
@@ -10446,7 +10462,7 @@ mod tests {
&cancel, &cancel,
CompactOptions { CompactOptions {
flags: dryrun_flags, flags: dryrun_flags,
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -10457,14 +10473,22 @@ mod tests {
verify_result().await; verify_result().await;
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
// compact again // compact again
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
@@ -10483,14 +10507,22 @@ mod tests {
guard.cutoffs.space = Lsn(0x38); guard.cutoffs.space = Lsn(0x38);
} }
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
// not increasing the GC horizon and compact again // not increasing the GC horizon and compact again
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
@@ -10695,7 +10727,7 @@ mod tests {
&cancel, &cancel,
CompactOptions { CompactOptions {
flags: dryrun_flags, flags: dryrun_flags,
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -10706,14 +10738,22 @@ mod tests {
verify_result().await; verify_result().await;
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
// compact again // compact again
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
@@ -10913,7 +10953,11 @@ mod tests {
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
branch_tline branch_tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
@@ -10926,7 +10970,7 @@ mod tests {
&cancel, &cancel,
CompactOptions { CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))), compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -11594,7 +11638,7 @@ mod tests {
CompactOptions { CompactOptions {
flags: EnumSet::new(), flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(2)).into()), compact_key_range: Some((get_key(0)..get_key(2)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -11641,7 +11685,7 @@ mod tests {
CompactOptions { CompactOptions {
flags: EnumSet::new(), flags: EnumSet::new(),
compact_key_range: Some((get_key(2)..get_key(4)).into()), compact_key_range: Some((get_key(2)..get_key(4)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -11693,7 +11737,7 @@ mod tests {
CompactOptions { CompactOptions {
flags: EnumSet::new(), flags: EnumSet::new(),
compact_key_range: Some((get_key(4)..get_key(9)).into()), compact_key_range: Some((get_key(4)..get_key(9)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -11744,7 +11788,7 @@ mod tests {
CompactOptions { CompactOptions {
flags: EnumSet::new(), flags: EnumSet::new(),
compact_key_range: Some((get_key(9)..get_key(10)).into()), compact_key_range: Some((get_key(9)..get_key(10)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -11800,7 +11844,7 @@ mod tests {
CompactOptions { CompactOptions {
flags: EnumSet::new(), flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(10)).into()), compact_key_range: Some((get_key(0)..get_key(10)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -12071,7 +12115,7 @@ mod tests {
&cancel, &cancel,
CompactOptions { CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))), compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -12106,7 +12150,11 @@ mod tests {
// compact again // compact again
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
@@ -12325,7 +12373,7 @@ mod tests {
CompactOptions { CompactOptions {
compact_key_range: Some((get_key(0)..get_key(2)).into()), compact_key_range: Some((get_key(0)..get_key(2)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()), compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -12371,7 +12419,7 @@ mod tests {
CompactOptions { CompactOptions {
compact_key_range: Some((get_key(3)..get_key(8)).into()), compact_key_range: Some((get_key(3)..get_key(8)).into()),
compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()), compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -12419,7 +12467,7 @@ mod tests {
CompactOptions { CompactOptions {
compact_key_range: Some((get_key(0)..get_key(5)).into()), compact_key_range: Some((get_key(0)..get_key(5)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()), compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )
@@ -12454,7 +12502,11 @@ mod tests {
// final full compaction // final full compaction
tline tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx) .compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await .await
.unwrap(); .unwrap();
verify_result().await; verify_result().await;
@@ -12564,7 +12616,7 @@ mod tests {
CompactOptions { CompactOptions {
compact_key_range: None, compact_key_range: None,
compact_lsn_range: None, compact_lsn_range: None,
..Default::default() ..CompactOptions::default_for_gc_compaction_unit_tests()
}, },
&ctx, &ctx,
) )

View File

@@ -939,6 +939,20 @@ pub(crate) struct CompactOptions {
/// Set job size for the GC compaction. /// Set job size for the GC compaction.
/// This option is only used by GC compaction. /// This option is only used by GC compaction.
pub sub_compaction_max_job_size_mb: Option<u64>, pub sub_compaction_max_job_size_mb: Option<u64>,
/// Only for GC compaction.
/// If set, the compaction will compact the metadata layers. Should be only set to true in unit tests
/// because metadata compaction is not fully supported yet.
pub gc_compaction_do_metadata_compaction: bool,
}
impl CompactOptions {
#[cfg(test)]
pub fn default_for_gc_compaction_unit_tests() -> Self {
Self {
gc_compaction_do_metadata_compaction: true,
..Default::default()
}
}
} }
impl std::fmt::Debug for Timeline { impl std::fmt::Debug for Timeline {
@@ -2185,6 +2199,7 @@ impl Timeline {
compact_lsn_range: None, compact_lsn_range: None,
sub_compaction: false, sub_compaction: false,
sub_compaction_max_job_size_mb: None, sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
}, },
ctx, ctx,
) )

View File

@@ -396,6 +396,7 @@ impl GcCompactionQueue {
}), }),
compact_lsn_range: None, compact_lsn_range: None,
sub_compaction_max_job_size_mb: None, sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
}, },
permit, permit,
); );
@@ -512,6 +513,7 @@ impl GcCompactionQueue {
compact_key_range: Some(job.compact_key_range.into()), compact_key_range: Some(job.compact_key_range.into()),
compact_lsn_range: Some(job.compact_lsn_range.into()), compact_lsn_range: Some(job.compact_lsn_range.into()),
sub_compaction_max_job_size_mb: None, sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
}; };
pending_tasks.push(GcCompactionQueueItem::SubCompactionJob { pending_tasks.push(GcCompactionQueueItem::SubCompactionJob {
options, options,
@@ -785,6 +787,8 @@ pub(crate) struct GcCompactJob {
/// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`]. /// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`].
/// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here. /// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here.
pub compact_lsn_range: Range<Lsn>, pub compact_lsn_range: Range<Lsn>,
/// See [`CompactOptions::gc_compaction_do_metadata_compaction`].
pub do_metadata_compaction: bool,
} }
impl GcCompactJob { impl GcCompactJob {
@@ -799,6 +803,7 @@ impl GcCompactJob {
.compact_lsn_range .compact_lsn_range
.map(|x| x.into()) .map(|x| x.into())
.unwrap_or(Lsn::INVALID..Lsn::MAX), .unwrap_or(Lsn::INVALID..Lsn::MAX),
do_metadata_compaction: options.gc_compaction_do_metadata_compaction,
} }
} }
} }
@@ -3174,6 +3179,7 @@ impl Timeline {
dry_run: job.dry_run, dry_run: job.dry_run,
compact_key_range: start..end, compact_key_range: start..end,
compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn, compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
do_metadata_compaction: false,
}); });
current_start = Some(end); current_start = Some(end);
} }
@@ -3236,7 +3242,7 @@ impl Timeline {
async fn compact_with_gc_inner( async fn compact_with_gc_inner(
self: &Arc<Self>, self: &Arc<Self>,
cancel: &CancellationToken, cancel: &CancellationToken,
job: GcCompactJob, mut job: GcCompactJob,
ctx: &RequestContext, ctx: &RequestContext,
yield_for_l0: bool, yield_for_l0: bool,
) -> Result<CompactionOutcome, CompactionError> { ) -> Result<CompactionOutcome, CompactionError> {
@@ -3244,6 +3250,28 @@ impl Timeline {
// with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
// Note that we already acquired the compaction lock when the outer `compact` function gets called. // Note that we already acquired the compaction lock when the outer `compact` function gets called.
// If the job is not configured to compact the metadata key range, shrink the key range
// to exclude the metadata key range. The check is done by checking if the end of the key range
// is larger than the start of the metadata key range. Note that metadata keys cover the entire
// second half of the keyspace, so it's enough to only check the end of the key range.
if !job.do_metadata_compaction
&& job.compact_key_range.end > Key::metadata_key_range().start
{
tracing::info!(
"compaction for metadata key range is not supported yet, overriding compact_key_range from {} to {}",
job.compact_key_range.end,
Key::metadata_key_range().start
);
// Shrink the key range to exclude the metadata key range.
job.compact_key_range.end = Key::metadata_key_range().start;
// Skip the job if the key range completely lies within the metadata key range.
if job.compact_key_range.start >= job.compact_key_range.end {
tracing::info!("compact_key_range is empty, skipping compaction");
return Ok(CompactionOutcome::Done);
}
}
let timer = Instant::now(); let timer = Instant::now();
let begin_timer = timer; let begin_timer = timer;