pageserver: properly yield for L0 compaction (#10769)

## Problem

When image compaction yields for L0 compaction, it may not immediately
schedule L0 compaction, because it just goes on to compact the next
pending timeline.

Touches #10694.
Requires #10744.

## Summary of changes

Extend `CompactionOutcome` with `YieldForL0` and `Skipped` variants, and
immediately schedule an L0 compaction pass in the `YieldForL0` case.
This commit is contained in:
Erik Grinaker
2025-02-12 00:43:58 +01:00
committed by GitHub
parent 6c83ac3fd2
commit 71c30e52fa
4 changed files with 44 additions and 28 deletions

View File

@@ -2901,7 +2901,8 @@ impl Tenant {
}
/// Performs one compaction iteration. Called periodically from the compaction loop. Returns
/// whether another compaction iteration is needed (if we yield), or
/// whether another compaction is needed, if we still have pending work or if we yield for
/// immediate L0 compaction.
///
/// Compaction can also be explicitly requested for a timeline via the HTTP API.
async fn compaction_iteration(
@@ -2911,7 +2912,7 @@ impl Tenant {
) -> Result<CompactionOutcome, CompactionError> {
// Don't compact inactive tenants.
if !self.is_active() {
return Ok(CompactionOutcome::Done);
return Ok(CompactionOutcome::Skipped);
}
// Don't compact tenants that can't upload layers. We don't check `may_delete_layers_hint`,
@@ -2919,13 +2920,13 @@ impl Tenant {
let location = self.tenant_conf.load().location;
if !location.may_upload_layers_hint() {
info!("skipping compaction in location state {location:?}");
return Ok(CompactionOutcome::Done);
return Ok(CompactionOutcome::Skipped);
}
// Don't compact if the circuit breaker is tripped.
if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
info!("skipping compaction due to previous failures");
return Ok(CompactionOutcome::Done);
return Ok(CompactionOutcome::Skipped);
}
// Collect all timelines to compact, along with offload instructions and L0 counts.
@@ -2988,10 +2989,15 @@ impl Tenant {
.instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
.await
.inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
has_pending_l0 |= outcome == CompactionOutcome::Pending;
match outcome {
CompactionOutcome::Done => {}
CompactionOutcome::Skipped => {}
CompactionOutcome::Pending => has_pending_l0 = true,
CompactionOutcome::YieldForL0 => has_pending_l0 = true,
}
}
if has_pending_l0 {
return Ok(CompactionOutcome::Pending); // do another pass
return Ok(CompactionOutcome::YieldForL0); // do another pass
}
}
@@ -3046,7 +3052,14 @@ impl Tenant {
})?;
}
has_pending |= outcome == CompactionOutcome::Pending;
match outcome {
CompactionOutcome::Done => {}
CompactionOutcome::Skipped => {}
CompactionOutcome::Pending => has_pending = true,
// This mostly makes sense when the L0-only pass above is enabled, since there's
// otherwise no guarantee that we'll start with the timeline that has high L0.
CompactionOutcome::YieldForL0 => return Ok(CompactionOutcome::YieldForL0),
}
}
// Success! Untrip the breaker if necessary.

View File

@@ -268,13 +268,12 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
match output {
Ok(outcome) => {
error_run = 0;
// If there's more compaction work pending, reschedule immediately. This isn't
// necessarily L0 compaction, but that's fine for now.
//
// TODO: differentiate between L0 compaction and other compaction. The former needs
// to be responsive, the latter doesn't.
if outcome == CompactionOutcome::Pending {
tenant.l0_compaction_trigger.notify_one();
// If there's more compaction work, L0 or not, schedule an immediate run.
match outcome {
CompactionOutcome::Done => {}
CompactionOutcome::Skipped => {}
CompactionOutcome::YieldForL0 => tenant.l0_compaction_trigger.notify_one(),
CompactionOutcome::Pending => tenant.l0_compaction_trigger.notify_one(),
}
}

View File

@@ -1815,8 +1815,8 @@ impl Timeline {
// compaction task goes over it's period (20s) which is quite often in production.
let (_guard, _permit) = tokio::select! {
tuple = prepare => { tuple },
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
_ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
_ = cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
};
let last_record_lsn = self.get_last_record_lsn();
@@ -1824,7 +1824,7 @@ impl Timeline {
// Last record Lsn could be zero in case the timeline was just created
if !last_record_lsn.is_valid() {
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
return Ok(CompactionOutcome::Done);
return Ok(CompactionOutcome::Skipped);
}
let result = match self.get_compaction_algorithm_settings().kind {

View File

@@ -609,8 +609,11 @@ pub enum CompactionOutcome {
/// Still has pending layers to be compacted after this round. Ideally, the scheduler
/// should immediately schedule another compaction.
Pending,
// TODO: add a skipped variant for cases where we didn't attempt compaction. These currently
// return Done, which can lead the caller to believe there is no compaction debt.
/// A timeline needs L0 compaction. Yield and schedule an immediate L0 compaction pass (only
/// guaranteed when `compaction_l0_first` is enabled).
YieldForL0,
/// Compaction was skipped, because the timeline is ineligible for compaction.
Skipped,
}
impl Timeline {
@@ -703,10 +706,11 @@ impl Timeline {
.unwrap_or(self.get_disk_consistent_lsn());
l0_min_lsn.max(self.get_ancestor_lsn())
};
// 1. L0 Compact
let l0_compaction_outcome = {
let l0_outcome = {
let timer = self.metrics.compact_time_histo.start_timer();
let l0_compaction_outcome = self
let l0_outcome = self
.compact_level0(
target_file_size,
options.flags.contains(CompactFlags::ForceL0Compaction),
@@ -714,17 +718,17 @@ impl Timeline {
)
.await?;
timer.stop_and_record();
l0_compaction_outcome
l0_outcome
};
if options.flags.contains(CompactFlags::OnlyL0Compaction) {
return Ok(l0_compaction_outcome);
return Ok(l0_outcome);
}
if l0_compaction_outcome == CompactionOutcome::Pending {
// Yield if we have pending L0 compaction. The scheduler will do another pass.
info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
return Ok(CompactionOutcome::Pending);
// Yield if we have pending L0 compaction. The scheduler will do another pass.
if l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0 {
info!("image/ancestor compaction yielding for L0 compaction");
return Ok(CompactionOutcome::YieldForL0);
}
if l0_l1_boundary_lsn < self.partitioning.read().1 {
@@ -788,7 +792,7 @@ impl Timeline {
if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
// Yield and do not do any other kind of compaction.
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
return Ok(CompactionOutcome::Pending);
return Ok(CompactionOutcome::YieldForL0);
}
}
Err(err) => {