mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
pageserver: properly yield for L0 compaction (#10769)
## Problem When image compaction yields for L0 compaction, it may not immediately schedule L0 compaction, because it just goes on to compact the next pending timeline. Touches #10694. Requires #10744. ## Summary of changes Extend `CompactionOutcome` with `YieldForL0` and `Skipped` variants, and immediately schedule an L0 compaction pass in the `YieldForL0` case.
This commit is contained in:
@@ -2901,7 +2901,8 @@ impl Tenant {
|
||||
}
|
||||
|
||||
/// Performs one compaction iteration. Called periodically from the compaction loop. Returns
|
||||
/// whether another compaction iteration is needed (if we yield), or
|
||||
/// whether another compaction is needed, if we still have pending work or if we yield for
|
||||
/// immediate L0 compaction.
|
||||
///
|
||||
/// Compaction can also be explicitly requested for a timeline via the HTTP API.
|
||||
async fn compaction_iteration(
|
||||
@@ -2911,7 +2912,7 @@ impl Tenant {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
// Don't compact inactive tenants.
|
||||
if !self.is_active() {
|
||||
return Ok(CompactionOutcome::Done);
|
||||
return Ok(CompactionOutcome::Skipped);
|
||||
}
|
||||
|
||||
// Don't compact tenants that can't upload layers. We don't check `may_delete_layers_hint`,
|
||||
@@ -2919,13 +2920,13 @@ impl Tenant {
|
||||
let location = self.tenant_conf.load().location;
|
||||
if !location.may_upload_layers_hint() {
|
||||
info!("skipping compaction in location state {location:?}");
|
||||
return Ok(CompactionOutcome::Done);
|
||||
return Ok(CompactionOutcome::Skipped);
|
||||
}
|
||||
|
||||
// Don't compact if the circuit breaker is tripped.
|
||||
if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
|
||||
info!("skipping compaction due to previous failures");
|
||||
return Ok(CompactionOutcome::Done);
|
||||
return Ok(CompactionOutcome::Skipped);
|
||||
}
|
||||
|
||||
// Collect all timelines to compact, along with offload instructions and L0 counts.
|
||||
@@ -2988,10 +2989,15 @@ impl Tenant {
|
||||
.instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
|
||||
.await
|
||||
.inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
|
||||
has_pending_l0 |= outcome == CompactionOutcome::Pending;
|
||||
match outcome {
|
||||
CompactionOutcome::Done => {}
|
||||
CompactionOutcome::Skipped => {}
|
||||
CompactionOutcome::Pending => has_pending_l0 = true,
|
||||
CompactionOutcome::YieldForL0 => has_pending_l0 = true,
|
||||
}
|
||||
}
|
||||
if has_pending_l0 {
|
||||
return Ok(CompactionOutcome::Pending); // do another pass
|
||||
return Ok(CompactionOutcome::YieldForL0); // do another pass
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3046,7 +3052,14 @@ impl Tenant {
|
||||
})?;
|
||||
}
|
||||
|
||||
has_pending |= outcome == CompactionOutcome::Pending;
|
||||
match outcome {
|
||||
CompactionOutcome::Done => {}
|
||||
CompactionOutcome::Skipped => {}
|
||||
CompactionOutcome::Pending => has_pending = true,
|
||||
// This mostly makes sense when the L0-only pass above is enabled, since there's
|
||||
// otherwise no guarantee that we'll start with the timeline that has high L0.
|
||||
CompactionOutcome::YieldForL0 => return Ok(CompactionOutcome::YieldForL0),
|
||||
}
|
||||
}
|
||||
|
||||
// Success! Untrip the breaker if necessary.
|
||||
|
||||
@@ -268,13 +268,12 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
match output {
|
||||
Ok(outcome) => {
|
||||
error_run = 0;
|
||||
// If there's more compaction work pending, reschedule immediately. This isn't
|
||||
// necessarily L0 compaction, but that's fine for now.
|
||||
//
|
||||
// TODO: differentiate between L0 compaction and other compaction. The former needs
|
||||
// to be responsive, the latter doesn't.
|
||||
if outcome == CompactionOutcome::Pending {
|
||||
tenant.l0_compaction_trigger.notify_one();
|
||||
// If there's more compaction work, L0 or not, schedule an immediate run.
|
||||
match outcome {
|
||||
CompactionOutcome::Done => {}
|
||||
CompactionOutcome::Skipped => {}
|
||||
CompactionOutcome::YieldForL0 => tenant.l0_compaction_trigger.notify_one(),
|
||||
CompactionOutcome::Pending => tenant.l0_compaction_trigger.notify_one(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1815,8 +1815,8 @@ impl Timeline {
|
||||
// compaction task goes over it's period (20s) which is quite often in production.
|
||||
let (_guard, _permit) = tokio::select! {
|
||||
tuple = prepare => { tuple },
|
||||
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
|
||||
_ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
|
||||
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
|
||||
_ = cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
|
||||
};
|
||||
|
||||
let last_record_lsn = self.get_last_record_lsn();
|
||||
@@ -1824,7 +1824,7 @@ impl Timeline {
|
||||
// Last record Lsn could be zero in case the timeline was just created
|
||||
if !last_record_lsn.is_valid() {
|
||||
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
|
||||
return Ok(CompactionOutcome::Done);
|
||||
return Ok(CompactionOutcome::Skipped);
|
||||
}
|
||||
|
||||
let result = match self.get_compaction_algorithm_settings().kind {
|
||||
|
||||
@@ -609,8 +609,11 @@ pub enum CompactionOutcome {
|
||||
/// Still has pending layers to be compacted after this round. Ideally, the scheduler
|
||||
/// should immediately schedule another compaction.
|
||||
Pending,
|
||||
// TODO: add a skipped variant for cases where we didn't attempt compaction. These currently
|
||||
// return Done, which can lead the caller to believe there is no compaction debt.
|
||||
/// A timeline needs L0 compaction. Yield and schedule an immediate L0 compaction pass (only
|
||||
/// guaranteed when `compaction_l0_first` is enabled).
|
||||
YieldForL0,
|
||||
/// Compaction was skipped, because the timeline is ineligible for compaction.
|
||||
Skipped,
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
@@ -703,10 +706,11 @@ impl Timeline {
|
||||
.unwrap_or(self.get_disk_consistent_lsn());
|
||||
l0_min_lsn.max(self.get_ancestor_lsn())
|
||||
};
|
||||
|
||||
// 1. L0 Compact
|
||||
let l0_compaction_outcome = {
|
||||
let l0_outcome = {
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
let l0_compaction_outcome = self
|
||||
let l0_outcome = self
|
||||
.compact_level0(
|
||||
target_file_size,
|
||||
options.flags.contains(CompactFlags::ForceL0Compaction),
|
||||
@@ -714,17 +718,17 @@ impl Timeline {
|
||||
)
|
||||
.await?;
|
||||
timer.stop_and_record();
|
||||
l0_compaction_outcome
|
||||
l0_outcome
|
||||
};
|
||||
|
||||
if options.flags.contains(CompactFlags::OnlyL0Compaction) {
|
||||
return Ok(l0_compaction_outcome);
|
||||
return Ok(l0_outcome);
|
||||
}
|
||||
|
||||
if l0_compaction_outcome == CompactionOutcome::Pending {
|
||||
// Yield if we have pending L0 compaction. The scheduler will do another pass.
|
||||
info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
|
||||
return Ok(CompactionOutcome::Pending);
|
||||
// Yield if we have pending L0 compaction. The scheduler will do another pass.
|
||||
if l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0 {
|
||||
info!("image/ancestor compaction yielding for L0 compaction");
|
||||
return Ok(CompactionOutcome::YieldForL0);
|
||||
}
|
||||
|
||||
if l0_l1_boundary_lsn < self.partitioning.read().1 {
|
||||
@@ -788,7 +792,7 @@ impl Timeline {
|
||||
if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
|
||||
// Yield and do not do any other kind of compaction.
|
||||
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
|
||||
return Ok(CompactionOutcome::Pending);
|
||||
return Ok(CompactionOutcome::YieldForL0);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
|
||||
Reference in New Issue
Block a user