From 80596feeaa5fc50b2639b7e9a15d5196bad31d1b Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 1 Apr 2025 13:43:58 +0200 Subject: [PATCH] pageserver: invert `CompactFlags::NoYield` as `YieldForL0` (#11382) ## Problem `CompactFlags::NoYield` was a bit inconvenient, since every caller except for the background compaction loop should generally set it (e.g. HTTP API calls, tests, etc). It was also inconsistent with `CompactionOutcome::YieldForL0`. ## Summary of changes Invert `CompactFlags::NoYield` as `CompactFlags::YieldForL0`. There should be no behavioral changes. --- pageserver/src/http/routes.rs | 2 - pageserver/src/tenant.rs | 61 ++++++-------------- pageserver/src/tenant/timeline.rs | 26 +++++---- pageserver/src/tenant/timeline/compaction.rs | 56 ++++++++---------- 4 files changed, 59 insertions(+), 86 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 5a13fb1387..2bedf9e11a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2256,7 +2256,6 @@ async fn timeline_compact_handler( let state = get_state(&request); let mut flags = EnumSet::empty(); - flags |= CompactFlags::NoYield; // run compaction to completion if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? { flags |= CompactFlags::ForceL0Compaction; @@ -2417,7 +2416,6 @@ async fn timeline_checkpoint_handler( let state = get_state(&request); let mut flags = EnumSet::empty(); - flags |= CompactFlags::NoYield; // run compaction to completion if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? { flags |= CompactFlags::ForceL0Compaction; } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index e7d8ed75ed..3ed4103792 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3080,6 +3080,7 @@ impl Tenant { let mut has_pending_l0 = false; for timeline in compact_l0 { let ctx = &ctx.with_scope_timeline(&timeline); + // NB: don't set CompactFlags::YieldForL0, since this is an L0-only compaction pass. let outcome = timeline .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx) .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id)) @@ -3097,14 +3098,9 @@ impl Tenant { } } - // Pass 2: image compaction and timeline offloading. If any timelines have accumulated - // more L0 layers, they may also be compacted here. - // - // NB: image compaction may yield if there is pending L0 compaction. - // - // TODO: it will only yield if there is pending L0 compaction on the same timeline. If a - // different timeline needs compaction, it won't. It should check `l0_compaction_trigger`. - // We leave this for a later PR. + // Pass 2: image compaction and timeline offloading. If any timelines have accumulated more + // L0 layers, they may also be compacted here. Image compaction will yield if there is + // pending L0 compaction on any tenant timeline. // // TODO: consider ordering timelines by some priority, e.g. time since last full compaction, // amount of L1 delta debt or garbage, offload-eligible timelines first, etc. @@ -3115,8 +3111,14 @@ impl Tenant { } let ctx = &ctx.with_scope_timeline(&timeline); + // Yield for L0 if the separate L0 pass is enabled (otherwise there's no point). + let mut flags = EnumSet::default(); + if self.get_compaction_l0_first() { + flags |= CompactFlags::YieldForL0; + } + let mut outcome = timeline - .compact(cancel, EnumSet::default(), ctx) + .compact(cancel, flags, ctx) .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id)) .await .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?; @@ -6516,11 +6518,7 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact( - &CancellationToken::new(), - CompactFlags::NoYield.into(), - &ctx, - ) + .compact(&CancellationToken::new(), EnumSet::default(), &ctx) .await?; let mut writer = tline.writer().await; @@ -6537,11 +6535,7 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact( - &CancellationToken::new(), - CompactFlags::NoYield.into(), - &ctx, - ) + .compact(&CancellationToken::new(), EnumSet::default(), &ctx) .await?; let mut writer = tline.writer().await; @@ -6558,11 +6552,7 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact( - &CancellationToken::new(), - CompactFlags::NoYield.into(), - &ctx, - ) + .compact(&CancellationToken::new(), EnumSet::default(), &ctx) .await?; let mut writer = tline.writer().await; @@ -6579,11 +6569,7 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact( - &CancellationToken::new(), - CompactFlags::NoYield.into(), - &ctx, - ) + .compact(&CancellationToken::new(), EnumSet::default(), &ctx) .await?; assert_eq!( @@ -6666,9 +6652,7 @@ mod tests { timeline.freeze_and_flush().await?; if compact { // this requires timeline to be &Arc - timeline - .compact(&cancel, CompactFlags::NoYield.into(), ctx) - .await?; + timeline.compact(&cancel, EnumSet::default(), ctx).await?; } // this doesn't really need to use the timeline_id target, but it is closer to what it @@ -6995,7 +6979,6 @@ mod tests { child_timeline.freeze_and_flush().await?; let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceRepartition); - flags.insert(CompactFlags::NoYield); child_timeline .compact(&CancellationToken::new(), flags, &ctx) .await?; @@ -7374,9 +7357,7 @@ mod tests { // Perform a cycle of flush, compact, and GC tline.freeze_and_flush().await?; - tline - .compact(&cancel, CompactFlags::NoYield.into(), &ctx) - .await?; + tline.compact(&cancel, EnumSet::default(), &ctx).await?; tenant .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx) .await?; @@ -7705,7 +7686,6 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); - flags.insert(CompactFlags::NoYield); flags } else { EnumSet::empty() @@ -7756,9 +7736,7 @@ mod tests { let before_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len(); - tline - .compact(&cancel, CompactFlags::NoYield.into(), &ctx) - .await?; + tline.compact(&cancel, EnumSet::default(), &ctx).await?; let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len(); @@ -7923,7 +7901,6 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); - flags.insert(CompactFlags::NoYield); flags }, &ctx, @@ -8386,7 +8363,6 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); - flags.insert(CompactFlags::NoYield); flags }, &ctx, @@ -8454,7 +8430,6 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); - flags.insert(CompactFlags::NoYield); flags }, &ctx, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 75f9225302..7c9c9a45d4 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -870,9 +870,14 @@ pub(crate) enum CompactFlags { OnlyL0Compaction, EnhancedGcBottomMostCompaction, DryRun, - /// Disables compaction yielding e.g. due to high L0 count. This is set e.g. when requesting - /// compaction via HTTP API. - NoYield, + /// Makes image compaction yield if there's pending L0 compaction. This should always be used in + /// the background compaction task, since we want to aggressively compact down L0 to bound + /// read amplification. + /// + /// It only makes sense to use this when `compaction_l0_first` is enabled (such that we yield to + /// an L0 compaction pass), and without `OnlyL0Compaction` (L0 compaction shouldn't yield for L0 + /// compaction). + YieldForL0, } #[serde_with::serde_as] @@ -1891,18 +1896,19 @@ impl Timeline { // out by other background tasks (including image compaction). We request this via // `BackgroundLoopKind::L0Compaction`. // - // If this is a regular compaction pass, and L0-only compaction is enabled in the config, - // then we should yield for immediate L0 compaction if necessary while we're waiting for the - // background task semaphore. There's no point yielding otherwise, since we'd just end up - // right back here. + // Yield for pending L0 compaction while waiting for the semaphore. let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction); let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() { true => BackgroundLoopKind::L0Compaction, false => BackgroundLoopKind::Compaction, }; - let yield_for_l0 = !is_l0_only - && self.get_compaction_l0_first() - && !options.flags.contains(CompactFlags::NoYield); + let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0); + if yield_for_l0 { + // If this is an L0 pass, it doesn't make sense to yield for L0. + debug_assert!(!is_l0_only, "YieldForL0 during L0 pass"); + // If `compaction_l0_first` is disabled, there's no point yielding. + debug_assert!(self.get_compaction_l0_first(), "YieldForL0 without L0 pass"); + } let acquire = async move { let guard = self.compaction_lock.lock().await; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 711501caa9..2276ed428b 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -394,8 +394,8 @@ impl GcCompactionQueue { if job.dry_run { flags |= CompactFlags::DryRun; } - if options.flags.contains(CompactFlags::NoYield) { - flags |= CompactFlags::NoYield; + if options.flags.contains(CompactFlags::YieldForL0) { + flags |= CompactFlags::YieldForL0; } let options = CompactOptions { flags, @@ -983,7 +983,7 @@ impl Timeline { // Yield if we have pending L0 compaction. The scheduler will do another pass. if (l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0) - && !options.flags.contains(CompactFlags::NoYield) + && options.flags.contains(CompactFlags::YieldForL0) { info!("image/ancestor compaction yielding for L0 compaction"); return Ok(CompactionOutcome::YieldForL0); @@ -1028,7 +1028,7 @@ impl Timeline { .load() .as_ref() .clone(), - !options.flags.contains(CompactFlags::NoYield), + options.flags.contains(CompactFlags::YieldForL0), ) .await .inspect_err(|err| { @@ -2635,7 +2635,7 @@ impl Timeline { ) -> Result { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); - let no_yield = options.flags.contains(CompactFlags::NoYield); + let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0); if sub_compaction { info!( "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs" @@ -2650,7 +2650,7 @@ impl Timeline { idx + 1, jobs_len ); - self.compact_with_gc_inner(cancel, job, ctx, no_yield) + self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0) .await?; } if jobs_len == 0 { @@ -2658,7 +2658,8 @@ impl Timeline { } return Ok(CompactionOutcome::Done); } - self.compact_with_gc_inner(cancel, job, ctx, no_yield).await + self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0) + .await } async fn compact_with_gc_inner( @@ -2666,7 +2667,7 @@ impl Timeline { cancel: &CancellationToken, job: GcCompactJob, ctx: &RequestContext, - no_yield: bool, + yield_for_l0: bool, ) -> Result { // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. @@ -2936,18 +2937,15 @@ impl Timeline { if cancel.is_cancelled() { return Err(CompactionError::ShuttingDown); } - if !no_yield { - let should_yield = self + let should_yield = yield_for_l0 + && self .l0_compaction_trigger .notified() .now_or_never() .is_some(); - if should_yield { - tracing::info!( - "preempt gc-compaction when downloading layers: too many L0 layers" - ); - return Ok(CompactionOutcome::YieldForL0); - } + if should_yield { + tracing::info!("preempt gc-compaction when downloading layers: too many L0 layers"); + return Ok(CompactionOutcome::YieldForL0); } let resident_layer = layer .download_and_keep_resident(ctx) @@ -3081,21 +3079,17 @@ impl Timeline { return Err(CompactionError::ShuttingDown); } - if !no_yield { - keys_processed += 1; - if keys_processed % 1000 == 0 { - let should_yield = self - .l0_compaction_trigger - .notified() - .now_or_never() - .is_some(); - if should_yield { - tracing::info!( - "preempt gc-compaction in the main loop: too many L0 layers" - ); - return Ok(CompactionOutcome::YieldForL0); - } - } + keys_processed += 1; + let should_yield = yield_for_l0 + && keys_processed % 1000 == 0 + && self + .l0_compaction_trigger + .notified() + .now_or_never() + .is_some(); + if should_yield { + tracing::info!("preempt gc-compaction in the main loop: too many L0 layers"); + return Ok(CompactionOutcome::YieldForL0); } if self.shard_identity.is_key_disposable(&key) { // If this shard does not need to store this key, simply skip it.