From 80596feeaa5fc50b2639b7e9a15d5196bad31d1b Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Tue, 1 Apr 2025 13:43:58 +0200
Subject: [PATCH] pageserver: invert `CompactFlags::NoYield` as `YieldForL0`
 (#11382)

## Problem

`CompactFlags::NoYield` was a bit inconvenient, since every caller
except for the background compaction loop should generally set it (e.g.
HTTP API calls, tests, etc). It was also inconsistent with
`CompactionOutcome::YieldForL0`.

## Summary of changes

Invert `CompactFlags::NoYield` as `CompactFlags::YieldForL0`. There
should be no behavioral changes.
---
 pageserver/src/http/routes.rs                |  2 -
 pageserver/src/tenant.rs                     | 61 ++++++--------------
 pageserver/src/tenant/timeline.rs            | 26 +++++----
 pageserver/src/tenant/timeline/compaction.rs | 56 ++++++++----------
 4 files changed, 59 insertions(+), 86 deletions(-)

diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 5a13fb1387..2bedf9e11a 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2256,7 +2256,6 @@ async fn timeline_compact_handler(
     let state = get_state(&request);
 
     let mut flags = EnumSet::empty();
-    flags |= CompactFlags::NoYield; // run compaction to completion
 
     if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
         flags |= CompactFlags::ForceL0Compaction;
@@ -2417,7 +2416,6 @@ async fn timeline_checkpoint_handler(
     let state = get_state(&request);
 
     let mut flags = EnumSet::empty();
-    flags |= CompactFlags::NoYield; // run compaction to completion
     if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
         flags |= CompactFlags::ForceL0Compaction;
     }
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index e7d8ed75ed..3ed4103792 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3080,6 +3080,7 @@ impl Tenant {
             let mut has_pending_l0 = false;
             for timeline in compact_l0 {
                 let ctx = &ctx.with_scope_timeline(&timeline);
+                // NB: don't set CompactFlags::YieldForL0, since this is an L0-only compaction pass.
                 let outcome = timeline
                     .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx)
                     .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
@@ -3097,14 +3098,9 @@ impl Tenant {
             }
         }
 
-        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated
-        // more L0 layers, they may also be compacted here.
-        //
-        // NB: image compaction may yield if there is pending L0 compaction.
-        //
-        // TODO: it will only yield if there is pending L0 compaction on the same timeline. If a
-        // different timeline needs compaction, it won't. It should check `l0_compaction_trigger`.
-        // We leave this for a later PR.
+        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated more
+        // L0 layers, they may also be compacted here. Image compaction will yield if there is
+        // pending L0 compaction on any tenant timeline.
         //
         // TODO: consider ordering timelines by some priority, e.g. time since last full compaction,
         // amount of L1 delta debt or garbage, offload-eligible timelines first, etc.
@@ -3115,8 +3111,14 @@ impl Tenant {
             }
             let ctx = &ctx.with_scope_timeline(&timeline);
 
+            // Yield for L0 if the separate L0 pass is enabled (otherwise there's no point).
+            let mut flags = EnumSet::default();
+            if self.get_compaction_l0_first() {
+                flags |= CompactFlags::YieldForL0;
+            }
+
             let mut outcome = timeline
-                .compact(cancel, EnumSet::default(), ctx)
+                .compact(cancel, flags, ctx)
                 .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
                 .await
                 .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
@@ -6516,11 +6518,7 @@ mod tests {
 
         tline.freeze_and_flush().await?;
         tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
             .await?;
 
         let mut writer = tline.writer().await;
@@ -6537,11 +6535,7 @@ mod tests {
 
         tline.freeze_and_flush().await?;
         tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
             .await?;
 
         let mut writer = tline.writer().await;
@@ -6558,11 +6552,7 @@ mod tests {
 
         tline.freeze_and_flush().await?;
         tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
             .await?;
 
         let mut writer = tline.writer().await;
@@ -6579,11 +6569,7 @@ mod tests {
 
         tline.freeze_and_flush().await?;
         tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
             .await?;
 
         assert_eq!(
@@ -6666,9 +6652,7 @@ mod tests {
             timeline.freeze_and_flush().await?;
             if compact {
                 // this requires timeline to be &Arc<Timeline>
-                timeline
-                    .compact(&cancel, CompactFlags::NoYield.into(), ctx)
-                    .await?;
+                timeline.compact(&cancel, EnumSet::default(), ctx).await?;
             }
 
             // this doesn't really need to use the timeline_id target, but it is closer to what it
@@ -6995,7 +6979,6 @@ mod tests {
         child_timeline.freeze_and_flush().await?;
         let mut flags = EnumSet::new();
         flags.insert(CompactFlags::ForceRepartition);
-        flags.insert(CompactFlags::NoYield);
         child_timeline
             .compact(&CancellationToken::new(), flags, &ctx)
             .await?;
@@ -7374,9 +7357,7 @@ mod tests {
 
             // Perform a cycle of flush, compact, and GC
             tline.freeze_and_flush().await?;
-            tline
-                .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
-                .await?;
+            tline.compact(&cancel, EnumSet::default(), &ctx).await?;
             tenant
                 .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
                 .await?;
@@ -7705,7 +7686,6 @@ mod tests {
                             let mut flags = EnumSet::new();
                             flags.insert(CompactFlags::ForceImageLayerCreation);
                             flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::NoYield);
                             flags
                         } else {
                             EnumSet::empty()
@@ -7756,9 +7736,7 @@ mod tests {
         let before_num_l0_delta_files =
             tline.layers.read().await.layer_map()?.level0_deltas().len();
 
-        tline
-            .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
-            .await?;
+        tline.compact(&cancel, EnumSet::default(), &ctx).await?;
 
         let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
 
@@ -7923,7 +7901,6 @@ mod tests {
                             let mut flags = EnumSet::new();
                             flags.insert(CompactFlags::ForceImageLayerCreation);
                             flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::NoYield);
                             flags
                         },
                         &ctx,
@@ -8386,7 +8363,6 @@ mod tests {
                     let mut flags = EnumSet::new();
                     flags.insert(CompactFlags::ForceImageLayerCreation);
                     flags.insert(CompactFlags::ForceRepartition);
-                    flags.insert(CompactFlags::NoYield);
                     flags
                 },
                 &ctx,
@@ -8454,7 +8430,6 @@ mod tests {
                     let mut flags = EnumSet::new();
                     flags.insert(CompactFlags::ForceImageLayerCreation);
                     flags.insert(CompactFlags::ForceRepartition);
-                    flags.insert(CompactFlags::NoYield);
                     flags
                 },
                 &ctx,
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 75f9225302..7c9c9a45d4 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -870,9 +870,14 @@ pub(crate) enum CompactFlags {
     OnlyL0Compaction,
     EnhancedGcBottomMostCompaction,
     DryRun,
-    /// Disables compaction yielding e.g. due to high L0 count. This is set e.g. when requesting
-    /// compaction via HTTP API.
-    NoYield,
+    /// Makes image compaction yield if there's pending L0 compaction. This should always be used in
+    /// the background compaction task, since we want to aggressively compact down L0 to bound
+    /// read amplification.
+    ///
+    /// It only makes sense to use this when `compaction_l0_first` is enabled (such that we yield to
+    /// an L0 compaction pass), and without `OnlyL0Compaction` (L0 compaction shouldn't yield for L0
+    /// compaction).
+    YieldForL0,
 }
 
 #[serde_with::serde_as]
@@ -1891,18 +1896,19 @@ impl Timeline {
         // out by other background tasks (including image compaction). We request this via
         // `BackgroundLoopKind::L0Compaction`.
         //
-        // If this is a regular compaction pass, and L0-only compaction is enabled in the config,
-        // then we should yield for immediate L0 compaction if necessary while we're waiting for the
-        // background task semaphore. There's no point yielding otherwise, since we'd just end up
-        // right back here.
+        // Yield for pending L0 compaction while waiting for the semaphore.
         let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction);
         let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() {
             true => BackgroundLoopKind::L0Compaction,
             false => BackgroundLoopKind::Compaction,
         };
-        let yield_for_l0 = !is_l0_only
-            && self.get_compaction_l0_first()
-            && !options.flags.contains(CompactFlags::NoYield);
+        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);
+        if yield_for_l0 {
+            // If this is an L0 pass, it doesn't make sense to yield for L0.
+            debug_assert!(!is_l0_only, "YieldForL0 during L0 pass");
+            // If `compaction_l0_first` is disabled, there's no point yielding.
+            debug_assert!(self.get_compaction_l0_first(), "YieldForL0 without L0 pass");
+        }
 
         let acquire = async move {
             let guard = self.compaction_lock.lock().await;
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 711501caa9..2276ed428b 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -394,8 +394,8 @@ impl GcCompactionQueue {
                 if job.dry_run {
                     flags |= CompactFlags::DryRun;
                 }
-                if options.flags.contains(CompactFlags::NoYield) {
-                    flags |= CompactFlags::NoYield;
+                if options.flags.contains(CompactFlags::YieldForL0) {
+                    flags |= CompactFlags::YieldForL0;
                 }
                 let options = CompactOptions {
                     flags,
@@ -983,7 +983,7 @@ impl Timeline {
 
         // Yield if we have pending L0 compaction. The scheduler will do another pass.
         if (l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0)
-            && !options.flags.contains(CompactFlags::NoYield)
+            && options.flags.contains(CompactFlags::YieldForL0)
         {
             info!("image/ancestor compaction yielding for L0 compaction");
             return Ok(CompactionOutcome::YieldForL0);
@@ -1028,7 +1028,7 @@ impl Timeline {
                             .load()
                             .as_ref()
                             .clone(),
-                        !options.flags.contains(CompactFlags::NoYield),
+                        options.flags.contains(CompactFlags::YieldForL0),
                     )
                     .await
                     .inspect_err(|err| {
@@ -2635,7 +2635,7 @@ impl Timeline {
     ) -> Result<CompactionOutcome, CompactionError> {
         let sub_compaction = options.sub_compaction;
         let job = GcCompactJob::from_compact_options(options.clone());
-        let no_yield = options.flags.contains(CompactFlags::NoYield);
+        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);
         if sub_compaction {
             info!(
                 "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"
@@ -2650,7 +2650,7 @@ impl Timeline {
                     idx + 1,
                     jobs_len
                 );
-                self.compact_with_gc_inner(cancel, job, ctx, no_yield)
+                self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)
                     .await?;
             }
             if jobs_len == 0 {
@@ -2658,7 +2658,8 @@ impl Timeline {
             }
             return Ok(CompactionOutcome::Done);
         }
-        self.compact_with_gc_inner(cancel, job, ctx, no_yield).await
+        self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)
+            .await
     }
 
     async fn compact_with_gc_inner(
@@ -2666,7 +2667,7 @@ impl Timeline {
         cancel: &CancellationToken,
         job: GcCompactJob,
         ctx: &RequestContext,
-        no_yield: bool,
+        yield_for_l0: bool,
     ) -> Result<CompactionOutcome, CompactionError> {
         // Block other compaction/GC tasks from running for now. GC-compaction could run along
         // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
@@ -2936,18 +2937,15 @@ impl Timeline {
             if cancel.is_cancelled() {
                 return Err(CompactionError::ShuttingDown);
             }
-            if !no_yield {
-                let should_yield = self
+            let should_yield = yield_for_l0
+                && self
                     .l0_compaction_trigger
                     .notified()
                     .now_or_never()
                     .is_some();
-                if should_yield {
-                    tracing::info!(
-                        "preempt gc-compaction when downloading layers: too many L0 layers"
-                    );
-                    return Ok(CompactionOutcome::YieldForL0);
-                }
+            if should_yield {
+                tracing::info!("preempt gc-compaction when downloading layers: too many L0 layers");
+                return Ok(CompactionOutcome::YieldForL0);
             }
             let resident_layer = layer
                 .download_and_keep_resident(ctx)
@@ -3081,21 +3079,17 @@ impl Timeline {
                 return Err(CompactionError::ShuttingDown);
             }
 
-            if !no_yield {
-                keys_processed += 1;
-                if keys_processed % 1000 == 0 {
-                    let should_yield = self
-                        .l0_compaction_trigger
-                        .notified()
-                        .now_or_never()
-                        .is_some();
-                    if should_yield {
-                        tracing::info!(
-                            "preempt gc-compaction in the main loop: too many L0 layers"
-                        );
-                        return Ok(CompactionOutcome::YieldForL0);
-                    }
-                }
+            keys_processed += 1;
+            let should_yield = yield_for_l0
+                && keys_processed % 1000 == 0
+                && self
+                    .l0_compaction_trigger
+                    .notified()
+                    .now_or_never()
+                    .is_some();
+            if should_yield {
+                tracing::info!("preempt gc-compaction in the main loop: too many L0 layers");
+                return Ok(CompactionOutcome::YieldForL0);
             }
             if self.shard_identity.is_key_disposable(&key) {
                 // If this shard does not need to store this key, simply skip it.