pageserver: invert CompactFlags::NoYield as YieldForL0 (#11382)

## Problem `CompactFlags::NoYield` was a bit inconvenient, since every caller except for the background compaction loop should generally set it (e.g. HTTP API calls, tests, etc). It was also inconsistent with `CompactionOutcome::YieldForL0`. ## Summary of changes Invert `CompactFlags::NoYield` as `CompactFlags::YieldForL0`. There should be no behavioral changes.
2026-01-07 05:22:56 +00:00 · 2025-04-01 13:43:58 +02:00
parent 225cabd84d
commit 80596feeaa
4 changed files with 59 additions and 86 deletions
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2256,7 +2256,6 @@ async fn timeline_compact_handler(
    let state = get_state(&request);

    let mut flags = EnumSet::empty();
-    flags |= CompactFlags::NoYield; // run compaction to completion

    if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
        flags |= CompactFlags::ForceL0Compaction;
@@ -2417,7 +2416,6 @@ async fn timeline_checkpoint_handler(
    let state = get_state(&request);

    let mut flags = EnumSet::empty();
-    flags |= CompactFlags::NoYield; // run compaction to completion
    if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
        flags |= CompactFlags::ForceL0Compaction;
    }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3080,6 +3080,7 @@ impl Tenant {
            let mut has_pending_l0 = false;
            for timeline in compact_l0 {
                let ctx = &ctx.with_scope_timeline(&timeline);
+                // NB: don't set CompactFlags::YieldForL0, since this is an L0-only compaction pass.
                let outcome = timeline
                    .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx)
                    .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
@@ -3097,14 +3098,9 @@ impl Tenant {
            }
        }

-        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated
-        // more L0 layers, they may also be compacted here.
-        //
-        // NB: image compaction may yield if there is pending L0 compaction.
-        //
-        // TODO: it will only yield if there is pending L0 compaction on the same timeline. If a
-        // different timeline needs compaction, it won't. It should check `l0_compaction_trigger`.
-        // We leave this for a later PR.
+        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated more
+        // L0 layers, they may also be compacted here. Image compaction will yield if there is
+        // pending L0 compaction on any tenant timeline.
        //
        // TODO: consider ordering timelines by some priority, e.g. time since last full compaction,
        // amount of L1 delta debt or garbage, offload-eligible timelines first, etc.
@@ -3115,8 +3111,14 @@ impl Tenant {
            }
            let ctx = &ctx.with_scope_timeline(&timeline);

+            // Yield for L0 if the separate L0 pass is enabled (otherwise there's no point).
+            let mut flags = EnumSet::default();
+            if self.get_compaction_l0_first() {
+                flags |= CompactFlags::YieldForL0;
+            }
+
            let mut outcome = timeline
-                .compact(cancel, EnumSet::default(), ctx)
+                .compact(cancel, flags, ctx)
                .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
                .await
                .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
@@ -6516,11 +6518,7 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
            .await?;

        let mut writer = tline.writer().await;
@@ -6537,11 +6535,7 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
            .await?;

        let mut writer = tline.writer().await;
@@ -6558,11 +6552,7 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
            .await?;

        let mut writer = tline.writer().await;
@@ -6579,11 +6569,7 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(
-                &CancellationToken::new(),
-                CompactFlags::NoYield.into(),
-                &ctx,
-            )
+            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)
            .await?;

        assert_eq!(
@@ -6666,9 +6652,7 @@ mod tests {
            timeline.freeze_and_flush().await?;
            if compact {
                // this requires timeline to be &Arc<Timeline>
-                timeline
-                    .compact(&cancel, CompactFlags::NoYield.into(), ctx)
-                    .await?;
+                timeline.compact(&cancel, EnumSet::default(), ctx).await?;
            }

            // this doesn't really need to use the timeline_id target, but it is closer to what it
@@ -6995,7 +6979,6 @@ mod tests {
        child_timeline.freeze_and_flush().await?;
        let mut flags = EnumSet::new();
        flags.insert(CompactFlags::ForceRepartition);
-        flags.insert(CompactFlags::NoYield);
        child_timeline
            .compact(&CancellationToken::new(), flags, &ctx)
            .await?;
@@ -7374,9 +7357,7 @@ mod tests {

            // Perform a cycle of flush, compact, and GC
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
-                .await?;
+            tline.compact(&cancel, EnumSet::default(), &ctx).await?;
            tenant
                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
                .await?;
@@ -7705,7 +7686,6 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::NoYield);
                            flags
                        } else {
                            EnumSet::empty()
@@ -7756,9 +7736,7 @@ mod tests {
        let before_num_l0_delta_files =
            tline.layers.read().await.layer_map()?.level0_deltas().len();

-        tline
-            .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
-            .await?;
+        tline.compact(&cancel, EnumSet::default(), &ctx).await?;

        let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();

@@ -7923,7 +7901,6 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::NoYield);
                            flags
                        },
                        &ctx,
@@ -8386,7 +8363,6 @@ mod tests {
                    let mut flags = EnumSet::new();
                    flags.insert(CompactFlags::ForceImageLayerCreation);
                    flags.insert(CompactFlags::ForceRepartition);
-                    flags.insert(CompactFlags::NoYield);
                    flags
                },
                &ctx,
@@ -8454,7 +8430,6 @@ mod tests {
                    let mut flags = EnumSet::new();
                    flags.insert(CompactFlags::ForceImageLayerCreation);
                    flags.insert(CompactFlags::ForceRepartition);
-                    flags.insert(CompactFlags::NoYield);
                    flags
                },
                &ctx,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -870,9 +870,14 @@ pub(crate) enum CompactFlags {
    OnlyL0Compaction,
    EnhancedGcBottomMostCompaction,
    DryRun,
-    /// Disables compaction yielding e.g. due to high L0 count. This is set e.g. when requesting
-    /// compaction via HTTP API.
-    NoYield,
+    /// Makes image compaction yield if there's pending L0 compaction. This should always be used in
+    /// the background compaction task, since we want to aggressively compact down L0 to bound
+    /// read amplification.
+    ///
+    /// It only makes sense to use this when `compaction_l0_first` is enabled (such that we yield to
+    /// an L0 compaction pass), and without `OnlyL0Compaction` (L0 compaction shouldn't yield for L0
+    /// compaction).
+    YieldForL0,
 }

 #[serde_with::serde_as]
@@ -1891,18 +1896,19 @@ impl Timeline {
        // out by other background tasks (including image compaction). We request this via
        // `BackgroundLoopKind::L0Compaction`.
        //
-        // If this is a regular compaction pass, and L0-only compaction is enabled in the config,
-        // then we should yield for immediate L0 compaction if necessary while we're waiting for the
-        // background task semaphore. There's no point yielding otherwise, since we'd just end up
-        // right back here.
+        // Yield for pending L0 compaction while waiting for the semaphore.
        let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction);
        let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() {
            true => BackgroundLoopKind::L0Compaction,
            false => BackgroundLoopKind::Compaction,
        };
-        let yield_for_l0 = !is_l0_only
-            && self.get_compaction_l0_first()
-            && !options.flags.contains(CompactFlags::NoYield);
+        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);
+        if yield_for_l0 {
+            // If this is an L0 pass, it doesn't make sense to yield for L0.
+            debug_assert!(!is_l0_only, "YieldForL0 during L0 pass");
+            // If `compaction_l0_first` is disabled, there's no point yielding.
+            debug_assert!(self.get_compaction_l0_first(), "YieldForL0 without L0 pass");
+        }

        let acquire = async move {
            let guard = self.compaction_lock.lock().await;
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -394,8 +394,8 @@ impl GcCompactionQueue {
                if job.dry_run {
                    flags |= CompactFlags::DryRun;
                }
-                if options.flags.contains(CompactFlags::NoYield) {
-                    flags |= CompactFlags::NoYield;
+                if options.flags.contains(CompactFlags::YieldForL0) {
+                    flags |= CompactFlags::YieldForL0;
                }
                let options = CompactOptions {
                    flags,
@@ -983,7 +983,7 @@ impl Timeline {

        // Yield if we have pending L0 compaction. The scheduler will do another pass.
        if (l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0)
-            && !options.flags.contains(CompactFlags::NoYield)
+            && options.flags.contains(CompactFlags::YieldForL0)
        {
            info!("image/ancestor compaction yielding for L0 compaction");
            return Ok(CompactionOutcome::YieldForL0);
@@ -1028,7 +1028,7 @@ impl Timeline {
                            .load()
                            .as_ref()
                            .clone(),
-                        !options.flags.contains(CompactFlags::NoYield),
+                        options.flags.contains(CompactFlags::YieldForL0),
                    )
                    .await
                    .inspect_err(|err| {
@@ -2635,7 +2635,7 @@ impl Timeline {
    ) -> Result<CompactionOutcome, CompactionError> {
        let sub_compaction = options.sub_compaction;
        let job = GcCompactJob::from_compact_options(options.clone());
-        let no_yield = options.flags.contains(CompactFlags::NoYield);
+        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);
        if sub_compaction {
            info!(
                "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"
@@ -2650,7 +2650,7 @@ impl Timeline {
                    idx + 1,
                    jobs_len
                );
-                self.compact_with_gc_inner(cancel, job, ctx, no_yield)
+                self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)
                    .await?;
            }
            if jobs_len == 0 {
@@ -2658,7 +2658,8 @@ impl Timeline {
            }
            return Ok(CompactionOutcome::Done);
        }
-        self.compact_with_gc_inner(cancel, job, ctx, no_yield).await
+        self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)
+            .await
    }

    async fn compact_with_gc_inner(
@@ -2666,7 +2667,7 @@ impl Timeline {
        cancel: &CancellationToken,
        job: GcCompactJob,
        ctx: &RequestContext,
-        no_yield: bool,
+        yield_for_l0: bool,
    ) -> Result<CompactionOutcome, CompactionError> {
        // Block other compaction/GC tasks from running for now. GC-compaction could run along
        // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
@@ -2936,18 +2937,15 @@ impl Timeline {
            if cancel.is_cancelled() {
                return Err(CompactionError::ShuttingDown);
            }
-            if !no_yield {
-                let should_yield = self
+            let should_yield = yield_for_l0
+                && self
                    .l0_compaction_trigger
                    .notified()
                    .now_or_never()
                    .is_some();
-                if should_yield {
-                    tracing::info!(
-                        "preempt gc-compaction when downloading layers: too many L0 layers"
-                    );
-                    return Ok(CompactionOutcome::YieldForL0);
-                }
+            if should_yield {
+                tracing::info!("preempt gc-compaction when downloading layers: too many L0 layers");
+                return Ok(CompactionOutcome::YieldForL0);
            }
            let resident_layer = layer
                .download_and_keep_resident(ctx)
@@ -3081,21 +3079,17 @@ impl Timeline {
                return Err(CompactionError::ShuttingDown);
            }

-            if !no_yield {
-                keys_processed += 1;
-                if keys_processed % 1000 == 0 {
-                    let should_yield = self
-                        .l0_compaction_trigger
-                        .notified()
-                        .now_or_never()
-                        .is_some();
-                    if should_yield {
-                        tracing::info!(
-                            "preempt gc-compaction in the main loop: too many L0 layers"
-                        );
-                        return Ok(CompactionOutcome::YieldForL0);
-                    }
-                }
+            keys_processed += 1;
+            let should_yield = yield_for_l0
+                && keys_processed % 1000 == 0
+                && self
+                    .l0_compaction_trigger
+                    .notified()
+                    .now_or_never()
+                    .is_some();
+            if should_yield {
+                tracing::info!("preempt gc-compaction in the main loop: too many L0 layers");
+                return Ok(CompactionOutcome::YieldForL0);
            }
            if self.shard_identity.is_key_disposable(&key) {
                // If this shard does not need to store this key, simply skip it.