From 60fc1e8cc8a906e8b37ee795fe5fca666703fbec Mon Sep 17 00:00:00 2001
From: Joonas Koivunen <joonas@neon.tech>
Date: Wed, 14 Aug 2024 16:48:15 +0300
Subject: [PATCH] chore: even more responsive compaction cancellation (#8725)

Some benchmarks and tests might still fail because of #8655 (tracked in
#8708) because we are not fast enough to shut down ([one evidence]).
Partially this is explained by the current validation mode of streaming
k-merge, but otherwise because that is where we use a lot of time in
compaction. Outside of L0 => L1 compaction, the image layer generation
is already guarded by vectored reads doing cancellation checks.

32768 is a wild guess based on looking how many keys we put in each
layer in a bench (1-2 million), but I assume it will be good enough
divisor. Doing checks more often will start showing up as contention
which we cannot currently measure. Doing checks less often might be
reasonable.

[one evidence]:
https://neon-github-public-dev.s3.amazonaws.com/reports/main/10384136483/index.html#suites/9681106e61a1222669b9d22ab136d07b/96e6d53af234924/

Earlier PR: #8706.
---
 pageserver/src/tenant/timeline.rs            |  7 ++++++-
 pageserver/src/tenant/timeline/compaction.rs | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index d437724673..b4d908b130 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4540,7 +4540,12 @@ impl Timeline {
         new_images: &[ResidentLayer],
         layers_to_remove: &[Layer],
     ) -> Result<(), CompactionError> {
-        let mut guard = self.layers.write().await;
+        let mut guard = tokio::select! {
+            guard = self.layers.write() => guard,
+            _ = self.cancel.cancelled() => {
+                return Err(CompactionError::ShuttingDown);
+            }
+        };
 
         let mut duplicated_layers = HashSet::new();
 
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index e24459e7b9..7370ec1386 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1048,11 +1048,22 @@ impl Timeline {
         let mut dup_end_lsn: Lsn = Lsn::INVALID; // end LSN of layer containing values of the single key
         let mut next_hole = 0; // index of next hole in holes vector
 
+        let mut keys = 0;
+
         while let Some((key, lsn, value)) = all_values_iter
             .next(ctx)
             .await
             .map_err(CompactionError::Other)?
         {
+            keys += 1;
+
+            if keys % 32_768 == 0 && self.cancel.is_cancelled() {
+                // avoid hitting the cancellation token on every key. in benches, we end up
+                // shuffling an order of million keys per layer, this means we'll check it
+                // around tens of times per layer.
+                return Err(CompactionError::ShuttingDown);
+            }
+
             let same_key = prev_key.map_or(false, |prev_key| prev_key == key);
             // We need to check key boundaries once we reach next key or end of layer with the same key
             if !same_key || lsn == dup_end_lsn {
@@ -1157,6 +1168,8 @@ impl Timeline {
                         .await
                         .map_err(CompactionError::Other)?,
                     );
+
+                    keys = 0;
                 }
 
                 writer