From 92d95b08cfba6973bc735538fe5778f40b0dd45c Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Wed, 22 Jan 2025 19:15:46 -0500
Subject: [PATCH] fix(pageserver): extend split job key range to the end
 (#10484)

## Problem

Not really a bug fix, but hopefully can reproduce
https://github.com/neondatabase/neon/issues/10482 more.

If the layer map does not contain layers that end at exactly the end
range of the compaction job, the current split algorithm will produce
the last job that ends at the maximum layer key. This patch extends it
all the way to the compaction job end key.

For example, the user requests a compaction of 0000...FFFF. However, we
only have a layer 0000..3000 in the layer map, and the split job will
have a range of 0000..3000 instead of 0000..FFFF.

This is not a correctness issue but it would be better to fix it so that
we can get consistent job splits.

## Summary of changes

Compaction job split will always cover the full specified key range.

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant/timeline/compaction.rs | 6 ++++++
 test_runner/regress/test_compaction.py       | 9 +++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 57fc415d06..4d5dc2d8a9 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2212,6 +2212,12 @@ impl Timeline {
                 } else {
                     end
                 };
+                let end = if ranges_num == idx + 1 {
+                    // extend the compaction range to the end of the key range if it's the last partition
+                    end.max(job.compact_key_range.end)
+                } else {
+                    end
+                };
                 info!(
                     "splitting compaction job: {}..{}, estimated_size={}",
                     start, end, total_size
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index d0a2349ccf..fde26e1533 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -150,8 +150,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
     child_workloads: list[Workload] = []
 
     for i in range(1, churn_rounds + 1):
-        if i % 10 == 0:
-            log.info(f"Running churn round {i}/{churn_rounds} ...")
+        log.info(f"Running churn round {i}/{churn_rounds} ...")
         if i % 10 == 5 and with_branches == "with_branches":
             branch_name = f"child-{i}"
             branch_timeline_id = env.create_branch(branch_name)
@@ -172,8 +171,10 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
                     "sub_compaction_max_job_size_mb": 16,
                 },
             )
-
-        workload.churn_rows(row_count, env.pageserver.id)
+        # do not wait for upload so that we can see if gc_compaction works well with data being ingested
+        workload.churn_rows(row_count, env.pageserver.id, upload=False)
+        time.sleep(1)
+        workload.validate(env.pageserver.id)
 
     def compaction_finished():
         queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))