From 92d95b08cfba6973bc735538fe5778f40b0dd45c Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 22 Jan 2025 19:15:46 -0500 Subject: [PATCH] fix(pageserver): extend split job key range to the end (#10484) ## Problem Not really a bug fix, but hopefully can reproduce https://github.com/neondatabase/neon/issues/10482 more. If the layer map does not contain layers that end at exactly the end range of the compaction job, the current split algorithm will produce the last job that ends at the maximum layer key. This patch extends it all the way to the compaction job end key. For example, the user requests a compaction of 0000...FFFF. However, we only have a layer 0000..3000 in the layer map, and the split job will have a range of 0000..3000 instead of 0000..FFFF. This is not a correctness issue but it would be better to fix it so that we can get consistent job splits. ## Summary of changes Compaction job split will always cover the full specified key range. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 6 ++++++ test_runner/regress/test_compaction.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 57fc415d06..4d5dc2d8a9 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -2212,6 +2212,12 @@ impl Timeline { } else { end }; + let end = if ranges_num == idx + 1 { + // extend the compaction range to the end of the key range if it's the last partition + end.max(job.compact_key_range.end) + } else { + end + }; info!( "splitting compaction job: {}..{}, estimated_size={}", start, end, total_size diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index d0a2349ccf..fde26e1533 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -150,8 +150,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b child_workloads: list[Workload] = [] for i in range(1, churn_rounds + 1): - if i % 10 == 0: - log.info(f"Running churn round {i}/{churn_rounds} ...") + log.info(f"Running churn round {i}/{churn_rounds} ...") if i % 10 == 5 and with_branches == "with_branches": branch_name = f"child-{i}" branch_timeline_id = env.create_branch(branch_name) @@ -172,8 +171,10 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b "sub_compaction_max_job_size_mb": 16, }, ) - - workload.churn_rows(row_count, env.pageserver.id) + # do not wait for upload so that we can see if gc_compaction works well with data being ingested + workload.churn_rows(row_count, env.pageserver.id, upload=False) + time.sleep(1) + workload.validate(env.pageserver.id) def compaction_finished(): queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))