feat(pageserver): drop disposable keys during gc-compaction (#9765)

close https://github.com/neondatabase/neon/issues/9552, close https://github.com/neondatabase/neon/issues/8920, part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes * Drop keys not belonging to this shard during gc-compaction to avoid constructing history that might have been truncated during shard compaction. * Run gc-compaction at the end of shard compaction test. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
2026-01-04 03:52:56 +00:00 · 2024-11-18 14:27:52 -05:00
parent 5f0e9c9a94
commit e5c89f3da3
2 changed files with 30 additions and 2 deletions
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2021,6 +2021,14 @@ impl Timeline {
            if cancel.is_cancelled() {
                return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
            }
+            if self.shard_identity.is_key_disposable(&key) {
+                // If this shard does not need to store this key, simply skip it.
+                //
+                // This is not handled in the filter iterator because shard is determined by hash.
+                // Therefore, it does not give us any performance benefit to do things like skip
+                // a whole layer file as handling key spaces (ranges).
+                continue;
+            }
            if !job_desc.compaction_key_range.contains(&key) {
                if !desc.is_delta {
                    continue;
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -122,10 +122,19 @@ LARGE_STRIPES = 32768


@pytest.mark.parametrize(
-    "shard_count,stripe_size", [(None, None), (4, TINY_STRIPES), (4, LARGE_STRIPES)]
+    "shard_count,stripe_size,gc_compaction",
+    [
+        (None, None, False),
+        (4, TINY_STRIPES, False),
+        (4, LARGE_STRIPES, False),
+        (4, LARGE_STRIPES, True),
+    ],
 )
 def test_sharding_compaction(
-    neon_env_builder: NeonEnvBuilder, stripe_size: int, shard_count: Optional[int]
+    neon_env_builder: NeonEnvBuilder,
+    stripe_size: int,
+    shard_count: Optional[int],
+    gc_compaction: bool,
 ):
    """
    Use small stripes, small layers, and small compaction thresholds to exercise how compaction
@@ -217,6 +226,17 @@ def test_sharding_compaction(
    # Assert that everything is still readable
    workload.validate()

+    if gc_compaction:
+        # trigger gc compaction to get more coverage for that, piggyback on the existing workload
+        for shard in env.storage_controller.locate(tenant_id):
+            pageserver = env.get_pageserver(shard["node_id"])
+            tenant_shard_id = shard["shard_id"]
+            pageserver.http_client().timeline_compact(
+                tenant_shard_id,
+                timeline_id,
+                enhanced_gc_bottom_most_compaction=True,
+            )
+

 class CompactionAlgorithm(str, enum.Enum):
    LEGACY = "legacy"