tests: add test_compaction_l0_memory (#8403)

This test reproduces the case of a writer creating a deep stack of L0 layers. It uses realistic layer sizes and writes several gigabytes of data, therefore runs as a performance test although it is validating memory footprint rather than performance per se. It acts a regression test for two recent fixes: - https://github.com/neondatabase/neon/pull/8401 - https://github.com/neondatabase/neon/pull/8391 In future it will demonstrate the larger improvement of using a k-merge iterator for L0 compaction (#8184) This test can be extended to enforce limits on the memory consumption of other housekeeping steps, by restarting the pageserver and then running other things to do the same "how much did RSS increase" measurement.
2026-01-04 12:02:55 +00:00 · 2024-07-17 18:35:27 +01:00
parent 839a5724a4
commit 975f8ac658
2 changed files with 99 additions and 0 deletions
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -117,6 +117,9 @@ class LayerMapInfo:
    def image_layers(self) -> List[HistoricLayerInfo]:
        return [x for x in self.historic_layers if x.kind == "Image"]

+    def delta_l0_layers(self) -> List[HistoricLayerInfo]:
+        return [x for x in self.historic_layers if x.kind == "Delta" and x.l0]
+
    def historic_by_name(self) -> Set[str]:
        return set(x.layer_file_name for x in self.historic_layers)

--- a/test_runner/performance/test_compaction.py
+++ b/test_runner/performance/test_compaction.py
@@ -2,6 +2,7 @@ from contextlib import closing

 import pytest
 from fixtures.compare_fixtures import NeonCompare
+from fixtures.log_helper import log
 from fixtures.neon_fixtures import wait_for_last_flush_lsn


@@ -56,3 +57,98 @@ def test_compaction(neon_compare: NeonCompare):
        pageserver_http.timeline_compact(tenant_id, timeline_id)

    neon_compare.report_size()
+
+
+def test_compaction_l0_memory(neon_compare: NeonCompare):
+    """
+    Generate a large stack of L0s pending compaction into L1s, and
+    measure the pageserver's peak RSS while doing so
+    """
+
+    env = neon_compare.env
+    pageserver_http = env.pageserver.http_client()
+
+    tenant_id, timeline_id = env.neon_cli.create_tenant(
+        conf={
+            # Initially disable compaction so that we will build up a stack of L0s
+            "compaction_period": "0s",
+            "gc_period": "0s",
+        }
+    )
+    neon_compare.tenant = tenant_id
+    neon_compare.timeline = timeline_id
+
+    endpoint = env.endpoints.create_start(
+        "main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
+    )
+
+    # Read tenant effective config and assert on checkpoint_distance and compaction_threshold,
+    # as we do want to test with defaults (to be same as the field), but this test's workload size makes assumptions about them.
+    #
+    # If these assertions fail, it probably means we changed the default.
+    tenant_conf = pageserver_http.tenant_config(tenant_id)
+    assert tenant_conf.effective_config["checkpoint_distance"] == 256 * 1024 * 1024
+    assert tenant_conf.effective_config["compaction_threshold"] == 10
+
+    # Aim to write about 20 L0s, so that we will hit the limit on how many
+    # to compact at once
+    with closing(endpoint.connect()) as conn:
+        with conn.cursor() as cur:
+            for i in range(200):
+                cur.execute(f"create table tbl{i} (i int, j int);")
+                cur.execute(f"insert into tbl{i} values (generate_series(1, 1000), 0);")
+                for j in range(100):
+                    cur.execute(f"update tbl{i} set j = {j};")
+
+    wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+    endpoint.stop()
+
+    # Check we have generated the L0 stack we expected
+    layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
+    initial_l0s = len(layers.delta_l0_layers())
+    initial_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
+    log.info(f"l0s before compaction {initial_l0s} ({initial_l0s_size})")
+
+    def rss_hwm():
+        v = pageserver_http.get_metric_value("libmetrics_maxrss_kb")
+        assert v is not None
+        assert v > 0
+        return v * 1024
+
+    before = rss_hwm()
+    pageserver_http.timeline_compact(tenant_id, timeline_id)
+    after = rss_hwm()
+
+    log.info(f"RSS across compaction: {before} -> {after} (grew {after - before})")
+
+    layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
+    final_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
+    log.info(f"l0s after compaction {len(layers.delta_l0_layers())} ({final_l0s_size})")
+
+    assert after > before  # If we didn't use some memory the test is probably buggy
+    compaction_mapped_rss = after - before
+
+    # During L0 compaction, we require as much memory as the physical size of what we compacted, and then some,
+    # because the key->value mapping in L0s compaction is exhaustive, non-streaming, and does not de-duplicate
+    # repeated references to the same key.
+    #
+    # To be fixed in https://github.com/neondatabase/neon/issues/8184, after which
+    # this memory estimate can be revised far downwards to something that doesn't scale
+    # linearly with the layer sizes.
+    MEMORY_ESTIMATE = (initial_l0s_size - final_l0s_size) * 1.25
+
+    # If we find that compaction is using more memory, this may indicate a regression
+    assert compaction_mapped_rss < MEMORY_ESTIMATE
+
+    # If we find that compaction is using <0.5 the expected memory then:
+    # - maybe we made a big efficiency improvement, in which case update the test
+    # - maybe something is functionally wrong with the test and it's not driving the system as expected
+    assert compaction_mapped_rss > MEMORY_ESTIMATE / 2
+
+    # We should have compacted some but not all of the l0s, based on the limit on how much
+    # l0 to compact in one go
+    assert len(layers.delta_l0_layers()) > 0
+    assert len(layers.delta_l0_layers()) < initial_l0s
+
+    # The pageserver should have logged when it hit the compaction size limit
+    env.pageserver.assert_log_contains(".*hit max delta layer size limit.*")