tests: add test_compaction_l0_memory (#8403)

This test reproduces the case of a writer creating a deep stack of L0
layers. It uses realistic layer sizes and writes several gigabytes of
data, therefore runs as a performance test although it is validating
memory footprint rather than performance per se.

It acts a regression test for two recent fixes:
- https://github.com/neondatabase/neon/pull/8401
- https://github.com/neondatabase/neon/pull/8391

In future it will demonstrate the larger improvement of using a k-merge
iterator for L0 compaction (#8184)

This test can be extended to enforce limits on the memory consumption of
other housekeeping steps, by restarting the pageserver and then running
other things to do the same "how much did RSS increase" measurement.
This commit is contained in:
John Spray
2024-07-17 18:35:27 +01:00
committed by GitHub
parent 839a5724a4
commit 975f8ac658
2 changed files with 99 additions and 0 deletions

View File

@@ -117,6 +117,9 @@ class LayerMapInfo:
def image_layers(self) -> List[HistoricLayerInfo]:
return [x for x in self.historic_layers if x.kind == "Image"]
def delta_l0_layers(self) -> List[HistoricLayerInfo]:
return [x for x in self.historic_layers if x.kind == "Delta" and x.l0]
def historic_by_name(self) -> Set[str]:
return set(x.layer_file_name for x in self.historic_layers)

View File

@@ -2,6 +2,7 @@ from contextlib import closing
import pytest
from fixtures.compare_fixtures import NeonCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import wait_for_last_flush_lsn
@@ -56,3 +57,98 @@ def test_compaction(neon_compare: NeonCompare):
pageserver_http.timeline_compact(tenant_id, timeline_id)
neon_compare.report_size()
def test_compaction_l0_memory(neon_compare: NeonCompare):
"""
Generate a large stack of L0s pending compaction into L1s, and
measure the pageserver's peak RSS while doing so
"""
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# Initially disable compaction so that we will build up a stack of L0s
"compaction_period": "0s",
"gc_period": "0s",
}
)
neon_compare.tenant = tenant_id
neon_compare.timeline = timeline_id
endpoint = env.endpoints.create_start(
"main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
)
# Read tenant effective config and assert on checkpoint_distance and compaction_threshold,
# as we do want to test with defaults (to be same as the field), but this test's workload size makes assumptions about them.
#
# If these assertions fail, it probably means we changed the default.
tenant_conf = pageserver_http.tenant_config(tenant_id)
assert tenant_conf.effective_config["checkpoint_distance"] == 256 * 1024 * 1024
assert tenant_conf.effective_config["compaction_threshold"] == 10
# Aim to write about 20 L0s, so that we will hit the limit on how many
# to compact at once
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(200):
cur.execute(f"create table tbl{i} (i int, j int);")
cur.execute(f"insert into tbl{i} values (generate_series(1, 1000), 0);")
for j in range(100):
cur.execute(f"update tbl{i} set j = {j};")
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop()
# Check we have generated the L0 stack we expected
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
initial_l0s = len(layers.delta_l0_layers())
initial_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
log.info(f"l0s before compaction {initial_l0s} ({initial_l0s_size})")
def rss_hwm():
v = pageserver_http.get_metric_value("libmetrics_maxrss_kb")
assert v is not None
assert v > 0
return v * 1024
before = rss_hwm()
pageserver_http.timeline_compact(tenant_id, timeline_id)
after = rss_hwm()
log.info(f"RSS across compaction: {before} -> {after} (grew {after - before})")
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
final_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
log.info(f"l0s after compaction {len(layers.delta_l0_layers())} ({final_l0s_size})")
assert after > before # If we didn't use some memory the test is probably buggy
compaction_mapped_rss = after - before
# During L0 compaction, we require as much memory as the physical size of what we compacted, and then some,
# because the key->value mapping in L0s compaction is exhaustive, non-streaming, and does not de-duplicate
# repeated references to the same key.
#
# To be fixed in https://github.com/neondatabase/neon/issues/8184, after which
# this memory estimate can be revised far downwards to something that doesn't scale
# linearly with the layer sizes.
MEMORY_ESTIMATE = (initial_l0s_size - final_l0s_size) * 1.25
# If we find that compaction is using more memory, this may indicate a regression
assert compaction_mapped_rss < MEMORY_ESTIMATE
# If we find that compaction is using <0.5 the expected memory then:
# - maybe we made a big efficiency improvement, in which case update the test
# - maybe something is functionally wrong with the test and it's not driving the system as expected
assert compaction_mapped_rss > MEMORY_ESTIMATE / 2
# We should have compacted some but not all of the l0s, based on the limit on how much
# l0 to compact in one go
assert len(layers.delta_l0_layers()) > 0
assert len(layers.delta_l0_layers()) < initial_l0s
# The pageserver should have logged when it hit the compaction size limit
env.pageserver.assert_log_contains(".*hit max delta layer size limit.*")