Files
neon/test_runner/performance/test_compaction.py
John Spray 975f8ac658 tests: add test_compaction_l0_memory (#8403)
This test reproduces the case of a writer creating a deep stack of L0
layers. It uses realistic layer sizes and writes several gigabytes of
data, therefore runs as a performance test although it is validating
memory footprint rather than performance per se.

It acts a regression test for two recent fixes:
- https://github.com/neondatabase/neon/pull/8401
- https://github.com/neondatabase/neon/pull/8391

In future it will demonstrate the larger improvement of using a k-merge
iterator for L0 compaction (#8184)

This test can be extended to enforce limits on the memory consumption of
other housekeeping steps, by restarting the pageserver and then running
other things to do the same "how much did RSS increase" measurement.
2024-07-17 17:35:27 +00:00

155 lines
6.3 KiB
Python

from contextlib import closing
import pytest
from fixtures.compare_fixtures import NeonCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import wait_for_last_flush_lsn
#
# Test compaction and image layer creation performance.
#
# This creates a few tables and runs some simple INSERTs and UPDATEs on them to generate
# some delta layers. Then it runs manual compaction, measuring how long it takes.
#
@pytest.mark.timeout(1000)
def test_compaction(neon_compare: NeonCompare):
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# Disable background GC and compaction, we'll run compaction manually.
"gc_period": "0s",
"compaction_period": "0s",
# Make checkpoint distance somewhat smaller than default, to create
# more delta layers quicker, to trigger compaction.
"checkpoint_distance": "25000000", # 25 MB
# Force image layer creation when we run compaction.
"image_creation_threshold": "1",
}
)
neon_compare.tenant = tenant_id
neon_compare.timeline = timeline_id
# Create some tables, and run a bunch of INSERTs and UPDATes on them,
# to generate WAL and layers
endpoint = env.endpoints.create_start(
"main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
)
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(100):
cur.execute(f"create table tbl{i} (i int, j int);")
cur.execute(f"insert into tbl{i} values (generate_series(1, 1000), 0);")
for j in range(100):
cur.execute(f"update tbl{i} set j = {j};")
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
# First compaction generates L1 layers
with neon_compare.zenbenchmark.record_duration("compaction"):
pageserver_http.timeline_compact(tenant_id, timeline_id)
# And second compaction triggers image layer creation
with neon_compare.zenbenchmark.record_duration("image_creation"):
pageserver_http.timeline_compact(tenant_id, timeline_id)
neon_compare.report_size()
def test_compaction_l0_memory(neon_compare: NeonCompare):
"""
Generate a large stack of L0s pending compaction into L1s, and
measure the pageserver's peak RSS while doing so
"""
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# Initially disable compaction so that we will build up a stack of L0s
"compaction_period": "0s",
"gc_period": "0s",
}
)
neon_compare.tenant = tenant_id
neon_compare.timeline = timeline_id
endpoint = env.endpoints.create_start(
"main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
)
# Read tenant effective config and assert on checkpoint_distance and compaction_threshold,
# as we do want to test with defaults (to be same as the field), but this test's workload size makes assumptions about them.
#
# If these assertions fail, it probably means we changed the default.
tenant_conf = pageserver_http.tenant_config(tenant_id)
assert tenant_conf.effective_config["checkpoint_distance"] == 256 * 1024 * 1024
assert tenant_conf.effective_config["compaction_threshold"] == 10
# Aim to write about 20 L0s, so that we will hit the limit on how many
# to compact at once
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(200):
cur.execute(f"create table tbl{i} (i int, j int);")
cur.execute(f"insert into tbl{i} values (generate_series(1, 1000), 0);")
for j in range(100):
cur.execute(f"update tbl{i} set j = {j};")
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop()
# Check we have generated the L0 stack we expected
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
initial_l0s = len(layers.delta_l0_layers())
initial_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
log.info(f"l0s before compaction {initial_l0s} ({initial_l0s_size})")
def rss_hwm():
v = pageserver_http.get_metric_value("libmetrics_maxrss_kb")
assert v is not None
assert v > 0
return v * 1024
before = rss_hwm()
pageserver_http.timeline_compact(tenant_id, timeline_id)
after = rss_hwm()
log.info(f"RSS across compaction: {before} -> {after} (grew {after - before})")
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
final_l0s_size = sum(x.layer_file_size for x in layers.delta_l0_layers())
log.info(f"l0s after compaction {len(layers.delta_l0_layers())} ({final_l0s_size})")
assert after > before # If we didn't use some memory the test is probably buggy
compaction_mapped_rss = after - before
# During L0 compaction, we require as much memory as the physical size of what we compacted, and then some,
# because the key->value mapping in L0s compaction is exhaustive, non-streaming, and does not de-duplicate
# repeated references to the same key.
#
# To be fixed in https://github.com/neondatabase/neon/issues/8184, after which
# this memory estimate can be revised far downwards to something that doesn't scale
# linearly with the layer sizes.
MEMORY_ESTIMATE = (initial_l0s_size - final_l0s_size) * 1.25
# If we find that compaction is using more memory, this may indicate a regression
assert compaction_mapped_rss < MEMORY_ESTIMATE
# If we find that compaction is using <0.5 the expected memory then:
# - maybe we made a big efficiency improvement, in which case update the test
# - maybe something is functionally wrong with the test and it's not driving the system as expected
assert compaction_mapped_rss > MEMORY_ESTIMATE / 2
# We should have compacted some but not all of the l0s, based on the limit on how much
# l0 to compact in one go
assert len(layers.delta_l0_layers()) > 0
assert len(layers.delta_l0_layers()) < initial_l0s
# The pageserver should have logged when it hit the compaction size limit
env.pageserver.assert_log_contains(".*hit max delta layer size limit.*")