fix(pageserver): ensure gc-compaction gets preempted by L0 (#11512)

## Problem

Part of #9114 

## Summary of changes

Gc-compaction flag was not correctly set, causing it not getting
preempted by L0.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-04-09 16:57:50 -04:00
committed by GitHub
parent 63ee8e2181
commit 405a17bf0b
3 changed files with 62 additions and 1 deletions

View File

@@ -38,12 +38,34 @@ PREEMPT_COMPACTION_TENANT_CONF = {
"compaction_target_size": 1024**2,
"image_creation_threshold": 1,
"image_creation_preempt_threshold": 1,
# compact more frequently
# Compact more frequently
"compaction_threshold": 3,
"compaction_upper_limit": 6,
"lsn_lease_length": "0s",
}
PREEMPT_GC_COMPACTION_TENANT_CONF = {
"gc_period": "5s",
"compaction_period": "5s",
# Small checkpoint distance to create many layers
"checkpoint_distance": 1024**2,
# Compact small layers
"compaction_target_size": 1024**2,
"image_creation_threshold": 10000, # Do not create image layers at all
"image_creation_preempt_threshold": 10000,
# Compact more frequently
"compaction_threshold": 3,
"compaction_upper_limit": 6,
"lsn_lease_length": "0s",
# Enable gc-compaction
"gc_compaction_enabled": "true",
"gc_compaction_initial_threshold_kb": 1024, # At a small threshold
"gc_compaction_ratio_percent": 1,
# No PiTR interval and small GC horizon
"pitr_interval": "0s",
"gc_horizon": f"{1024**2}",
}
@skip_in_debug_build("only run with release build")
@pytest.mark.parametrize(
@@ -165,6 +187,41 @@ def test_pageserver_compaction_preempt(
env.pageserver.assert_log_contains("resuming image layer creation")
@skip_in_debug_build("only run with release build")
def test_pageserver_gc_compaction_preempt(
neon_env_builder: NeonEnvBuilder,
):
# Ideally we should be able to do unit tests for this, but we need real Postgres
# WALs in order to do unit testing...
conf = PREEMPT_GC_COMPACTION_TENANT_CONF.copy()
env = neon_env_builder.init_start(initial_tenant_conf=conf)
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
row_count = 200000
churn_rounds = 10
ps_http = env.pageserver.http_client()
workload = Workload(env, tenant_id, timeline_id)
workload.init(env.pageserver.id)
log.info("Writing initial data ...")
workload.write_rows(row_count, env.pageserver.id)
for i in range(1, churn_rounds + 1):
log.info(f"Running churn round {i}/{churn_rounds} ...")
workload.churn_rows(row_count, env.pageserver.id, upload=False)
workload.validate(env.pageserver.id)
ps_http.timeline_compact(tenant_id, timeline_id, wait_until_uploaded=True)
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)
# ensure gc_compaction gets preempted and then resumed
env.pageserver.assert_log_contains("preempt gc-compaction")
@skip_in_debug_build("only run with release build")
@pytest.mark.timeout(900) # This test is slow with sanitizers enabled, especially on ARM
@pytest.mark.parametrize(