test_runner: fix pagebench tenant configs (#11420)

## Problem Pagebench creates a bunch of tenants by first creating a template tenant and copying its remote storage, then attaching the copies to the Pageserver. These tenants had custom configurations to disable GC and compaction. However, these configs were only picked up by the Pageserver on attach, and not registered with the storage controller. This caused the storage controller to replace the tenant configs with the default tenant config, re-enabling GC and compaction which interferes with benchmark performance. Resolves #11381. ## Summary of changes Register the copied tenants with the storage controller, instead of directly attaching them to the Pageserver.
2026-01-05 20:42:54 +00:00 · 2025-04-02 22:11:39 +02:00
parent 03ae57236f
commit 17193d6a33
2 changed files with 11 additions and 20 deletions
--- a/test_runner/fixtures/pageserver/many_tenants.py
+++ b/test_runner/fixtures/pageserver/many_tenants.py
@@ -43,7 +43,7 @@ def single_timeline(
        f"template tenant is template_tenant={template_tenant} template_timeline={template_timeline}"
    )

-    log.info("detach template tenant form pageserver")
+    log.info("detach template tenant from pageserver")
    env.pageserver.tenant_detach(template_tenant)

    log.info(f"duplicating template tenant {ncopies} times in remote storage")
@@ -65,11 +65,13 @@ def single_timeline(
    assert ps_http.tenant_list() == []

    def attach(tenant):
-        env.pageserver.tenant_attach(
-            tenant,
-            config=template_config.copy(),
-            generation=100,
-            override_storage_controller_generation=True,
+        # NB: create the new tenant in the storage controller with the correct tenant config. This
+        # will pick up the existing tenant data from remote storage. If we just attach it to the
+        # Pageserver, the storage controller will reset the tenant config to the default.
+        env.create_tenant(
+            tenant_id=tenant,
+            timeline_id=template_timeline,
+            conf=template_config,
        )

    with concurrent.futures.ThreadPoolExecutor(max_workers=22) as executor:
--- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
@@ -15,9 +15,7 @@ from fixtures.neon_fixtures import (
 )
 from fixtures.utils import get_scale_for_db, humantime_to_ms, skip_on_ci

-from performance.pageserver.util import (
-    setup_pageserver_with_tenants,
-)
+from performance.pageserver.util import setup_pageserver_with_tenants

 if TYPE_CHECKING:
    from typing import Any
@@ -126,14 +124,11 @@ def setup_and_run_pagebench_benchmark(
    for param, (value, kwargs) in params.items():
        record(param, metric_value=value, report=MetricReport.TEST_PARAM, **kwargs)

-    def setup_wrapper(env: NeonEnv):
-        return setup_tenant_template(env, pg_bin, pgbench_scale)
-
    env = setup_pageserver_with_tenants(
        neon_env_builder,
        f"max_throughput_latest_lsn-{n_tenants}-{pgbench_scale}",
        n_tenants,
-        setup_wrapper,
+        lambda env: setup_tenant_template(env, pg_bin, pgbench_scale),
        # https://github.com/neondatabase/neon/issues/8070
        timeout_in_seconds=60,
    )
@@ -160,14 +155,8 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
        "gc_period": "0s",  # disable periodic gc
        "checkpoint_timeout": "10 years",
        "compaction_period": "0s",  # disable periodic compaction
-        "compaction_threshold": 10,
-        "compaction_target_size": 134217728,
-        "checkpoint_distance": 268435456,
-        "image_creation_threshold": 3,
    }
-    template_tenant, template_timeline = env.create_tenant(set_default=True)
-    env.pageserver.tenant_detach(template_tenant)
-    env.pageserver.tenant_attach(template_tenant, config)
+    template_tenant, template_timeline = env.create_tenant(set_default=True, conf=config)
    ps_http = env.pageserver.http_client()
    with env.endpoints.create_start("main", tenant_id=template_tenant) as ep:
        pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", "-I", "dtGvp", ep.connstr()])