test_runner: fix pagebench tenant configs (#11420)

## Problem

Pagebench creates a bunch of tenants by first creating a template tenant
and copying its remote storage, then attaching the copies to the
Pageserver.

These tenants had custom configurations to disable GC and compaction.
However, these configs were only picked up by the Pageserver on attach,
and not registered with the storage controller. This caused the storage
controller to replace the tenant configs with the default tenant config,
re-enabling GC and compaction which interferes with benchmark
performance.

Resolves #11381.

## Summary of changes

Register the copied tenants with the storage controller, instead of
directly attaching them to the Pageserver.
This commit is contained in:
Erik Grinaker
2025-04-02 22:11:39 +02:00
committed by GitHub
parent 03ae57236f
commit 17193d6a33
2 changed files with 11 additions and 20 deletions

View File

@@ -43,7 +43,7 @@ def single_timeline(
f"template tenant is template_tenant={template_tenant} template_timeline={template_timeline}"
)
log.info("detach template tenant form pageserver")
log.info("detach template tenant from pageserver")
env.pageserver.tenant_detach(template_tenant)
log.info(f"duplicating template tenant {ncopies} times in remote storage")
@@ -65,11 +65,13 @@ def single_timeline(
assert ps_http.tenant_list() == []
def attach(tenant):
env.pageserver.tenant_attach(
tenant,
config=template_config.copy(),
generation=100,
override_storage_controller_generation=True,
# NB: create the new tenant in the storage controller with the correct tenant config. This
# will pick up the existing tenant data from remote storage. If we just attach it to the
# Pageserver, the storage controller will reset the tenant config to the default.
env.create_tenant(
tenant_id=tenant,
timeline_id=template_timeline,
conf=template_config,
)
with concurrent.futures.ThreadPoolExecutor(max_workers=22) as executor:

View File

@@ -15,9 +15,7 @@ from fixtures.neon_fixtures import (
)
from fixtures.utils import get_scale_for_db, humantime_to_ms, skip_on_ci
from performance.pageserver.util import (
setup_pageserver_with_tenants,
)
from performance.pageserver.util import setup_pageserver_with_tenants
if TYPE_CHECKING:
from typing import Any
@@ -126,14 +124,11 @@ def setup_and_run_pagebench_benchmark(
for param, (value, kwargs) in params.items():
record(param, metric_value=value, report=MetricReport.TEST_PARAM, **kwargs)
def setup_wrapper(env: NeonEnv):
return setup_tenant_template(env, pg_bin, pgbench_scale)
env = setup_pageserver_with_tenants(
neon_env_builder,
f"max_throughput_latest_lsn-{n_tenants}-{pgbench_scale}",
n_tenants,
setup_wrapper,
lambda env: setup_tenant_template(env, pg_bin, pgbench_scale),
# https://github.com/neondatabase/neon/issues/8070
timeout_in_seconds=60,
)
@@ -160,14 +155,8 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
"gc_period": "0s", # disable periodic gc
"checkpoint_timeout": "10 years",
"compaction_period": "0s", # disable periodic compaction
"compaction_threshold": 10,
"compaction_target_size": 134217728,
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
template_tenant, template_timeline = env.create_tenant(set_default=True, conf=config)
ps_http = env.pageserver.http_client()
with env.endpoints.create_start("main", tenant_id=template_tenant) as ep:
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", "-I", "dtGvp", ep.connstr()])