From 5f4fe8f72ac2c0279d7fc85c01d5cd4c38e4d1af Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 9 Jan 2024 09:45:37 +0000 Subject: [PATCH] fight various timeouts at high tenant count --- test_runner/fixtures/pageserver/many_tenants.py | 4 ++-- test_runner/performance/test_pageserver_pagebench.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test_runner/fixtures/pageserver/many_tenants.py b/test_runner/fixtures/pageserver/many_tenants.py index a0c14f0b60..60d0d17c46 100644 --- a/test_runner/fixtures/pageserver/many_tenants.py +++ b/test_runner/fixtures/pageserver/many_tenants.py @@ -90,7 +90,7 @@ def single_timeline( work_queue.do(22, tenants, attach_broken) - env.pageserver.stop() # clears the failpoint as a side-effect + env.pageserver.stop(immediate=True) # clears the failpoint as a side-effect; immediate to avoid hitting neon_local's timeout tenant_timelines = list(map(lambda tenant: (tenant, template_timeline), tenants)) log.info(f"python-side on-demand download the layer files into local tenant dir") fixtures.pageserver.remote_storage.copy_all_remote_layer_files_to_local_tenant_dir( @@ -100,7 +100,7 @@ def single_timeline( log.info(f"wait for tenants to become active") for tenant in tenants: - wait_until_tenant_active(ps_http, tenant) + wait_until_tenant_active(ps_http, tenant, iterations=ncopies, period=1) # ensure all layers are resident for predictiable performance for tenant in tenants: diff --git a/test_runner/performance/test_pageserver_pagebench.py b/test_runner/performance/test_pageserver_pagebench.py index e56e7e31bc..8a0d36bc90 100644 --- a/test_runner/performance/test_pageserver_pagebench.py +++ b/test_runner/performance/test_pageserver_pagebench.py @@ -92,4 +92,6 @@ def test_getpage_throughput( log.info(f"Results:\n{json.dumps(results, sort_keys=True, indent=2)}") + env.pageserver.stop(immediate=True) # with 20k tenants, we hit neon_local's shutdown timeout of 10 seconds + zenbenchmark.record_pagebench_results("get-page-latest-lsn", results, duration)