diff --git a/test_runner/performance/test_pageserver_startup_many_tenants.py b/test_runner/performance/test_pageserver_startup_many_tenants.py index 1ee48571f9..03fd5aee75 100644 --- a/test_runner/performance/test_pageserver_startup_many_tenants.py +++ b/test_runner/performance/test_pageserver_startup_many_tenants.py @@ -1,8 +1,12 @@ +import asyncio from pathlib import Path +import queue import shutil import subprocess -from typing import List +import threading +from typing import List, Optional from fixtures.neon_fixtures import ( + NeonEnv, NeonEnvBuilder, PgBin, last_flush_lsn_upload, @@ -13,6 +17,42 @@ from fixtures.log_helper import log from fixtures.pageserver.utils import wait_until_tenant_active +def duplicate_tenant( + env: NeonEnv, remote_storage: LocalFsStorage, template_tenant: TenantId, new_tenant: TenantId +): + src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines" + assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory" + + assert isinstance(remote_storage, LocalFsStorage) + dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines" + dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False) + dst_timelines_dir.mkdir(parents=False, exist_ok=False) + + for tl in src_timelines_dir.iterdir(): + src_tl_dir = src_timelines_dir / tl.name + assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory" + dst_tl_dir = dst_timelines_dir / tl.name + dst_tl_dir.mkdir(parents=False, exist_ok=False) + for file in tl.iterdir(): + shutil.copy2(file, dst_tl_dir) + if "__" in file.name: + cmd: List[str] = [ + str( + env.neon_binpath / "pagectl" + ), # TODO: abstract this like the other binaries + "layer", + "rewrite-summary", + str(dst_tl_dir / file.name), + "--new-tenant-id", + str(new_tenant), + ] + subprocess.run(cmd, check=True) + else: + # index_part etc need no patching + pass + return None + + def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): """ Usage @@ -28,6 +68,10 @@ def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bi remote_storage = env.pageserver_remote_storage assert isinstance(remote_storage, LocalFsStorage) + # cleanup initial tenant + env.pageserver.tenant_detach(env.initial_tenant) + + # create our template tenant tenant_config_mgmt_api = { "gc_period": "0s", @@ -43,66 +87,43 @@ def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bi ps_http = env.pageserver.http_client() template_tenant, template_timeline = env.neon_cli.create_tenant(conf=tenant_config_cli) - template_tenant_gen = int(ps_http.tenant_status(template_tenant)["generation"]) ep = env.endpoints.create_start("main", tenant_id=template_tenant) ep.safe_psql("create table foo(b text)") for i in range(0, 8): ep.safe_psql("insert into foo(b) values ('some text')") last_flush_lsn_upload(env, ep, template_tenant, template_timeline) ep.stop_and_destroy() - ps_http.tenant_detach(template_tenant) - - # tear down processes that could mess with us - env.pageserver.stop() - for sk in env.safekeepers: - sk.stop() + env.pageserver.tenant_detach(template_tenant) # duplicate the tenant in remote storage - src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines" - assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory" - tenants = [template_tenant] - for i in range(0, 20_000): - new_tenant = TenantId.generate() - tenants.append(new_tenant) - log.info("Duplicating tenant #%s: %s", i, new_tenant) + def worker(queue: queue.Queue[Optional[TenantId]]): + while True: + tenant_id = queue.get() + if tenant_id is None: + return + assert isinstance(remote_storage, LocalFsStorage) + duplicate_tenant(env, remote_storage, template_tenant, tenant_id) - dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines" - dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False) - dst_timelines_dir.mkdir(parents=False, exist_ok=False) + new_tenants: List[TenantId] = [TenantId.generate() for _ in range(0, 20_000)] + duplications: queue.Queue[Optional[TenantId]] = queue.Queue() + for t in new_tenants: + duplications.put(t) + workers = [] + for _ in range(0, 8): + w = threading.Thread(target=worker, args=[duplications]) + workers.append(w) + w.start() + duplications.put(None) + for w in workers: + w.join() - for tl in src_timelines_dir.iterdir(): - src_tl_dir = src_timelines_dir / tl.name - assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory" - dst_tl_dir = dst_timelines_dir / tl.name - dst_tl_dir.mkdir(parents=False, exist_ok=False) - for file in tl.iterdir(): - shutil.copy2(file, dst_tl_dir) - if "__" in file.name: - cmd: List[str] = [ - str( - env.neon_binpath / "pagectl" - ), # TODO: abstract this like the other binaries - "layer", - "rewrite-summary", - str(dst_tl_dir / file.name), - "--new-tenant-id", - str(new_tenant), - ] - subprocess.run(cmd, check=True) - else: - # index_part etc need no patching - pass - - env.pageserver.start() assert ps_http.tenant_list() == [] - for tenant in tenants: - ps_http.tenant_attach( - tenant, config=tenant_config_mgmt_api, generation=template_tenant_gen + 1 - ) - for tenant in tenants: + for tenant in new_tenants: + env.pageserver.tenant_attach(tenant, config=tenant_config_mgmt_api) + for tenant in new_tenants: wait_until_tenant_active(ps_http, tenant) # ensure all layers are resident for predictiable performance # TODO: ensure all kinds of eviction are disabled (per-tenant, disk-usage-based) - for tenant in tenants: + for tenant in new_tenants: ps_http.download_all_layers(tenant, template_timeline)