many_tenants script now works

This commit is contained in:
Christian Schwarz
2023-11-27 16:11:05 +00:00
parent 306880081d
commit efe0d93bf5

View File

@@ -1,8 +1,12 @@
import asyncio
from pathlib import Path
import queue
import shutil
import subprocess
from typing import List
import threading
from typing import List, Optional
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
last_flush_lsn_upload,
@@ -13,6 +17,42 @@ from fixtures.log_helper import log
from fixtures.pageserver.utils import wait_until_tenant_active
def duplicate_tenant(
env: NeonEnv, remote_storage: LocalFsStorage, template_tenant: TenantId, new_tenant: TenantId
):
src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines"
assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory"
assert isinstance(remote_storage, LocalFsStorage)
dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines"
dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False)
dst_timelines_dir.mkdir(parents=False, exist_ok=False)
for tl in src_timelines_dir.iterdir():
src_tl_dir = src_timelines_dir / tl.name
assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory"
dst_tl_dir = dst_timelines_dir / tl.name
dst_tl_dir.mkdir(parents=False, exist_ok=False)
for file in tl.iterdir():
shutil.copy2(file, dst_tl_dir)
if "__" in file.name:
cmd: List[str] = [
str(
env.neon_binpath / "pagectl"
), # TODO: abstract this like the other binaries
"layer",
"rewrite-summary",
str(dst_tl_dir / file.name),
"--new-tenant-id",
str(new_tenant),
]
subprocess.run(cmd, check=True)
else:
# index_part etc need no patching
pass
return None
def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
"""
Usage
@@ -28,6 +68,10 @@ def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bi
remote_storage = env.pageserver_remote_storage
assert isinstance(remote_storage, LocalFsStorage)
# cleanup initial tenant
env.pageserver.tenant_detach(env.initial_tenant)
# create our template tenant
tenant_config_mgmt_api = {
"gc_period": "0s",
@@ -43,66 +87,43 @@ def test_pageserver_startup_many_tenants(neon_env_builder: NeonEnvBuilder, pg_bi
ps_http = env.pageserver.http_client()
template_tenant, template_timeline = env.neon_cli.create_tenant(conf=tenant_config_cli)
template_tenant_gen = int(ps_http.tenant_status(template_tenant)["generation"])
ep = env.endpoints.create_start("main", tenant_id=template_tenant)
ep.safe_psql("create table foo(b text)")
for i in range(0, 8):
ep.safe_psql("insert into foo(b) values ('some text')")
last_flush_lsn_upload(env, ep, template_tenant, template_timeline)
ep.stop_and_destroy()
ps_http.tenant_detach(template_tenant)
# tear down processes that could mess with us
env.pageserver.stop()
for sk in env.safekeepers:
sk.stop()
env.pageserver.tenant_detach(template_tenant)
# duplicate the tenant in remote storage
src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines"
assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory"
tenants = [template_tenant]
for i in range(0, 20_000):
new_tenant = TenantId.generate()
tenants.append(new_tenant)
log.info("Duplicating tenant #%s: %s", i, new_tenant)
def worker(queue: queue.Queue[Optional[TenantId]]):
while True:
tenant_id = queue.get()
if tenant_id is None:
return
assert isinstance(remote_storage, LocalFsStorage)
duplicate_tenant(env, remote_storage, template_tenant, tenant_id)
dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines"
dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False)
dst_timelines_dir.mkdir(parents=False, exist_ok=False)
new_tenants: List[TenantId] = [TenantId.generate() for _ in range(0, 20_000)]
duplications: queue.Queue[Optional[TenantId]] = queue.Queue()
for t in new_tenants:
duplications.put(t)
workers = []
for _ in range(0, 8):
w = threading.Thread(target=worker, args=[duplications])
workers.append(w)
w.start()
duplications.put(None)
for w in workers:
w.join()
for tl in src_timelines_dir.iterdir():
src_tl_dir = src_timelines_dir / tl.name
assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory"
dst_tl_dir = dst_timelines_dir / tl.name
dst_tl_dir.mkdir(parents=False, exist_ok=False)
for file in tl.iterdir():
shutil.copy2(file, dst_tl_dir)
if "__" in file.name:
cmd: List[str] = [
str(
env.neon_binpath / "pagectl"
), # TODO: abstract this like the other binaries
"layer",
"rewrite-summary",
str(dst_tl_dir / file.name),
"--new-tenant-id",
str(new_tenant),
]
subprocess.run(cmd, check=True)
else:
# index_part etc need no patching
pass
env.pageserver.start()
assert ps_http.tenant_list() == []
for tenant in tenants:
ps_http.tenant_attach(
tenant, config=tenant_config_mgmt_api, generation=template_tenant_gen + 1
)
for tenant in tenants:
for tenant in new_tenants:
env.pageserver.tenant_attach(tenant, config=tenant_config_mgmt_api)
for tenant in new_tenants:
wait_until_tenant_active(ps_http, tenant)
# ensure all layers are resident for predictiable performance
# TODO: ensure all kinds of eviction are disabled (per-tenant, disk-usage-based)
for tenant in tenants:
for tenant in new_tenants:
ps_http.download_all_layers(tenant, template_timeline)