From db1ebd865359d085230db30ee2750a0b43ef7b1f Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 14 Dec 2023 17:49:00 +0000 Subject: [PATCH] test_pageserver: add snapshotting_env fixture --- test_runner/performance/test_pageserver.py | 183 ++++++++++++++------- 1 file changed, 120 insertions(+), 63 deletions(-) diff --git a/test_runner/performance/test_pageserver.py b/test_runner/performance/test_pageserver.py index 34a55b15fd..e45b4d1eb1 100644 --- a/test_runner/performance/test_pageserver.py +++ b/test_runner/performance/test_pageserver.py @@ -1,96 +1,142 @@ import json +import os import shutil import subprocess +import time from pathlib import Path -from typing import List +from typing import List, Tuple +import pytest from fixtures.benchmark_fixture import NeonBenchmarker from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, last_flush_lsn_upload +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, last_flush_lsn_upload from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind -from fixtures.types import TenantId +from fixtures.types import TenantId, TimelineId -def test_getpage_throughput( - neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker, pg_bin: PgBin -): +@pytest.fixture(scope="function") +def snapshotting_env( + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path +) -> Tuple[NeonEnv, TimelineId, List[TenantId]]: + """ + The fixture prepares environment or restores it from a snapshot. + + The logic is the following: + - if the snapshot directory exists, the snapshot is restored from it + - if there is no snapshot, the environment is initialized from scratch and stored in a snapshot + - if the fixture is executed on CI (it has CI=true in the environment), the snapshot is not saved + """ + + snapshot_dir = test_output_dir.parent / f"snapshot-{test_output_dir.name}" + save_snapshot = os.getenv("CI", "false") != "true" + neon_env_builder.enable_generations = True neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - env = neon_env_builder.init_start() - - remote_storage = env.pageserver_remote_storage - assert isinstance(remote_storage, LocalFsStorage) - - ps_http = env.pageserver.http_client() - - # clean up the useless default tenant - ps_http.tenant_delete(env.initial_tenant) # create our template tenant tenant_config_mgmt_api = { "gc_period": "0s", - "checkpoint_timeout": "3650 day", + "checkpoint_timeout": "10 years", "compaction_period": "20 s", "compaction_threshold": 10, "compaction_target_size": 134217728, "checkpoint_distance": 268435456, "image_creation_threshold": 3, } - tenant_config_cli = {k: str(v) for k, v in tenant_config_mgmt_api.items()} - template_tenant, template_timeline = env.neon_cli.create_tenant(conf=tenant_config_cli) - template_tenant_gen = int(ps_http.tenant_status(template_tenant)["generation"]) - with env.endpoints.create_start("main", tenant_id=template_tenant) as ep: - pg_bin.run_capture(["pgbench", "-i", "-s50", ep.connstr()]) - last_flush_lsn_upload(env, ep, template_tenant, template_timeline) - ps_http.tenant_detach(template_tenant) + if snapshot_dir.exists(): + env = neon_env_builder.from_repo_dir(snapshot_dir) + ps_http = env.pageserver.http_client() + tenants = list({TenantId(t.name) for t in (snapshot_dir.glob("pageserver_*/tenants/*"))}) + with (snapshot_dir / "attachments.json").open("r") as f: + content = json.load(f) + template_tenant_gen = list(content["tenants"].values())[0]["generation"] - # stop PS just for good measure - env.pageserver.stop() + template_timeline = env.initial_timeline - # duplicate the tenant in remote storage - src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines" - assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory" - tenants = [template_tenant] - for i in range(0, 200): - new_tenant = TenantId.generate() - tenants.append(new_tenant) - log.info("Duplicating tenant #%s: %s", i, new_tenant) + env.broker.try_start() - dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines" - dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False) - dst_timelines_dir.mkdir(parents=False, exist_ok=False) + assert env.attachment_service is not None + env.attachment_service.start() - for tl in src_timelines_dir.iterdir(): - src_tl_dir = src_timelines_dir / tl.name - assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory" - dst_tl_dir = dst_timelines_dir / tl.name - dst_tl_dir.mkdir(parents=False, exist_ok=False) - for file in tl.iterdir(): - shutil.copy2(file, dst_tl_dir) - if "__" in file.name: - cmd: List[str] = [ - str( - env.neon_binpath / "pagectl" - ), # TODO: abstract this like the other binaries - "layer", - "rewrite-summary", - str(dst_tl_dir / file.name), - "--new-tenant-id", - str(new_tenant), - ] - subprocess.run(cmd, check=True) - else: - # index_part etc need no patching - pass + # Wait for the attachment service to start + time.sleep(5) - env.pageserver.start() - assert ps_http.tenant_list() == [] - for tenant in tenants: - ps_http.tenant_attach( - tenant, config=tenant_config_mgmt_api, generation=template_tenant_gen + 1 + for tenant in tenants: + env.attachment_service.attach_hook(tenant, 1) + + env.pageserver.start() + else: + env = neon_env_builder.init_start() + + remote_storage = env.pageserver_remote_storage + assert isinstance(remote_storage, LocalFsStorage) + + ps_http = env.pageserver.http_client() + # clean up the useless default tenant + ps_http.tenant_delete(env.initial_tenant) + + tenant_config_cli = {k: str(v) for k, v in tenant_config_mgmt_api.items()} + + template_tenant, template_timeline = env.neon_cli.create_tenant( + conf=tenant_config_cli, set_default=True ) + template_tenant_gen = int(ps_http.tenant_status(template_tenant)["generation"]) + with env.endpoints.create_start("main", tenant_id=template_tenant) as ep: + pg_bin.run_capture(["pgbench", "-i", "-s50", ep.connstr()]) + last_flush_lsn_upload(env, ep, template_tenant, template_timeline) + ps_http.tenant_detach(template_tenant) + + # stop PS just for good measure + env.pageserver.stop() + + # duplicate the tenant in remote storage + src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines" + assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory" + tenants = [template_tenant] + for i in range(0, 200): + new_tenant = TenantId.generate() + tenants.append(new_tenant) + log.info("Duplicating tenant #%s: %s", i, new_tenant) + + dst_timelines_dir: Path = remote_storage.tenant_path(new_tenant) / "timelines" + dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False) + dst_timelines_dir.mkdir(parents=False, exist_ok=False) + + for tl in src_timelines_dir.iterdir(): + src_tl_dir = src_timelines_dir / tl.name + assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory" + dst_tl_dir = dst_timelines_dir / tl.name + dst_tl_dir.mkdir(parents=False, exist_ok=False) + for file in tl.iterdir(): + shutil.copy2(file, dst_tl_dir) + if "__" in file.name: + cmd: List[str] = [ + str( + env.neon_binpath / "pagectl" + ), # TODO: abstract this like the other binaries + "layer", + "rewrite-summary", + str(dst_tl_dir / file.name), + "--new-tenant-id", + str(new_tenant), + ] + subprocess.run(cmd, check=True) + else: + # index_part etc need no patching + pass + + env.pageserver.start() + assert ps_http.tenant_list() == [] + for tenant in tenants: + ps_http.tenant_attach( + tenant, config=tenant_config_mgmt_api, generation=template_tenant_gen + 1 + ) + + if save_snapshot and not snapshot_dir.exists(): + shutil.copytree(env.repo_dir, snapshot_dir) + for tenant in tenants: wait_until_tenant_active(ps_http, tenant) @@ -99,6 +145,17 @@ def test_getpage_throughput( for tenant in tenants: ps_http.download_all_layers(tenant, template_timeline) + return env, template_timeline, tenants + + +def test_getpage_throughput( + snapshotting_env: Tuple[NeonEnv, TimelineId, List[TenantId]], + zenbenchmark: NeonBenchmarker, + pg_bin: PgBin, +): + env, template_timeline, tenants = snapshotting_env + ps_http = env.pageserver.http_client() + # run the benchmark with one client per timeline, each doing 10k requests to random keys. cmd = [ str(env.neon_binpath / "pagebench"),