From 12e6f443dae80f316832cf81d83b4f71eb17bbc9 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Sun, 18 Dec 2022 00:02:04 +0000 Subject: [PATCH] test_perf_pgbench: switch to server-side data generation (#3058) To offload the network and reduce its impact, I suggest switching to server-side data generation for the pgbench initialize workflow. --- test_runner/fixtures/benchmark_fixture.py | 63 ++++++++++++-------- test_runner/performance/test_perf_pgbench.py | 6 +- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index 27fb0a60b2..b1489b7ab1 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -11,7 +11,7 @@ from datetime import datetime from pathlib import Path # Type-related stuff -from typing import Callable, ClassVar, Iterator, Optional +from typing import Callable, ClassVar, Dict, Iterator, Optional import pytest from _pytest.config import Config @@ -135,23 +135,26 @@ class PgBenchRunResult: @dataclasses.dataclass class PgBenchInitResult: - REGEX: ClassVar[re.Pattern] = re.compile( # type: ignore[type-arg] - r"done in (\d+\.\d+) s " - r"\(" - r"(?:drop tables (\d+\.\d+) s)?(?:, )?" - r"(?:create tables (\d+\.\d+) s)?(?:, )?" - r"(?:client-side generate (\d+\.\d+) s)?(?:, )?" - r"(?:vacuum (\d+\.\d+) s)?(?:, )?" - r"(?:primary keys (\d+\.\d+) s)?(?:, )?" - r"\)\." - ) + # Taken from https://github.com/postgres/postgres/blob/REL_15_1/src/bin/pgbench/pgbench.c#L5144-L5171 + EXTRACTORS: ClassVar[Dict[str, re.Pattern]] = { # type: ignore[type-arg] + "drop_tables": re.compile(r"drop tables (\d+\.\d+) s"), + "create_tables": re.compile(r"create tables (\d+\.\d+) s"), + "client_side_generate": re.compile(r"client-side generate (\d+\.\d+) s"), + "server_side_generate": re.compile(r"server-side generate (\d+\.\d+) s"), + "vacuum": re.compile(r"vacuum (\d+\.\d+) s"), + "primary_keys": re.compile(r"primary keys (\d+\.\d+) s"), + "foreign_keys": re.compile(r"foreign keys (\d+\.\d+) s"), + "total": re.compile(r"done in (\d+\.\d+) s"), # Total time printed by pgbench + } - total: float + total: Optional[float] drop_tables: Optional[float] create_tables: Optional[float] client_side_generate: Optional[float] + server_side_generate: Optional[float] vacuum: Optional[float] primary_keys: Optional[float] + foreign_keys: Optional[float] duration: float start_timestamp: int end_timestamp: int @@ -164,25 +167,35 @@ class PgBenchInitResult: start_timestamp: int, end_timestamp: int, ): - # Parses pgbench initialize output for default initialization steps (dtgvp) + # Parses pgbench initialize output # Example: done in 5.66 s (drop tables 0.05 s, create tables 0.31 s, client-side generate 2.01 s, vacuum 0.53 s, primary keys 0.38 s). last_line = stderr.splitlines()[-1] - if (m := cls.REGEX.match(last_line)) is not None: - total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [ - float(v) for v in m.groups() if v is not None - ] - else: + timings: Dict[str, Optional[float]] = {} + last_line_items = re.split(r"\(|\)|,", last_line) + for item in last_line_items: + for key, regex in cls.EXTRACTORS.items(): + if (m := regex.match(item.strip())) is not None: + if key in timings: + raise RuntimeError( + f"can't store pgbench results for repeated action `{key}`" + ) + + timings[key] = float(m.group(1)) + + if not timings or "total" not in timings: raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`") return cls( - total=total, - drop_tables=drop_tables, - create_tables=create_tables, - client_side_generate=client_side_generate, - vacuum=vacuum, - primary_keys=primary_keys, + total=timings["total"], + drop_tables=timings.get("drop_tables", 0.0), + create_tables=timings.get("create_tables", 0.0), + client_side_generate=timings.get("client_side_generate", 0.0), + server_side_generate=timings.get("server_side_generate", 0.0), + vacuum=timings.get("vacuum", 0.0), + primary_keys=timings.get("primary_keys", 0.0), + foreign_keys=timings.get("foreign_keys", 0.0), duration=duration, start_timestamp=start_timestamp, end_timestamp=end_timestamp, @@ -326,8 +339,10 @@ class NeonBenchmarker: "drop_tables", "create_tables", "client_side_generate", + "server_side_generate", "vacuum", "primary_keys", + "foreign_keys", ] for metric in metrics: if (value := getattr(result, metric)) is not None: diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 015cc40a72..50e5366c1e 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -15,7 +15,7 @@ from fixtures.utils import get_scale_for_db @enum.unique class PgBenchLoadType(enum.Enum): INIT = "init" - SIMPLE_UPDATE = "simple_update" + SIMPLE_UPDATE = "simple-update" SELECT_ONLY = "select-only" @@ -94,7 +94,9 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P if workload_type == PgBenchLoadType.INIT: # Run initialize - init_pgbench(env, ["pgbench", f"-s{scale}", "-i", connstr], password=password) + init_pgbench( + env, ["pgbench", f"-s{scale}", "-i", "-I", "dtGvp", connstr], password=password + ) if workload_type == PgBenchLoadType.SIMPLE_UPDATE: # Run simple-update workload