mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
See #7718. Fix it by renaming all `types.py` to `common_types.py`. Additionally, add an advert for using `allowed_errors.py` to test any added regex.
519 lines
17 KiB
Python
519 lines
17 KiB
Python
import calendar
|
|
import dataclasses
|
|
import enum
|
|
import json
|
|
import os
|
|
import re
|
|
import timeit
|
|
from contextlib import contextmanager
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# Type-related stuff
|
|
from typing import Callable, ClassVar, Dict, Iterator, Optional
|
|
|
|
import allure
|
|
import pytest
|
|
from _pytest.config import Config
|
|
from _pytest.config.argparsing import Parser
|
|
from _pytest.fixtures import FixtureRequest
|
|
from _pytest.terminal import TerminalReporter
|
|
|
|
from fixtures.common_types import TenantId, TimelineId
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonPageserver
|
|
|
|
"""
|
|
This file contains fixtures for micro-benchmarks.
|
|
|
|
To use, declare the `zenbenchmark` fixture in the test function. Run the
|
|
bencmark, and then record the result by calling `zenbenchmark.record`. For example:
|
|
|
|
>>> import timeit
|
|
>>> from fixtures.neon_fixtures import NeonEnv
|
|
>>> def test_mybench(neon_simple_env: NeonEnv, zenbenchmark):
|
|
... # Initialize the test
|
|
... ...
|
|
... # Run the test, timing how long it takes
|
|
... with zenbenchmark.record_duration('test_query'):
|
|
... cur.execute('SELECT test_query(...)')
|
|
... # Record another measurement
|
|
... zenbenchmark.record('speed_of_light', 300000, 'km/s')
|
|
|
|
There's no need to import this file to use it. It should be declared as a plugin
|
|
inside `conftest.py`, and that makes it available to all tests.
|
|
|
|
You can measure multiple things in one test, and record each one with a separate
|
|
call to `zenbenchmark`. For example, you could time the bulk loading that happens
|
|
in the test initialization, or measure disk usage after the test query.
|
|
|
|
"""
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PgBenchRunResult:
|
|
number_of_clients: int
|
|
number_of_threads: int
|
|
number_of_transactions_actually_processed: int
|
|
latency_average: float
|
|
latency_stddev: Optional[float]
|
|
tps: float
|
|
run_duration: float
|
|
run_start_timestamp: int
|
|
run_end_timestamp: int
|
|
scale: int
|
|
|
|
# TODO progress
|
|
|
|
@classmethod
|
|
def parse_from_stdout(
|
|
cls,
|
|
stdout: str,
|
|
run_duration: float,
|
|
run_start_timestamp: int,
|
|
run_end_timestamp: int,
|
|
):
|
|
stdout_lines = stdout.splitlines()
|
|
|
|
latency_stddev = None
|
|
|
|
# we know significant parts of these values from test input
|
|
# but to be precise take them from output
|
|
for line in stdout_lines:
|
|
# scaling factor: 5
|
|
if line.startswith("scaling factor:"):
|
|
scale = int(line.split()[-1])
|
|
# number of clients: 1
|
|
if line.startswith("number of clients: "):
|
|
number_of_clients = int(line.split()[-1])
|
|
# number of threads: 1
|
|
if line.startswith("number of threads: "):
|
|
number_of_threads = int(line.split()[-1])
|
|
# number of transactions actually processed: 1000/1000
|
|
# OR
|
|
# number of transactions actually processed: 1000
|
|
if line.startswith("number of transactions actually processed"):
|
|
if "/" in line:
|
|
number_of_transactions_actually_processed = int(line.split("/")[1])
|
|
else:
|
|
number_of_transactions_actually_processed = int(line.split()[-1])
|
|
# latency average = 19.894 ms
|
|
if line.startswith("latency average"):
|
|
latency_average = float(line.split()[-2])
|
|
# latency stddev = 3.387 ms
|
|
# (only printed with some options)
|
|
if line.startswith("latency stddev"):
|
|
latency_stddev = float(line.split()[-2])
|
|
|
|
# Get the TPS without initial connection time. The format
|
|
# of the tps lines changed in pgbench v14, but we accept
|
|
# either format:
|
|
#
|
|
# pgbench v13 and below:
|
|
# tps = 50.219689 (including connections establishing)
|
|
# tps = 50.264435 (excluding connections establishing)
|
|
#
|
|
# pgbench v14:
|
|
# initial connection time = 3.858 ms
|
|
# tps = 309.281539 (without initial connection time)
|
|
if line.startswith("tps = ") and (
|
|
"(excluding connections establishing)" in line
|
|
or "(without initial connection time)" in line
|
|
):
|
|
tps = float(line.split()[2])
|
|
|
|
return cls(
|
|
number_of_clients=number_of_clients,
|
|
number_of_threads=number_of_threads,
|
|
number_of_transactions_actually_processed=number_of_transactions_actually_processed,
|
|
latency_average=latency_average,
|
|
latency_stddev=latency_stddev,
|
|
tps=tps,
|
|
run_duration=run_duration,
|
|
run_start_timestamp=run_start_timestamp,
|
|
run_end_timestamp=run_end_timestamp,
|
|
scale=scale,
|
|
)
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PgBenchInitResult:
|
|
# Taken from https://github.com/postgres/postgres/blob/REL_15_1/src/bin/pgbench/pgbench.c#L5144-L5171
|
|
EXTRACTORS: ClassVar[Dict[str, re.Pattern]] = { # type: ignore[type-arg]
|
|
"drop_tables": re.compile(r"drop tables (\d+\.\d+) s"),
|
|
"create_tables": re.compile(r"create tables (\d+\.\d+) s"),
|
|
"client_side_generate": re.compile(r"client-side generate (\d+\.\d+) s"),
|
|
"server_side_generate": re.compile(r"server-side generate (\d+\.\d+) s"),
|
|
"vacuum": re.compile(r"vacuum (\d+\.\d+) s"),
|
|
"primary_keys": re.compile(r"primary keys (\d+\.\d+) s"),
|
|
"foreign_keys": re.compile(r"foreign keys (\d+\.\d+) s"),
|
|
"total": re.compile(r"done in (\d+\.\d+) s"), # Total time printed by pgbench
|
|
}
|
|
|
|
total: Optional[float]
|
|
drop_tables: Optional[float]
|
|
create_tables: Optional[float]
|
|
client_side_generate: Optional[float]
|
|
server_side_generate: Optional[float]
|
|
vacuum: Optional[float]
|
|
primary_keys: Optional[float]
|
|
foreign_keys: Optional[float]
|
|
duration: float
|
|
start_timestamp: int
|
|
end_timestamp: int
|
|
|
|
@classmethod
|
|
def parse_from_stderr(
|
|
cls,
|
|
stderr: str,
|
|
duration: float,
|
|
start_timestamp: int,
|
|
end_timestamp: int,
|
|
):
|
|
# Parses pgbench initialize output
|
|
# Example: done in 5.66 s (drop tables 0.05 s, create tables 0.31 s, client-side generate 2.01 s, vacuum 0.53 s, primary keys 0.38 s).
|
|
|
|
last_line = stderr.splitlines()[-1]
|
|
|
|
timings: Dict[str, Optional[float]] = {}
|
|
last_line_items = re.split(r"\(|\)|,", last_line)
|
|
for item in last_line_items:
|
|
for key, regex in cls.EXTRACTORS.items():
|
|
if (m := regex.match(item.strip())) is not None:
|
|
if key in timings:
|
|
raise RuntimeError(
|
|
f"can't store pgbench results for repeated action `{key}`"
|
|
)
|
|
|
|
timings[key] = float(m.group(1))
|
|
|
|
if not timings or "total" not in timings:
|
|
raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`")
|
|
|
|
return cls(
|
|
total=timings["total"],
|
|
drop_tables=timings.get("drop_tables", 0.0),
|
|
create_tables=timings.get("create_tables", 0.0),
|
|
client_side_generate=timings.get("client_side_generate", 0.0),
|
|
server_side_generate=timings.get("server_side_generate", 0.0),
|
|
vacuum=timings.get("vacuum", 0.0),
|
|
primary_keys=timings.get("primary_keys", 0.0),
|
|
foreign_keys=timings.get("foreign_keys", 0.0),
|
|
duration=duration,
|
|
start_timestamp=start_timestamp,
|
|
end_timestamp=end_timestamp,
|
|
)
|
|
|
|
|
|
@enum.unique
|
|
class MetricReport(str, enum.Enum): # str is a hack to make it json serializable
|
|
# this means that this is a constant test parameter
|
|
# like number of transactions, or number of clients
|
|
TEST_PARAM = "test_param"
|
|
# reporter can use it to mark test runs with higher values as improvements
|
|
HIGHER_IS_BETTER = "higher_is_better"
|
|
# the same but for lower values
|
|
LOWER_IS_BETTER = "lower_is_better"
|
|
|
|
|
|
class NeonBenchmarker:
|
|
"""
|
|
An object for recording benchmark results. This is created for each test
|
|
function by the zenbenchmark fixture
|
|
"""
|
|
|
|
def __init__(self, property_recorder: Callable[[str, object], None]):
|
|
# property recorder here is a pytest fixture provided by junitxml module
|
|
# https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property
|
|
self.property_recorder = property_recorder
|
|
|
|
def record(
|
|
self,
|
|
metric_name: str,
|
|
metric_value: float,
|
|
unit: str,
|
|
report: MetricReport,
|
|
):
|
|
"""
|
|
Record a benchmark result.
|
|
"""
|
|
# just to namespace the value
|
|
name = f"neon_benchmarker_{metric_name}"
|
|
self.property_recorder(
|
|
name,
|
|
{
|
|
"name": metric_name,
|
|
"value": metric_value,
|
|
"unit": unit,
|
|
"report": report,
|
|
},
|
|
)
|
|
|
|
@contextmanager
|
|
def record_duration(self, metric_name: str) -> Iterator[None]:
|
|
"""
|
|
Record a duration. Usage:
|
|
|
|
with zenbenchmark.record_duration('foobar_runtime'):
|
|
foobar() # measure this
|
|
"""
|
|
start = timeit.default_timer()
|
|
yield
|
|
end = timeit.default_timer()
|
|
|
|
self.record(
|
|
metric_name=metric_name,
|
|
metric_value=end - start,
|
|
unit="s",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
|
|
def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult):
|
|
self.record(
|
|
f"{prefix}.number_of_clients",
|
|
pg_bench_result.number_of_clients,
|
|
"",
|
|
MetricReport.TEST_PARAM,
|
|
)
|
|
self.record(
|
|
f"{prefix}.number_of_threads",
|
|
pg_bench_result.number_of_threads,
|
|
"",
|
|
MetricReport.TEST_PARAM,
|
|
)
|
|
self.record(
|
|
f"{prefix}.number_of_transactions_actually_processed",
|
|
pg_bench_result.number_of_transactions_actually_processed,
|
|
"",
|
|
# that's because this is predefined by test matrix and doesn't change across runs
|
|
report=MetricReport.TEST_PARAM,
|
|
)
|
|
self.record(
|
|
f"{prefix}.latency_average",
|
|
pg_bench_result.latency_average,
|
|
unit="ms",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
if pg_bench_result.latency_stddev is not None:
|
|
self.record(
|
|
f"{prefix}.latency_stddev",
|
|
pg_bench_result.latency_stddev,
|
|
unit="ms",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
self.record(f"{prefix}.tps", pg_bench_result.tps, "", report=MetricReport.HIGHER_IS_BETTER)
|
|
self.record(
|
|
f"{prefix}.run_duration",
|
|
pg_bench_result.run_duration,
|
|
unit="s",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
self.record(
|
|
f"{prefix}.run_start_timestamp",
|
|
pg_bench_result.run_start_timestamp,
|
|
"",
|
|
MetricReport.TEST_PARAM,
|
|
)
|
|
self.record(
|
|
f"{prefix}.run_end_timestamp",
|
|
pg_bench_result.run_end_timestamp,
|
|
"",
|
|
MetricReport.TEST_PARAM,
|
|
)
|
|
self.record(
|
|
f"{prefix}.scale",
|
|
pg_bench_result.scale,
|
|
"",
|
|
MetricReport.TEST_PARAM,
|
|
)
|
|
|
|
def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult):
|
|
test_params = [
|
|
"start_timestamp",
|
|
"end_timestamp",
|
|
]
|
|
for test_param in test_params:
|
|
self.record(
|
|
f"{prefix}.{test_param}", getattr(result, test_param), "", MetricReport.TEST_PARAM
|
|
)
|
|
|
|
metrics = [
|
|
"duration",
|
|
"drop_tables",
|
|
"create_tables",
|
|
"client_side_generate",
|
|
"server_side_generate",
|
|
"vacuum",
|
|
"primary_keys",
|
|
"foreign_keys",
|
|
]
|
|
for metric in metrics:
|
|
if (value := getattr(result, metric)) is not None:
|
|
self.record(
|
|
f"{prefix}.{metric}", value, unit="s", report=MetricReport.LOWER_IS_BETTER
|
|
)
|
|
|
|
def get_io_writes(self, pageserver: NeonPageserver) -> int:
|
|
"""
|
|
Fetch the "cumulative # of bytes written" metric from the pageserver
|
|
"""
|
|
return self.get_int_counter_value(
|
|
pageserver, "libmetrics_disk_io_bytes_total", {"io_operation": "write"}
|
|
)
|
|
|
|
def get_peak_mem(self, pageserver: NeonPageserver) -> int:
|
|
"""
|
|
Fetch the "maxrss" metric from the pageserver
|
|
"""
|
|
return self.get_int_counter_value(pageserver, "libmetrics_maxrss_kb")
|
|
|
|
def get_int_counter_value(
|
|
self,
|
|
pageserver: NeonPageserver,
|
|
metric_name: str,
|
|
label_filters: Optional[Dict[str, str]] = None,
|
|
) -> int:
|
|
"""Fetch the value of given int counter from pageserver metrics."""
|
|
all_metrics = pageserver.http_client().get_metrics()
|
|
sample = all_metrics.query_one(metric_name, label_filters)
|
|
return int(round(sample.value))
|
|
|
|
def get_timeline_size(
|
|
self, repo_dir: Path, tenant_id: TenantId, timeline_id: TimelineId
|
|
) -> int:
|
|
"""
|
|
Calculate the on-disk size of a timeline
|
|
"""
|
|
path = f"{repo_dir}/tenants/{tenant_id}/timelines/{timeline_id}"
|
|
|
|
totalbytes = 0
|
|
for root, _dirs, files in os.walk(path):
|
|
for name in files:
|
|
totalbytes += os.path.getsize(os.path.join(root, name))
|
|
|
|
return totalbytes
|
|
|
|
@contextmanager
|
|
def record_pageserver_writes(
|
|
self, pageserver: NeonPageserver, metric_name: str
|
|
) -> Iterator[None]:
|
|
"""
|
|
Record bytes written by the pageserver during a test.
|
|
"""
|
|
before = self.get_io_writes(pageserver)
|
|
yield
|
|
after = self.get_io_writes(pageserver)
|
|
|
|
self.record(
|
|
metric_name,
|
|
round((after - before) / (1024 * 1024)),
|
|
"MB",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def zenbenchmark(
|
|
request: FixtureRequest,
|
|
record_property: Callable[[str, object], None],
|
|
) -> Iterator[NeonBenchmarker]:
|
|
"""
|
|
This is a python decorator for benchmark fixtures. It contains functions for
|
|
recording measurements, and prints them out at the end.
|
|
"""
|
|
benchmarker = NeonBenchmarker(record_property)
|
|
yield benchmarker
|
|
|
|
results = {}
|
|
for _, recorded_property in request.node.user_properties:
|
|
name = recorded_property["name"]
|
|
value = str(recorded_property["value"])
|
|
if (unit := recorded_property["unit"].strip()) != "":
|
|
value += f" {unit}"
|
|
results[name] = value
|
|
|
|
content = json.dumps(results, indent=2)
|
|
allure.attach(
|
|
content,
|
|
"benchmarks.json",
|
|
allure.attachment_type.JSON,
|
|
)
|
|
|
|
|
|
def pytest_addoption(parser: Parser):
|
|
parser.addoption(
|
|
"--out-dir",
|
|
dest="out_dir",
|
|
help="Directory to output performance tests results to.",
|
|
)
|
|
|
|
|
|
def get_out_path(target_dir: Path, revision: str) -> Path:
|
|
"""
|
|
get output file path
|
|
if running in the CI uses commit revision
|
|
to avoid duplicates uses counter
|
|
"""
|
|
# use UTC timestamp as a counter marker to avoid weird behaviour
|
|
# when for example files are deleted
|
|
ts = calendar.timegm(datetime.utcnow().utctimetuple())
|
|
path = target_dir / f"{ts}_{revision}.json"
|
|
assert not path.exists()
|
|
return path
|
|
|
|
|
|
# Hook to print the results at the end
|
|
@pytest.hookimpl(hookwrapper=True)
|
|
def pytest_terminal_summary(
|
|
terminalreporter: TerminalReporter, exitstatus: int, config: Config
|
|
) -> Iterator[None]:
|
|
yield
|
|
revision = os.getenv("GITHUB_SHA", "local")
|
|
platform = os.getenv("PLATFORM", "local")
|
|
|
|
is_header_printed = False
|
|
|
|
result = []
|
|
for test_report in terminalreporter.stats.get("passed", []):
|
|
result_entry = []
|
|
|
|
for _, recorded_property in test_report.user_properties:
|
|
if not is_header_printed:
|
|
terminalreporter.section("Benchmark results", "-")
|
|
is_header_printed = True
|
|
|
|
terminalreporter.write(f"{test_report.head_line}.{recorded_property['name']}: ")
|
|
unit = recorded_property["unit"]
|
|
value = recorded_property["value"]
|
|
if unit == "MB":
|
|
terminalreporter.write(f"{value:,.0f}", green=True)
|
|
elif unit in ("s", "ms") and isinstance(value, float):
|
|
terminalreporter.write(f"{value:,.3f}", green=True)
|
|
elif isinstance(value, float):
|
|
terminalreporter.write(f"{value:,.4f}", green=True)
|
|
else:
|
|
terminalreporter.write(str(value), green=True)
|
|
terminalreporter.line(f" {unit}")
|
|
|
|
result_entry.append(recorded_property)
|
|
|
|
result.append(
|
|
{
|
|
"suit": test_report.nodeid,
|
|
"total_duration": test_report.duration,
|
|
"data": result_entry,
|
|
}
|
|
)
|
|
|
|
out_dir = config.getoption("out_dir")
|
|
if out_dir is None:
|
|
return
|
|
|
|
if not result:
|
|
log.warning("no results to store (no passed test suites)")
|
|
return
|
|
|
|
get_out_path(Path(out_dir), revision=revision).write_text(
|
|
json.dumps({"revision": revision, "platform": platform, "result": result}, indent=4)
|
|
)
|