import dataclasses import json import os from pathlib import Path import re import subprocess import timeit import calendar import enum from datetime import datetime import uuid import pytest from _pytest.config import Config from _pytest.terminal import TerminalReporter import warnings from contextlib import contextmanager # Type-related stuff from typing import Iterator, Optional """ This file contains fixtures for micro-benchmarks. To use, declare the 'zenbenchmark' fixture in the test function. Run the bencmark, and then record the result by calling zenbenchmark.record. For example: import timeit from fixtures.neon_fixtures import NeonEnv def test_mybench(neon_simple_env: env, zenbenchmark): # Initialize the test ... # Run the test, timing how long it takes with zenbenchmark.record_duration('test_query'): cur.execute('SELECT test_query(...)') # Record another measurement zenbenchmark.record('speed_of_light', 300000, 'km/s') There's no need to import this file to use it. It should be declared as a plugin inside conftest.py, and that makes it available to all tests. You can measure multiple things in one test, and record each one with a separate call to zenbenchmark. For example, you could time the bulk loading that happens in the test initialization, or measure disk usage after the test query. """ @dataclasses.dataclass class PgBenchRunResult: number_of_clients: int number_of_threads: int number_of_transactions_actually_processed: int latency_average: float latency_stddev: Optional[float] tps: float run_duration: float run_start_timestamp: int run_end_timestamp: int # TODO progress @classmethod def parse_from_stdout( cls, stdout: str, run_duration: float, run_start_timestamp: int, run_end_timestamp: int, ): stdout_lines = stdout.splitlines() latency_stddev = None # we know significant parts of these values from test input # but to be precise take them from output for line in stdout.splitlines(): # scaling factor: 5 if line.startswith("scaling factor:"): scale = int(line.split()[-1]) # number of clients: 1 if line.startswith("number of clients: "): number_of_clients = int(line.split()[-1]) # number of threads: 1 if line.startswith("number of threads: "): number_of_threads = int(line.split()[-1]) # number of transactions actually processed: 1000/1000 # OR # number of transactions actually processed: 1000 if line.startswith("number of transactions actually processed"): if "/" in line: number_of_transactions_actually_processed = int(line.split("/")[1]) else: number_of_transactions_actually_processed = int(line.split()[-1]) # latency average = 19.894 ms if line.startswith("latency average"): latency_average = float(line.split()[-2]) # latency stddev = 3.387 ms # (only printed with some options) if line.startswith("latency stddev"): latency_stddev = float(line.split()[-2]) # Get the TPS without initial connection time. The format # of the tps lines changed in pgbench v14, but we accept # either format: # # pgbench v13 and below: # tps = 50.219689 (including connections establishing) # tps = 50.264435 (excluding connections establishing) # # pgbench v14: # initial connection time = 3.858 ms # tps = 309.281539 (without initial connection time) if (line.startswith("tps = ") and ("(excluding connections establishing)" in line or "(without initial connection time)")): tps = float(line.split()[2]) return cls( number_of_clients=number_of_clients, number_of_threads=number_of_threads, number_of_transactions_actually_processed=number_of_transactions_actually_processed, latency_average=latency_average, latency_stddev=latency_stddev, tps=tps, run_duration=run_duration, run_start_timestamp=run_start_timestamp, run_end_timestamp=run_end_timestamp, ) @enum.unique class MetricReport(str, enum.Enum): # str is a hack to make it json serializable # this means that this is a constant test parameter # like number of transactions, or number of clients TEST_PARAM = 'test_param' # reporter can use it to mark test runs with higher values as improvements HIGHER_IS_BETTER = 'higher_is_better' # the same but for lower values LOWER_IS_BETTER = 'lower_is_better' class NeonBenchmarker: """ An object for recording benchmark results. This is created for each test function by the zenbenchmark fixture """ def __init__(self, property_recorder): # property recorder here is a pytest fixture provided by junitxml module # https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property self.property_recorder = property_recorder def record( self, metric_name: str, metric_value: float, unit: str, report: MetricReport, ): """ Record a benchmark result. """ # just to namespace the value name = f"neon_benchmarker_{metric_name}" self.property_recorder( name, { "name": metric_name, "value": metric_value, "unit": unit, "report": report, }, ) @contextmanager def record_duration(self, metric_name: str): """ Record a duration. Usage: with zenbenchmark.record_duration('foobar_runtime'): foobar() # measure this """ start = timeit.default_timer() yield end = timeit.default_timer() self.record( metric_name=metric_name, metric_value=end - start, unit="s", report=MetricReport.LOWER_IS_BETTER, ) def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult): self.record(f"{prefix}.number_of_clients", pg_bench_result.number_of_clients, '', MetricReport.TEST_PARAM) self.record(f"{prefix}.number_of_threads", pg_bench_result.number_of_threads, '', MetricReport.TEST_PARAM) self.record( f"{prefix}.number_of_transactions_actually_processed", pg_bench_result.number_of_transactions_actually_processed, '', # that's because this is predefined by test matrix and doesn't change across runs report=MetricReport.TEST_PARAM, ) self.record(f"{prefix}.latency_average", pg_bench_result.latency_average, unit="ms", report=MetricReport.LOWER_IS_BETTER) if pg_bench_result.latency_stddev is not None: self.record(f"{prefix}.latency_stddev", pg_bench_result.latency_stddev, unit="ms", report=MetricReport.LOWER_IS_BETTER) self.record(f"{prefix}.tps", pg_bench_result.tps, '', report=MetricReport.HIGHER_IS_BETTER) self.record(f"{prefix}.run_duration", pg_bench_result.run_duration, unit="s", report=MetricReport.LOWER_IS_BETTER) self.record(f"{prefix}.run_start_timestamp", pg_bench_result.run_start_timestamp, '', MetricReport.TEST_PARAM) self.record(f"{prefix}.run_end_timestamp", pg_bench_result.run_end_timestamp, '', MetricReport.TEST_PARAM) def get_io_writes(self, pageserver) -> int: """ Fetch the "cumulative # of bytes written" metric from the pageserver """ metric_name = r'libmetrics_disk_io_bytes_total{io_operation="write"}' return self.get_int_counter_value(pageserver, metric_name) def get_peak_mem(self, pageserver) -> int: """ Fetch the "maxrss" metric from the pageserver """ metric_name = r'libmetrics_maxrss_kb' return self.get_int_counter_value(pageserver, metric_name) def get_int_counter_value(self, pageserver, metric_name) -> int: """Fetch the value of given int counter from pageserver metrics.""" # TODO: If we start to collect more of the prometheus metrics in the # performance test suite like this, we should refactor this to load and # parse all the metrics into a more convenient structure in one go. # # The metric should be an integer, as it's a number of bytes. But in general # all prometheus metrics are floats. So to be pedantic, read it as a float # and round to integer. all_metrics = pageserver.http_client().get_metrics() matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE) assert matches return int(round(float(matches.group(1)))) def get_timeline_size(self, repo_dir: Path, tenantid: uuid.UUID, timelineid: str): """ Calculate the on-disk size of a timeline """ path = "{}/tenants/{}/timelines/{}".format(repo_dir, tenantid.hex, timelineid) totalbytes = 0 for root, dirs, files in os.walk(path): for name in files: totalbytes += os.path.getsize(os.path.join(root, name)) return totalbytes @contextmanager def record_pageserver_writes(self, pageserver, metric_name): """ Record bytes written by the pageserver during a test. """ before = self.get_io_writes(pageserver) yield after = self.get_io_writes(pageserver) self.record(metric_name, round((after - before) / (1024 * 1024)), "MB", report=MetricReport.LOWER_IS_BETTER) @pytest.fixture(scope="function") def zenbenchmark(record_property) -> Iterator[NeonBenchmarker]: """ This is a python decorator for benchmark fixtures. It contains functions for recording measurements, and prints them out at the end. """ benchmarker = NeonBenchmarker(record_property) yield benchmarker def pytest_addoption(parser): parser.addoption( "--out-dir", dest="out_dir", help="Directory to output performance tests results to.", ) def get_out_path(target_dir: Path, revision: str) -> Path: """ get output file path if running in the CI uses commit revision to avoid duplicates uses counter """ # use UTC timestamp as a counter marker to avoid weird behaviour # when for example files are deleted ts = calendar.timegm(datetime.utcnow().utctimetuple()) path = target_dir / f"{ts}_{revision}.json" assert not path.exists() return path # Hook to print the results at the end @pytest.hookimpl(hookwrapper=True) def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config): yield revision = os.getenv("GITHUB_SHA", "local") platform = os.getenv("PLATFORM", "local") terminalreporter.section("Benchmark results", "-") result = [] for test_report in terminalreporter.stats.get("passed", []): result_entry = [] for _, recorded_property in test_report.user_properties: terminalreporter.write("{}.{}: ".format(test_report.head_line, recorded_property["name"])) unit = recorded_property["unit"] value = recorded_property["value"] if unit == "MB": terminalreporter.write("{0:,.0f}".format(value), green=True) elif unit in ("s", "ms") and isinstance(value, float): terminalreporter.write("{0:,.3f}".format(value), green=True) elif isinstance(value, float): terminalreporter.write("{0:,.4f}".format(value), green=True) else: terminalreporter.write(str(value), green=True) terminalreporter.line(" {}".format(unit)) result_entry.append(recorded_property) result.append({ "suit": test_report.nodeid, "total_duration": test_report.duration, "data": result_entry, }) out_dir = config.getoption("out_dir") if out_dir is None: warnings.warn("no out dir provided to store performance test results") return if not result: warnings.warn("no results to store (no passed test suites)") return get_out_path(Path(out_dir), revision=revision).write_text( json.dumps({ "revision": revision, "platform": platform, "result": result }, indent=4))