Include # of bytes written in pgbench benchmark result

Now that the page server collects this metric (since commit 212920e47e), let's include it in the performance test results The new metric looks like this: performance/test_perf_pgbench.py . [100%] --------------- Benchmark results ---------------- test_pgbench.init: 6.784 s test_pgbench.pageserver_writes: 466 MB <---- THIS IS NEW test_pgbench.5000_xacts: 8.196 s test_pgbench.size: 163 MB =============== 1 passed in 21.00s ===============
2026-01-10 15:02:56 +00:00 · 2021-09-03 09:00:26 +03:00
parent 2c10224c9a
commit 21cf4a3e11
3 changed files with 42 additions and 6 deletions
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -1,6 +1,7 @@
 from pprint import pprint

 import os
+import re
 import timeit
 import pathlib
 import uuid
@@ -120,6 +121,35 @@ class ZenithBenchmarker:

        self.results.record(self.request.node.name, metric_name, end - start, 's')

+    def get_io_writes(self, pageserver) -> int:
+        """
+        Fetch the "cumulative # of bytes written" metric from the pageserver
+        """
+        # Fetch all the exposed prometheus metrics from page server
+        all_metrics = pageserver.http_client().get_metrics()
+        # Use a regular expression to extract the one we're interested in
+        #
+        # TODO: If we start to collect more of the prometheus metrics in the
+        # performance test suite like this, we should refactor this to load and
+        # parse all the metrics into a more convenient structure in one go.
+        #
+        # The metric should be an integer, as it's a number of bytes. But in general
+        # all prometheus metrics are floats. So to be pedantic, read it as a float
+        # and round to integer.
+        matches = re.search(r'pageserver_disk_io_bytes{io_operation="write"} (\S+)', all_metrics)
+        return int(round(float(matches.group(1))))
+
+    @contextmanager
+    def record_pageserver_writes(self, pageserver, metric_name):
+        """
+        Record bytes written by the pageserver during a test.
+        """
+        before = self.get_io_writes(pageserver)
+        yield
+        after = self.get_io_writes(pageserver)
+
+        self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB')
+
@pytest.fixture(scope='function')
 def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
    """
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -226,6 +226,11 @@ class ZenithPageserverHttpClient(requests.Session):
        res.raise_for_status()
        return res.json()

+    def get_metrics(self) -> str:
+        res = self.get(f"http://localhost:{self.port}/metrics")
+        res.raise_for_status()
+        return res.text
+

@dataclass
 class AuthKeys:
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -46,13 +46,14 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin

    connstr = pg.connstr()

-    # Initialize pgbench database
-    with zenbenchmark.record_duration('init'):
-        pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
+    # Initialize pgbench database, recording the time and I/O it takes
+    with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+        with zenbenchmark.record_duration('init'):
+            pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])

-        # Flush the layers from memory to disk. The time to do that is included in the
-        # reported init time.
-        pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+            # Flush the layers from memory to disk. This is included in the reported
+            # time and I/O
+            pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")

    # Run pgbench for 5000 transactions
    with zenbenchmark.record_duration('5000_xacts'):