diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py
index 43e6646d0c..86ca78d000 100644
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -1,6 +1,7 @@
 from pprint import pprint
 
 import os
+import re
 import timeit
 import pathlib
 import uuid
@@ -120,6 +121,35 @@ class ZenithBenchmarker:
 
         self.results.record(self.request.node.name, metric_name, end - start, 's')
 
+    def get_io_writes(self, pageserver) -> int:
+        """
+        Fetch the "cumulative # of bytes written" metric from the pageserver
+        """
+        # Fetch all the exposed prometheus metrics from page server
+        all_metrics = pageserver.http_client().get_metrics()
+        # Use a regular expression to extract the one we're interested in
+        #
+        # TODO: If we start to collect more of the prometheus metrics in the
+        # performance test suite like this, we should refactor this to load and
+        # parse all the metrics into a more convenient structure in one go.
+        #
+        # The metric should be an integer, as it's a number of bytes. But in general
+        # all prometheus metrics are floats. So to be pedantic, read it as a float
+        # and round to integer.
+        matches = re.search(r'pageserver_disk_io_bytes{io_operation="write"} (\S+)', all_metrics)
+        return int(round(float(matches.group(1))))
+
+    @contextmanager
+    def record_pageserver_writes(self, pageserver, metric_name):
+        """
+        Record bytes written by the pageserver during a test.
+        """
+        before = self.get_io_writes(pageserver)
+        yield
+        after = self.get_io_writes(pageserver)
+
+        self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB')
+
 @pytest.fixture(scope='function')
 def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
     """
diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py
index bef4acbd4a..573649b520 100644
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -226,6 +226,11 @@ class ZenithPageserverHttpClient(requests.Session):
         res.raise_for_status()
         return res.json()
 
+    def get_metrics(self) -> str:
+        res = self.get(f"http://localhost:{self.port}/metrics")
+        res.raise_for_status()
+        return res.text
+
 
 @dataclass
 class AuthKeys:
diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py
index 91485df1dc..7e0f19bec8 100644
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -46,13 +46,14 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin
 
     connstr = pg.connstr()
 
-    # Initialize pgbench database
-    with zenbenchmark.record_duration('init'):
-        pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
+    # Initialize pgbench database, recording the time and I/O it takes
+    with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+        with zenbenchmark.record_duration('init'):
+            pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
 
-        # Flush the layers from memory to disk. The time to do that is included in the
-        # reported init time.
-        pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+            # Flush the layers from memory to disk. This is included in the reported
+            # time and I/O
+            pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
 
     # Run pgbench for 5000 transactions
     with zenbenchmark.record_duration('5000_xacts'):