Add read/write throughput performance tests (#1883)

Part of #1467 This PR adds several performance tests that compare the [PG statistics](https://www.postgresql.org/docs/current/monitoring-stats.html) obtained when running PG benchmarks against Neon and vanilla PG to measure the read/write throughput of the DB.
2026-01-07 05:22:56 +00:00 · 2022-06-06 12:32:10 -04:00
parent fecad1ca34
commit 6cfebc096f
5 changed files with 190 additions and 12 deletions
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -1,6 +1,5 @@
-pytest_plugins = (
-    "fixtures.neon_fixtures",
-    "fixtures.benchmark_fixture",
-    "fixtures.compare_fixtures",
-    "fixtures.slow",
-)
+pytest_plugins = ("fixtures.neon_fixtures",
+                  "fixtures.benchmark_fixture",
+                  "fixtures.compare_fixtures",
+                  "fixtures.slow",
+                  "fixtures.pg_stats")
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -1,12 +1,13 @@
 import pytest
 from contextlib import contextmanager
 from abc import ABC, abstractmethod
+from fixtures.pg_stats import PgStatTable

 from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv
 from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker

 # Type-related stuff
-from typing import Iterator
+from typing import Dict, List


 class PgCompare(ABC):
@@ -51,6 +52,31 @@ class PgCompare(ABC):
    def record_duration(self, out_name):
        pass

+    @contextmanager
+    def record_pg_stats(self, pg_stats: List[PgStatTable]):
+        init_data = self._retrieve_pg_stats(pg_stats)
+
+        yield
+
+        data = self._retrieve_pg_stats(pg_stats)
+
+        for k in set(init_data) & set(data):
+            self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER)
+
+    def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]:
+        results: Dict[str, int] = {}
+
+        with self.pg.connect().cursor() as cur:
+            for pg_stat in pg_stats:
+                cur.execute(pg_stat.query)
+                row = cur.fetchone()
+                assert len(row) == len(pg_stat.columns)
+
+                for col, val in zip(pg_stat.columns, row):
+                    results[f"{pg_stat.table}.{col}"] = int(val)
+
+        return results
+

 class NeonCompare(PgCompare):
    """PgCompare interface for the neon stack."""
--- a/test_runner/fixtures/pg_stats.py
+++ b/test_runner/fixtures/pg_stats.py
@@ -0,0 +1,52 @@
+from typing import List
+
+import pytest
+
+
+class PgStatTable:
+    table: str
+    columns: List[str]
+    additional_query: str
+
+    def __init__(self, table: str, columns: List[str], filter_query: str = ""):
+        self.table = table
+        self.columns = columns
+        self.additional_query = filter_query
+
+    @property
+    def query(self) -> str:
+        return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}"
+
+
+@pytest.fixture(scope='function')
+def pg_stats_rw() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database",
+                    ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_ro() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_wo() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_wal() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_wal",
+                    ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"],
+                    "")
+    ]
--- a/test_runner/performance/test_compare_pg_stats.py
+++ b/test_runner/performance/test_compare_pg_stats.py
@@ -0,0 +1,101 @@
+import os
+from typing import List
+
+import pytest
+from fixtures.compare_fixtures import PgCompare
+from fixtures.pg_stats import PgStatTable
+
+from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix
+
+
+def get_seeds_matrix(default: int = 100):
+    seeds = os.getenv("TEST_PG_BENCH_SEEDS_MATRIX", default=str(default))
+    return list(map(int, seeds.split(",")))
+
+
+@pytest.mark.parametrize("seed", get_seeds_matrix())
+@pytest.mark.parametrize("scale", get_scales_matrix())
+@pytest.mark.parametrize("duration", get_durations_matrix(5))
+def test_compare_pg_stats_rw_with_pgbench_default(neon_with_baseline: PgCompare,
+                                                  seed: int,
+                                                  scale: int,
+                                                  duration: int,
+                                                  pg_stats_rw: List[PgStatTable]):
+    env = neon_with_baseline
+    # initialize pgbench
+    env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
+    env.flush()
+
+    with env.record_pg_stats(pg_stats_rw):
+        env.pg_bin.run_capture(
+            ['pgbench', f'-T{duration}', f'--random-seed={seed}', '-Mprepared', env.pg.connstr()])
+        env.flush()
+
+
+@pytest.mark.parametrize("seed", get_seeds_matrix())
+@pytest.mark.parametrize("scale", get_scales_matrix())
+@pytest.mark.parametrize("duration", get_durations_matrix(5))
+def test_compare_pg_stats_wo_with_pgbench_simple_update(neon_with_baseline: PgCompare,
+                                                        seed: int,
+                                                        scale: int,
+                                                        duration: int,
+                                                        pg_stats_wo: List[PgStatTable]):
+    env = neon_with_baseline
+    # initialize pgbench
+    env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
+    env.flush()
+
+    with env.record_pg_stats(pg_stats_wo):
+        env.pg_bin.run_capture([
+            'pgbench',
+            '-N',
+            f'-T{duration}',
+            f'--random-seed={seed}',
+            '-Mprepared',
+            env.pg.connstr()
+        ])
+        env.flush()
+
+
+@pytest.mark.parametrize("seed", get_seeds_matrix())
+@pytest.mark.parametrize("scale", get_scales_matrix())
+@pytest.mark.parametrize("duration", get_durations_matrix(5))
+def test_compare_pg_stats_ro_with_pgbench_select_only(neon_with_baseline: PgCompare,
+                                                      seed: int,
+                                                      scale: int,
+                                                      duration: int,
+                                                      pg_stats_ro: List[PgStatTable]):
+    env = neon_with_baseline
+    # initialize pgbench
+    env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
+    env.flush()
+
+    with env.record_pg_stats(pg_stats_ro):
+        env.pg_bin.run_capture([
+            'pgbench',
+            '-S',
+            f'-T{duration}',
+            f'--random-seed={seed}',
+            '-Mprepared',
+            env.pg.connstr()
+        ])
+        env.flush()
+
+
+@pytest.mark.parametrize("seed", get_seeds_matrix())
+@pytest.mark.parametrize("scale", get_scales_matrix())
+@pytest.mark.parametrize("duration", get_durations_matrix(5))
+def test_compare_pg_stats_wal_with_pgbench_default(neon_with_baseline: PgCompare,
+                                                   seed: int,
+                                                   scale: int,
+                                                   duration: int,
+                                                   pg_stats_wal: List[PgStatTable]):
+    env = neon_with_baseline
+    # initialize pgbench
+    env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
+    env.flush()
+
+    with env.record_pg_stats(pg_stats_wal):
+        env.pg_bin.run_capture(
+            ['pgbench', f'-T{duration}', f'--random-seed={seed}', '-Mprepared', env.pg.connstr()])
+        env.flush()
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -79,7 +79,7 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int):
    # Run simple-update workload
    run_pgbench(env,
                "simple-update",
-                ['pgbench', '-n', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
+                ['pgbench', '-N', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])

    # Run SELECT workload
    run_pgbench(env,
@@ -89,13 +89,13 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int):
    env.report_size()


-def get_durations_matrix():
-    durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default="45")
+def get_durations_matrix(default: int = 45):
+    durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
    return list(map(int, durations.split(",")))


-def get_scales_matrix():
-    scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default="10")
+def get_scales_matrix(default: int = 10):
+    scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
    return list(map(int, scales.split(",")))