[test_runner/performance] scale: Increase data volumes in PostgreSQL regression tests by 10x

Co-Authored-By: alexeymasterov@neon.tech <alexeymasterov@neon.tech>
2026-01-13 16:32:56 +00:00 · 2025-05-19 10:06:26 +00:00
parent cdb6479c8a
commit 759962c5cc
9 changed files with 20 additions and 20 deletions
--- a/test_runner/performance/test_branching.py
+++ b/test_runner/performance/test_branching.py
@@ -70,9 +70,9 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
    endpoint_child = env.endpoints.create_start("child")

    with neon_compare.record_duration("root_run_duration"):
-        endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
+        endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)")  # 10x increase from 1000000
    with neon_compare.record_duration("child_run_duration"):
-        endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
+        endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)")  # 10x increase from 1000000


 def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
    endpoint_root.safe_psql_many(
        [
            "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
-            "INSERT INTO foo SELECT FROM generate_series(1,1000000)",
+            "INSERT INTO foo SELECT FROM generate_series(1,10000000)",  # 10x increase from 1000000
        ]
    )

--- a/test_runner/performance/test_copy.py
+++ b/test_runner/performance/test_copy.py
@@ -59,7 +59,7 @@ def test_copy(neon_with_baseline: PgCompare):
            # Since there's no data in the table previously, this extends it.
            with env.record_pageserver_writes("copy_extend_pageserver_writes"):
                with env.record_duration("copy_extend"):
-                    cur.copy_from(copy_test_data(1000000), "copytest")
+                    cur.copy_from(copy_test_data(10000000), "copytest")  # 10x increase from 1000000
                    env.flush()

            # Delete most rows, and VACUUM to make the space available for reuse.
@@ -79,7 +79,7 @@ def test_copy(neon_with_baseline: PgCompare):
            # This will also clear all the VM bits.
            with env.record_pageserver_writes("copy_reuse_pageserver_writes"):
                with env.record_duration("copy_reuse"):
-                    cur.copy_from(copy_test_data(1000000), "copytest")
+                    cur.copy_from(copy_test_data(10000000), "copytest")  # 10x increase from 1000000
                    env.flush()

            env.report_peak_memory_use()
--- a/test_runner/performance/test_cumulative_statistics_persistence.py
+++ b/test_runner/performance/test_cumulative_statistics_persistence.py
@@ -101,8 +101,8 @@ def test_cumulative_statistics_persistence(
    try:
        connstr = project["connection_uris"][0]["connection_uri"]
        env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"])
-        # seed about 1 GiB of data into pgbench_accounts
-        pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env)
+        # seed about 10 GiB of data into pgbench_accounts
+        pg_bin.run_capture(["pgbench", "-i", "-s680"], env=env)  # 10x increase from 68

        # assert rows in pgbench_accounts is 6800000 rows
        conn = psycopg2.connect(connstr)
--- a/test_runner/performance/test_latency.py
+++ b/test_runner/performance/test_latency.py
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
    from fixtures.neon_fixtures import PgProtocol


-def start_write_workload(pg: PgProtocol, scale: int = 10):
+def start_write_workload(pg: PgProtocol, scale: int = 100):  # 10x increase from 10
    with pg.connect().cursor() as cur:
        cur.execute(f"create table big as select generate_series(1,{scale * 100_000})")

--- a/test_runner/performance/test_parallel_copy.py
+++ b/test_runner/performance/test_parallel_copy.py
@@ -17,13 +17,13 @@ async def repeat_bytes(buf, repetitions: int):

 async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str):
    buf = BytesIO()
-    for i in range(1000):
+    for i in range(10000):  # 10x increase from 1000
        buf.write(
            f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
        )
    buf.seek(0)

-    copy_input = repeat_bytes(buf.read(), 5000)
+    copy_input = repeat_bytes(buf.read(), 50000)  # 10x increase from 5000

    pg_conn = await endpoint.connect_async()

--- a/test_runner/performance/test_parallel_copy_to.py
+++ b/test_runner/performance/test_parallel_copy_to.py
@@ -16,13 +16,13 @@ async def repeat_bytes(buf, repetitions: int):

 async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str):
    buf = BytesIO()
-    for i in range(1000):
+    for i in range(10000):  # 10x increase from 1000
        buf.write(
            f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
        )
    buf.seek(0)

-    copy_input = repeat_bytes(buf.read(), 5000)
+    copy_input = repeat_bytes(buf.read(), 50000)  # 10x increase from 5000

    pg_conn = await pg.connect_async()
    await pg_conn.copy_to_table(table_name, source=copy_input)
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -181,7 +181,7 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
    env.report_size()


-def get_durations_matrix(default: int = 45) -> list[int]:
+def get_durations_matrix(default: int = 450) -> list[int]:  # 10x increase from 45
    durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
    rv = []
    for d in durations.split(","):
@@ -197,7 +197,7 @@ def get_durations_matrix(default: int = 45) -> list[int]:
    return rv


-def get_scales_matrix(default: int = 10) -> list[int]:
+def get_scales_matrix(default: int = 100) -> list[int]:  # 10x increase from 10
    scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
    rv = []
    for s in scales.split(","):
--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -18,13 +18,13 @@ if TYPE_CHECKING:
@pytest.mark.parametrize(
    "rows,iters,workers",
    [
-        # The test table is large enough (3-4 MB) that it doesn't fit in the compute node
+        # The test table is large enough (30-40 MB) that it doesn't fit in the compute node
        # cache, so the seqscans go to the page server. But small enough that it fits
        # into memory in the page server.
-        pytest.param(100000, 100, 0),
+        pytest.param(1000000, 100, 0),  # 10x increase from 100000
        # Also test with a larger table, with and without parallelism
-        pytest.param(10000000, 1, 0),
-        pytest.param(10000000, 1, 4),
+        pytest.param(100000000, 1, 0),  # 10x increase from 10000000
+        pytest.param(100000000, 1, 4),  # 10x increase from 10000000
    ],
 )
@pytest.mark.parametrize(
--- a/test_runner/performance/test_wal_backpressure.py
+++ b/test_runner/performance/test_wal_backpressure.py
@@ -69,9 +69,9 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it

    ## Single table workload:
    At each step, insert new `new_rows_each_update` rows.
-    The variable `new_rows_each_update` is equal to `scale * 100_000`.
+    The variable `new_rows_each_update` is equal to `scale * 1_000_000`.
    The number of steps is determined by `num_iters` variable."""
-    new_rows_each_update = scale * 100_000
+    new_rows_each_update = scale * 1_000_000  # 10x increase from 100_000

    def start_single_table_workload(table_id: int):
        for _ in range(num_iters):