From 759962c5cc85354161aa86b8d6b3eeeaf9ecdfec Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 19 May 2025 10:06:26 +0000
Subject: [PATCH] [test_runner/performance] scale: Increase data volumes in
 PostgreSQL regression tests by 10x

Co-Authored-By: alexeymasterov@neon.tech <alexeymasterov@neon.tech>
---
 test_runner/performance/test_branching.py                 | 6 +++---
 test_runner/performance/test_copy.py                      | 4 ++--
 .../performance/test_cumulative_statistics_persistence.py | 4 ++--
 test_runner/performance/test_latency.py                   | 2 +-
 test_runner/performance/test_parallel_copy.py             | 4 ++--
 test_runner/performance/test_parallel_copy_to.py          | 4 ++--
 test_runner/performance/test_perf_pgbench.py              | 4 ++--
 test_runner/performance/test_seqscans.py                  | 8 ++++----
 test_runner/performance/test_wal_backpressure.py          | 4 ++--
 9 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/test_runner/performance/test_branching.py b/test_runner/performance/test_branching.py
index 1b29dab288..bada3e80b5 100644
--- a/test_runner/performance/test_branching.py
+++ b/test_runner/performance/test_branching.py
@@ -70,9 +70,9 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
     endpoint_child = env.endpoints.create_start("child")
 
     with neon_compare.record_duration("root_run_duration"):
-        endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
+        endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)")  # 10x increase from 1000000
     with neon_compare.record_duration("child_run_duration"):
-        endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
+        endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)")  # 10x increase from 1000000
 
 
 def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
     endpoint_root.safe_psql_many(
         [
             "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
-            "INSERT INTO foo SELECT FROM generate_series(1,1000000)",
+            "INSERT INTO foo SELECT FROM generate_series(1,10000000)",  # 10x increase from 1000000
         ]
     )
 
diff --git a/test_runner/performance/test_copy.py b/test_runner/performance/test_copy.py
index 8535e6843d..3eaeb7d113 100644
--- a/test_runner/performance/test_copy.py
+++ b/test_runner/performance/test_copy.py
@@ -59,7 +59,7 @@ def test_copy(neon_with_baseline: PgCompare):
             # Since there's no data in the table previously, this extends it.
             with env.record_pageserver_writes("copy_extend_pageserver_writes"):
                 with env.record_duration("copy_extend"):
-                    cur.copy_from(copy_test_data(1000000), "copytest")
+                    cur.copy_from(copy_test_data(10000000), "copytest")  # 10x increase from 1000000
                     env.flush()
 
             # Delete most rows, and VACUUM to make the space available for reuse.
@@ -79,7 +79,7 @@ def test_copy(neon_with_baseline: PgCompare):
             # This will also clear all the VM bits.
             with env.record_pageserver_writes("copy_reuse_pageserver_writes"):
                 with env.record_duration("copy_reuse"):
-                    cur.copy_from(copy_test_data(1000000), "copytest")
+                    cur.copy_from(copy_test_data(10000000), "copytest")  # 10x increase from 1000000
                     env.flush()
 
             env.report_peak_memory_use()
diff --git a/test_runner/performance/test_cumulative_statistics_persistence.py b/test_runner/performance/test_cumulative_statistics_persistence.py
index 5e9e55cb0f..8d661c92da 100644
--- a/test_runner/performance/test_cumulative_statistics_persistence.py
+++ b/test_runner/performance/test_cumulative_statistics_persistence.py
@@ -101,8 +101,8 @@ def test_cumulative_statistics_persistence(
     try:
         connstr = project["connection_uris"][0]["connection_uri"]
         env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"])
-        # seed about 1 GiB of data into pgbench_accounts
-        pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env)
+        # seed about 10 GiB of data into pgbench_accounts
+        pg_bin.run_capture(["pgbench", "-i", "-s680"], env=env)  # 10x increase from 68
 
         # assert rows in pgbench_accounts is 6800000 rows
         conn = psycopg2.connect(connstr)
diff --git a/test_runner/performance/test_latency.py b/test_runner/performance/test_latency.py
index 0431f0bf42..3045897f10 100644
--- a/test_runner/performance/test_latency.py
+++ b/test_runner/performance/test_latency.py
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
     from fixtures.neon_fixtures import PgProtocol
 
 
-def start_write_workload(pg: PgProtocol, scale: int = 10):
+def start_write_workload(pg: PgProtocol, scale: int = 100):  # 10x increase from 10
     with pg.connect().cursor() as cur:
         cur.execute(f"create table big as select generate_series(1,{scale * 100_000})")
 
diff --git a/test_runner/performance/test_parallel_copy.py b/test_runner/performance/test_parallel_copy.py
index f7f20bd33e..d0bac8bf87 100644
--- a/test_runner/performance/test_parallel_copy.py
+++ b/test_runner/performance/test_parallel_copy.py
@@ -17,13 +17,13 @@ async def repeat_bytes(buf, repetitions: int):
 
 async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str):
     buf = BytesIO()
-    for i in range(1000):
+    for i in range(10000):  # 10x increase from 1000
         buf.write(
             f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
         )
     buf.seek(0)
 
-    copy_input = repeat_bytes(buf.read(), 5000)
+    copy_input = repeat_bytes(buf.read(), 50000)  # 10x increase from 5000
 
     pg_conn = await endpoint.connect_async()
 
diff --git a/test_runner/performance/test_parallel_copy_to.py b/test_runner/performance/test_parallel_copy_to.py
index 0427ecaf0a..5da88015e6 100644
--- a/test_runner/performance/test_parallel_copy_to.py
+++ b/test_runner/performance/test_parallel_copy_to.py
@@ -16,13 +16,13 @@ async def repeat_bytes(buf, repetitions: int):
 
 async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str):
     buf = BytesIO()
-    for i in range(1000):
+    for i in range(10000):  # 10x increase from 1000
         buf.write(
             f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
         )
     buf.seek(0)
 
-    copy_input = repeat_bytes(buf.read(), 5000)
+    copy_input = repeat_bytes(buf.read(), 50000)  # 10x increase from 5000
 
     pg_conn = await pg.connect_async()
     await pg_conn.copy_to_table(table_name, source=copy_input)
diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py
index 57889ceadf..07d4f7dbdf 100644
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -181,7 +181,7 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
     env.report_size()
 
 
-def get_durations_matrix(default: int = 45) -> list[int]:
+def get_durations_matrix(default: int = 450) -> list[int]:  # 10x increase from 45
     durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
     rv = []
     for d in durations.split(","):
@@ -197,7 +197,7 @@ def get_durations_matrix(default: int = 45) -> list[int]:
     return rv
 
 
-def get_scales_matrix(default: int = 10) -> list[int]:
+def get_scales_matrix(default: int = 100) -> list[int]:  # 10x increase from 10
     scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
     rv = []
     for s in scales.split(","):
diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py
index 37854df1fa..6bb051109c 100644
--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -18,13 +18,13 @@ if TYPE_CHECKING:
 @pytest.mark.parametrize(
     "rows,iters,workers",
     [
-        # The test table is large enough (3-4 MB) that it doesn't fit in the compute node
+        # The test table is large enough (30-40 MB) that it doesn't fit in the compute node
         # cache, so the seqscans go to the page server. But small enough that it fits
         # into memory in the page server.
-        pytest.param(100000, 100, 0),
+        pytest.param(1000000, 100, 0),  # 10x increase from 100000
         # Also test with a larger table, with and without parallelism
-        pytest.param(10000000, 1, 0),
-        pytest.param(10000000, 1, 4),
+        pytest.param(100000000, 1, 0),  # 10x increase from 10000000
+        pytest.param(100000000, 1, 4),  # 10x increase from 10000000
     ],
 )
 @pytest.mark.parametrize(
diff --git a/test_runner/performance/test_wal_backpressure.py b/test_runner/performance/test_wal_backpressure.py
index 4824fa1ba8..a0bd5e4407 100644
--- a/test_runner/performance/test_wal_backpressure.py
+++ b/test_runner/performance/test_wal_backpressure.py
@@ -69,9 +69,9 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it
 
     ## Single table workload:
     At each step, insert new `new_rows_each_update` rows.
-    The variable `new_rows_each_update` is equal to `scale * 100_000`.
+    The variable `new_rows_each_update` is equal to `scale * 1_000_000`.
     The number of steps is determined by `num_iters` variable."""
-    new_rows_each_update = scale * 100_000
+    new_rows_each_update = scale * 1_000_000  # 10x increase from 100_000
 
     def start_single_table_workload(table_id: int):
         for _ in range(num_iters):