From 759962c5cc85354161aa86b8d6b3eeeaf9ecdfec Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 10:06:26 +0000 Subject: [PATCH] [test_runner/performance] scale: Increase data volumes in PostgreSQL regression tests by 10x Co-Authored-By: alexeymasterov@neon.tech --- test_runner/performance/test_branching.py | 6 +++--- test_runner/performance/test_copy.py | 4 ++-- .../performance/test_cumulative_statistics_persistence.py | 4 ++-- test_runner/performance/test_latency.py | 2 +- test_runner/performance/test_parallel_copy.py | 4 ++-- test_runner/performance/test_parallel_copy_to.py | 4 ++-- test_runner/performance/test_perf_pgbench.py | 4 ++-- test_runner/performance/test_seqscans.py | 8 ++++---- test_runner/performance/test_wal_backpressure.py | 4 ++-- 9 files changed, 20 insertions(+), 20 deletions(-) diff --git a/test_runner/performance/test_branching.py b/test_runner/performance/test_branching.py index 1b29dab288..bada3e80b5 100644 --- a/test_runner/performance/test_branching.py +++ b/test_runner/performance/test_branching.py @@ -70,9 +70,9 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare): endpoint_child = env.endpoints.create_start("child") with neon_compare.record_duration("root_run_duration"): - endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)") # 10x increase from 1000000 with neon_compare.record_duration("child_run_duration"): - endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)") # 10x increase from 1000000 def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): @@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): endpoint_root.safe_psql_many( [ "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')", - "INSERT INTO foo SELECT FROM generate_series(1,1000000)", + "INSERT INTO foo SELECT FROM generate_series(1,10000000)", # 10x increase from 1000000 ] ) diff --git a/test_runner/performance/test_copy.py b/test_runner/performance/test_copy.py index 8535e6843d..3eaeb7d113 100644 --- a/test_runner/performance/test_copy.py +++ b/test_runner/performance/test_copy.py @@ -59,7 +59,7 @@ def test_copy(neon_with_baseline: PgCompare): # Since there's no data in the table previously, this extends it. with env.record_pageserver_writes("copy_extend_pageserver_writes"): with env.record_duration("copy_extend"): - cur.copy_from(copy_test_data(1000000), "copytest") + cur.copy_from(copy_test_data(10000000), "copytest") # 10x increase from 1000000 env.flush() # Delete most rows, and VACUUM to make the space available for reuse. @@ -79,7 +79,7 @@ def test_copy(neon_with_baseline: PgCompare): # This will also clear all the VM bits. with env.record_pageserver_writes("copy_reuse_pageserver_writes"): with env.record_duration("copy_reuse"): - cur.copy_from(copy_test_data(1000000), "copytest") + cur.copy_from(copy_test_data(10000000), "copytest") # 10x increase from 1000000 env.flush() env.report_peak_memory_use() diff --git a/test_runner/performance/test_cumulative_statistics_persistence.py b/test_runner/performance/test_cumulative_statistics_persistence.py index 5e9e55cb0f..8d661c92da 100644 --- a/test_runner/performance/test_cumulative_statistics_persistence.py +++ b/test_runner/performance/test_cumulative_statistics_persistence.py @@ -101,8 +101,8 @@ def test_cumulative_statistics_persistence( try: connstr = project["connection_uris"][0]["connection_uri"] env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"]) - # seed about 1 GiB of data into pgbench_accounts - pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env) + # seed about 10 GiB of data into pgbench_accounts + pg_bin.run_capture(["pgbench", "-i", "-s680"], env=env) # 10x increase from 68 # assert rows in pgbench_accounts is 6800000 rows conn = psycopg2.connect(connstr) diff --git a/test_runner/performance/test_latency.py b/test_runner/performance/test_latency.py index 0431f0bf42..3045897f10 100644 --- a/test_runner/performance/test_latency.py +++ b/test_runner/performance/test_latency.py @@ -13,7 +13,7 @@ if TYPE_CHECKING: from fixtures.neon_fixtures import PgProtocol -def start_write_workload(pg: PgProtocol, scale: int = 10): +def start_write_workload(pg: PgProtocol, scale: int = 100): # 10x increase from 10 with pg.connect().cursor() as cur: cur.execute(f"create table big as select generate_series(1,{scale * 100_000})") diff --git a/test_runner/performance/test_parallel_copy.py b/test_runner/performance/test_parallel_copy.py index f7f20bd33e..d0bac8bf87 100644 --- a/test_runner/performance/test_parallel_copy.py +++ b/test_runner/performance/test_parallel_copy.py @@ -17,13 +17,13 @@ async def repeat_bytes(buf, repetitions: int): async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str): buf = BytesIO() - for i in range(1000): + for i in range(10000): # 10x increase from 1000 buf.write( f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() ) buf.seek(0) - copy_input = repeat_bytes(buf.read(), 5000) + copy_input = repeat_bytes(buf.read(), 50000) # 10x increase from 5000 pg_conn = await endpoint.connect_async() diff --git a/test_runner/performance/test_parallel_copy_to.py b/test_runner/performance/test_parallel_copy_to.py index 0427ecaf0a..5da88015e6 100644 --- a/test_runner/performance/test_parallel_copy_to.py +++ b/test_runner/performance/test_parallel_copy_to.py @@ -16,13 +16,13 @@ async def repeat_bytes(buf, repetitions: int): async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str): buf = BytesIO() - for i in range(1000): + for i in range(10000): # 10x increase from 1000 buf.write( f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() ) buf.seek(0) - copy_input = repeat_bytes(buf.read(), 5000) + copy_input = repeat_bytes(buf.read(), 50000) # 10x increase from 5000 pg_conn = await pg.connect_async() await pg_conn.copy_to_table(table_name, source=copy_input) diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 57889ceadf..07d4f7dbdf 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -181,7 +181,7 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P env.report_size() -def get_durations_matrix(default: int = 45) -> list[int]: +def get_durations_matrix(default: int = 450) -> list[int]: # 10x increase from 45 durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default)) rv = [] for d in durations.split(","): @@ -197,7 +197,7 @@ def get_durations_matrix(default: int = 45) -> list[int]: return rv -def get_scales_matrix(default: int = 10) -> list[int]: +def get_scales_matrix(default: int = 100) -> list[int]: # 10x increase from 10 scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default)) rv = [] for s in scales.split(","): diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index 37854df1fa..6bb051109c 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -18,13 +18,13 @@ if TYPE_CHECKING: @pytest.mark.parametrize( "rows,iters,workers", [ - # The test table is large enough (3-4 MB) that it doesn't fit in the compute node + # The test table is large enough (30-40 MB) that it doesn't fit in the compute node # cache, so the seqscans go to the page server. But small enough that it fits # into memory in the page server. - pytest.param(100000, 100, 0), + pytest.param(1000000, 100, 0), # 10x increase from 100000 # Also test with a larger table, with and without parallelism - pytest.param(10000000, 1, 0), - pytest.param(10000000, 1, 4), + pytest.param(100000000, 1, 0), # 10x increase from 10000000 + pytest.param(100000000, 1, 4), # 10x increase from 10000000 ], ) @pytest.mark.parametrize( diff --git a/test_runner/performance/test_wal_backpressure.py b/test_runner/performance/test_wal_backpressure.py index 4824fa1ba8..a0bd5e4407 100644 --- a/test_runner/performance/test_wal_backpressure.py +++ b/test_runner/performance/test_wal_backpressure.py @@ -69,9 +69,9 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it ## Single table workload: At each step, insert new `new_rows_each_update` rows. - The variable `new_rows_each_update` is equal to `scale * 100_000`. + The variable `new_rows_each_update` is equal to `scale * 1_000_000`. The number of steps is determined by `num_iters` variable.""" - new_rows_each_update = scale * 100_000 + new_rows_each_update = scale * 1_000_000 # 10x increase from 100_000 def start_single_table_workload(table_id: int): for _ in range(num_iters):