[test_runner/performance] scale: Increase data volumes in PostgreSQL regression tests by 10x

Co-Authored-By: alexeymasterov@neon.tech <alexeymasterov@neon.tech>
This commit is contained in:
Devin AI
2025-05-19 10:06:26 +00:00
parent cdb6479c8a
commit 759962c5cc
9 changed files with 20 additions and 20 deletions

View File

@@ -70,9 +70,9 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):
endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)") # 10x increase from 1000000
with neon_compare.record_duration("child_run_duration"):
endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,10000000)") # 10x increase from 1000000
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
endpoint_root.safe_psql_many(
[
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
"INSERT INTO foo SELECT FROM generate_series(1,10000000)", # 10x increase from 1000000
]
)

View File

@@ -59,7 +59,7 @@ def test_copy(neon_with_baseline: PgCompare):
# Since there's no data in the table previously, this extends it.
with env.record_pageserver_writes("copy_extend_pageserver_writes"):
with env.record_duration("copy_extend"):
cur.copy_from(copy_test_data(1000000), "copytest")
cur.copy_from(copy_test_data(10000000), "copytest") # 10x increase from 1000000
env.flush()
# Delete most rows, and VACUUM to make the space available for reuse.
@@ -79,7 +79,7 @@ def test_copy(neon_with_baseline: PgCompare):
# This will also clear all the VM bits.
with env.record_pageserver_writes("copy_reuse_pageserver_writes"):
with env.record_duration("copy_reuse"):
cur.copy_from(copy_test_data(1000000), "copytest")
cur.copy_from(copy_test_data(10000000), "copytest") # 10x increase from 1000000
env.flush()
env.report_peak_memory_use()

View File

@@ -101,8 +101,8 @@ def test_cumulative_statistics_persistence(
try:
connstr = project["connection_uris"][0]["connection_uri"]
env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"])
# seed about 1 GiB of data into pgbench_accounts
pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env)
# seed about 10 GiB of data into pgbench_accounts
pg_bin.run_capture(["pgbench", "-i", "-s680"], env=env) # 10x increase from 68
# assert rows in pgbench_accounts is 6800000 rows
conn = psycopg2.connect(connstr)

View File

@@ -13,7 +13,7 @@ if TYPE_CHECKING:
from fixtures.neon_fixtures import PgProtocol
def start_write_workload(pg: PgProtocol, scale: int = 10):
def start_write_workload(pg: PgProtocol, scale: int = 100): # 10x increase from 10
with pg.connect().cursor() as cur:
cur.execute(f"create table big as select generate_series(1,{scale * 100_000})")

View File

@@ -17,13 +17,13 @@ async def repeat_bytes(buf, repetitions: int):
async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str):
buf = BytesIO()
for i in range(1000):
for i in range(10000): # 10x increase from 1000
buf.write(
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
)
buf.seek(0)
copy_input = repeat_bytes(buf.read(), 5000)
copy_input = repeat_bytes(buf.read(), 50000) # 10x increase from 5000
pg_conn = await endpoint.connect_async()

View File

@@ -16,13 +16,13 @@ async def repeat_bytes(buf, repetitions: int):
async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str):
buf = BytesIO()
for i in range(1000):
for i in range(10000): # 10x increase from 1000
buf.write(
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
)
buf.seek(0)
copy_input = repeat_bytes(buf.read(), 5000)
copy_input = repeat_bytes(buf.read(), 50000) # 10x increase from 5000
pg_conn = await pg.connect_async()
await pg_conn.copy_to_table(table_name, source=copy_input)

View File

@@ -181,7 +181,7 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
env.report_size()
def get_durations_matrix(default: int = 45) -> list[int]:
def get_durations_matrix(default: int = 450) -> list[int]: # 10x increase from 45
durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
rv = []
for d in durations.split(","):
@@ -197,7 +197,7 @@ def get_durations_matrix(default: int = 45) -> list[int]:
return rv
def get_scales_matrix(default: int = 10) -> list[int]:
def get_scales_matrix(default: int = 100) -> list[int]: # 10x increase from 10
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
rv = []
for s in scales.split(","):

View File

@@ -18,13 +18,13 @@ if TYPE_CHECKING:
@pytest.mark.parametrize(
"rows,iters,workers",
[
# The test table is large enough (3-4 MB) that it doesn't fit in the compute node
# The test table is large enough (30-40 MB) that it doesn't fit in the compute node
# cache, so the seqscans go to the page server. But small enough that it fits
# into memory in the page server.
pytest.param(100000, 100, 0),
pytest.param(1000000, 100, 0), # 10x increase from 100000
# Also test with a larger table, with and without parallelism
pytest.param(10000000, 1, 0),
pytest.param(10000000, 1, 4),
pytest.param(100000000, 1, 0), # 10x increase from 10000000
pytest.param(100000000, 1, 4), # 10x increase from 10000000
],
)
@pytest.mark.parametrize(

View File

@@ -69,9 +69,9 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it
## Single table workload:
At each step, insert new `new_rows_each_update` rows.
The variable `new_rows_each_update` is equal to `scale * 100_000`.
The variable `new_rows_each_update` is equal to `scale * 1_000_000`.
The number of steps is determined by `num_iters` variable."""
new_rows_each_update = scale * 100_000
new_rows_each_update = scale * 1_000_000 # 10x increase from 100_000
def start_single_table_workload(table_id: int):
for _ in range(num_iters):