From f15291b6067fe03cb7a8198a59223639190ae1f3 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 2 Jan 2024 14:27:14 +0000 Subject: [PATCH] tests: update restart+regress tests to use sharding --- .../regress/test_pageserver_restart.py | 24 +++++++-- test_runner/regress/test_pg_regress.py | 53 +++++++++++++------ 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index c4499196b5..753898f747 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -1,4 +1,6 @@ +import random from contextlib import closing +from typing import Optional import pytest from fixtures.log_helper import log @@ -141,18 +143,24 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): # Test that repeatedly kills and restarts the page server, while the # safekeeper and compute node keep running. @pytest.mark.timeout(540) -def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, build_type: str): +@pytest.mark.parametrize("shard_count", [None, 4]) +def test_pageserver_chaos( + neon_env_builder: NeonEnvBuilder, build_type: str, shard_count: Optional[int] +): if build_type == "debug": pytest.skip("times out in debug builds") neon_env_builder.enable_pageserver_remote_storage(s3_storage()) neon_env_builder.enable_scrub_on_exit() + if shard_count is not None: + neon_env_builder.num_pageservers = shard_count - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count) # these can happen, if we shutdown at a good time. to be fixed as part of #5172. message = ".*duplicated L1 layer layer=.*" - env.pageserver.allowed_errors.append(message) + for ps in env.pageservers: + ps.allowed_errors.append(message) # Use a tiny checkpoint distance, to create a lot of layers quickly. # That allows us to stress the compaction and layer flushing logic more. @@ -192,13 +200,19 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, build_type: str): log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) + # We run "random" kills using a fixed seed, to improve reproducibility if a test + # failure is related to a particular order of operations. + seed = 0xDEADBEEF + rng = random.Random(seed) + # Update the whole table, then immediately kill and restart the pageserver for i in range(1, 15): endpoint.safe_psql("UPDATE foo set updates = updates + 1") # This kills the pageserver immediately, to simulate a crash - env.pageserver.stop(immediate=True) - env.pageserver.start() + to_kill = rng.choice(env.pageservers) + to_kill.stop(immediate=True) + to_kill.start() # Check that all the updates are visible num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0] diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index f26d04e2f3..e4219ec7a6 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -2,25 +2,40 @@ # This file runs pg_regress-based tests. # from pathlib import Path +from typing import Optional -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content +import pytest +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + check_restored_datadir_content, +) +from fixtures.remote_storage import s3_storage # Run the main PostgreSQL regression tests, in src/test/regress. # +@pytest.mark.parametrize("shard_count", [None, 4]) def test_pg_regress( - neon_simple_env: NeonEnv, + neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_bin, capsys, base_dir: Path, pg_distrib_dir: Path, + shard_count: Optional[int], ): - env = neon_simple_env + """ + :param shard_count: if None, create an unsharded tenant. Otherwise create a tenant with this + many shards. + """ + if shard_count is not None: + neon_env_builder.num_pageservers = shard_count + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.enable_scrub_on_exit() + env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count) - env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - endpoint = env.endpoints.create_start("test_pg_regress") + endpoint = env.endpoints.create_start("main") endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. @@ -61,22 +76,25 @@ def test_pg_regress( # Run the PostgreSQL "isolation" tests, in src/test/isolation. # +@pytest.mark.parametrize("shard_count", [None, 4]) def test_isolation( - neon_simple_env: NeonEnv, + neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_bin, capsys, base_dir: Path, pg_distrib_dir: Path, + shard_count: Optional[int], ): - env = neon_simple_env + if shard_count is not None: + neon_env_builder.num_pageservers = shard_count + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.enable_scrub_on_exit() + env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count) - env.neon_cli.create_branch("test_isolation", "empty") # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them - endpoint = env.endpoints.create_start( - "test_isolation", config_lines=["max_prepared_transactions=100"] - ) + endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=100"]) endpoint.safe_psql("CREATE DATABASE isolation_regression") # Create some local directories for pg_isolation_regress to run in. @@ -114,19 +132,24 @@ def test_isolation( # Run extra Neon-specific pg_regress-based tests. The tests and their # schedule file are in the sql_regress/ directory. +@pytest.mark.parametrize("shard_count", [None, 4]) def test_sql_regress( - neon_simple_env: NeonEnv, + neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_bin, capsys, base_dir: Path, pg_distrib_dir: Path, + shard_count: Optional[int], ): - env = neon_simple_env + if shard_count is not None: + neon_env_builder.num_pageservers = shard_count + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.enable_scrub_on_exit() + env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count) - env.neon_cli.create_branch("test_sql_regress", "empty") # Connect to postgres and create a database called "regression". - endpoint = env.endpoints.create_start("test_sql_regress") + endpoint = env.endpoints.create_start("main") endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in.