From d6281cbe65db6959e83c6d8abb44c0a3184e8b97 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 16 Oct 2024 15:27:46 +0100 Subject: [PATCH] tests: stabilize test_timelines_parallel_endpoints (#9413) ## Problem This test would get failures like `command failed: Found no timeline id for branch name 'branch_8'` It's because neon_local is being invoked concurrently for branch creation, which is unsafe (they'll step on each others' JSON writes) Example failure: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9410/11363051979/index.html#testresult/5ddc56c640f5422b/retries ## Summary of changes - Don't do branch creation concurrently with endpoint creation via neon_local --- test_runner/regress/test_tenants.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 4a16535941..03cb79fc1d 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -19,6 +19,7 @@ from fixtures.metrics import ( parse_metrics, ) from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, wait_for_last_flush_lsn, @@ -490,8 +491,8 @@ def test_timelines_parallel_endpoints(neon_simple_env: NeonEnv): n_threads = 16 barrier = threading.Barrier(n_threads) - def test_timeline(branch_name: str, timeline_id: TimelineId): - endpoint = env.endpoints.create_start(branch_name) + def test_timeline(branch_name: str, timeline_id: TimelineId, endpoint: Endpoint): + endpoint.start() endpoint.stop() # Use a barrier to make sure we restart endpoints at the same time barrier.wait() @@ -502,8 +503,12 @@ def test_timelines_parallel_endpoints(neon_simple_env: NeonEnv): for i in range(0, n_threads): branch_name = f"branch_{i}" timeline_id = env.create_branch(branch_name) - w = threading.Thread(target=test_timeline, args=[branch_name, timeline_id]) + endpoint = env.endpoints.create(branch_name) + w = threading.Thread(target=test_timeline, args=[branch_name, timeline_id, endpoint]) workers.append(w) + + # Only start the restarts once we're done creating all timelines & endpoints + for w in workers: w.start() for w in workers: