From f071800979fba434ea0708f22e454c513efe47b2 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 3 Feb 2025 09:02:21 +0000 Subject: [PATCH] tests: stabilize shard locations earlier in test_scrubber_tenant_snapshot (#10606) ## Problem This test would sometimes emit unexpected logs from the storage controller's requests to do migrations, which overlap with the test's restarts of pageservers, where those migrations are happening some time after a shard split as the controller moves load around. Example: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10602/13067323736/index.html#testresult/f66f1329557a1fc5/retries ## Summary of changes - Do a reconcile_until_idle after shard split, so that the rest of the test doesn't run concurrently with migrations --- test_runner/regress/test_storage_scrubber.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 7e92cc01cd..0f4e5688a9 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -71,6 +71,10 @@ def test_scrubber_tenant_snapshot(neon_env_builder: NeonEnvBuilder, shard_count: else: tenant_shard_ids = [TenantShardId(tenant_id, 0, 0)] + # Let shards finish rescheduling to other pageservers: this makes the rest of the test more stable + # is it won't overlap with migrations + env.storage_controller.reconcile_until_idle(max_interval=0.1, timeout_secs=120) + output_path = neon_env_builder.test_output_dir / "snapshot" os.makedirs(output_path)