From d515727e942f97f1e8ffa27bf86ef47e4506a272 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 1 Oct 2024 10:15:18 +0100 Subject: [PATCH] tests: make test_multi_attach more stable (#9202) ## Problem `test_multi_attach` is sometimes failing with `invalid compute status for configuration request: Configuration`. This is likely a result of the test attempting to reconfigure the compute at the same time as the storage controller is doing so. This test was originally written before the storage controller existed, and is not expecting anything else to be reconfiguring computes at the same time. ## Summary of changes - Configure the tenant into scheduling policy `Stop` in the storage controller at the start of the test, so that it won't try to do anything to the tenant while the test is running. --- test_runner/regress/test_pageserver_generations.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 519994f774..96521b5684 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -549,6 +549,14 @@ def test_multi_attach( tenant_id = env.initial_tenant timeline_id = env.initial_timeline + # Instruct the storage controller to not interfere with our low level configuration + # of the pageserver's attachment states. Otherwise when it sees nodes go offline+return, + # it would send its own requests that would conflict with the test's. + env.storage_controller.tenant_policy_update(tenant_id, {"scheduling": "Stop"}) + env.storage_controller.allowed_errors.extend( + [".*Scheduling is disabled by policy Stop.*", ".*Skipping reconcile for policy Stop.*"] + ) + # Initially, the tenant will be attached to the first pageserver (first is default in our test harness) wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active")) _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)