From 82af640484660a88e120b67e3f9bd2f3361c8704 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 20 Jun 2024 10:33:44 +0100 Subject: [PATCH] test: bring back consistency checks --- test_runner/performance/test_storage_controller_scale.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index 65e1b5cfd4..f65410bb98 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -138,6 +138,9 @@ def test_storage_controller_many_tenants( # of shards are hitting the delayed path. env.storage_controller.allowed_errors.append(".*Many shards are waiting to reconcile") + # TODO: explain + env.storage_controller.allowed_errors.append(".*Scheduling error when draining pageserver.*") + for ps in env.pageservers: # This can happen because when we do a loop over all pageservers and mark them offline/active, # reconcilers might get cancelled, and the next reconcile can follow a not-so-elegant path of @@ -297,8 +300,8 @@ def test_storage_controller_many_tenants( ps.restart() poll_node_status(env, ps.id, "Active", max_attempts=24, backoff=1) - # env.storage_controller.reconcile_until_idle() - # env.storage_controller.consistency_check() + env.storage_controller.reconcile_until_idle() + env.storage_controller.consistency_check() retryable_node_operation( lambda ps_id: env.storage_controller.node_fill(ps_id), ps.id, max_attempts=3, backoff=2 @@ -312,7 +315,7 @@ def test_storage_controller_many_tenants( # Consistency check is safe here: restarting pageservers should not have caused any Reconcilers to spawn, # as they were not offline long enough to trigger any scheduling changes. - # env.storage_controller.consistency_check() + env.storage_controller.consistency_check() check_memory() # Stop the storage controller before tearing down fixtures, because it otherwise might log