From ac8f44c70e8a591b090b10c7ab2145b50574923f Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 25 Jul 2025 22:09:34 +0400 Subject: [PATCH] tests: stop ps immediately in test_ps_unavailable_after_delete (#12728) ## Problem test_ps_unavailable_after_delete is flaky. All test failures I've looked at are because of ERROR log messages in pageserver, which happen because storage controller tries runs a reconciliations during the graceful shutdown of the pageserver. I wasn't able to reproduce it locally, but I think stopping PS immediately instead of gracefully should help. If not, we might just silence those errors. - Closes: https://databricks.atlassian.net/browse/LKB-745 --- test_runner/regress/test_storage_controller.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index a6f05fe8ad..e11be1df8c 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3309,6 +3309,7 @@ def test_ps_unavailable_after_delete( ps.allowed_errors.append(".*request was dropped before completing.*") env.storage_controller.node_delete(ps.id, force=True) wait_until(lambda: assert_nodes_count(2)) + env.storage_controller.reconcile_until_idle() elif deletion_api == DeletionAPIKind.OLD: env.storage_controller.node_delete_old(ps.id) assert_nodes_count(2)