From 3a4ebfb95dd19a499b574c812f6c8cd4adebb172 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 25 Mar 2024 09:38:12 +0000 Subject: [PATCH] test: fix `test_pageserver_recovery` flakyness (#7207) ## Problem We recently introduced log file validation for the storage controller. The heartbeater will WARN when it fails for a node, hence the test fails. Closes https://github.com/neondatabase/neon/issues/7159 ## Summary of changes * Warn only once for each set of heartbeat retries * Allow list heartbeat warns --- control_plane/attachment_service/src/heartbeater.rs | 2 +- test_runner/regress/test_recovery.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/control_plane/attachment_service/src/heartbeater.rs b/control_plane/attachment_service/src/heartbeater.rs index e15de28920..7669680eb6 100644 --- a/control_plane/attachment_service/src/heartbeater.rs +++ b/control_plane/attachment_service/src/heartbeater.rs @@ -139,7 +139,7 @@ impl HeartbeaterTask { .with_client_retries( |client| async move { client.get_utilization().await }, &jwt_token, - 2, + 3, 3, Duration::from_secs(1), &cancel, diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index 6aac1e1d84..ab5c8be256 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -15,6 +15,13 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.pageserver.is_testing_enabled_or_skip() + # We expect the pageserver to exit, which will cause storage storage controller + # requests to fail and warn. + env.storage_controller.allowed_errors.append(".*management API still failed.*") + env.storage_controller.allowed_errors.append( + ".*Reconcile error.*error sending request for url.*" + ) + # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main")