From 04938d9d559d6e5968ce7e3b71a5a86ac8f87f57 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 15 Nov 2024 13:22:05 +0000 Subject: [PATCH] tests: tolerate pageserver 500s in test_timeline_archival_chaos (#9769) ## Problem Test exposes cases where pageserver gives 500 responses, causing failures like https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9766/11844529470/index.html#suites/d1acc79950edeb0563fc86236c620898/3546be2ffed99ba6 ## Summary of changes - Tolerate such messages, and link an issue for cleaning up the pageserver not to return such 500s. --- test_runner/regress/test_timeline_archive.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index c447535e10..83631405ab 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -406,7 +406,13 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): tenant_shard_id = TenantShardId(tenant_id, 0, 0) # Unavailable pageservers during timeline CRUD operations can be logged as errors on the storage controller - env.storage_controller.allowed_errors.append(".*error sending request.*") + env.storage_controller.allowed_errors.extend( + [ + ".*error sending request.*", + # FIXME: the pageserver should not return 500s on cancellation (https://github.com/neondatabase/neon/issues/97680) + ".*InternalServerError(Error deleting timeline .* on .* on .*: pageserver API: error: Cancelled", + ] + ) for ps in env.pageservers: # We will do unclean restarts, which results in these messages when cleaning up files @@ -415,10 +421,10 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): ".*removing local file.*because it has unexpected length.*", ".*__temp.*", # FIXME: there are still anyhow::Error paths in timeline creation/deletion which - # generate 500 results when called during shutdown + # generate 500 results when called during shutdown (https://github.com/neondatabase/neon/issues/9768) ".*InternalServerError.*", # FIXME: there are still anyhow::Error paths in timeline deletion that generate - # log lines at error severity + # log lines at error severity (https://github.com/neondatabase/neon/issues/9768) ".*delete_timeline.*Error", ] )