From 0efff1db26d14000278e409586f580493e31289d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 24 Jun 2025 00:26:38 +0200 Subject: [PATCH] Allow cancellation errors in tests that allow timeline deletion errors (#12315) After merging of PR https://github.com/neondatabase/neon/pull/11712 we saw some tests be flaky, with errors showing up about the timeline having been cancelled instead of having been deleted. This is an outcome that is inherently racy with the "has been deleted" error. In some instances, https://github.com/neondatabase/neon/pull/11712 has already added the error about the timeline having been cancelled. This PR adds them to the remaining instances of https://github.com/neondatabase/neon/pull/11712, fixing the flakiness. --- test_runner/regress/test_storage_controller.py | 4 +++- test_runner/regress/test_timeline_detach_ancestor.py | 8 ++++++-- test_runner/regress/test_timeline_gc_blocking.py | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index be7f0c8a3e..70772766d7 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3642,7 +3642,9 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi env.start() for ps in env.pageservers: - ps.allowed_errors.append(".*Timeline.* has been deleted.*") + ps.allowed_errors.extend( + [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"] + ) tenant_id = TenantId.generate() timeline_id = TimelineId.generate() diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index c58f78aeb1..b5cc431afe 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -1099,7 +1099,9 @@ def test_timeline_detach_ancestor_interrupted_by_deletion( for ps in env.pageservers: ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS) - ps.allowed_errors.append(".*Timeline.* has been deleted.*") + ps.allowed_errors.extend( + [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"] + ) pageservers = dict((int(p.id), p) for p in env.pageservers) @@ -1221,7 +1223,9 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv for ps in env.pageservers: ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS) - ps.allowed_errors.append(".*Timeline.* has been deleted.*") + ps.allowed_errors.extend( + [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"] + ) pageservers = dict((int(p.id), p) for p in env.pageservers) diff --git a/test_runner/regress/test_timeline_gc_blocking.py b/test_runner/regress/test_timeline_gc_blocking.py index 8ef64a0742..daba8019b6 100644 --- a/test_runner/regress/test_timeline_gc_blocking.py +++ b/test_runner/regress/test_timeline_gc_blocking.py @@ -25,7 +25,9 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool initial_tenant_shard_count=2 if sharded else None, ) for ps in env.pageservers: - ps.allowed_errors.append(".*Timeline.* has been deleted.*") + ps.allowed_errors.extend( + [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"] + ) if sharded: http = env.storage_controller.pageserver_api()