From c864166b32f53be2d2ecb7d480a5c17ad74c841a Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Fri, 26 Jul 2024 08:16:31 +0000 Subject: [PATCH] test: make sure gc gets unblocked by late deletion --- .../regress/test_timeline_detach_ancestor.py | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index 437816775b..87773c23db 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -1224,7 +1224,15 @@ def test_retried_detach_ancestor_after_failed_reparenting(neon_env_builder: Neon def test_timeline_is_deleted_before_timeline_detach_ancestor_completes( neon_env_builder: NeonEnvBuilder, ): - env = neon_env_builder.init_start() + """ + Make sure that a timeline deleted after restart will unpause gc blocking. + """ + env = neon_env_builder.init_start( + initial_tenant_conf={ + "gc_period": "1s", + "lsn_lease_length": "0s", + } + ) env.pageserver.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS) @@ -1239,8 +1247,11 @@ def test_timeline_is_deleted_before_timeline_detach_ancestor_completes( def detach_and_get_stuck(): return http.detach_ancestor(env.initial_tenant, detached) - def pausepoint_hit(): - env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + def request_processing_noted_in_log(): + _, offset = env.pageserver.assert_log_contains( + ".*INFO request\\{method=PUT path=/v1/tenant/[0-9a-f]{32}/timeline/[0-9a-f]{32}/detach_ancestor .*\\}: Handling request", + ) + return offset def delete_detached(): return http.timeline_delete(env.initial_tenant, detached) @@ -1249,7 +1260,18 @@ def test_timeline_is_deleted_before_timeline_detach_ancestor_completes( with ThreadPoolExecutor(max_workers=1) as pool: detach = pool.submit(detach_and_get_stuck) - wait_until(10, 1.0, pausepoint_hit) + offset = wait_until(10, 1.0, request_processing_noted_in_log) + + # make this named fn tor more clear failure test output logging + def pausepoint_hit_with_gc_paused() -> LogCursor: + env.pageserver.assert_log_contains(f"at failpoint {failpoint}") + _, at = env.pageserver.assert_log_contains( + ".* gc_loop.*: Skipping GC while there is an ongoing detach_ancestor attempt", + offset, + ) + return at + + offset = wait_until(10, 1.0, pausepoint_hit_with_gc_paused) delete_detached() @@ -1268,6 +1290,11 @@ def test_timeline_is_deleted_before_timeline_detach_ancestor_completes( finally: http.configure_failpoints((failpoint, "off")) + # make sure gc has been unblocked + time.sleep(2) + + env.pageserver.assert_log_contains(".* gc_loop.*: 1 timelines need GC", offset) + # TODO: # - branch near existing L1 boundary, image layers?