From 3f401a328f62fcdf58424de6a2b7324068426485 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 18 Nov 2024 11:33:27 +0000 Subject: [PATCH] tests: mitigate bug to stabilize test_storage_controller_many_tenants (#9771) ## Problem Due to #9471 , the scale test occasionally gets 404s while trying to modify the config of a timeline that belongs to a tenant being migrated. We rarely see this narrow race in the field, but the test is quite good at reproducing it. ## Summary of changes - Ignore 404 errors in this test. --- .../performance/test_storage_controller_scale.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index d2eba751f8..dc051483f8 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -16,7 +16,7 @@ from fixtures.neon_fixtures import ( PageserverAvailability, PageserverSchedulingPolicy, ) -from fixtures.pageserver.http import PageserverHttpClient +from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pg_version import PgVersion @@ -273,7 +273,17 @@ def test_storage_controller_many_tenants( archival_state = rng.choice( [TimelineArchivalState.ARCHIVED, TimelineArchivalState.UNARCHIVED] ) - virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state) + try: + virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state) + except PageserverApiException as e: + if e.status_code == 404: + # FIXME: there is an edge case where timeline ops can encounter a 404 during + # a very short time window between generating a new generation number and + # attaching this tenant to its new pageserver. + # See https://github.com/neondatabase/neon/issues/9471 + pass + else: + raise # Generate a mixture of operations and dispatch them all concurrently futs = []