From fcedd102262577d1beabc60785b923cbfdad7410 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 11 Feb 2025 12:37:09 +0000 Subject: [PATCH] tests: temporarily permit a log error (#10752) ## Problem These tests can encounter a bug in the pageserver read path (#9185) which occurs under the very specific circumstances that the tests create, but is very unlikely to happen in the field. We will fix the bug, but in the meantime let's un-flake the tests. Related: https://github.com/neondatabase/neon/issues/10720 ## Summary of changes - Permit "could not find data for key" errors in tests affected by #9185 --- test_runner/regress/test_sharding.py | 8 ++++++++ test_runner/regress/test_storage_scrubber.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 86a6b7428b..6f8070e2ba 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -1810,3 +1810,11 @@ def test_sharding_gc( shard_gc_cutoff_lsn = Lsn(shard_index["metadata_bytes"]["latest_gc_cutoff_lsn"]) log.info(f"Shard {shard_number} cutoff LSN: {shard_gc_cutoff_lsn}") assert shard_gc_cutoff_lsn == shard_0_gc_cutoff_lsn + + for ps in env.pageservers: + # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by + # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. + # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed + ps.allowed_errors.append( + ".*could not find data for key 020000000000000000000000000000000000.*" + ) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 0f4e5688a9..46038ccbbb 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -312,6 +312,14 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_ drop_local_state(env, tenant_id) workload.validate() + for ps in env.pageservers: + # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by + # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. + # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed + ps.allowed_errors.append( + ".*could not find data for key 020000000000000000000000000000000000.*" + ) + def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder): """