From e640bc7dba325c4a7d7f0ff366118eac07f28bd1 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 8 Dec 2023 17:32:16 +0000 Subject: [PATCH] tests: allow-lists for occasional failures (#6074) test_creating_tenant_conf_after... - Test detaches a tenant and then re-attaches immediatel: this causes a race between pending remote LSN update and the generation bump in the attachment. test_gc_cutoff: - Test rapidly restarts a pageserver before one generation has had the chance to process deletions from the previous generation --- test_runner/regress/test_gc_cutoff.py | 5 +++++ test_runner/regress/test_tenant_conf.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index be3355f5cc..284a8c3563 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -35,6 +35,11 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) + # Because this test does a rapid series of restarts of the same node, it's possible that + # we are restarted again before we can clean up deletion lists form the previous generation, + # resulting in a subsequent startup logging a warning. + env.pageserver.allowed_errors.append(".*Dropping stale deletions for tenant.*") + for _ in range(5): with pytest.raises(subprocess.SubprocessError): pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr]) diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index f4565c2ee2..2ed22cabc4 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -314,6 +314,10 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder): assert not config_path.exists(), "detach did not remove config file" + # The re-attach's increment of the generation number may invalidate deletion queue + # updates in flight from the previous attachment. + env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*") + env.pageserver.tenant_attach(tenant_id) wait_until( number_of_iterations=5,