From 46d30bf0545baa419bfc04e10d50c46ddd4062ba Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 14 Nov 2022 16:03:51 +0200 Subject: [PATCH] Check for errors in pageserver log after each test. If there are any unexpected ERRORs or WARNs in pageserver.log after test finishes, fail the test. This requires whitelisting the errors that *are* expected in each test, and there's also a few common errors that are printed by most tests, which are whitelisted in the fixture itself. With this, we don't need the special abort() call in testing mode, when compaction or GC fails. Those failures will print ERRORs to the logs, which will be picked up by this new mechanisms. A bunch of errors are currently whitelisted that we probably shouldn't be emitting in the first place, but fixing those is out of scope for this commit, so I just left FIXME comments on them. --- pageserver/src/tenant_tasks.rs | 4 -- test_runner/fixtures/neon_fixtures.py | 59 +++++++++++++++++++ test_runner/regress/test_branch_and_gc.py | 7 +++ test_runner/regress/test_branch_behind.py | 3 + test_runner/regress/test_broken_timeline.py | 18 +++++- test_runner/regress/test_compatibility.py | 6 ++ test_runner/regress/test_gc_cutoff.py | 5 ++ test_runner/regress/test_import.py | 25 ++++++++ .../regress/test_pageserver_restart.py | 4 ++ test_runner/regress/test_read_validation.py | 2 + test_runner/regress/test_readonly_node.py | 2 + test_runner/regress/test_recovery.py | 4 ++ test_runner/regress/test_remote_storage.py | 11 ++++ test_runner/regress/test_tenant_detach.py | 8 +++ test_runner/regress/test_tenant_relocation.py | 5 ++ test_runner/regress/test_tenants.py | 14 +++++ .../test_tenants_with_remote_storage.py | 36 +++++++++++ test_runner/regress/test_timeline_delete.py | 5 ++ test_runner/regress/test_wal_acceptor.py | 19 ++++++ .../test_walredo_not_left_behind_on_detach.py | 2 + 20 files changed, 233 insertions(+), 6 deletions(-) diff --git a/pageserver/src/tenant_tasks.rs b/pageserver/src/tenant_tasks.rs index a24bdd5812..5a9c5aa3a5 100644 --- a/pageserver/src/tenant_tasks.rs +++ b/pageserver/src/tenant_tasks.rs @@ -72,8 +72,6 @@ async fn compaction_loop(tenant_id: TenantId) { if let Err(e) = tenant.compaction_iteration() { sleep_duration = wait_duration; error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration); - #[cfg(feature = "testing")] - std::process::abort(); } // Sleep @@ -123,8 +121,6 @@ async fn gc_loop(tenant_id: TenantId) { { sleep_duration = wait_duration; error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration); - #[cfg(feature = "testing")] - std::process::abort(); } } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index f68c6a25db..051c140836 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -784,6 +784,8 @@ class NeonEnvBuilder: self.cleanup_remote_storage() + self.env.pageserver.assert_no_errors() + class NeonEnv: """ @@ -1723,6 +1725,43 @@ class NeonPageserver(PgProtocol): self.config_override = config_override self.version = env.get_pageserver_version() + # After a test finishes, we will scrape the log to see if there are any + # unexpected error messages. If your test expects an error, add it to + # 'allowed_errors' in the test with something like: + # + # env.pageserver.allowed_errors.append(".*could not open garage door.*") + # + # The entries in the list are regular experessions. + self.allowed_errors = [ + # All tests print these, when starting up or shutting down + ".*wal receiver task finished with an error: walreceiver connection handling failure.*", + ".*Shutdown task error: walreceiver connection handling failure.*", + ".*Etcd client error: grpc request error: status: Unavailable.*", + ".*query handler for .* failed: Connection reset by peer.*", + ".*serving compute connection task.*exited with error: Broken pipe.*", + ".*Connection aborted: error communicating with the server: Broken pipe.*", + ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*", + ".*Connection aborted: error communicating with the server: Connection reset by peer.*", + ".*kill_and_wait_impl.*: wait successful.*", + ".*end streaming to Some.*", + # safekeeper connection can fail with this, in the window between timeline creation + # and streaming start + ".*Failed to process query for timeline .*: state uninitialized, no data to read.*", + # Tests related to authentication and authorization print these + ".*Error processing HTTP request: Forbidden", + # intentional failpoints + ".*failpoint ", + # FIXME: there is a race condition between GC and detach, see + # https://github.com/neondatabase/neon/issues/2442 + ".*could not remove ephemeral file.*No such file or directory.*", + # FIXME: These need investigation + ".*gc_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*", + ".*compaction_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*", + ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*", + ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*", + ".*Removing intermediate uninit mark file.*", + ] + def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver": """ Start the page server. @@ -1771,6 +1810,26 @@ class NeonPageserver(PgProtocol): is_testing_enabled_or_skip=self.is_testing_enabled_or_skip, ) + def assert_no_errors(self): + logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r") + + error_or_warn = re.compile("ERROR|WARN") + errors = [] + while True: + line = logfile.readline() + if not line: + break + + if error_or_warn.search(line): + # It's an ERROR or WARN. Is it in the allow-list? + for a in self.allowed_errors: + if re.match(a, line): + break + else: + errors.append(line) + + assert not errors + def append_pageserver_param_overrides( params_to_update: List[str], diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index 12debe50eb..fad4b4c79e 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -116,6 +116,13 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http_client = env.pageserver.http_client() + env.pageserver.allowed_errors.extend( + [ + ".*invalid branch start lsn: less than latest GC cutoff.*", + ".*invalid branch start lsn: less than planned GC cutoff.*", + ] + ) + # Disable background GC but set the `pitr_interval` to be small, so GC can delete something tenant, _ = env.neon_cli.create_tenant( conf={ diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index 0e2a8b346b..a841e3ced2 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -13,6 +13,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() + env.pageserver.allowed_errors.append(".*invalid branch start lsn.*") + env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*") + # Branch at the point where only 100 rows were inserted env.neon_cli.create_branch("test_branch_behind") pgmain = env.postgres.create_start("test_branch_behind") diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index b747af4d09..cf7f4b8289 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -11,10 +11,17 @@ from fixtures.types import TenantId, TimelineId # Test restarting page server, while safekeeper and compute node keep # running. def test_broken_timeline(neon_env_builder: NeonEnvBuilder): - # One safekeeper is enough for this test. - neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() + env.pageserver.allowed_errors.extend( + [ + ".*No timelines to attach received.*", + ".*Failed to process timeline dir contents.*", + ".*Failed to load delta layer.*", + ".*Timeline .* was not found.*", + ] + ) + tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = [] for n in range(4): @@ -111,6 +118,13 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http = env.pageserver.http_client() + env.pageserver.allowed_errors.extend( + [ + ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*", + ".*Timeline got dropped without initializing, cleaning its files.*", + ] + ) + tenant_id, _ = env.neon_cli.create_tenant() timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 306aa84040..9ad8cd393f 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -50,6 +50,12 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o env = neon_env_builder.init_start() pg = env.postgres.create_start("main") + + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()]) pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()]) pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]) diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 22b77d2cf1..7fe77a7e85 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -9,6 +9,11 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin # test anyway, so it doesn't need any special attention here. def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): env = neon_env_builder.init_start() + + # These warnings are expected, when the pageserver is restarted abruptly + env.pageserver.allowed_errors.append(".*found future image layer.*") + env.pageserver.allowed_errors.append(".*found future delta layer.*") + pageserver_http = env.pageserver.http_client() # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index ced5e18406..fbc893f312 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -76,6 +76,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build env = neon_env_builder.init_start() env.pageserver.http_client().tenant_create(tenant) + env.pageserver.allowed_errors.extend( + [ + ".*error importing base backup .*", + ".*Timeline got dropped without initializing, cleaning its files.*", + ".*Removing intermediate uninit mark file.*", + ".*InternalServerError.*timeline not found.*", + ".*InternalServerError.*Tenant .* not found.*", + ".*InternalServerError.*Timeline .* not found.*", + ".*InternalServerError.*Cannot delete timeline which has child timelines.*", + ] + ) + + # FIXME: we should clean up pageserver to not print this + env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*") + + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + def import_tar(base, wal): env.neon_cli.raw_cli( [ @@ -122,6 +142,11 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + timeline = env.neon_cli.create_branch("test_import_from_pageserver_small") pg = env.postgres.create_start("test_import_from_pageserver_small") diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index eac5e6e61d..ad06634ae9 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -67,6 +67,10 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() + # These warnings are expected, when the pageserver is restarted abruptly + env.pageserver.allowed_errors.append(".*found future image layer.*") + env.pageserver.allowed_errors.append(".*found future delta layer.*") + # Use a tiny checkpoint distance, to create a lot of layers quickly. # That allows us to stress the compaction and layer flushing logic more. tenant, _ = env.neon_cli.create_tenant( diff --git a/test_runner/regress/test_read_validation.py b/test_runner/regress/test_read_validation.py index beaae0351b..1e49c3b69f 100644 --- a/test_runner/regress/test_read_validation.py +++ b/test_runner/regress/test_read_validation.py @@ -143,6 +143,8 @@ def test_read_validation_neg(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_read_validation_neg", "empty") + env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*") + pg = env.postgres.create_start("test_read_validation_neg") log.info("postgres is running on 'test_read_validation_neg' branch") diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index dfa57aec25..62c3ead0a7 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -17,6 +17,8 @@ def test_readonly_node(neon_simple_env: NeonEnv): pgmain = env.postgres.create_start("test_readonly_node") log.info("postgres is running on 'test_readonly_node' branch") + env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*") + main_pg_conn = pgmain.connect() main_cur = main_pg_conn.cursor() diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index e70b1351ba..1e93958e98 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -17,6 +17,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): neon_env_builder.start() + # These warnings are expected, when the pageserver is restarted abruptly + env.pageserver.allowed_errors.append(".*found future delta layer.*") + env.pageserver.allowed_errors.append(".*found future image layer.*") + # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main") diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 4fb5a5406d..ecca496c7c 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -56,6 +56,17 @@ def test_remote_storage_backup_and_restore( ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() + + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*marking .* as locally complete, while it doesnt exist in remote index.*" + ) + env.pageserver.allowed_errors.append(".*No timelines to attach received.*") + + env.pageserver.allowed_errors.append(".*Tenant download is already in progress.*") + env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*") + env.pageserver.allowed_errors.append(".*No metadata file found in the timeline directory.*") + pageserver_http = env.pageserver.http_client() pg = env.postgres.create_start("main") diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index dc4cd2e37e..f66bacc4f7 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -20,6 +20,8 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() + env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state") + # first check for non existing tenant tenant_id = TenantId.generate() with pytest.raises( @@ -28,6 +30,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): ): pageserver_http.tenant_detach(tenant_id) + # the error will be printed to the log too + env.pageserver.allowed_errors.append(".*Tenant not found for id.*") + # create new nenant tenant_id, timeline_id = env.neon_cli.create_tenant() @@ -50,6 +55,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): bogus_timeline_id = TimelineId.generate() pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0) + # the error will be printed to the log too + env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*") + # try to concurrently run gc and detach gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id)) gc_thread.start() diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index aec45307f7..c4b3b28f34 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -259,6 +259,11 @@ def test_tenant_relocation( env = neon_env_builder.init_start() + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + # create folder for remote storage mock remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage" diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 4ffea60950..6d153b42bc 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -25,6 +25,13 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv): ) initial_tenant_dirs = [d for d in tenants_dir.iterdir()] + neon_simple_env.pageserver.allowed_errors.extend( + [ + ".*Failed to create directory structure for tenant .*, cleaning tmp data.*", + ".*Failed to fsync removed temporary tenant directory .*", + ] + ) + pageserver_http = neon_simple_env.pageserver.http_client() pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return")) with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"): @@ -206,6 +213,13 @@ def test_pageserver_with_empty_tenants( ) env = neon_env_builder.init_start() + + env.pageserver.allowed_errors.append( + ".*marking .* as locally complete, while it doesnt exist in remote index.*" + ) + env.pageserver.allowed_errors.append(".*Tenant .* has no timelines directory.*") + env.pageserver.allowed_errors.append(".*No timelines to attach received.*") + client = env.pageserver.http_client() tenant_without_timelines_dir = env.initial_tenant diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index 9a4cbe135b..8fd28cf53e 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -66,6 +66,11 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem env = neon_env_builder.init_start() + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + tenants_pgs: List[Tuple[TenantId, Postgres]] = [] for _ in range(1, 5): @@ -117,6 +122,13 @@ def test_tenants_attached_after_download( ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() + + # FIXME: Are these expected? + env.pageserver.allowed_errors.append(".*No timelines to attach received.*") + env.pageserver.allowed_errors.append( + ".*marking .* as locally complete, while it doesnt exist in remote index.*" + ) + pageserver_http = env.pageserver.http_client() pg = env.postgres.create_start("main") @@ -209,6 +221,16 @@ def test_tenant_upgrades_index_json_from_v0( # launch pageserver, populate the default tenants timeline, wait for it to be uploaded, # then go ahead and modify the "remote" version as if it was downgraded, needing upgrade env = neon_env_builder.init_start() + + # FIXME: Are these expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + env.pageserver.allowed_errors.append(".*No timelines to attach received.*") + env.pageserver.allowed_errors.append( + ".*Failed to get local tenant state: Tenant .* not found in the local state.*" + ) + pageserver_http = env.pageserver.http_client() pg = env.postgres.create_start("main") @@ -315,6 +337,20 @@ def test_tenant_redownloads_truncated_file_on_startup( ) env = neon_env_builder.init_start() + + env.pageserver.allowed_errors.append( + ".*Redownloading locally existing .* due to size mismatch.*" + ) + env.pageserver.allowed_errors.append( + ".*Downloaded layer exists already but layer file metadata mismatches.*" + ) + + # FIXME: Are these expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + env.pageserver.allowed_errors.append(".*No timelines to attach received.*") + pageserver_http = env.pageserver.http_client() pg = env.postgres.create_start("main") diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index 450f7f2381..d8f9ef2f89 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -7,6 +7,11 @@ from fixtures.utils import wait_until def test_timeline_delete(neon_simple_env: NeonEnv): env = neon_simple_env + env.pageserver.allowed_errors.append(".*Timeline .* was not found.*") + env.pageserver.allowed_errors.append(".*timeline not found.*") + env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*") + env.pageserver.allowed_errors.append(".*Tenant .* not found in the local state.*") + ps_http = env.pageserver.http_client() # first try to delete non existing timeline diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 2b43f46fd3..3945376e5e 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -263,6 +263,12 @@ def test_broker(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_broker", "main") + + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + pg = env.postgres.create_start("test_broker") pg.safe_psql("CREATE TABLE t(key int primary key, value text)") @@ -306,6 +312,11 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() + # FIXME: Is this expected? + env.pageserver.allowed_errors.append( + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + ) + env.neon_cli.create_branch("test_safekeepers_wal_removal") pg = env.postgres.create_start("test_safekeepers_wal_removal") @@ -1081,6 +1092,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() + # FIXME: are these expected? + env.pageserver.allowed_errors.extend( + [ + ".*Failed to process query for timeline .*: Timeline .* was not found in global map.*", + ".*end streaming to Some.*", + ] + ) + # Create two tenants: one will be deleted, other should be preserved. tenant_id = env.initial_tenant timeline_id_1 = env.neon_cli.create_branch("br1") # Active, delete explicitly diff --git a/test_runner/regress/test_walredo_not_left_behind_on_detach.py b/test_runner/regress/test_walredo_not_left_behind_on_detach.py index c79aea35da..aaaa8893a5 100644 --- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py +++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py @@ -22,6 +22,8 @@ def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_prese # as a zombie process. def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() + # We intentionally test for a non-existent tenant. + env.pageserver.allowed_errors.append(".*Tenant not found.*") pageserver_http = env.pageserver.http_client() pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text())