From 5820faaa876c35b0504b1f5136d851437651a765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 12 Dec 2023 18:00:37 +0100 Subject: [PATCH] Use extend instead of groups of append calls in tests (#6109) Repeated calls to `.append` don't line up as nicely as they might get formatted in different ways. Also, it is more characters and the lines might be longer. Saw this while working on #5912. --- test_runner/regress/test_auth.py | 11 ++-- test_runner/regress/test_branch_behind.py | 5 +- test_runner/regress/test_branching.py | 20 +++--- test_runner/regress/test_import.py | 13 ++-- .../test_pageserver_metric_collection.py | 28 ++++---- test_runner/regress/test_remote_storage.py | 25 +++---- test_runner/regress/test_tenant_delete.py | 14 ++-- test_runner/regress/test_tenant_detach.py | 12 ++-- test_runner/regress/test_tenant_relocation.py | 17 ++--- .../test_tenants_with_remote_storage.py | 23 ++++--- .../regress/test_threshold_based_eviction.py | 13 ++-- test_runner/regress/test_timeline_delete.py | 66 ++++++++++--------- 12 files changed, 133 insertions(+), 114 deletions(-) diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py index 7487106c44..bd87ff3efd 100644 --- a/test_runner/regress/test_auth.py +++ b/test_runner/regress/test_auth.py @@ -92,8 +92,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder): def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder): neon_env_builder.auth_enabled = True env = neon_env_builder.init_start() - env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*") - env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*") + env.pageserver.allowed_errors.extend( + [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"] + ) pageserver_token_old = env.auth_keys.generate_pageserver_token() pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old) @@ -145,9 +146,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder): def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder): neon_env_builder.auth_enabled = True env = neon_env_builder.init_start() - env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*") - env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*") - + env.pageserver.allowed_errors.extend( + [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"] + ) pageserver_token_old = env.auth_keys.generate_pageserver_token() pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old) diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index a19b2862f8..9879254897 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -14,8 +14,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() - env.pageserver.allowed_errors.append(".*invalid branch start lsn.*") - env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*") + env.pageserver.allowed_errors.extend( + [".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"] + ) # Branch at the point where only 100 rows were inserted branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind") diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index a908dd713a..82ca985d01 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -148,11 +148,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env = neon_env_builder.init_configs() env.start() - env.pageserver.allowed_errors.append( - ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*" - ) - env.pageserver.allowed_errors.append( - ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading" + env.pageserver.allowed_errors.extend( + [ + ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*", + ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading", + ] ) ps_http = env.pageserver.http_client() @@ -247,11 +247,11 @@ def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: N env = neon_env_builder.init_configs() env.start() - env.pageserver.allowed_errors.append( - ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*" - ) - env.pageserver.allowed_errors.append( - ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory" + env.pageserver.allowed_errors.extend( + [ + ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*", + ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory", + ] ) ps_http = env.pageserver.http_client() diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 920e8d0b72..faedf5d944 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -99,12 +99,13 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build ] ) - # FIXME: we should clean up pageserver to not print this - env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*") - - # FIXME: Is this expected? - env.pageserver.allowed_errors.append( - ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + env.pageserver.allowed_errors.extend( + [ + # FIXME: we should clean up pageserver to not print this + ".*exited with error: unexpected message type: CopyData.*", + # FIXME: Is this expected? + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*", + ] ) def import_tar(base, wal): diff --git a/test_runner/regress/test_pageserver_metric_collection.py b/test_runner/regress/test_pageserver_metric_collection.py index b76dbbee03..042961baa5 100644 --- a/test_runner/regress/test_pageserver_metric_collection.py +++ b/test_runner/regress/test_pageserver_metric_collection.py @@ -64,13 +64,13 @@ def test_metric_collection( # spin up neon, after http server is ready env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) # httpserver is shut down before pageserver during passing run - env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*") - # we have a fast rate of calculation, these can happen at shutdown - env.pageserver.allowed_errors.append( - ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*" - ) - env.pageserver.allowed_errors.append( - ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes" + env.pageserver.allowed_errors.extend( + [ + ".*metrics endpoint refused the sent metrics*", + # we have a fast rate of calculation, these can happen at shutdown + ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*", + ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes", + ] ) tenant_id = env.initial_tenant @@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile( pageserver_http = env.pageserver.http_client() # httpserver is shut down before pageserver during passing run - env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*") - # we have a fast rate of calculation, these can happen at shutdown - env.pageserver.allowed_errors.append( - ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*" - ) - env.pageserver.allowed_errors.append( - ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes" + env.pageserver.allowed_errors.extend( + [ + ".*metrics endpoint refused the sent metrics*", + # we have a fast rate of calculation, these can happen at shutdown + ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*", + ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes", + ] ) tenant_id = env.initial_tenant diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 0a5046e219..3004d69f50 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -73,19 +73,20 @@ def test_remote_storage_backup_and_restore( ##### First start, insert data and upload it to the remote storage env = neon_env_builder.init_start() - # FIXME: Is this expected? - env.pageserver.allowed_errors.append( - ".*marking .* as locally complete, while it doesnt exist in remote index.*" + env.pageserver.allowed_errors.extend( + [ + # FIXME: Is this expected? + ".*marking .* as locally complete, while it doesnt exist in remote index.*", + ".*No timelines to attach received.*", + ".*Failed to get local tenant state.*", + # FIXME retry downloads without throwing errors + ".*failed to load remote timeline.*", + # we have a bunch of pytest.raises for these below + ".*tenant .*? already exists, state:.*", + ".*tenant directory already exists.*", + ".*simulated failure of remote operation.*", + ] ) - env.pageserver.allowed_errors.append(".*No timelines to attach received.*") - - env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*") - # FIXME retry downloads without throwing errors - env.pageserver.allowed_errors.append(".*failed to load remote timeline.*") - # we have a bunch of pytest.raises for these below - env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") - env.pageserver.allowed_errors.append(".*tenant directory already exists.*") - env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*") pageserver_http = env.pageserver.http_client() endpoint = env.endpoints.create_start("main") diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index 48f5682371..fece876459 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -395,13 +395,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE env.start() pageserver_http = env.pageserver.http_client() - # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero - env.pageserver.allowed_errors.append( - ".*Timeline got dropped without initializing, cleaning its files" - ) - # the response hit_pausable_failpoint_and_later_fail - env.pageserver.allowed_errors.append( - f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn" + env.pageserver.allowed_errors.extend( + [ + # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero + ".*Timeline got dropped without initializing, cleaning its files", + # the response hit_pausable_failpoint_and_later_fail + f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn", + ] ) env.pageserver.tenant_create(env.initial_tenant) diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 5b63bd6161..0dcbb23ad4 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -307,10 +307,14 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): bogus_timeline_id = TimelineId.generate() pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0) - # the error will be printed to the log too - env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*") - # Timelines get stopped during detach, ignore the gc calls that error, witnessing that - env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*") + env.pageserver.allowed_errors.extend( + [ + # the error will be printed to the log too + ".*gc target timeline does not exist.*", + # Timelines get stopped during detach, ignore the gc calls that error, witnessing that + ".*InternalServerError\\(timeline is Stopping.*", + ] + ) # Detach while running manual GC. # It should wait for manual GC to finish because it runs in a task associated with the tenant. diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index feacdcc802..dcd7232b1b 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -216,16 +216,17 @@ def test_tenant_relocation( tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209") - # FIXME: Is this expected? - env.pageservers[0].allowed_errors.append( - ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + env.pageservers[0].allowed_errors.extend( + [ + # FIXME: Is this expected? + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*", + # Needed for detach polling on the original pageserver + f".*NotFound: tenant {tenant_id}.*", + # We will dual-attach in this test, so stale generations are expected + ".*Dropped remote consistent LSN updates.*", + ] ) - # Needed for detach polling on the original pageserver - env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*") - # We will dual-attach in this test, so stale generations are expected - env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*") - assert isinstance(env.pageserver_remote_storage, LocalFsStorage) # we use two branches to check that they are both relocated diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index b7b4e2be0b..07fb6dc5ca 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -117,10 +117,12 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder): ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() - # FIXME: Are these expected? - env.pageserver.allowed_errors.append(".*No timelines to attach received.*") - env.pageserver.allowed_errors.append( - ".*marking .* as locally complete, while it doesnt exist in remote index.*" + env.pageserver.allowed_errors.extend( + [ + # FIXME: Are these expected? + ".*No timelines to attach received.*", + ".*marking .* as locally complete, while it doesnt exist in remote index.*", + ] ) pageserver_http = env.pageserver.http_client() @@ -218,13 +220,14 @@ def test_tenant_redownloads_truncated_file_on_startup( assert isinstance(env.pageserver_remote_storage, LocalFsStorage) - env.pageserver.allowed_errors.append(".*removing local file .* because .*") - - # FIXME: Are these expected? - env.pageserver.allowed_errors.append( - ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" + env.pageserver.allowed_errors.extend( + [ + ".*removing local file .* because .*", + # FIXME: Are these expected? + ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*", + ".*No timelines to attach received.*", + ] ) - env.pageserver.allowed_errors.append(".*No timelines to attach received.*") pageserver_http = env.pageserver.http_client() endpoint = env.endpoints.create_start("main") diff --git a/test_runner/regress/test_threshold_based_eviction.py b/test_runner/regress/test_threshold_based_eviction.py index 27d5cce5f2..5f72cfd747 100644 --- a/test_runner/regress/test_threshold_based_eviction.py +++ b/test_runner/regress/test_threshold_based_eviction.py @@ -36,12 +36,13 @@ def test_threshold_based_eviction( ".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*" ) env = neon_env_builder.init_start() - env.pageserver.allowed_errors.append(metrics_refused_log_line) - - # these can happen whenever we run consumption metrics collection - env.pageserver.allowed_errors.append(r".*failed to calculate logical size at \S+: cancelled") - env.pageserver.allowed_errors.append( - r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes" + env.pageserver.allowed_errors.extend( + [ + metrics_refused_log_line, + # these can happen whenever we run consumption metrics collection + r".*failed to calculate logical size at \S+: cancelled", + r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes", + ] ) tenant_id, timeline_id = env.initial_tenant, env.initial_timeline diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index 17113a6bc5..c6d578a7a2 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -39,10 +39,14 @@ from urllib3.util.retry import Retry def test_timeline_delete(neon_simple_env: NeonEnv): env = neon_simple_env - env.pageserver.allowed_errors.append(".*Timeline .* was not found.*") - env.pageserver.allowed_errors.append(".*timeline not found.*") - env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*") - env.pageserver.allowed_errors.append(".*Precondition failed: Requested tenant is missing.*") + env.pageserver.allowed_errors.extend( + [ + ".*Timeline .* was not found.*", + ".*timeline not found.*", + ".*Cannot delete timeline which has child timelines.*", + ".*Precondition failed: Requested tenant is missing.*", + ] + ) ps_http = env.pageserver.http_client() @@ -198,22 +202,22 @@ def test_delete_timeline_exercise_crash_safety_failpoints( ), ) - env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}") - # It appears when we stopped flush loop during deletion and then pageserver is stopped - env.pageserver.allowed_errors.append( - ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", + env.pageserver.allowed_errors.extend( + [ + f".*{timeline_id}.*failpoint: {failpoint}", + # It appears when we stopped flush loop during deletion and then pageserver is stopped + ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", + # This happens when we fail before scheduling background operation. + # Timeline is left in stopping state and retry tries to stop it again. + ".*Ignoring new state, equal to the existing one: Stopping", + # This happens when we retry delete requests for broken timelines + ".*Ignoring state update Stopping for broken timeline", + # This happens when timeline remains are cleaned up during loading + ".*Timeline dir entry become invalid.*", + # In one of the branches we poll for tenant to become active. Polls can generate this log message: + f".*Tenant {env.initial_tenant} is not active*", + ] ) - # This happens when we fail before scheduling background operation. - # Timeline is left in stopping state and retry tries to stop it again. - env.pageserver.allowed_errors.append( - ".*Ignoring new state, equal to the existing one: Stopping" - ) - # This happens when we retry delete requests for broken timelines - env.pageserver.allowed_errors.append(".*Ignoring state update Stopping for broken timeline") - # This happens when timeline remains are cleaned up during loading - env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*") - # In one of the branches we poll for tenant to become active. Polls can generate this log message: - env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*") ps_http.configure_failpoints((failpoint, "return")) @@ -398,13 +402,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild env = neon_env_builder.init_start() - env.pageserver.allowed_errors.append(".*failpoint: timeline-delete-before-rm") - env.pageserver.allowed_errors.append( - ".*Ignoring new state, equal to the existing one: Stopping" - ) - # this happens, because the stuck timeline is visible to shutdown - env.pageserver.allowed_errors.append( - ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", + env.pageserver.allowed_errors.extend( + [ + ".*failpoint: timeline-delete-before-rm", + ".*Ignoring new state, equal to the existing one: Stopping", + # this happens, because the stuck timeline is visible to shutdown + ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", + ] ) ps_http = env.pageserver.http_client() @@ -551,10 +555,12 @@ def test_concurrent_timeline_delete_stuck_on( with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err: ps_http.timeline_delete(env.initial_tenant, child_timeline_id) assert second_call_err.value.status_code == 409 - env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*") - # the second call will try to transition the timeline into Stopping state as well - env.pageserver.allowed_errors.append( - f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping" + env.pageserver.allowed_errors.extend( + [ + f".*{child_timeline_id}.*{error_msg_re}.*", + # the second call will try to transition the timeline into Stopping state as well + f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping", + ] ) log.info("second call failed as expected")