diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index cb013ae8c3..a4c8c8ac42 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -48,7 +48,16 @@ def test_storage_controller_many_tenants( # We will intentionally stress reconciler concurrrency, which triggers a warning when lots # of shards are hitting the delayed path. - env.storage_controller.allowed_errors.append(".*Many shards are waiting to reconcile") + env.storage_controller.allowed_errors.extend( + [ + # We will intentionally stress reconciler concurrrency, which triggers a warning when lots + # of shards are hitting the delayed path. + ".*Many shards are waiting to reconcile", + # We will create many timelines concurrently, so they might get slow enough to trip the warning + # that timeline creation is holding a lock too long. + ".*Shared lock by TimelineCreate.*was held.*", + ] + ) for ps in env.pageservers: # This can happen because when we do a loop over all pageservers and mark them offline/active, diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index a3316f2f45..d3fba32a19 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -31,8 +31,12 @@ def error_tolerant_delete(ps_http, tenant_id): if e.status_code == 500: # This test uses failure injection, which can produce 500s as the pageserver expects # the object store to always be available, and the ListObjects during deletion is generally - # an infallible operation - assert "simulated failure of remote operation" in e.message + # an infallible operation. This can show up as a clear simulated error, or as a general + # error during delete_objects() + assert ( + "simulated failure of remote operation" in e.message + or "failed to delete" in e.message + ) else: raise else: