From 66337097de074de3a2e2e19bf0b1c304a21b273c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 16 Jul 2024 12:19:28 +0200 Subject: [PATCH] Avoid the storage controller in test_tenant_creation_fails (#8392) As described in #8385, the likely source for flakiness in test_tenant_creation_fails is the following sequence of events: 1. test instructs the storage controller to create the tenant 2. storage controller adds the tenant and persists it to the database. issues a creation request 3. the pageserver restarts with the failpoint disabled 4. storage controller's background reconciliation still wants to create the tenant 5. pageserver gets new request to create the tenant from background reconciliation This commit just avoids the storage controller entirely. It has its own set of issues, as the re-attach request will obviously not include the tenant, but it's still useful to test for non-existence of the tenant. The generation is also not optional any more during tenant attachment. If you omit it, the pageserver yields an error. We change the signature of `tenant_attach` to reflect that. Alternative to #8385 Fixes #8266 --- test_runner/fixtures/neon_fixtures.py | 2 +- test_runner/fixtures/pageserver/http.py | 2 +- test_runner/regress/test_tenants.py | 13 +++---------- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index fe4a334458..625e9096f5 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2786,8 +2786,8 @@ class NeonPageserver(PgProtocol, LogUtils): ) return client.tenant_attach( tenant_id, + generation, config, - generation=generation, ) def tenant_detach(self, tenant_id: TenantId): diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index d66b94948a..f1e3d1a309 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -238,8 +238,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter): def tenant_attach( self, tenant_id: Union[TenantId, TenantShardId], + generation: int, config: None | Dict[str, Any] = None, - generation: Optional[int] = None, ): config = config or {} diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 04b3fdd80f..0ebf714de0 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -45,17 +45,10 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv): # Failure to write a config to local disk makes the pageserver assume that local disk is bad and abort the process pageserver_http.configure_failpoints(("tenant-config-before-write", "return")) - # Storage controller will see a torn TCP connection when the crash point is reached, and follow an unclean 500 error path - neon_simple_env.storage_controller.allowed_errors.extend( - [ - ".*Reconcile not done yet while creating tenant.*", - ".*Reconcile error: receive body: error sending request.*", - ".*Error processing HTTP request: InternalServerError.*", - ] - ) + tenant_id = TenantId.generate() - with pytest.raises(Exception, match="error sending request"): - _ = neon_simple_env.neon_cli.create_tenant() + with pytest.raises(requests.exceptions.ConnectionError, match="Connection aborted"): + neon_simple_env.pageserver.http_client().tenant_attach(tenant_id=tenant_id, generation=1) # Any files left behind on disk during failed creation do not prevent # a retry from succeeding. Restart pageserver with no failpoints.