From e3778381a8eaac0b43238c1fc5ade20d459c20aa Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 13 Dec 2023 14:14:38 +0000 Subject: [PATCH] tests: make test_bulk_insert recreate tenant in same generation (#6113) ## Problem Test deletes tenant and recreates with the same ID. The recreation bumps generation number. This could lead to stale generation warnings in the logs. ## Summary of changes Handle this more gracefully by re-creating in the same generation that the tenant was previously attached in. We could also update the tenant delete path to have the attachment service to drop tenant state on delete, but I like having it there: it makes debug easier, and the only time it's a problem is when a test is re-using a tenant ID after deletion. ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --- test_runner/fixtures/neon_fixtures.py | 7 ++++--- test_runner/performance/test_bulk_insert.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4b23650960..7dfdd9274d 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1870,11 +1870,12 @@ class NeonPageserver(PgProtocol): tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None, auth_token: Optional[str] = None, + generation: Optional[int] = None, ) -> TenantId: + if generation is None: + generation = self.maybe_get_generation(tenant_id) client = self.http_client(auth_token=auth_token) - return client.tenant_create( - tenant_id, conf, generation=self.maybe_get_generation(tenant_id) - ) + return client.tenant_create(tenant_id, conf, generation=generation) def tenant_load(self, tenant_id: TenantId): client = self.http_client() diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index a146e011cc..a2a1fa11e5 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -55,9 +55,20 @@ def measure_recovery_time(env: NeonCompare): # Delete the Tenant in the pageserver: this will drop local and remote layers, such that # when we "create" the Tenant again, we will replay the WAL from the beginning. + # + # This is a "weird" thing to do, and can confuse the attachment service as we're re-using + # the same tenant ID for a tenant that is logically different from the pageserver's point + # of view, but the same as far as the safekeeper/WAL is concerned. To work around that, + # we will explicitly create the tenant in the same generation that it was previously + # attached in. + assert env.env.attachment_service is not None + attach_status = env.env.attachment_service.inspect(tenant_id=env.tenant) + assert attach_status is not None + (attach_gen, _) = attach_status + client.tenant_delete(env.tenant) wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5) - env.env.pageserver.tenant_create(tenant_id=env.tenant) + env.env.pageserver.tenant_create(tenant_id=env.tenant, generation=attach_gen) # Measure recovery time with env.record_duration("wal_recovery"):