From e3778381a8eaac0b43238c1fc5ade20d459c20aa Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 13 Dec 2023 14:14:38 +0000
Subject: [PATCH] tests: make test_bulk_insert recreate tenant in same
 generation (#6113)

## Problem

Test deletes tenant and recreates with the same ID. The recreation bumps
generation number. This could lead to stale generation warnings in the
logs.

## Summary of changes

Handle this more gracefully by re-creating in the same generation that
the tenant was previously attached in.

We could also update the tenant delete path to have the attachment
service to drop tenant state on delete, but I like having it there: it
makes debug easier, and the only time it's a problem is when a test is
re-using a tenant ID after deletion.

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist
---
 test_runner/fixtures/neon_fixtures.py       |  7 ++++---
 test_runner/performance/test_bulk_insert.py | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 4b23650960..7dfdd9274d 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1870,11 +1870,12 @@ class NeonPageserver(PgProtocol):
         tenant_id: TenantId,
         conf: Optional[Dict[str, Any]] = None,
         auth_token: Optional[str] = None,
+        generation: Optional[int] = None,
     ) -> TenantId:
+        if generation is None:
+            generation = self.maybe_get_generation(tenant_id)
         client = self.http_client(auth_token=auth_token)
-        return client.tenant_create(
-            tenant_id, conf, generation=self.maybe_get_generation(tenant_id)
-        )
+        return client.tenant_create(tenant_id, conf, generation=generation)
 
     def tenant_load(self, tenant_id: TenantId):
         client = self.http_client()
diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py
index a146e011cc..a2a1fa11e5 100644
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -55,9 +55,20 @@ def measure_recovery_time(env: NeonCompare):
 
     # Delete the Tenant in the pageserver: this will drop local and remote layers, such that
     # when we "create" the Tenant again, we will replay the WAL from the beginning.
+    #
+    # This is a "weird" thing to do, and can confuse the attachment service as we're re-using
+    # the same tenant ID for a tenant that is logically different from the pageserver's point
+    # of view, but the same as far as the safekeeper/WAL is concerned.  To work around that,
+    # we will explicitly create the tenant in the same generation that it was previously
+    # attached in.
+    assert env.env.attachment_service is not None
+    attach_status = env.env.attachment_service.inspect(tenant_id=env.tenant)
+    assert attach_status is not None
+    (attach_gen, _) = attach_status
+
     client.tenant_delete(env.tenant)
     wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5)
-    env.env.pageserver.tenant_create(tenant_id=env.tenant)
+    env.env.pageserver.tenant_create(tenant_id=env.tenant, generation=attach_gen)
 
     # Measure recovery time
     with env.record_duration("wal_recovery"):