test: fix tenant duplication utility generation numbers (#8096)

## Problem
We have this set of test utilities which duplicate a tenant by copying
everything that's in remote storage and then attaching a tenant to the
pageserver and storage controller. When the "copied tenants" are created
on the storage controller, they start off from generation number 0. This
means that they can't see anything past that generation.

This issues has existed ever since generation numbers have been
introduced, but we've largely been lucky
for the generation to stay stable during the template tenant creation.

## Summary of Changes
Extend the storage controller debug attach hook to accept a generation
override. Use that in the tenant duplication logic to set the generation
number to something greater than the naturally reached generation. This
allows the tenants to see all layer files.
This commit is contained in:
Vlad Lazar
2024-06-19 11:55:59 +01:00
committed by GitHub
parent 5778d714f0
commit e7d62a257d
4 changed files with 20 additions and 3 deletions

View File

@@ -2159,12 +2159,19 @@ class NeonStorageController(MetricsGetter, LogUtils):
return time.time() - t1
def attach_hook_issue(
self, tenant_shard_id: Union[TenantId, TenantShardId], pageserver_id: int
self,
tenant_shard_id: Union[TenantId, TenantShardId],
pageserver_id: int,
generation_override: Optional[int] = None,
) -> int:
body = {"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id}
if generation_override is not None:
body["generation_override"] = generation_override
response = self.request(
"POST",
f"{self.env.storage_controller_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
json=body,
headers=self.headers(TokenScope.ADMIN),
)
gen = response.json()["gen"]
@@ -2635,6 +2642,7 @@ class NeonPageserver(PgProtocol, LogUtils):
config: None | Dict[str, Any] = None,
config_null: bool = False,
generation: Optional[int] = None,
override_storage_controller_generation: bool = False,
):
"""
Tenant attachment passes through here to acquire a generation number before proceeding
@@ -2643,6 +2651,10 @@ class NeonPageserver(PgProtocol, LogUtils):
client = self.http_client()
if generation is None:
generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
elif override_storage_controller_generation:
generation = self.env.storage_controller.attach_hook_issue(
tenant_id, self.id, generation
)
return client.tenant_attach(
tenant_id,
config,

View File

@@ -66,6 +66,8 @@ def single_timeline(
env.pageserver.tenant_attach(
tenant,
config=template_config.copy(),
generation=100,
override_storage_controller_generation=True,
)
time.sleep(0.1)
wait_until_tenant_state(ps_http, tenant, "Broken", 10)