Files
neon/test_runner/fixtures/pageserver/many_tenants.py
Erik Grinaker 3c8565a194 test_runner: propagate config via attach_hook for test fix (#11529)
## Problem

The `pagebench` benchmarks set up an initial dataset by creating a
template tenant, copying the remote storage to a bunch of new tenants,
and attaching them to Pageservers.

In #11420, we found that
`test_pageserver_characterize_throughput_with_n_tenants` had degraded
performance because it set a custom tenant config in Pageservers that
was then replaced with the default tenant config by the storage
controller.

The initial fix was to register the tenants directly in the storage
controller, but this created the tenants with generation 1. This broke
`test_basebackup_with_high_slru_count`, where the template tenant was at
generation 2, leading to all layer files at generation 2 being ignored.

Resolves #11485.
Touches #11381.

## Summary of changes

This patch addresses both test issues by modifying `attach_hook` to also
take a custom tenant config. This allows attaching tenants to
Pageservers from pre-existing remote storage, specifying both the
generation and tenant config when registering them in the storage
controller.
2025-04-11 11:31:12 +00:00

82 lines
2.8 KiB
Python

from __future__ import annotations
import concurrent.futures
from typing import TYPE_CHECKING
import fixtures.pageserver.remote_storage
from fixtures.log_helper import log
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any
from fixtures.common_types import TenantId, TimelineId
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
)
def single_timeline(
neon_env_builder: NeonEnvBuilder,
setup_template: Callable[[NeonEnv], tuple[TenantId, TimelineId, dict[str, Any]]],
ncopies: int,
) -> NeonEnv:
"""
Create `ncopies` duplicates of a template tenant that has a single timeline.
"""
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
remote_storage = env.pageserver_remote_storage
assert isinstance(remote_storage, LocalFsStorage)
ps_http = env.pageserver.http_client()
# clean up the useless default tenant
ps_http.tenant_delete(env.initial_tenant)
log.info("invoking callback to create template tenant")
template_tenant, template_timeline, template_config = setup_template(env)
log.info(
f"template tenant is template_tenant={template_tenant} template_timeline={template_timeline}"
)
log.info("detach template tenant from pageserver")
env.pageserver.tenant_detach(template_tenant)
log.info(f"duplicating template tenant {ncopies} times in remote storage")
tenants = fixtures.pageserver.remote_storage.duplicate_tenant(env, template_tenant, ncopies)
# In theory we could just attach all the tenants, force on-demand downloads via mgmt API, and be done.
# However, on-demand downloads are quite slow ATM.
# => do the on-demand downloads in Python.
log.info("python-side on-demand download the layer files into local tenant dir")
tenant_timelines = list(map(lambda tenant: (tenant, template_timeline), tenants))
fixtures.pageserver.remote_storage.copy_all_remote_layer_files_to_local_tenant_dir(
env, tenant_timelines
)
log.info("attach duplicated tenants to pageserver")
# In theory we could just attach all the tenants, force on-demand downloads via mgmt API, and be done.
# However, on-demand downloads are quite slow ATM.
# => do the on-demand downloads in Python.
assert ps_http.tenant_list() == []
def attach(tenant):
env.pageserver.tenant_attach(
tenant,
config=template_config,
generation=100,
override_storage_controller_generation=True,
)
with concurrent.futures.ThreadPoolExecutor(max_workers=22) as executor:
executor.map(attach, tenants)
# Benchmarks will start the pageserver explicitly themselves
env.pageserver.stop()
return env