mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 17:02:56 +00:00
## Problem see https://github.com/neondatabase/neon/issues/8070 ## Summary of changes the neon_local subcommands to - start neon - start pageserver - start safekeeper - start storage controller get a new option -t=xx or --start-timeout=xx which allows to specify a longer timeout in seconds we wait for the process start. This is useful in test cases where the pageserver has to read a lot of layer data, like in pagebench test cases. In addition we exploit the new timeout option in the python test infrastructure (python fixtures) and modify the flaky testcase to increase the timeout from 10 seconds to 1 minute. Example from the test execution ```bash RUST_BACKTRACE=1 NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py ... 2024-06-19 09:29:34.590 INFO [neon_fixtures.py:1513] Running command "/instance_store/neon/target/release/neon_local storage_controller start --start-timeout=60s" 2024-06-19 09:29:36.365 INFO [broker.py:34] starting storage_broker to listen incoming connections at "127.0.0.1:15001" 2024-06-19 09:29:36.365 INFO [neon_fixtures.py:1513] Running command "/instance_store/neon/target/release/neon_local pageserver start --id=1 --start-timeout=60s" 2024-06-19 09:29:36.366 INFO [neon_fixtures.py:1513] Running command "/instance_store/neon/target/release/neon_local safekeeper start 1 --start-timeout=60s" ```
57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
"""
|
|
Utilities used by all code in this sub-directory
|
|
"""
|
|
|
|
from typing import Any, Callable, Dict, Optional, Tuple
|
|
|
|
import fixtures.pageserver.many_tenants as many_tenants
|
|
from fixtures.common_types import TenantId, TimelineId
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import (
|
|
NeonEnv,
|
|
NeonEnvBuilder,
|
|
)
|
|
from fixtures.pageserver.utils import wait_until_all_tenants_state
|
|
|
|
|
|
def ensure_pageserver_ready_for_benchmarking(env: NeonEnv, n_tenants: int):
|
|
"""
|
|
Helper function.
|
|
"""
|
|
ps_http = env.pageserver.http_client()
|
|
|
|
log.info("wait for all tenants to become active")
|
|
wait_until_all_tenants_state(
|
|
ps_http, "Active", iterations=n_tenants, period=1, http_error_ok=False
|
|
)
|
|
|
|
# ensure all layers are resident for predictiable performance
|
|
tenants = [info["id"] for info in ps_http.tenant_list()]
|
|
for tenant in tenants:
|
|
for timeline in ps_http.tenant_status(tenant)["timelines"]:
|
|
info = ps_http.layer_map_info(tenant, timeline)
|
|
for layer in info.historic_layers:
|
|
assert not layer.remote
|
|
|
|
log.info("ready")
|
|
|
|
|
|
def setup_pageserver_with_tenants(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
name: str,
|
|
n_tenants: int,
|
|
setup: Callable[[NeonEnv], Tuple[TenantId, TimelineId, Dict[str, Any]]],
|
|
timeout_in_seconds: Optional[int] = None,
|
|
) -> NeonEnv:
|
|
"""
|
|
Utility function to set up a pageserver with a given number of identical tenants.
|
|
"""
|
|
|
|
def doit(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
|
|
return many_tenants.single_timeline(neon_env_builder, setup, n_tenants)
|
|
|
|
env = neon_env_builder.build_and_use_snapshot(name, doit)
|
|
env.start(timeout_in_seconds=timeout_in_seconds)
|
|
ensure_pageserver_ready_for_benchmarking(env, n_tenants)
|
|
return env
|