diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index b4761f103b..23f30804b4 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -455,7 +455,7 @@ class NeonEnvBuilder: test_overlay_dir: Optional[Path] = None, pageserver_remote_storage: Optional[RemoteStorage] = None, # toml that will be decomposed into `--config-override` flags during `pageserver --init` - pageserver_config_override: Optional[str] = None, + pageserver_config_override: Optional[str | Callable[[Dict[str, Any]], None]] = None, num_safekeepers: int = 1, num_pageservers: int = 1, # Use non-standard SK ids to check for various parsing bugs @@ -1127,10 +1127,14 @@ class NeonEnv: ) if config.pageserver_config_override is not None: - for o in config.pageserver_config_override.split(";"): - override = toml.loads(o) - for key, value in override.items(): - ps_cfg[key] = value + if callable(config.pageserver_config_override): + config.pageserver_config_override(ps_cfg) + else: + assert isinstance(config.pageserver_config_override, str) + for o in config.pageserver_config_override.split(";"): + override = toml.loads(o) + for key, value in override.items(): + ps_cfg[key] = value # Create a corresponding NeonPageserver object self.pageservers.append( diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index ac2fc79be4..0a5336f5a2 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -11,8 +11,7 @@ from fixtures.utils import print_gc_result, query_scalar # def test_branch_behind(neon_env_builder: NeonEnvBuilder): # Disable pitr, because here we want to test branch creation after GC - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) error_regexes = [ ".*invalid branch start lsn.*", diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py index e5067bba8b..44133f2350 100644 --- a/test_runner/regress/test_gc_aggressive.py +++ b/test_runner/regress/test_gc_aggressive.py @@ -67,8 +67,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId): # def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): # Disable pitr, because here we want to test branch creation after GC - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) timeline = env.neon_cli.create_branch("test_gc_aggressive", "main") endpoint = env.endpoints.create_start("test_gc_aggressive") @@ -94,13 +93,11 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): # def test_gc_index_upload(neon_env_builder: NeonEnvBuilder): - # Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" num_index_uploads = 0 neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - - env = neon_env_builder.init_start() + # Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls + env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) tenant_id = env.initial_tenant timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main") endpoint = env.endpoints.create_start("test_gc_index_upload") diff --git a/test_runner/regress/test_old_request_lsn.py b/test_runner/regress/test_old_request_lsn.py index 43b0bb56f0..f1dd3fb67d 100644 --- a/test_runner/regress/test_old_request_lsn.py +++ b/test_runner/regress/test_old_request_lsn.py @@ -16,8 +16,7 @@ from fixtures.utils import print_gc_result, query_scalar # def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): # Disable pitr, because here we want to test branch creation after GC - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) env.neon_cli.create_branch("test_old_request_lsn", "main") endpoint = env.endpoints.create_start("test_old_request_lsn") diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index 80a1d72f4a..abbea59113 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -42,6 +42,8 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv, neon_binpath: Path): "listen_http_addr", "pg_auth_type", "http_auth_type", + # TODO: only needed for NEON_PAGESERVER_PANIC_ON_UNSPECIFIED_COMPACTION_ALGORITHM in https://github.com/neondatabase/neon/pull/7748 + # "tenant_config", ] required_config_overrides = [ f"--config-override={toml.dumps({k: ps_config[k]})}" for k in required_config_keys diff --git a/test_runner/regress/test_pitr_gc.py b/test_runner/regress/test_pitr_gc.py index 6434f431a4..7e676b5515 100644 --- a/test_runner/regress/test_pitr_gc.py +++ b/test_runner/regress/test_pitr_gc.py @@ -10,11 +10,9 @@ from fixtures.utils import print_gc_result, query_scalar # def test_pitr_gc(neon_env_builder: NeonEnvBuilder): # Set pitr interval such that we need to keep the data - neon_env_builder.pageserver_config_override = ( - "tenant_config={pitr_interval = '1 day', gc_horizon = 0}" + env = neon_env_builder.init_start( + initial_tenant_conf={"pitr_interval": "1 day", "gc_horizon": "0"} ) - - env = neon_env_builder.init_start() endpoint_main = env.endpoints.create_start("main") main_pg_conn = endpoint_main.connect() diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index ab5c8be256..e21f9bb6f6 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -10,9 +10,11 @@ from fixtures.neon_fixtures import NeonEnvBuilder # def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): # Override default checkpointer settings to run it more often - neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}" - - env = neon_env_builder.init_start() + env = neon_env_builder.init_start( + initial_tenant_conf={ + "checkpoint_distance": "1048576", + } + ) env.pageserver.is_testing_enabled_or_skip() # We expect the pageserver to exit, which will cause storage storage controller diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index a345464208..2cbb036c0d 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -1,5 +1,6 @@ import json from contextlib import closing +from typing import Any, Dict import psycopg2.extras from fixtures.common_types import Lsn @@ -14,16 +15,22 @@ from fixtures.utils import wait_until def test_tenant_config(neon_env_builder: NeonEnvBuilder): """Test per tenant configuration""" - # set some non-default global config - neon_env_builder.pageserver_config_override = """ -page_cache_size=444; -wait_lsn_timeout='111 s'; -[tenant_config] -checkpoint_distance = 10000 -compaction_target_size = 1048576 -evictions_low_residence_duration_metric_threshold = "2 days" -eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold = "23 hours" } -""" + + def set_some_nondefault_global_config(ps_cfg: Dict[str, Any]): + ps_cfg["page_cache_size"] = 444 + ps_cfg["wait_lsn_timeout"] = "111 s" + + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config["checkpoint_distance"] = 10000 + tenant_config["compaction_target_size"] = 1048576 + tenant_config["evictions_low_residence_duration_metric_threshold"] = "2 days" + tenant_config["eviction_policy"] = { + "kind": "LayerAccessThreshold", + "period": "20s", + "threshold": "23 hours", + } + + neon_env_builder.pageserver_config_override = set_some_nondefault_global_config env = neon_env_builder.init_start() # we configure eviction but no remote storage, there might be error lines diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 7894f6933d..d3a228dbeb 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -502,9 +502,14 @@ def test_get_tenant_size_with_multiple_branches( gc_horizon = 128 * 1024 - neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}" - - env = neon_env_builder.init_start() + env = neon_env_builder.init_start( + initial_tenant_conf={ + "compaction_period": "0s", + "gc_period": "0s", + "pitr_interval": "0sec", + "gc_horizon": gc_horizon, + } + ) # FIXME: we have a race condition between GC and delete timeline. GC might fail with this # error. Similar to https://github.com/neondatabase/neon/issues/2671 diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 18063bf104..a6d06df3b6 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -415,11 +415,12 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder # Disable background compaction as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = ( - "tenant_config={checkpoint_distance=100000, compaction_period='10m'}" + env = neon_env_builder.init_start( + initial_tenant_conf={ + "checkpoint_distance": "100000", + "compaction_period": "10m", + } ) - - env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") @@ -462,9 +463,14 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): # Disable background compaction and GC as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='0s', gc_period='0s', pitr_interval='1s'}" - - env = neon_env_builder.init_start() + env = neon_env_builder.init_start( + initial_tenant_conf={ + "checkpoint_distance": "100000", + "compaction_period": "0s", + "gc_period": "0s", + "pitr_interval": "1s", + } + ) pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") diff --git a/test_runner/regress/test_wal_receiver.py b/test_runner/regress/test_wal_receiver.py index d9265dcbcd..6582b34218 100644 --- a/test_runner/regress/test_wal_receiver.py +++ b/test_runner/regress/test_wal_receiver.py @@ -1,4 +1,5 @@ import time +from typing import Any, Dict from fixtures.common_types import Lsn, TenantId from fixtures.log_helper import log @@ -42,10 +43,14 @@ def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder): # Kills one of the safekeepers and ensures that only the active ones are printed in the state. def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuilder): # Trigger WAL wait timeout faster - neon_env_builder.pageserver_config_override = """ - wait_lsn_timeout = "1s" - tenant_config={walreceiver_connect_timeout = "2s", lagging_wal_timeout = "2s"} - """ + def customize_pageserver_toml(ps_cfg: Dict[str, Any]): + ps_cfg["wait_lsn_timeout"] = "1s" + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config["walreceiver_connect_timeout"] = "2s" + tenant_config["lagging_wal_timeout"] = "2s" + + neon_env_builder.pageserver_config_override = customize_pageserver_toml + # Have notable SK ids to ensure we check logs for their presence, not some other random numbers neon_env_builder.safekeepers_id_start = 12345 neon_env_builder.num_safekeepers = 3