test_suite: remove usage of --pageserver-config-override

Rewrite the pageserver.toml instead.
This commit is contained in:
Christian Schwarz
2024-05-04 11:02:55 +00:00
parent 58055c7a96
commit ad185dd594
5 changed files with 83 additions and 42 deletions

View File

@@ -1746,10 +1746,9 @@ class NeonCli(AbstractNeonCli):
def pageserver_start(
self,
id: int,
overrides: Tuple[str, ...] = (),
extra_env_vars: Optional[Dict[str, str]] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}", *overrides]
start_args = ["pageserver", "start", f"--id={id}"]
storage = self.env.pageserver_remote_storage
append_pageserver_param_overrides(
params_to_update=start_args,
@@ -2412,9 +2411,47 @@ class NeonPageserver(PgProtocol, LogUtils):
return self.workdir / "tenants"
return self.workdir / "tenants" / str(tenant_shard_id)
@property
def config_toml_path(self) -> Path:
return self.workdir / "pageserver.toml"
def edit_config_toml(self, edit_fn: Callable[[Dict[str, Any]], bool]):
"""
Edit the pageserver's config toml file in place.
The `edit_fn` is to manipulate the dict, and if it returns True, the file will be written.
If it returns False, no changes are made to the file system.
"""
path = self.config_toml_path
with open(path, "r") as f:
config = toml.load(f)
save = edit_fn(config)
if save:
with open(path, "w") as f:
toml.dump(config, f)
def patch_config_toml_nonrecursive(self, patch: Dict[str, Any]) -> Dict[str, Any]:
"""
Non-recursively merge the given `patch` dict into the existing config toml, using `dict.update()`.
Returns the replaced values.
If there was no previous value, the key is mapped to None.
This allows to restore the original value by calling this method with the returned dict.
"""
replacements = {}
def doit(config: Dict[str, Any]) -> bool:
while len(patch) > 0:
key, new = patch.popitem()
old = config.get(key, None)
config[key] = new
replacements[key] = old
return True
self.edit_config_toml(doit)
return replacements
def start(
self,
overrides: Tuple[str, ...] = (),
extra_env_vars: Optional[Dict[str, str]] = None,
) -> "NeonPageserver":
"""
@@ -2424,9 +2461,7 @@ class NeonPageserver(PgProtocol, LogUtils):
"""
assert self.running is False
self.env.neon_cli.pageserver_start(
self.id, overrides=overrides, extra_env_vars=extra_env_vars
)
self.env.neon_cli.pageserver_start(self.id, extra_env_vars=extra_env_vars)
self.running = True
return self

View File

@@ -140,10 +140,12 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
# start without gc so we can time compaction with less noise; use shorter
# period for compaction so it starts earlier
def patch_default_tenant_config(config):
config["compaction_period"] = "3s"
config["gc_period"] = "0s"
return True
env.pageserver.edit_config_toml(patch_default_tenant_config)
env.pageserver.start(
overrides=(
"--pageserver-config-override=tenant_config={ compaction_period = '3s', gc_period = '0s' }",
),
# this does print more than we want, but the number should be comparable between runs
extra_env_vars={
"RUST_LOG": f"[compaction_loop{{tenant_id={env.initial_tenant}}}]=debug,info"

View File

@@ -45,17 +45,15 @@ def test_min_resident_size_override_handling(
ps_http.set_tenant_config(tenant_id, {})
assert_config(tenant_id, None, default_tenant_conf_value)
env.pageserver.stop()
if config_level_override is not None:
env.pageserver.start(
overrides=(
"--pageserver-config-override=tenant_config={ min_resident_size_override = "
+ str(config_level_override)
+ " }",
)
)
else:
env.pageserver.start()
def set_min_resident_size(config):
config["tenant_config"] = {"min_resident_size": config_level_override}
return True
env.pageserver.edit_config_toml(set_min_resident_size)
env.pageserver.stop()
env.pageserver.start()
tenant_id, _ = env.neon_cli.create_tenant()
assert_overrides(tenant_id, config_level_override)
@@ -164,34 +162,32 @@ class EvictionEnv:
usage eviction task is unknown; it might need to run one more iteration
before assertions can be made.
"""
disk_usage_config = {
"period": period,
"max_usage_pct": max_usage_pct,
"min_avail_bytes": min_avail_bytes,
"mock_statvfs": mock_behavior,
"eviction_order": eviction_order.config(),
}
enc = toml.TomlEncoder()
# these can sometimes happen during startup before any tenants have been
# loaded, so nothing can be evicted, we just wait for next iteration which
# is able to evict.
pageserver.allowed_errors.append(".*WARN.* disk usage still high.*")
pageserver.start(
overrides=(
"--pageserver-config-override=disk_usage_based_eviction="
+ enc.dump_inline_table(disk_usage_config).replace("\n", " "),
pageserver.patch_config_toml_nonrecursive(
{
"disk_usage_based_eviction": {
"period": period,
"max_usage_pct": max_usage_pct,
"min_avail_bytes": min_avail_bytes,
"mock_statvfs": mock_behavior,
"eviction_order": eviction_order.config(),
},
# Disk usage based eviction runs as a background task.
# But pageserver startup delays launch of background tasks for some time, to prioritize initial logical size calculations during startup.
# But, initial logical size calculation may not be triggered if safekeepers don't publish new broker messages.
# But, we only have a 10-second-timeout in this test.
# So, disable the delay for this test.
"--pageserver-config-override=background_task_maximum_delay='0s'",
),
"background_task_maximum_delay": "0s",
}
)
pageserver.start()
# we now do initial logical size calculation on startup, which on debug builds can fight with disk usage based eviction
for tenant_id, timeline_id in self.timelines:
tenant_ps = self.neon_env.get_tenant_pageserver(tenant_id)

View File

@@ -220,7 +220,12 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
# We will start a pageserver with no control_plane_api set, so it won't be able to self-register
env.storage_controller.node_register(env.pageserver)
env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
replaced_config = env.pageserver.patch_config_toml_nonrecursive(
{
"control_plane_api": "",
}
)
env.pageserver.start()
env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
env.neon_cli.create_tenant(
@@ -251,8 +256,8 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
assert parse_generation_suffix(key) is None
env.pageserver.stop()
# Starting without the override that disabled control_plane_api
env.pageserver.patch_config_toml_nonrecursive(replaced_config)
env.pageserver.start()
generate_uploads_and_deletions(env, pageserver=env.pageserver, init=False)
@@ -525,9 +530,10 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
# incident, but it might be unavoidable: if so, we want to be able to start up
# and serve clients.
env.pageserver.stop() # Non-immediate: implicitly checking that shutdown doesn't hang waiting for CP
env.pageserver.start(
overrides=("--pageserver-config-override=control_plane_emergency_mode=true",),
)
replaced = env.pageserver.patch_config_toml_nonrecursive({
"control_plane_emergency_mode": True,
})
env.pageserver.start()
# The pageserver should provide service to clients
generate_uploads_and_deletions(env, init=False, pageserver=env.pageserver)
@@ -549,6 +555,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
# The pageserver should work fine when subsequently restarted in non-emergency mode
env.pageserver.stop() # Non-immediate: implicitly checking that shutdown doesn't hang waiting for CP
env.pageserver.patch_config_toml_nonrecursive(replaced)
env.pageserver.start()
generate_uploads_and_deletions(env, init=False, pageserver=env.pageserver)

View File

@@ -290,9 +290,10 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
# This is the pageserver where we'll initially create the tenant. Run it in emergency
# mode so that it doesn't talk to storage controller, and do not register it.
env.pageservers[0].allowed_errors.append(".*Emergency mode!.*")
env.pageservers[0].start(
overrides=("--pageserver-config-override=control_plane_emergency_mode=true",),
)
env.pageservers[0].patch_config_toml_nonrecursive({
"control_plane_emergency_mode": True,
})
env.pageservers[0].start()
origin_ps = env.pageservers[0]
# These are the pageservers managed by the sharding service, where the tenant