mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-25 07:09:57 +00:00
# Refs - refs https://github.com/neondatabase/neon/issues/8915 - discussion thread: https://neondb.slack.com/archives/C033RQ5SPDH/p1742406381132599 - stacked atop https://github.com/neondatabase/neon/pull/11298 - corresponding internal docs update that illustrates how this PR removes friction: https://github.com/neondatabase/docs/pull/404 # Problem Rejecting `pageserver.toml`s with unknown fields adds friction, especially when using `pageserver.toml` fields as feature flags that need to be decommissioned. See the added paragraphs on `pageserver_api::models::ConfigToml` for details on what kind of friction it causes. Also read the corresponding internal docs update linked above to see a more imperative guide for using `pageserver.toml` flags as feature flags. # Solution ## Ignoring unknown fields Ignoring is the serde default behavior. So, just remove `serde(deny_unknown_fields)` from all structs in `pageserver_api::config::ConfigToml` `pageserver_api::config::TenantConfigToml`. I went through all the child fields and verified they don't use `deny_unknown_fields` either, including those shared with `pageserver_api::models`. ## Warning about unknown fields We still want to warn about unknown fields to - be informed about typos in the config template - be reminded about feature-flag style configs that have been cleaned up in code but not yet in config templates We tried `serde_ignore` (cf draft #11319) but it doesn't work with `serde(flatten)`. The solution we arrived at is to compare the on-disk TOML with the TOML that we produce if we serialize the `ConfigToml` again. Any key specified in the on-disk TOML but not present in the serialized TOML is flagged as an ignored key. The mechanism to do it is a tiny recursive decent visitor on the `toml_edit::DocumentMut`. # Future Work Invalid config _values_ in known fields will continue to fail pageserver startup. See - https://github.com/neondatabase/cloud/issues/24349 for current worst case impact to deployments & ideas to improve.
57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
import re
|
|
|
|
import pytest
|
|
from fixtures.neon_fixtures import NeonEnv
|
|
from fixtures.utils import run_only_on_default_postgres
|
|
|
|
|
|
@pytest.mark.parametrize("what", ["default", "top_level", "nested"])
|
|
@run_only_on_default_postgres(reason="does not use postgres")
|
|
def test_unknown_config_items_handling(neon_simple_env: NeonEnv, what: str):
|
|
"""
|
|
Ensure we log unknown config fields and expose a metric for alerting.
|
|
There are more unit tests in the Rust code for other TOML items.
|
|
"""
|
|
env = neon_simple_env
|
|
|
|
def edit_fn(config) -> str | None:
|
|
if what == "default":
|
|
return None
|
|
elif what == "top_level":
|
|
config["unknown_top_level_config_item"] = 23
|
|
return r"unknown_top_level_config_item"
|
|
elif what == "nested":
|
|
config["remote_storage"]["unknown_config_item"] = 23
|
|
return r"remote_storage.unknown_config_item"
|
|
else:
|
|
raise ValueError(f"Unknown what: {what}")
|
|
|
|
def get_metric():
|
|
metrics = env.pageserver.http_client().get_metrics()
|
|
samples = metrics.query_all("pageserver_config_ignored_items")
|
|
by_item = {sample.labels["item"]: sample.value for sample in samples}
|
|
assert by_item[""] == 0, "must always contain the empty item with value 0"
|
|
del by_item[""]
|
|
return by_item
|
|
|
|
expected_ignored_item = env.pageserver.edit_config_toml(edit_fn)
|
|
|
|
if expected_ignored_item is not None:
|
|
expected_ignored_item_log_line_re = r".*ignoring unknown configuration item.*" + re.escape(
|
|
expected_ignored_item
|
|
)
|
|
env.pageserver.allowed_errors.append(expected_ignored_item_log_line_re)
|
|
|
|
if expected_ignored_item is not None:
|
|
assert not env.pageserver.log_contains(expected_ignored_item_log_line_re)
|
|
assert get_metric() == {}
|
|
|
|
# in any way, unknown config items should not fail pageserver to start
|
|
# TODO: extend this test with the config validator mode once we introduce it
|
|
# https://github.com/neondatabase/cloud/issues/24349
|
|
env.pageserver.restart()
|
|
|
|
if expected_ignored_item is not None:
|
|
assert env.pageserver.log_contains(expected_ignored_item_log_line_re)
|
|
assert get_metric() == {expected_ignored_item: 1}
|