refactor(pageserver): rely on serde derive for toml deserialization (#7656)

This PR simplifies the pageserver configuration parsing as follows:

* introduce the `pageserver_api::config::ConfigToml` type
* implement `Default` for `ConfigToml`
* use serde derive to do the brain-dead leg-work of processing the toml
document
  * use `serde(default)` to fill in default values
* in `pageserver` crate:
* use `toml_edit` to deserialize the pageserver.toml string into a
`ConfigToml`
  * `PageServerConfig::parse_and_validate` then
    * consumes the `ConfigToml`
    * destructures it exhaustively into its constituent fields
    * constructs the `PageServerConfig`

The rules are:

* in `ConfigToml`, use `deny_unknown_fields` everywhere
* static default values go in `pageserver_api`
* if there cannot be a static default value (e.g. which default IO
engine to use, because it depends on the runtime), make the field in
`ConfigToml` an `Option`
* if runtime-augmentation of a value is needed, do that in
`parse_and_validate`
* a good example is `virtual_file_io_engine` or `l0_flush`, both of
which need to execute code to determine the effective value in
`PageServerConf`

The benefits:

* massive amount of brain-dead repetitive code can be deleted
* "unused variable" compile-time errors when removing a config value,
due to the exhaustive destructuring in `parse_and_validate`
* compile-time errors guide you when adding a new config field

Drawbacks:

* serde derive is sometimes a bit too magical
* `deny_unknown_fields` is easy to miss

Future Work / Benefits:
* make `neon_local` use `pageserver_api` to construct `ConfigToml` and
write it to `pageserver.toml`
* This provides more type safety / coompile-time errors than the current
approach.

### Refs

Fixes #3682 

### Future Work

* `remote_storage` deser doesn't reject unknown fields
https://github.com/neondatabase/neon/issues/8915
* clean up `libs/pageserver_api/src/config.rs` further
  * break up into multiple files, at least for tenant config
* move `models` as appropriate / refine distinction between config and
API models / be explicit about when it's the same
  * use `pub(crate)` visibility on `mod defaults` to detect stale values
This commit is contained in:
Christian Schwarz
2024-09-05 14:59:49 +02:00
committed by GitHub
parent 6dfbf49128
commit 850421ec06
31 changed files with 1001 additions and 1656 deletions

View File

@@ -24,7 +24,20 @@ from functools import cached_property, partial
from itertools import chain, product
from pathlib import Path
from types import TracebackType
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union, cast
from typing import (
Any,
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
)
from urllib.parse import quote, urlparse
import asyncpg
@@ -90,6 +103,8 @@ from fixtures.utils import AuxFileStore as AuxFileStore # reexport
from .neon_api import NeonAPI, NeonApiEndpoint
T = TypeVar("T")
"""
This file contains pytest fixtures. A fixture is a test resource that can be
summoned by placing its name in the test's arguments.
@@ -2986,16 +3001,17 @@ class NeonPageserver(PgProtocol, LogUtils):
def config_toml_path(self) -> Path:
return self.workdir / "pageserver.toml"
def edit_config_toml(self, edit_fn: Callable[[Dict[str, Any]], None]):
def edit_config_toml(self, edit_fn: Callable[[Dict[str, Any]], T]) -> T:
"""
Edit the pageserver's config toml file in place.
"""
path = self.config_toml_path
with open(path, "r") as f:
config = toml.load(f)
edit_fn(config)
res = edit_fn(config)
with open(path, "w") as f:
toml.dump(config, f)
return res
def patch_config_toml_nonrecursive(self, patch: Dict[str, Any]) -> Dict[str, Any]:
"""

View File

@@ -142,11 +142,10 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
# We will start a pageserver with no control_plane_api set, so it won't be able to self-register
env.storage_controller.node_register(env.pageserver)
replaced_config = env.pageserver.patch_config_toml_nonrecursive(
{
"control_plane_api": "",
}
)
def remove_control_plane_api_field(config):
return config.pop("control_plane_api")
control_plane_api = env.pageserver.edit_config_toml(remove_control_plane_api_field)
env.pageserver.start()
env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
@@ -179,7 +178,11 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
env.pageserver.stop()
# Starting without the override that disabled control_plane_api
env.pageserver.patch_config_toml_nonrecursive(replaced_config)
env.pageserver.patch_config_toml_nonrecursive(
{
"control_plane_api": control_plane_api,
}
)
env.pageserver.start()
generate_uploads_and_deletions(env, pageserver=env.pageserver, init=False)

View File

@@ -733,7 +733,7 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
# We will run with the limit set to 1, so that once we have one tenant stuck
# in a pausable failpoint, the rest are prevented from proceeding through warmup.
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = '1'"
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
@@ -984,7 +984,7 @@ def test_timeline_logical_size_task_priority(neon_env_builder: NeonEnvBuilder):
def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = '1'"
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()
@@ -1062,7 +1062,7 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
@pytest.mark.parametrize("activation_method", ["endpoint", "branch", "delete"])
def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_method: str):
# env.initial_tenant will take up this permit when attaching with lazy because of a failpoint activated after restart
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = '1'"
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()