neon_local: use pageserver.toml as source of truth for struct PageServerConf (#7642)

Before this PR, `neon_local` would store a copy of a subset of the
initial `pageserver.toml` in its `.neon/config`, e.g, `listen_pg_addr`.
That copy is represented as `struct PageServerConf`.

This copy was used to inform e.g., `neon_local endpoint` and other
commands that depend on Pageserver about which port to connect to.

The problem with that scheme is that the duplicated information in
`.neon/config` can get stale if `pageserver.toml` is changed.

This PR fixes that by eliminating populating `struct PageServerConf`
from the `pageserver.toml`s.

The `[[pageservers]]` TOML table in the `.neon/config` is obsolete.
As of this PR, `neon_local` will fail to start and print an error
informing about this change.

Code-level changes:

- Remove the `--pg-version` flag, it was only used for some checks
during `neon_local init`
- Remove the warn-but-continue behavior for when auth key creation fails
but auth keys are not required. It's just complexity that is unjustified
for a tool like `neon_local`.
- Introduce a type-system-level distinction between the runtime state
and the two (!) toml formats that are almost the same but not quite.
  - runtime state: `struct PageServerConf`, now without `serde` derives
  - toml format 1: the state in `.neon/config` => `struct OnDiskState`
- toml format 2: the `neon_local init --config TMPFILE` that, unlike
`struct OnDiskState`, allows specifying `pageservers`
- Remove `[[pageservers]]` from the `struct OnDiskState` and load the
data from the individual `pageserver.toml`s instead.
This commit is contained in:
Christian Schwarz
2024-05-08 16:32:21 +02:00
committed by GitHub
parent a4a4d78993
commit 8728d5a5fd
5 changed files with 412 additions and 377 deletions

View File

@@ -14,7 +14,7 @@ import textwrap
import threading
import time
import uuid
from contextlib import ExitStack, closing, contextmanager
from contextlib import closing, contextmanager
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
@@ -1054,14 +1054,14 @@ class NeonEnv:
self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
self.pageserver_aux_file_policy = config.pageserver_aux_file_policy
# Create a config file corresponding to the options
# Create the neon_local's `NeonLocalInitConf`
cfg: Dict[str, Any] = {
"default_tenant_id": str(self.initial_tenant),
"broker": {
"listen_addr": self.broker.listen_addr(),
},
"pageservers": [],
"safekeepers": [],
"pageservers": [],
}
if self.control_plane_api is not None:
@@ -1100,6 +1100,17 @@ class NeonEnv:
if config.pageserver_validate_vectored_get is not None:
ps_cfg["validate_vectored_get"] = config.pageserver_validate_vectored_get
if self.pageserver_remote_storage is not None:
ps_cfg["remote_storage"] = remote_storage_to_toml_dict(
self.pageserver_remote_storage
)
if config.pageserver_config_override is not None:
for o in config.pageserver_config_override.split(";"):
override = toml.loads(o)
for key, value in override.items():
ps_cfg[key] = value
# Create a corresponding NeonPageserver object
self.pageservers.append(
NeonPageserver(
@@ -1136,7 +1147,6 @@ class NeonEnv:
self.neon_cli.init(
cfg,
force=config.config_init_force,
pageserver_config_override=config.pageserver_config_override,
)
def start(self):
@@ -1722,46 +1732,22 @@ class NeonCli(AbstractNeonCli):
def init(
self,
config: Dict[str, Any],
init_config: Dict[str, Any],
force: Optional[str] = None,
pageserver_config_override: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
remote_storage = self.env.pageserver_remote_storage
ps_config = {}
if remote_storage is not None:
ps_config["remote_storage"] = remote_storage_to_toml_dict(remote_storage)
if pageserver_config_override is not None:
for o in pageserver_config_override.split(";"):
override = toml.loads(o)
for key, value in override.items():
ps_config[key] = value
with ExitStack() as stack:
ps_config_file = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+"))
ps_config_file.write(toml.dumps(ps_config))
ps_config_file.flush()
neon_local_config = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+"))
neon_local_config.write(toml.dumps(config))
neon_local_config.flush()
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
init_config_tmpfile.write(toml.dumps(init_config))
init_config_tmpfile.flush()
cmd = [
"init",
f"--config={neon_local_config.name}",
"--pg-version",
self.env.pg_version,
f"--pageserver-config={ps_config_file.name}",
f"--config={init_config_tmpfile.name}",
]
if force is not None:
cmd.extend(["--force", force])
s3_env_vars = None
if isinstance(remote_storage, S3Storage):
s3_env_vars = remote_storage.access_env_vars()
res = self.raw_cli(cmd, extra_env_vars=s3_env_vars)
res = self.raw_cli(cmd)
res.check_returncode()
return res

View File

@@ -2,6 +2,7 @@ import subprocess
from pathlib import Path
from typing import Optional
import toml
from fixtures.neon_fixtures import (
DEFAULT_BRANCH_NAME,
NeonEnv,
@@ -12,9 +13,11 @@ from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import wait_until
def test_pageserver_init_node_id(
neon_simple_env: NeonEnv, neon_binpath: Path, pg_distrib_dir: Path
):
def test_pageserver_init_node_id(neon_simple_env: NeonEnv, neon_binpath: Path):
"""
NB: The neon_local doesn't use `--init` mode anymore, but our production
deployment still does => https://github.com/neondatabase/aws/pull/1322
"""
workdir = neon_simple_env.pageserver.workdir
pageserver_config = workdir / "pageserver.toml"
pageserver_bin = neon_binpath / "pageserver"
@@ -28,18 +31,36 @@ def test_pageserver_init_node_id(
stderr=subprocess.PIPE,
)
# remove initial config and stop existing pageserver
pageserver_config.unlink()
neon_simple_env.pageserver.stop()
bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'])
with open(neon_simple_env.pageserver.config_toml_path, "r") as f:
ps_config = toml.load(f)
required_config_keys = [
"pg_distrib_dir",
"listen_pg_addr",
"listen_http_addr",
"pg_auth_type",
"http_auth_type",
]
required_config_overrides = [
f"--config-override={toml.dumps({k: ps_config[k]})}" for k in required_config_keys
]
pageserver_config.unlink()
bad_init = run_pageserver(["--init", *required_config_overrides])
assert (
bad_init.returncode == 1
), "pageserver should not be able to init new config without the node id"
assert 'missing config value "id"' in bad_init.stderr
assert not pageserver_config.exists(), "config file should not be created after init error"
good_init_cmd = ["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"']
good_init_cmd = [
"--init",
f"--config-override=id={ps_config['id']}",
*required_config_overrides,
]
completed_init = run_pageserver(good_init_cmd)
assert (
completed_init.returncode == 0