mirror of
https://github.com/neondatabase/neon.git
synced 2026-04-25 18:30:38 +00:00
Compare commits
2 Commits
release-co
...
problame/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e275df8640 | ||
|
|
13fa4b48c3 |
16
Cargo.lock
generated
16
Cargo.lock
generated
@@ -4270,6 +4270,7 @@ dependencies = [
|
||||
"scopeguard",
|
||||
"send-future",
|
||||
"serde",
|
||||
"serde_ignored",
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"serde_with",
|
||||
@@ -4692,7 +4693,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"byteorder",
|
||||
@@ -4726,7 +4727,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -6287,6 +6288,15 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_ignored"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "566da67d80e92e009728b3731ff0e5360cb181432b8ca73ea30bb1d170700d76"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.125"
|
||||
@@ -7170,7 +7180,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.10"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
|
||||
@@ -164,6 +164,7 @@ sd-notify = "0.4.1"
|
||||
send-future = "0.1.0"
|
||||
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_ignored = "0.1.11"
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
serde_with = { version = "2.0", features = [ "base64" ] }
|
||||
|
||||
@@ -51,9 +51,54 @@ pub struct NodeMetadata {
|
||||
/// If there cannot be a static default value because we need to make runtime
|
||||
/// checks to determine the default, make it an `Option` (which defaults to None).
|
||||
/// The runtime check should be done in the consuming crate, i.e., `pageserver`.
|
||||
///
|
||||
/// Unknown fields are silently ignored during deserialization.
|
||||
/// The alternative, which we used in the past, was to set `deny_unknown_fields`,
|
||||
/// which fails deserialization, and hence pageserver startup, if there is an unknown field.
|
||||
/// The reason we don't do that anymore is that it complicates
|
||||
/// usage of config fields for feature flagging, which we commonly do for
|
||||
/// region-by-region rollouts.
|
||||
/// The complications mainly arise because the `pageserver.toml` contents on a
|
||||
/// prod server have a separate lifecycle from the pageserver binary.
|
||||
/// For instance, `pageserver.toml` contents today are defined in the internal
|
||||
/// infra repo, and thus introducing a new config field to pageserver and
|
||||
/// rolling it out to prod servers are separate commits in separate repos
|
||||
/// that can't be made or rolled back atomically.
|
||||
/// Rollbacks in particular pose a risk with deny_unknown_fields because
|
||||
/// the old pageserver binary may reject a new config field, resulting in
|
||||
/// an outage unless the person doing the pageserver rollback remembers
|
||||
/// to also revert the commit that added the config field in to the
|
||||
/// `pageserver.toml` templates in the internal infra repo.
|
||||
/// (A pre-deploy config check would eliminate this risk during rollbacks,
|
||||
/// cf [here](https://github.com/neondatabase/cloud/issues/24349).)
|
||||
/// In addition to this compatibility problem during emergency rollbacks,
|
||||
/// deny_unknown_fields adds further complications when decomissioning a feature
|
||||
/// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]
|
||||
/// until all prod servers' `pageserver.toml` files have been updated to a version
|
||||
/// that doesn't specify the flag. Otherwise new software would fail to start up.
|
||||
/// This adds the requirement for an intermediate step where the new config field
|
||||
/// is accepted but ignored, prolonging the decomissioning process by an entire
|
||||
/// release cycle.
|
||||
/// By contrast with unknown fields silently ignored, decomissioning a feature
|
||||
/// flag is a one-step process: we can skip the intermediate step and straight
|
||||
/// remove the field from the [`ConfigToml`]. We leave the field in the
|
||||
/// `pageserver.toml` files on prod servers until we reach certainty that we
|
||||
/// will not roll back to old software whose behavior was dependent on config.
|
||||
/// Then we can remove the field from the templates in the internal infra repo.
|
||||
/// This process is [documented internally](
|
||||
/// https://docs.neon.build/storage/pageserver_configuration.html).
|
||||
///
|
||||
/// Note that above relaxed compatbility for the config format does NOT APPLY
|
||||
/// TO THE STORAGE FORMAT. As general guidance, when introducing storage format
|
||||
/// changes, ensure that the potential rollback target version will be compatible
|
||||
/// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:
|
||||
/// any format version that exists in an environment must be compatible with the software that runs there.
|
||||
/// Use a pageserver.toml flag only to gate whether software _writes_ the new format.
|
||||
/// For more compatibility considerations, refer to [internal docs](
|
||||
/// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
#[serde(default)]
|
||||
pub struct ConfigToml {
|
||||
// types mapped 1:1 into the runtime PageServerConfig type
|
||||
pub listen_pg_addr: String,
|
||||
@@ -134,7 +179,6 @@ pub struct ConfigToml {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct DiskUsageEvictionTaskConfig {
|
||||
pub max_usage_pct: utils::serde_percent::Percent,
|
||||
pub min_avail_bytes: u64,
|
||||
@@ -149,13 +193,11 @@ pub struct DiskUsageEvictionTaskConfig {
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub enum PageServicePipeliningConfig {
|
||||
Serial,
|
||||
Pipelined(PageServicePipeliningConfigPipelined),
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct PageServicePipeliningConfigPipelined {
|
||||
/// Causes runtime errors if larger than max get_vectored batch size.
|
||||
pub max_batch_size: NonZeroUsize,
|
||||
@@ -171,7 +213,6 @@ pub enum PageServiceProtocolPipelinedExecutionStrategy {
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub enum GetVectoredConcurrentIo {
|
||||
/// The read path is fully sequential: layers are visited
|
||||
/// one after the other and IOs are issued and waited upon
|
||||
@@ -242,7 +283,7 @@ pub struct MaxVectoredReadBytes(pub NonZeroUsize);
|
||||
|
||||
/// Tenant-level configuration values, used for various purposes.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(deny_unknown_fields, default)]
|
||||
#[serde(default)]
|
||||
pub struct TenantConfigToml {
|
||||
// Flush out an inmemory layer, if it's holding WAL older than this
|
||||
// This puts a backstop on how much WAL needs to be re-digested if the
|
||||
|
||||
@@ -1105,7 +1105,7 @@ pub struct CompactionAlgorithmSettings {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
pub enum L0FlushConfig {
|
||||
#[serde(rename_all = "snake_case")]
|
||||
Direct { max_concurrency: NonZeroUsize },
|
||||
|
||||
@@ -52,6 +52,7 @@ rustls.workspace = true
|
||||
scopeguard.workspace = true
|
||||
send-future.workspace = true
|
||||
serde.workspace = true
|
||||
serde_ignored.workspace = true
|
||||
serde_json = { workspace = true, features = ["raw_value"] }
|
||||
serde_path_to_error.workspace = true
|
||||
serde_with.workspace = true
|
||||
|
||||
@@ -96,7 +96,7 @@ fn main() -> anyhow::Result<()> {
|
||||
env::set_current_dir(&workdir)
|
||||
.with_context(|| format!("Failed to set application's current dir to '{workdir}'"))?;
|
||||
|
||||
let conf = initialize_config(&identity_file_path, &cfg_file_path, &workdir)?;
|
||||
let (conf, ignored) = initialize_config(&identity_file_path, &cfg_file_path, &workdir)?;
|
||||
|
||||
// Initialize logging.
|
||||
//
|
||||
@@ -127,7 +127,17 @@ fn main() -> anyhow::Result<()> {
|
||||
&[("node_id", &conf.id.to_string())],
|
||||
);
|
||||
|
||||
// after setting up logging, log the effective IO engine choice and read path implementations
|
||||
// Warn about ignored config items; see pageserver_api::config::ConfigToml
|
||||
// doc comment for rationale why we prefer this over serde(deny_unknown_fields).
|
||||
{
|
||||
let IgnoredConfigItems { paths } = ignored;
|
||||
for path in paths {
|
||||
warn!(?path, "ignoring unknown configuration item");
|
||||
}
|
||||
}
|
||||
|
||||
// Log configuration items for feature-flag-like config
|
||||
// (maybe we should automate this with a visitor?).
|
||||
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
||||
info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
|
||||
info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
|
||||
@@ -196,11 +206,15 @@ fn main() -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct IgnoredConfigItems {
|
||||
paths: Vec<String>,
|
||||
}
|
||||
|
||||
fn initialize_config(
|
||||
identity_file_path: &Utf8Path,
|
||||
cfg_file_path: &Utf8Path,
|
||||
workdir: &Utf8Path,
|
||||
) -> anyhow::Result<&'static PageServerConf> {
|
||||
) -> anyhow::Result<(&'static PageServerConf, IgnoredConfigItems)> {
|
||||
// The deployment orchestrator writes out an indentity file containing the node id
|
||||
// for all pageservers. This file is the source of truth for the node id. In order
|
||||
// to allow for rolling back pageserver releases, the node id is also included in
|
||||
@@ -229,15 +243,23 @@ fn initialize_config(
|
||||
|
||||
let config_file_contents =
|
||||
std::fs::read_to_string(cfg_file_path).context("read config file from filesystem")?;
|
||||
let config_toml = serde_path_to_error::deserialize(
|
||||
toml_edit::de::Deserializer::from_str(&config_file_contents)
|
||||
.context("build toml deserializer")?,
|
||||
)
|
||||
.context("deserialize config toml")?;
|
||||
let deserializer = toml_edit::de::Deserializer::from_str(&config_file_contents)
|
||||
.context("build toml deserializer")?;
|
||||
let mut path_to_error_track = serde_path_to_error::Track::new();
|
||||
let deserializer =
|
||||
serde_path_to_error::Deserializer::new(deserializer, &mut path_to_error_track);
|
||||
let mut ignored = Vec::new();
|
||||
let mut push_to_ignored = |path: serde_ignored::Path<'_>| {
|
||||
ignored.push(path.to_string());
|
||||
};
|
||||
let deserializer = serde_ignored::Deserializer::new(deserializer, &mut push_to_ignored);
|
||||
let config_toml: pageserver_api::config::ConfigToml =
|
||||
serde::Deserialize::deserialize(deserializer).context("deserialize config toml")?;
|
||||
let conf = PageServerConf::parse_and_validate(identity.id, config_toml, workdir)
|
||||
.context("runtime-validation of config toml")?;
|
||||
|
||||
Ok(Box::leak(Box::new(conf)))
|
||||
let conf = Box::leak(Box::new(conf));
|
||||
let ignored = IgnoredConfigItems { paths: ignored };
|
||||
Ok((conf, ignored))
|
||||
}
|
||||
|
||||
struct WaitForPhaseResult<F: std::future::Future + Unpin> {
|
||||
|
||||
@@ -526,7 +526,6 @@ impl PageServerConf {
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize, serde::Serialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct PageserverIdentity {
|
||||
pub id: NodeId,
|
||||
}
|
||||
@@ -598,82 +597,4 @@ mod tests {
|
||||
PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
|
||||
.expect("parse_and_validate");
|
||||
}
|
||||
|
||||
/// If there's a typo in the pageserver config, we'd rather catch that typo
|
||||
/// and fail pageserver startup than silently ignoring the typo, leaving whoever
|
||||
/// made it in the believe that their config change is effective.
|
||||
///
|
||||
/// The default in serde is to allow unknown fields, so, we rely
|
||||
/// on developer+review discipline to add `deny_unknown_fields` when adding
|
||||
/// new structs to the config, and these tests here as a regression test.
|
||||
///
|
||||
/// The alternative to all of this would be to allow unknown fields in the config.
|
||||
/// To catch them, we could have a config check tool or mgmt API endpoint that
|
||||
/// compares the effective config with the TOML on disk and makes sure that
|
||||
/// the on-disk TOML is a strict subset of the effective config.
|
||||
mod unknown_fields_handling {
|
||||
macro_rules! test {
|
||||
($short_name:ident, $input:expr) => {
|
||||
#[test]
|
||||
fn $short_name() {
|
||||
let input = $input;
|
||||
let err = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(&input)
|
||||
.expect_err("some_invalid_field is an invalid field");
|
||||
dbg!(&err);
|
||||
assert!(err.to_string().contains("some_invalid_field"));
|
||||
}
|
||||
};
|
||||
}
|
||||
use indoc::indoc;
|
||||
|
||||
test!(
|
||||
toplevel,
|
||||
indoc! {r#"
|
||||
some_invalid_field = 23
|
||||
"#}
|
||||
);
|
||||
|
||||
test!(
|
||||
toplevel_nested,
|
||||
indoc! {r#"
|
||||
[some_invalid_field]
|
||||
foo = 23
|
||||
"#}
|
||||
);
|
||||
|
||||
test!(
|
||||
disk_usage_based_eviction,
|
||||
indoc! {r#"
|
||||
[disk_usage_based_eviction]
|
||||
some_invalid_field = 23
|
||||
"#}
|
||||
);
|
||||
|
||||
test!(
|
||||
tenant_config,
|
||||
indoc! {r#"
|
||||
[tenant_config]
|
||||
some_invalid_field = 23
|
||||
"#}
|
||||
);
|
||||
|
||||
test!(
|
||||
l0_flush,
|
||||
indoc! {r#"
|
||||
[l0_flush]
|
||||
mode = "direct"
|
||||
some_invalid_field = 23
|
||||
"#}
|
||||
);
|
||||
|
||||
// TODO: fix this => https://github.com/neondatabase/neon/issues/8915
|
||||
// test!(
|
||||
// remote_storage_config,
|
||||
// indoc! {r#"
|
||||
// [remote_storage_config]
|
||||
// local_path = "/nonexistent"
|
||||
// some_invalid_field = 23
|
||||
// "#}
|
||||
// );
|
||||
}
|
||||
}
|
||||
|
||||
35
test_runner/regress/test_pageserver_config.py
Normal file
35
test_runner/regress/test_pageserver_config.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
|
||||
@pytest.mark.parametrize("what", ["default", "top_level", "nested"])
|
||||
def test_unknown_config_items_handling(neon_simple_env: NeonEnv, what: str):
|
||||
env = neon_simple_env
|
||||
|
||||
def edit_fn(config) -> str | None:
|
||||
if what == "default":
|
||||
return None
|
||||
elif what == "top_level":
|
||||
config["unknown_top_level_config_item"] = 23
|
||||
return r".*unknown_top_level_config_item.*"
|
||||
elif what == "nested":
|
||||
config["remote_storage"]["unknown_config_item"] = 23
|
||||
return r".*remote_storage\.unknown_config_item.*"
|
||||
else:
|
||||
raise ValueError(f"Unknown what: {what}")
|
||||
|
||||
expect_warn_re = env.pageserver.edit_config_toml(edit_fn)
|
||||
|
||||
if expect_warn_re is not None:
|
||||
env.pageserver.allowed_errors.append(expect_warn_re)
|
||||
|
||||
if expect_warn_re is not None:
|
||||
assert not env.pageserver.log_contains(expect_warn_re)
|
||||
|
||||
# in any way, unknown config items should not fail pageserver to start
|
||||
# TODO: extend this test with the config validator mode once we introduce it
|
||||
# https://github.com/neondatabase/cloud/issues/24349
|
||||
env.pageserver.restart()
|
||||
|
||||
if expect_warn_re is not None:
|
||||
assert env.pageserver.log_contains(expect_warn_re)
|
||||
Reference in New Issue
Block a user