From c43e664ff577d4568722e4e7a2b2c6267b609607 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 3 Sep 2024 15:11:30 +0100 Subject: [PATCH] storcon: provide an az id in metadata.json from neon local (#8897) ## Problem Neon local set-up does not inject an az id in `metadata.json`. See real change in https://github.com/neondatabase/neon/pull/8852. ## Summary of changes We piggyback on the existing `availability_zone` pageserver configuration in order to avoid making neon local even more complex. --- control_plane/src/pageserver.rs | 23 ++++++++++++++++++- test_runner/fixtures/neon_fixtures.py | 12 +++++----- .../fixtures/pageserver/allowed_errors.py | 3 --- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 399b1c2653..31777eb7a5 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -181,6 +181,23 @@ impl PageServerNode { ); io::stdout().flush()?; + // If the config file we got as a CLI argument includes the `availability_zone` + // config, then use that to populate the `metadata.json` file for the pageserver. + // In production the deployment orchestrator does this for us. + let az_id = conf + .other + .get("availability_zone") + .map(|toml| { + let az_str = toml.to_string(); + // Trim the (") chars from the toml representation + if az_str.starts_with('"') && az_str.ends_with('"') { + az_str[1..az_str.len() - 1].to_string() + } else { + az_str + } + }) + .unwrap_or("local".to_string()); + let config = self .pageserver_init_make_toml(conf) .context("make pageserver toml")?; @@ -216,6 +233,7 @@ impl PageServerNode { let (_http_host, http_port) = parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr"); let http_port = http_port.unwrap_or(9898); + // Intentionally hand-craft JSON: this acts as an implicit format compat test // in case the pageserver-side structure is edited, and reflects the real life // situation: the metadata is written by some other script. @@ -226,7 +244,10 @@ impl PageServerNode { postgres_port: self.pg_connection_config.port(), http_host: "localhost".to_string(), http_port, - other: HashMap::new(), + other: HashMap::from([( + "availability_zone_id".to_string(), + serde_json::json!(az_id), + )]), }) .unwrap(), ) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 800ae03d13..0cbab71cc3 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1164,6 +1164,8 @@ class NeonEnv: "listen_http_addr": f"localhost:{pageserver_port.http}", "pg_auth_type": pg_auth_type, "http_auth_type": http_auth_type, + # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override` + "availability_zone": "us-east-2a", } if self.pageserver_virtual_file_io_engine is not None: ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine @@ -1192,11 +1194,7 @@ class NeonEnv: # Create a corresponding NeonPageserver object self.pageservers.append( - NeonPageserver( - self, - ps_id, - port=pageserver_port, - ) + NeonPageserver(self, ps_id, port=pageserver_port, az_id=ps_cfg["availability_zone"]) ) cfg["pageservers"].append(ps_cfg) @@ -2400,6 +2398,7 @@ class NeonStorageController(MetricsGetter, LogUtils): "listen_http_port": node.service_port.http, "listen_pg_addr": "localhost", "listen_pg_port": node.service_port.pg, + "availability_zone_id": node.az_id, } log.info(f"node_register({body})") self.request( @@ -2923,10 +2922,11 @@ class NeonPageserver(PgProtocol, LogUtils): TEMP_FILE_SUFFIX = "___temp" - def __init__(self, env: NeonEnv, id: int, port: PageserverPort): + def __init__(self, env: NeonEnv, id: int, port: PageserverPort, az_id: str): super().__init__(host="localhost", port=port.pg, user="cloud_admin") self.env = env self.id = id + self.az_id = az_id self.running = False self.service_port = port self.version = env.get_binary_version("pageserver") diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index 70f2676245..f8d9a51c91 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -109,9 +109,6 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [ # controller's attempts to notify the endpoint). ".*reconciler.*neon_local notification hook failed.*", ".*reconciler.*neon_local error.*", - # Neon local does not provide pageserver with an AZ - # TODO: remove this once neon local does so - ".*registering without specific availability zone id.*", ]