diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 1a5ac1e2fe..8909e27c94 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -51,21 +51,21 @@ pub enum InitialPidFile<'t> { } /// Start a background child process using the parameters given. -pub fn start_process< - F, - S: AsRef, - EI: IntoIterator, // Not generic AsRef, otherwise empty `envs` prevents type inference ->( +pub fn start_process( process_name: &str, datadir: &Path, command: &Path, - args: &[S], + args: AI, envs: EI, initial_pid_file: InitialPidFile, process_status_check: F, ) -> anyhow::Result where F: Fn() -> anyhow::Result, + AI: IntoIterator, + A: AsRef, + // Not generic AsRef, otherwise empty `envs` prevents type inference + EI: IntoIterator, { let log_path = datadir.join(format!("{process_name}.log")); let process_log_file = fs::OpenOptions::new() diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 6f059d535e..f0c3b983f0 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -341,7 +341,7 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> { .get_many::("pageserver-config-override") .into_iter() .flatten() - .map(|s| s.as_str()) + .map(String::as_str) .collect() } diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 51e540e39c..3575e75db9 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -1,9 +1,10 @@ +use std::borrow::Cow; use std::collections::HashMap; use std::fs::File; use std::io::{BufReader, Write}; use std::num::NonZeroU64; -use std::path::{Path, PathBuf}; -use std::process::Child; +use std::path::PathBuf; +use std::process::{Child, Command}; use std::{io, result}; use anyhow::{bail, ensure, Context}; @@ -129,6 +130,8 @@ impl PageServerNode { overrides } + /// Initializes a pageserver node by creating its config with the overrides provided, + /// and creating an initial tenant and timeline afterwards. pub fn initialize( &self, create_tenant: Option, @@ -136,11 +139,28 @@ impl PageServerNode { config_overrides: &[&str], pg_version: u32, ) -> anyhow::Result { + // First, run `pageserver --init` and wait for it to write a config into FS and exit. + self.pageserver_init(config_overrides).with_context(|| { + format!( + "Failed to run init for pageserver node {}", + self.env.pageserver.id, + ) + })?; + + // Then, briefly start it fully to run HTTP commands on it, + // to create initial tenant and timeline. + // We disable the remote storage, since we stop pageserver right after the timeline creation, + // hence most of the uploads will either aborted or not started: no point to start them at all. + let disabled_remote_storage_override = "remote_storage={}"; let mut pageserver_process = self - .start_node(config_overrides, &self.env.base_data_dir, true) + .start_node( + &[disabled_remote_storage_override], + // Previous overrides will be taken from the config created before, don't overwrite them. + false, + ) .with_context(|| { format!( - "Failed to start a process for pageserver {}", + "Failed to start a process for pageserver node {}", self.env.pageserver.id, ) })?; @@ -201,55 +221,73 @@ impl PageServerNode { } pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result { - self.start_node(config_overrides, &self.repo_path(), false) + self.start_node(config_overrides, false) } - fn start_node( - &self, - config_overrides: &[&str], - datadir: &Path, - update_config: bool, - ) -> anyhow::Result { - let mut overrides = self.neon_local_overrides(); - overrides.extend(config_overrides.iter().map(|&c| c.to_owned())); - - print!( - "Starting pageserver at '{}' in '{}'", + fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> { + let datadir = self.repo_path(); + let node_id = self.env.pageserver.id; + println!( + "Initializing pageserver node {} at '{}' in {:?}", + node_id, self.pg_connection_config.raw_address(), - datadir.display() + datadir ); io::stdout().flush()?; - let mut args = vec![ - "-D", - datadir.to_str().with_context(|| { - format!("Datadir path {datadir:?} cannot be represented as a unicode string") - })?, - ]; + let datadir_path_str = datadir.to_str().with_context(|| { + format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}") + })?; + let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str); + args.push(Cow::Borrowed("--init")); + let init_output = Command::new(&self.env.pageserver_bin()) + .args(args.iter().map(Cow::as_ref)) + .envs(self.pageserver_env_variables()?) + .output() + .with_context(|| format!("Failed to run pageserver init for node {node_id}"))?; + + anyhow::ensure!( + init_output.status.success(), + "Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}", + node_id, + String::from_utf8_lossy(&init_output.stdout), + String::from_utf8_lossy(&init_output.stderr), + ); + + Ok(()) + } + + fn start_node(&self, config_overrides: &[&str], update_config: bool) -> anyhow::Result { + let mut overrides = self.neon_local_overrides(); + overrides.extend(config_overrides.iter().map(|&c| c.to_owned())); + + let datadir = self.repo_path(); + print!( + "Starting pageserver node {} at '{}' in {:?}", + self.env.pageserver.id, + self.pg_connection_config.raw_address(), + datadir + ); + io::stdout().flush()?; + + let datadir_path_str = datadir.to_str().with_context(|| { + format!( + "Cannot start pageserver node {} in path that has no string representation: {:?}", + self.env.pageserver.id, datadir, + ) + })?; + let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str); if update_config { - args.push("--update-config"); + args.push(Cow::Borrowed("--update-config")); } - for config_override in &overrides { - args.extend(["-c", config_override]); - } - - let envs = if self.env.pageserver.auth_type != AuthType::Trust { - // Generate a token to connect from the pageserver to a safekeeper - let token = self - .env - .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?; - vec![("ZENITH_AUTH_TOKEN".to_owned(), token)] - } else { - vec![] - }; background_process::start_process( "pageserver", - datadir, + &datadir, &self.env.pageserver_bin(), - &args, - envs, + args.iter().map(Cow::as_ref), + self.pageserver_env_variables()?, background_process::InitialPidFile::Expect(&self.pid_file()), || match self.check_status() { Ok(()) => Ok(true), @@ -259,6 +297,35 @@ impl PageServerNode { ) } + fn pageserver_basic_args<'a>( + &self, + config_overrides: &'a [&'a str], + datadir_path_str: &'a str, + ) -> Vec> { + let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)]; + + let mut overrides = self.neon_local_overrides(); + overrides.extend(config_overrides.iter().map(|&c| c.to_owned())); + for config_override in overrides { + args.push(Cow::Borrowed("-c")); + args.push(Cow::Owned(config_override)); + } + + args + } + + fn pageserver_env_variables(&self) -> anyhow::Result> { + Ok(if self.env.pageserver.auth_type != AuthType::Trust { + // Generate a token to connect from the pageserver to a safekeeper + let token = self + .env + .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?; + vec![("ZENITH_AUTH_TOKEN".to_owned(), token)] + } else { + Vec::new() + }) + } + /// /// Stop the server. /// diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 3bbffd6941..28858fcbab 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -272,7 +272,7 @@ impl Debug for S3Config { } impl RemoteStorageConfig { - pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result { + pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result> { let local_path = toml.get("local_path"); let bucket_name = toml.get("bucket_name"); let bucket_region = toml.get("bucket_region"); @@ -296,7 +296,8 @@ impl RemoteStorageConfig { .context("Failed to parse 'concurrency_limit' as a positive integer")?; let storage = match (local_path, bucket_name, bucket_region) { - (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"), + // no 'local_path' nor 'bucket_name' options are provided, consider this remote storage disabled + (None, None, None) => return Ok(None), (_, Some(_), None) => { bail!("'bucket_region' option is mandatory if 'bucket_name' is given ") } @@ -322,11 +323,11 @@ impl RemoteStorageConfig { (Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"), }; - Ok(RemoteStorageConfig { + Ok(Some(RemoteStorageConfig { max_concurrent_syncs, max_sync_errors, storage, - }) + })) } } diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index c07907a1c9..48e9f32276 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -524,7 +524,7 @@ impl PageServerConf { )), "auth_type" => builder.auth_type(parse_toml_from_str(key, item)?), "remote_storage" => { - builder.remote_storage_config(Some(RemoteStorageConfig::from_toml(item)?)) + builder.remote_storage_config(RemoteStorageConfig::from_toml(item)?) } "tenant_config" => { t_conf = Self::parse_toml_tenant_conf(item)?; diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 92cd5db203..cab5053b5b 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -318,12 +318,15 @@ fn set_id(workdir: &Path, given_id: Option) -> Result { } // Parse RemoteStorage from TOML table. -fn parse_remote_storage(storage_conf: &str) -> Result { +fn parse_remote_storage(storage_conf: &str) -> anyhow::Result { // funny toml doesn't consider plain inline table as valid document, so wrap in a key to parse - let storage_conf_toml = format!("remote_storage = {}", storage_conf); + let storage_conf_toml = format!("remote_storage = {storage_conf}"); let parsed_toml = storage_conf_toml.parse::()?; // parse let (_, storage_conf_parsed_toml) = parsed_toml.iter().next().unwrap(); // and strip key off again - RemoteStorageConfig::from_toml(storage_conf_parsed_toml) + RemoteStorageConfig::from_toml(storage_conf_parsed_toml).and_then(|parsed_config| { + // XXX: Don't print the original toml here, there might be some sensitive data + parsed_config.context("Incorrectly parsed remote storage toml as no remote storage config") + }) } #[test]