diff --git a/README.md b/README.md index e9c30668e0..01de24cdf6 100644 --- a/README.md +++ b/README.md @@ -125,24 +125,23 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r # Create repository in .neon with proper paths to binaries and data # Later that would be responsibility of a package install script > ./target/debug/neon_local init -Starting pageserver at '127.0.0.1:64000' in '.neon' - -Pageserver started -Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7 -Stopping pageserver gracefully...done! +Starting pageserver at '127.0.0.1:64000' in '.neon'. +pageserver started, pid: 2545906 +Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9 +Stopped pageserver 1 process with pid 2545906 # start pageserver and safekeeper > ./target/debug/neon_local start -Starting etcd broker using /usr/bin/etcd -Starting pageserver at '127.0.0.1:64000' in '.neon' - -Pageserver started -Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1' -Safekeeper started +Starting etcd broker using "/usr/bin/etcd" +etcd started, pid: 2545996 +Starting pageserver at '127.0.0.1:64000' in '.neon'. +pageserver started, pid: 2546005 +Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'. +safekeeper 1 started, pid: 2546041 # start postgres compute node > ./target/debug/neon_local pg start main -Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ... +Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 2f8098b7c9..c558c09854 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -26,8 +26,18 @@ use nix::unistd::Pid; use utils::lock_file; -const RETRIES: u32 = 15; -const RETRY_TIMEOUT_MILLIS: u64 = 500; +// These constants control the loop used to poll for process start / stop. +// +// The loop waits for at most 10 seconds, polling every 100 ms. +// Once a second, it prints a dot ("."), to give the user an indication that +// it's waiting. If the process hasn't started/stopped after 5 seconds, +// it prints a notice that it's taking long, but keeps waiting. +// +const RETRY_UNTIL_SECS: u64 = 10; +const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS; +const RETRY_INTERVAL_MILLIS: u64 = 100; +const DOT_EVERY_RETRIES: u64 = 10; +const NOTICE_AFTER_RETRIES: u64 = 50; /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates /// it itself. @@ -107,16 +117,16 @@ where return Ok(spawned_process); } Ok(false) => { - if retries < 5 { + if retries == NOTICE_AFTER_RETRIES { + // The process is taking a long time to start up. Keep waiting, but + // print a message + print!("\n{process_name} has not started yet, continuing to wait"); + } + if retries % DOT_EVERY_RETRIES == 0 { print!("."); io::stdout().flush().unwrap(); - } else { - if retries == 5 { - println!() // put a line break after dots for second message - } - println!("{process_name} has not started yet, retrying ({retries})..."); } - thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS)); + thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS)); } Err(e) => { println!("{process_name} failed to start: {e:#}"); @@ -127,7 +137,8 @@ where } } } - anyhow::bail!("{process_name} could not start in {RETRIES} attempts"); + println!(); + anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds"); } /// Stops the process, using the pid file given. Returns Ok also if the process is already not running. @@ -158,7 +169,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any } // Wait until process is gone - for _ in 0..RETRIES { + for retries in 0..RETRIES { match process_has_stopped(pid) { Ok(true) => { println!("\n{process_name} stopped"); @@ -170,9 +181,16 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any return Ok(()); } Ok(false) => { - print!("."); - io::stdout().flush().unwrap(); - thread::sleep(Duration::from_secs(1)) + if retries == NOTICE_AFTER_RETRIES { + // The process is taking a long time to start up. Keep waiting, but + // print a message + print!("\n{process_name} has not stopped yet, continuing to wait"); + } + if retries % DOT_EVERY_RETRIES == 0 { + print!("."); + io::stdout().flush().unwrap(); + } + thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS)); } Err(e) => { println!("{process_name} with pid {pid} failed to stop: {e:#}"); @@ -180,8 +198,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any } } } - - anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts"); + println!(); + anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds"); } fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command { diff --git a/control_plane/src/etcd.rs b/control_plane/src/etcd.rs index 4c15914e24..60aa5da780 100644 --- a/control_plane/src/etcd.rs +++ b/control_plane/src/etcd.rs @@ -6,7 +6,7 @@ use crate::{background_process, local_env}; pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> { let etcd_broker = &env.etcd_broker; - println!( + print!( "Starting etcd broker using {:?}", etcd_broker.etcd_binary_path ); diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 18d6aee68d..aec6f5bc2c 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -237,7 +237,7 @@ impl PageServerNode { datadir: &Path, update_config: bool, ) -> anyhow::Result { - println!( + print!( "Starting pageserver at '{}' in '{}'", self.pg_connection_config.raw_address(), datadir.display()