feat: start_database auto retry

This commit is contained in:
WenyXu
2024-04-19 02:40:15 +00:00
parent c7400a4182
commit f9afc5dbbf
2 changed files with 32 additions and 12 deletions

View File

@@ -28,8 +28,8 @@ pub(crate) type Pid = u32;
/// The state of a process.
#[derive(Debug, Clone)]
pub(crate) struct Process {
exit_status: Option<ExitStatus>,
exited: bool,
pub(crate) exit_status: Option<ExitStatus>,
pub(crate) exited: bool,
}
/// ProcessManager provides the ability to spawn/wait/kill a child process.
@@ -48,6 +48,10 @@ impl ProcessManager {
}
}
pub(crate) fn get(&self, pid: Pid) -> Option<Process> {
self.processes.lock().unwrap().get(&pid).cloned()
}
fn wait<F>(&self, mut child: Child, f: F)
where
F: FnOnce(Pid, OnChildExitResult) + Send + 'static,

View File

@@ -57,7 +57,7 @@ async fn main() {
let secs = rng.gen_range(100..300);
moved_state.killed.store(false, Ordering::Relaxed);
tokio::time::sleep(Duration::from_millis(secs)).await;
warn!("After {secs}s, Killing pid: {pid}");
warn!("After {secs}ms, Killing pid: {pid}");
moved_state.killed.store(true, Ordering::Relaxed);
ProcessManager::kill(pid, Signal::SIGKILL).expect("Failed to kill");
}
@@ -152,15 +152,31 @@ async fn start_database() -> Result<Pid> {
let health_url = "http://127.0.0.1:4000/health";
let process_manager = ProcessManager::new();
let pid = start_process(&process_manager, binary_path, test_dir, template_filename)
.await
.unwrap();
tokio::time::timeout(Duration::from_secs(100), health_check(health_url))
.await
.expect("Failed to start GreptimeDB process");
info!("GreptimeDB started, pid: {pid}");
Ok(pid)
for _ in 0..3 {
let pid = start_process(&process_manager, binary_path, test_dir, template_filename)
.await
.unwrap();
match tokio::time::timeout(Duration::from_secs(10), health_check(health_url)).await {
Ok(_) => {
info!("GreptimeDB started, pid: {pid}");
return Ok(pid);
}
Err(_) => {
ensure!(
process_manager.get(pid).unwrap().exited,
error::UnexpectedSnafu {
err_msg: format!("Failed to start database: pid: {pid}")
}
);
// retry alter
warn!("Wait for staring timeout, retry later...");
}
};
}
error::UnexpectedSnafu {
err_msg: "Failed to start datanode",
}
.fail()
}
async fn start_process(