mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
control_plane: add error handling to reading pid files
print file errors to stderr; propagate the io::Error to the caller. This error isn't handled very gracefully in WalAcceptorNode::drop(), but there aren't any good options there since drop can't fail.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::process::Command;
|
||||
use std::net::TcpStream;
|
||||
use std::thread;
|
||||
@@ -162,10 +163,10 @@ impl PageServerNode {
|
||||
|
||||
pub fn stop(&self) -> Result<()> {
|
||||
let pidfile = self.env.pageserver_pidfile();
|
||||
let pid = fs::read_to_string(pidfile).unwrap();
|
||||
let pid = read_pidfile(&pidfile)?;
|
||||
|
||||
let status = Command::new("kill")
|
||||
.arg(pid.clone())
|
||||
.arg(&pid)
|
||||
.env_clear()
|
||||
.status()
|
||||
.expect("failed to execute kill");
|
||||
@@ -260,22 +261,24 @@ impl WalAcceptorNode {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop(&self) {
|
||||
pub fn stop(&self) -> std::result::Result<(), io::Error> {
|
||||
println!("Stopping wal acceptor on {}", self.listen);
|
||||
let pidfile = self.data_dir.join("wal_acceptor.pid");
|
||||
if let Ok(pid) = fs::read_to_string(pidfile) {
|
||||
let _status = Command::new("kill")
|
||||
.arg(pid)
|
||||
.env_clear()
|
||||
.status()
|
||||
.expect("failed to execute kill");
|
||||
}
|
||||
let pid = read_pidfile(&pidfile)?;
|
||||
// Ignores any failures when running this command
|
||||
let _status = Command::new("kill")
|
||||
.arg(pid)
|
||||
.env_clear()
|
||||
.status()
|
||||
.expect("failed to execute kill");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WalAcceptorNode {
|
||||
fn drop(&mut self) {
|
||||
self.stop();
|
||||
self.stop().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,3 +343,14 @@ pub fn regress_check(pg: &PostgresNode) {
|
||||
.status()
|
||||
.expect("pg_regress failed");
|
||||
}
|
||||
|
||||
/// Read a PID file
|
||||
///
|
||||
/// This should contain an unsigned integer, but we return it as a String
|
||||
/// because our callers only want to pass it back into a subcommand.
|
||||
fn read_pidfile(pidfile: &Path) -> std::result::Result<String, io::Error> {
|
||||
fs::read_to_string(pidfile).map_err(|err| {
|
||||
eprintln!("failed to read pidfile {:?}: {:?}", pidfile, err);
|
||||
err
|
||||
})
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ fn test_acceptors_restarts() {
|
||||
} else {
|
||||
let node: usize = rng.gen_range(0..REDUNDANCY);
|
||||
failed_node = Some(node);
|
||||
storage_cplane.wal_acceptors[node].stop();
|
||||
storage_cplane.wal_acceptors[node].stop().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -127,7 +127,7 @@ fn test_acceptors_unavalability() {
|
||||
psql.execute("INSERT INTO t values (1, 'payload')", &[])
|
||||
.unwrap();
|
||||
|
||||
storage_cplane.wal_acceptors[0].stop();
|
||||
storage_cplane.wal_acceptors[0].stop().unwrap();
|
||||
let cp = Arc::new(storage_cplane);
|
||||
start_acceptor(&cp, 0);
|
||||
let now = SystemTime::now();
|
||||
@@ -137,7 +137,7 @@ fn test_acceptors_unavalability() {
|
||||
psql.execute("INSERT INTO t values (3, 'payload')", &[])
|
||||
.unwrap();
|
||||
|
||||
cp.wal_acceptors[1].stop();
|
||||
cp.wal_acceptors[1].stop().unwrap();
|
||||
start_acceptor(&cp, 1);
|
||||
psql.execute("INSERT INTO t values (4, 'payload')", &[])
|
||||
.unwrap();
|
||||
@@ -164,7 +164,7 @@ fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
|
||||
let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
|
||||
for i in 0..n_acceptors {
|
||||
if (mask & (1 << i)) != 0 {
|
||||
cplane.wal_acceptors[i].stop();
|
||||
cplane.wal_acceptors[i].stop().unwrap();
|
||||
}
|
||||
}
|
||||
thread::sleep(failure_period);
|
||||
|
||||
Reference in New Issue
Block a user