control_plane: add error handling to reading pid files

print file errors to stderr; propagate the io::Error to the caller.
This error isn't handled very gracefully in WalAcceptorNode::drop(),
but there aren't any good options there since drop can't fail.
This commit is contained in:
Eric Seppanen
2021-04-13 14:26:34 -07:00
parent 3c4ebc4030
commit d1d6c968d5
2 changed files with 29 additions and 15 deletions

View File

@@ -1,6 +1,7 @@
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::fs;
use std::io;
use std::process::Command;
use std::net::TcpStream;
use std::thread;
@@ -162,10 +163,10 @@ impl PageServerNode {
pub fn stop(&self) -> Result<()> {
let pidfile = self.env.pageserver_pidfile();
let pid = fs::read_to_string(pidfile).unwrap();
let pid = read_pidfile(&pidfile)?;
let status = Command::new("kill")
.arg(pid.clone())
.arg(&pid)
.env_clear()
.status()
.expect("failed to execute kill");
@@ -260,22 +261,24 @@ impl WalAcceptorNode {
}
}
pub fn stop(&self) {
pub fn stop(&self) -> std::result::Result<(), io::Error> {
println!("Stopping wal acceptor on {}", self.listen);
let pidfile = self.data_dir.join("wal_acceptor.pid");
if let Ok(pid) = fs::read_to_string(pidfile) {
let _status = Command::new("kill")
.arg(pid)
.env_clear()
.status()
.expect("failed to execute kill");
}
let pid = read_pidfile(&pidfile)?;
// Ignores any failures when running this command
let _status = Command::new("kill")
.arg(pid)
.env_clear()
.status()
.expect("failed to execute kill");
Ok(())
}
}
impl Drop for WalAcceptorNode {
fn drop(&mut self) {
self.stop();
self.stop().unwrap();
}
}
@@ -340,3 +343,14 @@ pub fn regress_check(pg: &PostgresNode) {
.status()
.expect("pg_regress failed");
}
/// Read a PID file
///
/// This should contain an unsigned integer, but we return it as a String
/// because our callers only want to pass it back into a subcommand.
fn read_pidfile(pidfile: &Path) -> std::result::Result<String, io::Error> {
fs::read_to_string(pidfile).map_err(|err| {
eprintln!("failed to read pidfile {:?}: {:?}", pidfile, err);
err
})
}

View File

@@ -78,7 +78,7 @@ fn test_acceptors_restarts() {
} else {
let node: usize = rng.gen_range(0..REDUNDANCY);
failed_node = Some(node);
storage_cplane.wal_acceptors[node].stop();
storage_cplane.wal_acceptors[node].stop().unwrap();
}
}
}
@@ -127,7 +127,7 @@ fn test_acceptors_unavalability() {
psql.execute("INSERT INTO t values (1, 'payload')", &[])
.unwrap();
storage_cplane.wal_acceptors[0].stop();
storage_cplane.wal_acceptors[0].stop().unwrap();
let cp = Arc::new(storage_cplane);
start_acceptor(&cp, 0);
let now = SystemTime::now();
@@ -137,7 +137,7 @@ fn test_acceptors_unavalability() {
psql.execute("INSERT INTO t values (3, 'payload')", &[])
.unwrap();
cp.wal_acceptors[1].stop();
cp.wal_acceptors[1].stop().unwrap();
start_acceptor(&cp, 1);
psql.execute("INSERT INTO t values (4, 'payload')", &[])
.unwrap();
@@ -164,7 +164,7 @@ fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
for i in 0..n_acceptors {
if (mask & (1 << i)) != 0 {
cplane.wal_acceptors[i].stop();
cplane.wal_acceptors[i].stop().unwrap();
}
}
thread::sleep(failure_period);