use nix kill instead of spawning a process

Since we are now calling the syscall directly, read_pidfile can now
parse an integer.

We also verify the pid is >= 1, because calling kill on 0 or negative
values goes straight to crazytown.
This commit is contained in:
Eric Seppanen
2021-05-03 20:43:45 -07:00
parent 4e2e5bb4e6
commit aac913f9dc
3 changed files with 51 additions and 49 deletions

14
Cargo.lock generated
View File

@@ -404,6 +404,7 @@ dependencies = [
"fs_extra",
"hex",
"lazy_static",
"nix",
"pageserver",
"postgres",
"postgres_ffi",
@@ -411,6 +412,7 @@ dependencies = [
"regex",
"serde",
"tar",
"thiserror",
"tokio-postgres",
"toml",
"walkeeper",
@@ -1163,6 +1165,18 @@ dependencies = [
"tempfile",
]
[[package]]
name = "nix"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa9b4819da1bc61c0ea48b63b7bc8604064dd43013e7cc325df098d49cd7c18a"
dependencies = [
"bitflags",
"cc",
"cfg-if 1.0.0",
"libc",
]
[[package]]
name = "nom"
version = "5.1.2"

View File

@@ -11,7 +11,6 @@ rand = "0.8.3"
tar = "0.4.33"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
serde = { version = "1.0", features = ["derive"] }
toml = "0.5"
lazy_static = "1.4"
@@ -20,6 +19,8 @@ anyhow = "1.0"
hex = "0.4.3"
bytes = "1.0.1"
fs_extra = "1.2.0"
nix = "0.20"
thiserror = "1"
pageserver = { path = "../pageserver" }
walkeeper = { path = "../walkeeper" }

View File

@@ -1,8 +1,9 @@
use anyhow::Result;
use anyhow::{anyhow, bail, Context, Result};
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use std::convert::TryInto;
use std::fs;
use std::io;
use std::net::SocketAddr;
use std::net::TcpStream;
use std::net::{SocketAddr, TcpStream};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str::FromStr;
@@ -171,7 +172,7 @@ impl PageServerNode {
.env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
if !cmd.status()?.success() {
anyhow::bail!(
bail!(
"Pageserver failed to start. See '{}' for details.",
self.repo_path().join("pageserver.log").display()
);
@@ -192,20 +193,13 @@ impl PageServerNode {
}
pub fn stop(&self) -> Result<()> {
let pidfile = self.pid_file();
let pid = read_pidfile(&pidfile)?;
let status = Command::new("kill")
.arg(&pid)
.env_clear()
.status()
.expect("failed to execute kill");
if !status.success() {
anyhow::bail!("Failed to kill pageserver with pid {}", pid);
let pid = read_pidfile(&self.pid_file())?;
let pid = Pid::from_raw(pid);
if kill(pid, Signal::SIGTERM).is_err() {
bail!("Failed to kill pageserver with pid {}", pid);
}
// await for pageserver stop
// wait for pageserver stop
for _ in 0..5 {
let stream = TcpStream::connect(self.address());
if let Err(_e) = stream {
@@ -215,12 +209,7 @@ impl PageServerNode {
thread::sleep(Duration::from_secs(1));
}
// ok, we failed to stop pageserver, let's panic
if !status.success() {
anyhow::bail!("Failed to stop pageserver with pid {}", pid);
} else {
Ok(())
}
bail!("Failed to stop pageserver with pid {}", pid);
}
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
@@ -304,24 +293,22 @@ impl WalAcceptorNode {
}
}
pub fn stop(&self) -> std::result::Result<(), io::Error> {
pub fn stop(&self) -> Result<()> {
println!("Stopping wal acceptor on {}", self.listen);
let pidfile = self.data_dir.join("wal_acceptor.pid");
let pid = read_pidfile(&pidfile)?;
// Ignores any failures when running this command
let _status = Command::new("kill")
.arg(pid)
.env_clear()
.status()
.expect("failed to execute kill");
let pid = Pid::from_raw(pid);
if kill(pid, Signal::SIGTERM).is_err() {
bail!("Failed to kill wal_acceptor with pid {}", pid);
}
Ok(())
}
}
impl Drop for WalAcceptorNode {
fn drop(&mut self) {
self.stop().unwrap();
// Ignore errors.
let _ = self.stop();
}
}
@@ -333,15 +320,10 @@ pub struct WalProposerNode {
impl WalProposerNode {
pub fn stop(&self) {
let status = Command::new("kill")
.arg(self.pid.to_string())
.env_clear()
.status()
.expect("failed to execute kill");
if !status.success() {
panic!("kill start failed");
}
// std::process::Child::id() returns u32, we need i32.
let pid: i32 = self.pid.try_into().unwrap();
let pid = Pid::from_raw(pid);
kill(pid, Signal::SIGTERM).expect("failed to execute kill");
}
}
@@ -353,11 +335,16 @@ impl Drop for WalProposerNode {
/// Read a PID file
///
/// This should contain an unsigned integer, but we return it as a String
/// because our callers only want to pass it back into a subcommand.
fn read_pidfile(pidfile: &Path) -> std::result::Result<String, io::Error> {
fs::read_to_string(pidfile).map_err(|err| {
eprintln!("failed to read pidfile {:?}: {:?}", pidfile, err);
err
})
/// We expect a file that contains a single integer.
/// We return an i32 for compatibility with libc and nix.
fn read_pidfile(pidfile: &Path) -> Result<i32> {
let pid_str = fs::read_to_string(pidfile)
.with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
let pid: i32 = pid_str
.parse()
.map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
if pid < 1 {
bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
}
Ok(pid)
}