From bd4dae8f4a5d4ea3fe2279dbd30a30a0943ffbab Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 1 Jan 2024 14:38:08 +0300 Subject: [PATCH] compute_ctl: kill postgres and sync-safekeeprs on exit. Otherwise they are left orphaned when compute_ctl is terminated with a signal. It was invisible most of the time because normally neon_local or k8s kills postgres directly and then compute_ctl finishes gracefully. However, in some tests compute_ctl gets stuck waiting for sync-safekeepers which intentionally never ends because safekeepers are offline, and we want to stop compute_ctl without leaving orphanes behind. This is a quite rough approach which doesn't wait for children termination. A better way would be to convert compute_ctl to async which would make waiting easy. --- Cargo.lock | 2 ++ compute_tools/Cargo.toml | 2 ++ compute_tools/src/bin/compute_ctl.rs | 32 +++++++++++++++++++++++++++- compute_tools/src/compute.rs | 8 +++++++ control_plane/src/endpoint.rs | 18 ++++++++++++---- 5 files changed, 57 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abd87dc0da..8e0ad7c8ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1161,6 +1161,7 @@ dependencies = [ "flate2", "futures", "hyper", + "nix 0.26.2", "notify", "num_cpus", "opentelemetry", @@ -1171,6 +1172,7 @@ dependencies = [ "rust-ini", "serde", "serde_json", + "signal-hook", "tar", "tokio", "tokio-postgres", diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index 142fa08495..759a117ee9 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -13,6 +13,7 @@ clap.workspace = true flate2.workspace = true futures.workspace = true hyper = { workspace = true, features = ["full"] } +nix.workspace = true notify.workspace = true num_cpus.workspace = true opentelemetry.workspace = true @@ -20,6 +21,7 @@ postgres.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true +signal-hook.workspace = true tar.workspace = true reqwest = { workspace = true, features = ["json"] } tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 436db59088..eb1d746f04 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -40,18 +40,22 @@ use std::collections::HashMap; use std::fs::File; use std::path::Path; use std::process::exit; +use std::sync::atomic::Ordering; use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock}; use std::{thread, time::Duration}; use anyhow::{Context, Result}; use chrono::Utc; use clap::Arg; +use nix::sys::signal::{kill, Signal}; +use signal_hook::consts::{SIGQUIT, SIGTERM}; +use signal_hook::{consts::SIGINT, iterator::Signals}; use tracing::{error, info}; use url::Url; use compute_api::responses::ComputeStatus; -use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec}; +use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec, PG_PID, SYNC_SAFEKEEPERS_PID}; use compute_tools::configurator::launch_configurator; use compute_tools::extension_server::get_pg_version; use compute_tools::http::api::launch_http_server; @@ -67,6 +71,13 @@ const BUILD_TAG_DEFAULT: &str = "latest"; fn main() -> Result<()> { init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; + let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?; + thread::spawn(move || { + for sig in signals.forever() { + handle_exit_signal(sig); + } + }); + let build_tag = option_env!("BUILD_TAG") .unwrap_or(BUILD_TAG_DEFAULT) .to_string(); @@ -346,6 +357,7 @@ fn main() -> Result<()> { let ecode = pg .wait() .expect("failed to start waiting on Postgres process"); + PG_PID.store(0, Ordering::SeqCst); info!("Postgres exited with code {}, shutting down", ecode); exit_code = ecode.code() } @@ -519,6 +531,24 @@ fn cli() -> clap::Command { ) } +/// When compute_ctl is killed, send also termination signal to sync-safekeepers +/// to prevent leakage. TODO: it is better to convert compute_ctl to async and +/// wait for termination which would be easy then. +fn handle_exit_signal(sig: i32) { + info!("received {sig} termination signal"); + let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst); + if ss_pid != 0 { + let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32); + kill(ss_pid, Signal::SIGTERM).ok(); + } + let pg_pid = PG_PID.load(Ordering::SeqCst); + if pg_pid != 0 { + let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32); + kill(pg_pid, Signal::SIGTERM).ok(); + } + exit(1); +} + #[test] fn verify_cli() { cli().debug_assert() diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index cd7be0520e..13701b7378 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -6,6 +6,8 @@ use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::process::{Command, Stdio}; use std::str::FromStr; +use std::sync::atomic::AtomicU32; +use std::sync::atomic::Ordering; use std::sync::{Condvar, Mutex, RwLock}; use std::thread; use std::time::Instant; @@ -34,6 +36,9 @@ use crate::spec::*; use crate::sync_sk::{check_if_synced, ping_safekeeper}; use crate::{config, extension_server}; +pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0); +pub static PG_PID: AtomicU32 = AtomicU32::new(0); + /// Compute node info shared across several `compute_ctl` threads. pub struct ComputeNode { // Url type maintains proper escaping @@ -501,6 +506,7 @@ impl ComputeNode { .stdout(Stdio::piped()) .spawn() .expect("postgres --sync-safekeepers failed to start"); + SYNC_SAFEKEEPERS_PID.store(sync_handle.id(), Ordering::SeqCst); // `postgres --sync-safekeepers` will print all log output to stderr and // final LSN to stdout. So we pipe only stdout, while stderr will be automatically @@ -508,6 +514,7 @@ impl ComputeNode { let sync_output = sync_handle .wait_with_output() .expect("postgres --sync-safekeepers failed"); + SYNC_SAFEKEEPERS_PID.store(0, Ordering::SeqCst); if !sync_output.status.success() { anyhow::bail!( @@ -662,6 +669,7 @@ impl ComputeNode { }) .spawn() .expect("cannot start postgres process"); + PG_PID.store(pg.id(), Ordering::SeqCst); wait_for_postgres(&mut pg, pgdata_path)?; diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 55b66742ca..3d5dfd6311 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -46,6 +46,8 @@ use std::time::Duration; use anyhow::{anyhow, bail, Context, Result}; use compute_api::spec::RemoteExtSpec; +use nix::sys::signal::kill; +use nix::sys::signal::Signal; use serde::{Deserialize, Serialize}; use utils::id::{NodeId, TenantId, TimelineId}; @@ -439,11 +441,14 @@ impl Endpoint { Ok(()) } - fn wait_for_compute_ctl_to_exit(&self) -> Result<()> { + fn wait_for_compute_ctl_to_exit(&self, send_sigterm: bool) -> Result<()> { // TODO use background_process::stop_process instead let pidfile_path = self.endpoint_path().join("compute_ctl.pid"); let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?; let pid = nix::unistd::Pid::from_raw(pid as i32); + if send_sigterm { + kill(pid, Signal::SIGTERM).ok(); + } crate::background_process::wait_until_stopped("compute_ctl", pid)?; Ok(()) } @@ -733,10 +738,15 @@ impl Endpoint { &None, )?; - // Also wait for the compute_ctl process to die. It might have some cleanup - // work to do after postgres stops, like syncing safekeepers, etc. + // Also wait for the compute_ctl process to die. It might have some + // cleanup work to do after postgres stops, like syncing safekeepers, + // etc. // - self.wait_for_compute_ctl_to_exit()?; + // If destroying, send it SIGTERM before waiting. Sometimes we do *not* + // want this cleanup: tests intentionally do stop when majority of + // safekeepers is down, so sync-safekeepers would hang otherwise. This + // could be a separate flag though. + self.wait_for_compute_ctl_to_exit(destroy)?; if destroy { println!( "Destroying postgres data directory '{}'",