diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index a787da7069..59dadbb1d3 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -22,10 +22,10 @@ use byteorder::{ByteOrder, LittleEndian}; use bytes::{BufMut, Bytes, BytesMut}; use nix::poll::*; use serde::Serialize; -use std::fs; use std::fs::OpenOptions; use std::io::prelude::*; use std::io::{Error, ErrorKind}; +use std::ops::{Deref, DerefMut}; use std::os::unix::io::AsRawFd; use std::os::unix::prelude::CommandExt; use std::path::PathBuf; @@ -34,6 +34,7 @@ use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command}; use std::sync::Mutex; use std::time::Duration; use std::time::Instant; +use std::{fs, io}; use tracing::*; use utils::crashsafe::path_with_suffix_extension; use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock}; @@ -44,6 +45,7 @@ use crate::metrics::{ }; use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block}; use crate::repository::Key; +use crate::task_mgr::BACKGROUND_RUNTIME; use crate::walrecord::NeonWalRecord; use crate::{config::PageServerConf, TEMP_FILE_SUFFIX}; use pageserver_api::reltag::{RelTag, SlruKind}; @@ -580,11 +582,10 @@ impl CloseFileDescriptors for C { /// struct PostgresRedoProcess { tenant_id: TenantId, - child: Child, + child: NoLeakChild, stdin: ChildStdin, stdout: ChildStdout, stderr: ChildStderr, - called_kill: bool, } impl PostgresRedoProcess { @@ -656,7 +657,7 @@ impl PostgresRedoProcess { } // Start postgres itself - let mut child = Command::new(pg_bin_dir_path.join("postgres")) + let child = Command::new(pg_bin_dir_path.join("postgres")) .arg("--wal-redo") .stdin(Stdio::piped()) .stderr(Stdio::piped()) @@ -675,7 +676,7 @@ impl PostgresRedoProcess { // as close-on-exec by default, but that's not enough, since we use // libraries that directly call libc open without setting that flag. .close_fds() - .spawn() + .spawn_no_leak_child() .map_err(|e| { Error::new( e.kind(), @@ -683,11 +684,10 @@ impl PostgresRedoProcess { ) })?; - info!( - pid = child.id(), - "launched WAL redo postgres process on {}", - datadir.display() - ); + let mut child = scopeguard::guard(child, |child| { + error!("killing wal-redo-postgres process due to a problem during launch"); + child.kill_and_wait(); + }); let stdin = child.stdin.take().unwrap(); let stdout = child.stdout.take().unwrap(); @@ -706,48 +706,21 @@ impl PostgresRedoProcess { set_nonblock_or_log_err!(stdout)?; set_nonblock_or_log_err!(stderr)?; + // all fallible operations post-spawn are complete, so get rid of the guard + let child = scopeguard::ScopeGuard::into_inner(child); + Ok(PostgresRedoProcess { tenant_id, child, stdin, stdout, stderr, - called_kill: false, }) } #[instrument(skip_all, fields(tenant_id=%self.tenant_id, pid=%self.child.id()))] - fn kill(mut self) { - info!("killing wal-redo-postgres process"); - self.called_kill = true; - - let res = self.child.kill(); - if let Err(e) = res { - // This branch is very unlikely because: - // - We (= pageserver) spawned this process successfully, so, we're allowed to kill it. - // - This is the only place that calls .kill() - // - We consume `self`, so, .kill() can't be called twice. - // - If the process exited by itself or was killed by someone else, - // .kill() will still succeed because we haven't wait()'ed yet. - // - // So, if we arrive here, we have really no idea what happened, - // whether the PID stored in self.child is still valid, etc. - // If this function were fallible, we'd return an error, but - // since it isn't, all we can do is log an error and proceed - // with the wait(). - error!(error = %e, "failed to SIGKILL wal-redo-postgres; subsequent wait() might fail or wait for wrong process"); - } - - match self.child.wait() { - Ok(exit_status) => { - // log at error level since .kill() is something we only do on errors ATM - error!(exit_status = %exit_status, "wal-redo-postgres wait successful"); - } - Err(e) => { - error!(error = %e, "wal-redo-postgres wait error; might leak the child process; it will show as zombie (defunct)"); - } - } - drop(self); + fn kill(self) { + self.child.kill_and_wait(); } // @@ -880,11 +853,96 @@ impl PostgresRedoProcess { } } -impl Drop for PostgresRedoProcess { - fn drop(&mut self) { - if !self.called_kill { - error!(tenant_id=%self.tenant_id, pid = %self.child.id(), "dropping PostgresRedoProcess that wasn't killed, likely causing defunct postgres process"); +/// Wrapper type around `std::process::Child` which guarantees that the child +/// will be killed and waited-for by this process before being dropped. +struct NoLeakChild { + child: Option, +} + +impl Deref for NoLeakChild { + type Target = Child; + + fn deref(&self) -> &Self::Target { + self.child.as_ref().expect("must not use from drop") + } +} + +impl DerefMut for NoLeakChild { + fn deref_mut(&mut self) -> &mut Self::Target { + self.child.as_mut().expect("must not use from drop") + } +} + +impl NoLeakChild { + fn spawn(command: &mut Command) -> io::Result { + let child = command.spawn()?; + Ok(NoLeakChild { child: Some(child) }) + } + + fn kill_and_wait(mut self) { + let child = match self.child.take() { + Some(child) => child, + None => return, + }; + Self::kill_and_wait_impl(child); + } + + #[instrument(skip_all, fields(pid=child.id()))] + fn kill_and_wait_impl(mut child: Child) { + let res = child.kill(); + if let Err(e) = res { + // This branch is very unlikely because: + // - We (= pageserver) spawned this process successfully, so, we're allowed to kill it. + // - This is the only place that calls .kill() + // - We consume `self`, so, .kill() can't be called twice. + // - If the process exited by itself or was killed by someone else, + // .kill() will still succeed because we haven't wait()'ed yet. + // + // So, if we arrive here, we have really no idea what happened, + // whether the PID stored in self.child is still valid, etc. + // If this function were fallible, we'd return an error, but + // since it isn't, all we can do is log an error and proceed + // with the wait(). + error!(error = %e, "failed to SIGKILL; subsequent wait() might fail or wait for wrong process"); } + + match child.wait() { + Ok(exit_status) => { + // log at error level since .kill() is something we only do on errors ATM + error!(exit_status = %exit_status, "wait successful"); + } + Err(e) => { + error!(error = %e, "wait error; might leak the child process; it will show as zombie (defunct)"); + } + } + } +} + +impl Drop for NoLeakChild { + fn drop(&mut self) { + let child = match self.child.take() { + Some(child) => child, + None => return, + }; + // Offload the kill+wait of the child process into the background. + // If someone stops the runtime, we'll leak the child process. + // We can ignore that case because we only stop the runtime on pageserver exit. + BACKGROUND_RUNTIME.spawn(async move { + tokio::task::spawn_blocking(move || { + Self::kill_and_wait_impl(child); + }) + .await + }); + } +} + +trait NoLeakChildCommandExt { + fn spawn_no_leak_child(&mut self) -> io::Result; +} + +impl NoLeakChildCommandExt for Command { + fn spawn_no_leak_child(&mut self) -> io::Result { + NoLeakChild::spawn(self) } } diff --git a/poetry.lock b/poetry.lock index 551b267a87..bc1b57bc64 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1077,6 +1077,17 @@ python-versions = ">=3.6" [package.extras] twisted = ["twisted"] +[[package]] +name = "psutil" +version = "5.9.4" +description = "Cross-platform lib for process and system monitoring in Python." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + [[package]] name = "psycopg2-binary" version = "2.9.3" @@ -1228,8 +1239,8 @@ python-versions = ">=3.6" [package.dependencies] pytest = [ - {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, {version = ">=5.0", markers = "python_version < \"3.10\""}, + {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, ] [[package]] @@ -1436,6 +1447,14 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "types-psutil" +version = "5.9.5.4" +description = "Typing stubs for psutil" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "types-psycopg2" version = "2.9.18" @@ -1555,7 +1574,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "ebe16714bd4db1f34f005c9b72392f165618b020a2d0948cae20e0e8894c5517" +content-hash = "c95c184fccaf40815405ad616ec1c55869c7f87b72777cc3a9cbaff41de98977" [metadata.files] aiopg = [ @@ -1966,9 +1985,26 @@ prometheus-client = [ {file = "prometheus_client-0.14.1-py3-none-any.whl", hash = "sha256:522fded625282822a89e2773452f42df14b5a8e84a86433e3f8a189c1d54dc01"}, {file = "prometheus_client-0.14.1.tar.gz", hash = "sha256:5459c427624961076277fdc6dc50540e2bacb98eebde99886e59ec55ed92093a"}, ] +psutil = [ + {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"}, + {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"}, + {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"}, + {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"}, + {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"}, + {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"}, + {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"}, + {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"}, +] psycopg2-binary = [ {file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"}, {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"}, {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"}, {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"}, {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"}, @@ -2002,6 +2038,7 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"}, {file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"}, {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"}, {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"}, {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"}, {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"}, @@ -2013,6 +2050,7 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"}, {file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"}, {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"}, {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"}, {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"}, {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"}, @@ -2029,18 +2067,7 @@ py = [ {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] pyasn1 = [ - {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"}, - {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"}, - {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"}, - {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"}, {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, - {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"}, - {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"}, - {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"}, - {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"}, - {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"}, - {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"}, - {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"}, {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, ] pycodestyle = [ @@ -2146,6 +2173,13 @@ pyyaml = [ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, @@ -2213,6 +2247,10 @@ tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +types-psutil = [ + {file = "types-psutil-5.9.5.4.tar.gz", hash = "sha256:aa09102b80c65a3b4573216614372398dab78972d650488eaff1ff05482cc18f"}, + {file = "types_psutil-5.9.5.4-py3-none-any.whl", hash = "sha256:28e59764630187e462d43788efa16d59d5e77b510115f9e25901b2d4007fca62"}, +] types-psycopg2 = [ {file = "types-psycopg2-2.9.18.tar.gz", hash = "sha256:9b0e9e1f097b15cd9fa8aad2596a9e3082fd72f8d9cfe52b190cfa709105b6c0"}, {file = "types_psycopg2-2.9.18-py3-none-any.whl", hash = "sha256:14c779dcab18c31453fa1cad3cf4b1601d33540a344adead3c47a6b8091cd2fa"}, diff --git a/pyproject.toml b/pyproject.toml index c2e2e2393b..b13acece18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,8 @@ pytest-order = "^1.0.1" allure-pytest = "^2.10.0" pytest-asyncio = "^0.19.0" toml = "^0.10.2" +psutil = "^5.9.4" +types-psutil = "^5.9.5.4" [tool.poetry.dev-dependencies] flake8 = "^5.0.4" diff --git a/test_runner/regress/test_walredo_not_left_behind_on_detach.py b/test_runner/regress/test_walredo_not_left_behind_on_detach.py new file mode 100644 index 0000000000..c79aea35da --- /dev/null +++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py @@ -0,0 +1,104 @@ +import time + +import psutil +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder, PageserverApiException +from fixtures.types import TenantId + + +def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_present=False): + children = psutil.Process(pageserver_pid).children() + for child in children: + if not wal_redo_present: + assert "--wal-redo" not in child.cmdline() + if not defunct_present: + assert child.status() != psutil.STATUS_ZOMBIE + + +# Check that the pageserver doesn't leave behind WAL redo processes +# when a tenant is detached. We had an issue previously where we failed +# to wait and consume the exit code of the WAL redo process, leaving it behind +# as a zombie process. +def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() + + pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text()) + + assert_child_processes(pagserver_pid, wal_redo_present=False, defunct_present=False) + + # first check for non existing tenant + tenant_id = TenantId.generate() + with pytest.raises( + expected_exception=PageserverApiException, + match=f"Tenant not found for id {tenant_id}", + ): + pageserver_http.tenant_detach(tenant_id) + + # create new nenant + tenant_id, _ = env.neon_cli.create_tenant() + + # assert tenant exists on disk + assert (env.repo_dir / "tenants" / str(tenant_id)).exists() + + pg = env.postgres.create_start("main", tenant_id=tenant_id) + + pg_conn = pg.connect() + cur = pg_conn.cursor() + + # Create table, and insert some rows. Make it big enough that it doesn't fit in + # shared_buffers, otherwise the SELECT after restart will just return answer + # from shared_buffers without hitting the page server, which defeats the point + # of this test. + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g + """ + ) + + # Verify that the table is larger than shared_buffers + cur.execute( + """ + select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize + from pg_settings where name = 'shared_buffers' + """ + ) + row = cur.fetchone() + assert row is not None + log.info(f"shared_buffers is {row[0]}, table size {row[1]}") + assert int(row[0]) < int(row[1]) + + cur.execute("SELECT count(*) FROM foo") + assert cur.fetchone() == (100000,) + + # After filling the table and doing the SELECT, it is guaranteed that we did some WAL redo. + # So, assert that the WAL redo process is present. + # XXX this is quite brittle as the lifecycle of the WAL redo process is an implementation detail + assert_child_processes(pagserver_pid, wal_redo_present=True, defunct_present=False) + + last_error = None + for i in range(3): + try: + pageserver_http.tenant_detach(tenant_id) + except Exception as e: + last_error = e + log.error(f"try {i} error detaching tenant: {e}") + continue + else: + break + # else is called if the loop finished without reaching "break" + else: + pytest.fail(f"could not detach tenant: {last_error}") + + # check that nothing is left on disk for deleted tenant + assert not (env.repo_dir / "tenants" / str(tenant_id)).exists() + + # Pageserver schedules kill+wait of the WAL redo process to the background runtime, + # asynchronously to tenant detach. Cut it some slack to complete kill+wait before + # checking. + time.sleep(1.0) + assert_child_processes(pagserver_pid, wal_redo_present=False, defunct_present=False)