From d7d3f451f0fe6a037c8c4add276da8dcb2ad11f8 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 21 Feb 2023 10:03:55 +0200 Subject: [PATCH] Use tracing panic hook in all binaries (#3634) Enables tracing panic hook in addition to pageserver introduced in #3475: - proxy - safekeeper - storage_broker For proxy, a drop guard which resets the original std panic hook was added on the first commit. Other binaries don't need it so they never reset anything by `disarm`ing the drop guard. The aim of the change is to make sure all panics a) have span information b) are logged similar to other messages, not interleaved with other messages as happens right now. Interleaving happens right now because std prints panics to stderr, and other logging happens in stdout. If this was handled gracefully by some utility, the log message splitter would treat panics as belonging to the previous message because it expects a message to start with a timestamp. Cc: #3468 --- libs/utils/src/logging.rs | 112 +++++++++++++++++++++++ pageserver/src/bin/pageserver.rs | 50 +--------- proxy/src/main.rs | 1 + safekeeper/src/bin/safekeeper.rs | 5 + storage_broker/src/bin/storage_broker.rs | 10 +- 5 files changed, 128 insertions(+), 50 deletions(-) diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 02684d3d16..f770622a60 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -45,3 +45,115 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> { Ok(()) } + +/// Disable the default rust panic hook by using `set_hook`. +/// +/// For neon binaries, the assumption is that tracing is configured before with [`init`], after +/// that sentry is configured (if needed). sentry will install it's own on top of this, always +/// processing the panic before we log it. +/// +/// When the return value is dropped, the hook is reverted to std default hook (prints to stderr). +/// If the assumptions about the initialization order are not held, use +/// [`TracingPanicHookGuard::disarm`] but keep in mind, if tracing is stopped, then panics will be +/// lost. +#[must_use] +pub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard { + std::panic::set_hook(Box::new(tracing_panic_hook)); + TracingPanicHookGuard::new() +} + +/// Drop guard which restores the std panic hook on drop. +/// +/// Tracing should not be used when it's not configured, but we cannot really latch on to any +/// imaginary lifetime of tracing. +pub struct TracingPanicHookGuard { + act: bool, +} + +impl TracingPanicHookGuard { + fn new() -> Self { + TracingPanicHookGuard { act: true } + } + + /// Make this hook guard not do anything when dropped. + pub fn forget(&mut self) { + self.act = false; + } +} + +impl Drop for TracingPanicHookGuard { + fn drop(&mut self) { + if self.act { + let _ = std::panic::take_hook(); + } + } +} + +/// Named symbol for our panic hook, which logs the panic. +fn tracing_panic_hook(info: &std::panic::PanicInfo) { + // following rust 1.66.1 std implementation: + // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288 + let location = info.location(); + + let msg = match info.payload().downcast_ref::<&'static str>() { + Some(s) => *s, + None => match info.payload().downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + + let thread = std::thread::current(); + let thread = thread.name().unwrap_or(""); + let backtrace = std::backtrace::Backtrace::capture(); + + let _entered = if let Some(location) = location { + tracing::error_span!("panic", %thread, location = %PrettyLocation(location)) + } else { + // very unlikely to hit here, but the guarantees of std could change + tracing::error_span!("panic", %thread) + } + .entered(); + + if backtrace.status() == std::backtrace::BacktraceStatus::Captured { + // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really + // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to + // string, maybe even to a TLS one but tracing already does that. + tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}"); + } else { + tracing::error!("{msg}"); + } + + // ensure that we log something on the panic if this hook is left after tracing has been + // unconfigured. worst case when teardown is racing the panic is to log the panic twice. + tracing::dispatcher::get_default(|d| { + if let Some(_none) = d.downcast_ref::() { + let location = location.map(PrettyLocation); + log_panic_to_stderr(thread, msg, location, &backtrace); + } + }); +} + +#[cold] +fn log_panic_to_stderr( + thread: &str, + msg: &str, + location: Option>, + backtrace: &std::backtrace::Backtrace, +) { + eprintln!("panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}"); +} + +struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>); + +impl std::fmt::Display for PrettyLocation<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column()) + } +} + +impl std::fmt::Debug for PrettyLocation<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ::fmt(self, f) + } +} diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index c499fd8d74..01a2c85d74 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -91,9 +91,9 @@ fn main() -> anyhow::Result<()> { // Initialize logging, which must be initialized before the custom panic hook is installed. logging::init(conf.log_format)?; - // disable the default rust panic hook by using `set_hook`. sentry will install it's own on top - // of this, always processing the panic before we log it. - std::panic::set_hook(Box::new(tracing_panic_hook)); + // mind the order required here: 1. logging, 2. panic_hook, 3. sentry. + // disarming this hook on pageserver, because we never tear down tracing. + logging::replace_panic_hook_with_tracing_panic_hook().forget(); // initialize sentry if SENTRY_DSN is provided let _sentry_guard = init_sentry( @@ -499,50 +499,6 @@ fn cli() -> Command { ) } -/// Named symbol for our panic hook, which logs the panic. -fn tracing_panic_hook(info: &std::panic::PanicInfo) { - // following rust 1.66.1 std implementation: - // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288 - let location = info.location(); - - let msg = match info.payload().downcast_ref::<&'static str>() { - Some(s) => *s, - None => match info.payload().downcast_ref::() { - Some(s) => &s[..], - None => "Box", - }, - }; - - let thread = std::thread::current(); - let thread = thread.name().unwrap_or(""); - let backtrace = std::backtrace::Backtrace::capture(); - - struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>); - - impl std::fmt::Display for PrettyLocation<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column()) - } - } - - let _entered = if let Some(location) = location { - tracing::error_span!("panic", %thread, location = %PrettyLocation(location)) - } else { - // very unlikely to hit here, but the guarantees of std could change - tracing::error_span!("panic", %thread) - } - .entered(); - - if backtrace.status() == std::backtrace::BacktraceStatus::Captured { - // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really - // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to - // string, maybe even to a TLS one but tracing already does that. - tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}"); - } else { - tracing::error!("{msg}"); - } -} - #[test] fn verify_cli() { cli().debug_assert(); diff --git a/proxy/src/main.rs b/proxy/src/main.rs index c319cb9cfc..85478da3bc 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -43,6 +43,7 @@ async fn flatten_err( #[tokio::main] async fn main() -> anyhow::Result<()> { let _logging_guard = logging::init().await?; + let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); info!("Version: {GIT_VERSION}"); diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 1a068412c8..683050e9cd 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -126,7 +126,12 @@ fn main() -> anyhow::Result<()> { return Ok(()); } + // important to keep the order of: + // 1. init logging + // 2. tracing panic hook + // 3. sentry logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::replace_panic_hook_with_tracing_panic_hook().forget(); info!("version: {GIT_VERSION}"); let args_workdir = &args.datadir; diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index c73206b7dc..1a0d261184 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -424,12 +424,16 @@ async fn http1_handler( #[tokio::main] async fn main() -> Result<(), Box> { - // initialize sentry if SENTRY_DSN is provided - let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); - let args = Args::parse(); + // important to keep the order of: + // 1. init logging + // 2. tracing panic hook + // 3. sentry logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::replace_panic_hook_with_tracing_panic_hook().forget(); + // initialize sentry if SENTRY_DSN is provided + let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); info!("version: {GIT_VERSION}"); ::metrics::set_build_info_metric(GIT_VERSION);