diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 02684d3d16..f770622a60 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -45,3 +45,115 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> { Ok(()) } + +/// Disable the default rust panic hook by using `set_hook`. +/// +/// For neon binaries, the assumption is that tracing is configured before with [`init`], after +/// that sentry is configured (if needed). sentry will install it's own on top of this, always +/// processing the panic before we log it. +/// +/// When the return value is dropped, the hook is reverted to std default hook (prints to stderr). +/// If the assumptions about the initialization order are not held, use +/// [`TracingPanicHookGuard::disarm`] but keep in mind, if tracing is stopped, then panics will be +/// lost. +#[must_use] +pub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard { + std::panic::set_hook(Box::new(tracing_panic_hook)); + TracingPanicHookGuard::new() +} + +/// Drop guard which restores the std panic hook on drop. +/// +/// Tracing should not be used when it's not configured, but we cannot really latch on to any +/// imaginary lifetime of tracing. +pub struct TracingPanicHookGuard { + act: bool, +} + +impl TracingPanicHookGuard { + fn new() -> Self { + TracingPanicHookGuard { act: true } + } + + /// Make this hook guard not do anything when dropped. + pub fn forget(&mut self) { + self.act = false; + } +} + +impl Drop for TracingPanicHookGuard { + fn drop(&mut self) { + if self.act { + let _ = std::panic::take_hook(); + } + } +} + +/// Named symbol for our panic hook, which logs the panic. +fn tracing_panic_hook(info: &std::panic::PanicInfo) { + // following rust 1.66.1 std implementation: + // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288 + let location = info.location(); + + let msg = match info.payload().downcast_ref::<&'static str>() { + Some(s) => *s, + None => match info.payload().downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + + let thread = std::thread::current(); + let thread = thread.name().unwrap_or(""); + let backtrace = std::backtrace::Backtrace::capture(); + + let _entered = if let Some(location) = location { + tracing::error_span!("panic", %thread, location = %PrettyLocation(location)) + } else { + // very unlikely to hit here, but the guarantees of std could change + tracing::error_span!("panic", %thread) + } + .entered(); + + if backtrace.status() == std::backtrace::BacktraceStatus::Captured { + // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really + // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to + // string, maybe even to a TLS one but tracing already does that. + tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}"); + } else { + tracing::error!("{msg}"); + } + + // ensure that we log something on the panic if this hook is left after tracing has been + // unconfigured. worst case when teardown is racing the panic is to log the panic twice. + tracing::dispatcher::get_default(|d| { + if let Some(_none) = d.downcast_ref::() { + let location = location.map(PrettyLocation); + log_panic_to_stderr(thread, msg, location, &backtrace); + } + }); +} + +#[cold] +fn log_panic_to_stderr( + thread: &str, + msg: &str, + location: Option>, + backtrace: &std::backtrace::Backtrace, +) { + eprintln!("panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}"); +} + +struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>); + +impl std::fmt::Display for PrettyLocation<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column()) + } +} + +impl std::fmt::Debug for PrettyLocation<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ::fmt(self, f) + } +} diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index c499fd8d74..01a2c85d74 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -91,9 +91,9 @@ fn main() -> anyhow::Result<()> { // Initialize logging, which must be initialized before the custom panic hook is installed. logging::init(conf.log_format)?; - // disable the default rust panic hook by using `set_hook`. sentry will install it's own on top - // of this, always processing the panic before we log it. - std::panic::set_hook(Box::new(tracing_panic_hook)); + // mind the order required here: 1. logging, 2. panic_hook, 3. sentry. + // disarming this hook on pageserver, because we never tear down tracing. + logging::replace_panic_hook_with_tracing_panic_hook().forget(); // initialize sentry if SENTRY_DSN is provided let _sentry_guard = init_sentry( @@ -499,50 +499,6 @@ fn cli() -> Command { ) } -/// Named symbol for our panic hook, which logs the panic. -fn tracing_panic_hook(info: &std::panic::PanicInfo) { - // following rust 1.66.1 std implementation: - // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288 - let location = info.location(); - - let msg = match info.payload().downcast_ref::<&'static str>() { - Some(s) => *s, - None => match info.payload().downcast_ref::() { - Some(s) => &s[..], - None => "Box", - }, - }; - - let thread = std::thread::current(); - let thread = thread.name().unwrap_or(""); - let backtrace = std::backtrace::Backtrace::capture(); - - struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>); - - impl std::fmt::Display for PrettyLocation<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column()) - } - } - - let _entered = if let Some(location) = location { - tracing::error_span!("panic", %thread, location = %PrettyLocation(location)) - } else { - // very unlikely to hit here, but the guarantees of std could change - tracing::error_span!("panic", %thread) - } - .entered(); - - if backtrace.status() == std::backtrace::BacktraceStatus::Captured { - // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really - // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to - // string, maybe even to a TLS one but tracing already does that. - tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}"); - } else { - tracing::error!("{msg}"); - } -} - #[test] fn verify_cli() { cli().debug_assert(); diff --git a/proxy/src/main.rs b/proxy/src/main.rs index c319cb9cfc..85478da3bc 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -43,6 +43,7 @@ async fn flatten_err( #[tokio::main] async fn main() -> anyhow::Result<()> { let _logging_guard = logging::init().await?; + let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); info!("Version: {GIT_VERSION}"); diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 1a068412c8..683050e9cd 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -126,7 +126,12 @@ fn main() -> anyhow::Result<()> { return Ok(()); } + // important to keep the order of: + // 1. init logging + // 2. tracing panic hook + // 3. sentry logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::replace_panic_hook_with_tracing_panic_hook().forget(); info!("version: {GIT_VERSION}"); let args_workdir = &args.datadir; diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index c73206b7dc..1a0d261184 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -424,12 +424,16 @@ async fn http1_handler( #[tokio::main] async fn main() -> Result<(), Box> { - // initialize sentry if SENTRY_DSN is provided - let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); - let args = Args::parse(); + // important to keep the order of: + // 1. init logging + // 2. tracing panic hook + // 3. sentry logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::replace_panic_hook_with_tracing_panic_hook().forget(); + // initialize sentry if SENTRY_DSN is provided + let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); info!("version: {GIT_VERSION}"); ::metrics::set_build_info_metric(GIT_VERSION);