From 8bc8d9d91878276364cde02058ce48c0e5589d83 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 6 Dec 2023 17:48:50 +0000 Subject: [PATCH] utils::logging: implement tracing_chrome & tracing_flame support --- Cargo.lock | 41 ++++++++ control_plane/attachment_service/src/main.rs | 2 +- control_plane/src/background_process.rs | 14 ++- libs/desim/tests/reliable_copy_test.rs | 2 +- libs/remote_storage/tests/common/mod.rs | 2 +- libs/utils/Cargo.toml | 2 + libs/utils/src/env_config.rs | 48 +++++++++ libs/utils/src/lib.rs | 1 + libs/utils/src/logging.rs | 104 +++++++++++++++++-- pageserver/pagebench/src/main.rs | 2 +- pageserver/src/bin/pageserver.rs | 2 +- pageserver/src/tenant.rs | 2 +- safekeeper/src/bin/safekeeper.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 2 +- 14 files changed, 207 insertions(+), 19 deletions(-) create mode 100644 libs/utils/src/env_config.rs diff --git a/Cargo.lock b/Cargo.lock index ecc69f7048..a6ce0a516d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3125,6 +3125,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.1" @@ -3461,6 +3471,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "p256" version = "0.11.1" @@ -6290,6 +6306,17 @@ dependencies = [ "syn 2.0.52", ] +[[package]] +name = "tracing-chrome" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496b3cd5447f7ff527bbbf19b071ad542a000adf297d4127078b4dfdb931f41a" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.31" @@ -6310,6 +6337,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-flame" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bae117ee14789185e129aaee5d93750abe67fdc5a9a62650452bfe4e122a3a9" +dependencies = [ + "lazy_static", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-log" version = "0.1.3" @@ -6352,6 +6390,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ "matchers", + "nu-ansi-term", "once_cell", "regex", "serde", @@ -6597,7 +6636,9 @@ dependencies = [ "tokio-tar", "tokio-util", "tracing", + "tracing-chrome", "tracing-error", + "tracing-flame", "tracing-subscriber", "url", "uuid", diff --git a/control_plane/attachment_service/src/main.rs b/control_plane/attachment_service/src/main.rs index bd8d7f5c59..c45070daca 100644 --- a/control_plane/attachment_service/src/main.rs +++ b/control_plane/attachment_service/src/main.rs @@ -171,7 +171,7 @@ fn main() -> anyhow::Result<()> { async fn async_main() -> anyhow::Result<()> { let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate())); - logging::init( + let _guard = logging::init( LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 2fced7d778..fbd739ac9e 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -86,7 +86,10 @@ where .stdout(process_log_file) .stderr(same_file_for_stderr) .args(args); - let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command)); + + let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars( + fill_rust_env_vars(background_command), + )); filled_cmd.envs(envs); let pid_file_to_check = match &initial_pid_file { @@ -268,6 +271,15 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command { cmd } +fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command { + for (var, val) in std::env::vars() { + if var.starts_with("NEON_") { + cmd = cmd.env(var, val); + } + } + cmd +} + /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(), /// 1. Claims a pidfile with a fcntl lock on it and /// 2. Sets up the pidfile's file descriptor so that it (and the lock) diff --git a/libs/desim/tests/reliable_copy_test.rs b/libs/desim/tests/reliable_copy_test.rs index cf7bff8f5a..4f6d425be9 100644 --- a/libs/desim/tests/reliable_copy_test.rs +++ b/libs/desim/tests/reliable_copy_test.rs @@ -154,7 +154,7 @@ mod reliable_copy_test { /// Run test simulations. #[test] fn sim_example_reliable_copy() { - utils::logging::init( + let _guard = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/remote_storage/tests/common/mod.rs b/libs/remote_storage/tests/common/mod.rs index da9dc08d8d..812e1f27d0 100644 --- a/libs/remote_storage/tests/common/mod.rs +++ b/libs/remote_storage/tests/common/mod.rs @@ -205,7 +205,7 @@ pub(crate) async fn upload_remote_data( pub(crate) fn ensure_logging_ready() { LOGGING_DONE.get_or_init(|| { - utils::logging::init( + let _ = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index c2d9d9d396..1819900a33 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -60,6 +60,8 @@ const_format.workspace = true # to use tokio channels as streams, this is faster to compile than async_stream # why is it only here? no other crate should use it, streams are rarely needed. tokio-stream = { version = "0.1.14" } +tracing-chrome = "0.7.1" +tracing-flame = "0.2.0" serde_path_to_error.workspace = true diff --git a/libs/utils/src/env_config.rs b/libs/utils/src/env_config.rs new file mode 100644 index 0000000000..823a49796c --- /dev/null +++ b/libs/utils/src/env_config.rs @@ -0,0 +1,48 @@ +use std::{fmt::Display, str::FromStr}; + +pub fn var(varname: &str, default: D) -> V +where + V: FromStr, + E: Display, + D: FnOnce() -> V, +{ + match std::env::var(varname) { + Ok(s) => s + .parse() + .map_err(|e| format!("failed to parse env var {varname}: {e:#}")) + .unwrap(), + Err(std::env::VarError::NotPresent) => default(), + Err(std::env::VarError::NotUnicode(_)) => { + panic!("env var {varname} is not unicode") + } + } +} + +pub struct Bool(bool); + +impl Bool { + pub const fn new_const() -> Self { + Bool(V) + } +} + +impl FromStr for Bool { + type Err = String; + + fn from_str(s: &str) -> Result { + if let Ok(b) = s.parse() { + return Ok(Bool(b)); + } + Ok(Bool(match s { + "0" => false, + "1" => true, + _ => return Err(format!("not a bool, accepting 0|1|{}|{}", false, true)), + })) + } +} + +impl Into for Bool { + fn into(self) -> bool { + self.0 + } +} diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 04ce0626c8..bd04eeea05 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -27,6 +27,7 @@ pub mod auth; pub mod id; mod hex; +pub mod env_config; pub use hex::Hex; // http endpoint utils diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index f7b73dc984..bdc4b58ad3 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -1,10 +1,16 @@ -use std::str::FromStr; +use std::{ + io::BufWriter, + str::FromStr, + sync::{Arc, Mutex}, +}; use anyhow::Context; use metrics::{IntCounter, IntCounterVec}; use once_cell::sync::Lazy; use strum_macros::{EnumString, EnumVariantNames}; +use super::env_config; + #[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)] #[strum(serialize_all = "snake_case")] pub enum LogFormat { @@ -98,11 +104,26 @@ pub enum Output { Stderr, } +/// Keep alive and drop it before the program terminates. +#[must_use] +pub struct FlushGuard(Arc>); + +struct FlushGuardInner { + _tracing_chrome_layer: Option, + _tracing_flame_layer: Option>>, +} + +impl From for FlushGuard { + fn from(value: FlushGuardInner) -> Self { + Self(Arc::new(Mutex::new(value))) + } +} + pub fn init( log_format: LogFormat, tracing_error_layer_enablement: TracingErrorLayerEnablement, output: Output, -) -> anyhow::Result<()> { +) -> anyhow::Result { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -113,8 +134,28 @@ pub fn init( // NB: the order of the with() calls does not matter. // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering use tracing_subscriber::prelude::*; - let r = tracing_subscriber::registry(); - let r = r.with({ + + // https://users.rust-lang.org/t/how-can-i-init-tracing-registry-dynamically-with-multiple-outputs/94307/6 + #[derive(Default)] + struct LayerStack { + layers: + Option + Sync + Send>>, + } + impl LayerStack { + fn add_layer(&mut self, new_layer: L) + where + L: tracing_subscriber::Layer + Send + Sync, + { + let new = match self.layers.take() { + Some(layers) => Some(layers.and_then(new_layer).boxed()), + None => Some(new_layer.boxed()), + }; + self.layers = new; + } + } + let mut layers = LayerStack::default(); + + layers.add_layer({ let log_layer = tracing_subscriber::fmt::layer() .with_target(false) .with_ansi(false) @@ -131,17 +172,60 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); - let r = r.with( + + layers + .add_layer( TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()), ); + + let tracing_chrome_layer_flush_guard = if env_config::var( + "NEON_UTILS_LOGGING_ENABLE_TRACING_CHROME", + env_config::Bool::new_const::, + ) + .into() + { + let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new() + .trace_style(tracing_chrome::TraceStyle::Async) + .build(); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + + let tracing_flame_flush_guard = if env_config::var( + "NEON_UTILS_LOGGING_ENABLE_TRACING_FLAME", + env_config::Bool::new_const::, + ) + .into() + { + let (layer, guard) = tracing_flame::FlameLayer::with_file("./tracing.folded").unwrap(); + let layer = layer + .with_empty_samples(false) + .with_module_path(false) + .with_file_and_line(false) + .with_threads_collapsed(true); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + match tracing_error_layer_enablement { - TracingErrorLayerEnablement::EnableWithRustLogFilter => r - .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) - .init(), - TracingErrorLayerEnablement::Disabled => r.init(), + TracingErrorLayerEnablement::EnableWithRustLogFilter => layers + .add_layer(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())), + TracingErrorLayerEnablement::Disabled => (), } - Ok(()) + let r = tracing_subscriber::registry(); + r.with(layers.layers.expect("we add at least one layer")) + .init(); + + Ok(FlushGuardInner { + _tracing_chrome_layer: tracing_chrome_layer_flush_guard, + _tracing_flame_layer: tracing_flame_flush_guard, + } + .into()) } /// Disable the default rust panic hook by using `set_hook`. diff --git a/pageserver/pagebench/src/main.rs b/pageserver/pagebench/src/main.rs index 743102d853..b385695cd1 100644 --- a/pageserver/pagebench/src/main.rs +++ b/pageserver/pagebench/src/main.rs @@ -30,7 +30,7 @@ enum Args { } fn main() { - logging::init( + let _guard = logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stderr, diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index c80230d4d7..cb0bfc6235 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; - logging::init( + let _guard = logging::init( conf.log_format, tracing_error_layer_enablement, logging::Output::Stdout, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3be469c63e..216b7415bb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3671,7 +3671,7 @@ pub(crate) mod harness { pub deletion_queue: MockDeletionQueue, } - static LOG_HANDLE: OnceCell<()> = OnceCell::new(); + static LOG_HANDLE: OnceCell = OnceCell::new(); pub(crate) fn setup_logging() { LOG_HANDLE.get_or_init(|| { diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 3c4c81e499..00b28c0839 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -219,7 +219,7 @@ async fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index 4e5f8ed724..3723584843 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -632,7 +632,7 @@ async fn main() -> Result<(), Box> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout,