From 6910eaea78a1e8f5d96680b5bf6c5cbd2fceef65 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 6 Dec 2023 17:48:50 +0000 Subject: [PATCH] utils::logging: implement tracing_chrome & tracing_flame support --- Cargo.lock | 42 +++++++++++ control_plane/src/background_process.rs | 5 +- libs/desim/tests/reliable_copy_test.rs | 2 +- libs/remote_storage/tests/common/mod.rs | 2 +- libs/utils/Cargo.toml | 2 + libs/utils/src/lib.rs | 1 + libs/utils/src/logging.rs | 94 +++++++++++++++++++++--- pageserver/ctl/src/main.rs | 2 +- pageserver/pagebench/src/main.rs | 2 +- pageserver/src/bin/pageserver.rs | 2 +- pageserver/src/tenant.rs | 2 +- safekeeper/src/bin/safekeeper.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 2 +- storage_controller/src/main.rs | 2 +- workspace_hack/Cargo.toml | 1 + 15 files changed, 143 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6faf4b72f0..7fa2937935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3184,6 +3184,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.1" @@ -3520,6 +3530,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "p256" version = "0.11.1" @@ -6445,6 +6461,17 @@ dependencies = [ "syn 2.0.52", ] +[[package]] +name = "tracing-chrome" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496b3cd5447f7ff527bbbf19b071ad542a000adf297d4127078b4dfdb931f41a" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.31" @@ -6465,6 +6492,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-flame" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bae117ee14789185e129aaee5d93750abe67fdc5a9a62650452bfe4e122a3a9" +dependencies = [ + "lazy_static", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-log" version = "0.1.3" @@ -6507,6 +6545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ "matchers", + "nu-ansi-term", "once_cell", "regex", "serde", @@ -6772,7 +6811,9 @@ dependencies = [ "tokio-tar", "tokio-util", "tracing", + "tracing-chrome", "tracing-error", + "tracing-flame", "tracing-subscriber", "url", "uuid", @@ -7374,6 +7415,7 @@ dependencies = [ "tower", "tracing", "tracing-core", + "tracing-subscriber", "url", "uuid", "zeroize", diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 94666f2870..272f36aab1 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -274,7 +274,10 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command { fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command { for (var, val) in std::env::vars() { if var.starts_with("NEON_PAGESERVER_") { - cmd = cmd.env(var, val); + cmd = cmd.env(&var, &val); + } + if var.starts_with("NEON_UTILS_") { + cmd = cmd.env(&var, &val) } } cmd diff --git a/libs/desim/tests/reliable_copy_test.rs b/libs/desim/tests/reliable_copy_test.rs index cf7bff8f5a..4f6d425be9 100644 --- a/libs/desim/tests/reliable_copy_test.rs +++ b/libs/desim/tests/reliable_copy_test.rs @@ -154,7 +154,7 @@ mod reliable_copy_test { /// Run test simulations. #[test] fn sim_example_reliable_copy() { - utils::logging::init( + let _guard = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/remote_storage/tests/common/mod.rs b/libs/remote_storage/tests/common/mod.rs index da9dc08d8d..812e1f27d0 100644 --- a/libs/remote_storage/tests/common/mod.rs +++ b/libs/remote_storage/tests/common/mod.rs @@ -205,7 +205,7 @@ pub(crate) async fn upload_remote_data( pub(crate) fn ensure_logging_ready() { LOGGING_DONE.get_or_init(|| { - utils::logging::init( + let _ = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index a6a081c5c1..20804a5bc9 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -61,6 +61,8 @@ const_format.workspace = true # to use tokio channels as streams, this is faster to compile than async_stream # why is it only here? no other crate should use it, streams are rarely needed. tokio-stream = { version = "0.1.14" } +tracing-chrome = "0.7.1" +tracing-flame = "0.2.0" serde_path_to_error.workspace = true diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 2953f0aad4..e6b6004b53 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -27,6 +27,7 @@ pub mod auth; pub mod id; mod hex; + pub use hex::Hex; // http endpoint utils diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index f7b73dc984..afe2ac0bd5 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -1,4 +1,8 @@ -use std::str::FromStr; +use std::{ + io::BufWriter, + str::FromStr, + sync::{Arc, Mutex}, +}; use anyhow::Context; use metrics::{IntCounter, IntCounterVec}; @@ -98,11 +102,27 @@ pub enum Output { Stderr, } +/// Keep alive and drop it before the program terminates. +#[allow(dead_code)] // We need to store the `Arc<>` for drop semantics. +#[must_use] +pub struct FlushGuard(Arc>); + +struct FlushGuardInner { + _tracing_chrome_layer: Option, + _tracing_flame_layer: Option>>, +} + +impl From for FlushGuard { + fn from(value: FlushGuardInner) -> Self { + Self(Arc::new(Mutex::new(value))) + } +} + pub fn init( log_format: LogFormat, tracing_error_layer_enablement: TracingErrorLayerEnablement, output: Output, -) -> anyhow::Result<()> { +) -> anyhow::Result { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -113,8 +133,28 @@ pub fn init( // NB: the order of the with() calls does not matter. // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering use tracing_subscriber::prelude::*; - let r = tracing_subscriber::registry(); - let r = r.with({ + + // https://users.rust-lang.org/t/how-can-i-init-tracing-registry-dynamically-with-multiple-outputs/94307/6 + #[derive(Default)] + struct LayerStack { + layers: + Option + Sync + Send>>, + } + impl LayerStack { + fn add_layer(&mut self, new_layer: L) + where + L: tracing_subscriber::Layer + Send + Sync, + { + let new = match self.layers.take() { + Some(layers) => Some(layers.and_then(new_layer).boxed()), + None => Some(new_layer.boxed()), + }; + self.layers = new; + } + } + let mut layers = LayerStack::default(); + + layers.add_layer({ let log_layer = tracing_subscriber::fmt::layer() .with_target(false) .with_ansi(false) @@ -131,17 +171,51 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); - let r = r.with( + + layers.add_layer( TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()), ); + + let tracing_chrome_layer_flush_guard = + if crate::env::var("NEON_UTILS_LOGGING_ENABLE_TRACING_CHROME").unwrap_or(false) { + let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new() + .trace_style(tracing_chrome::TraceStyle::Async) + .build(); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + + let tracing_flame_flush_guard = + if crate::env::var("NEON_UTILS_LOGGING_ENABLE_TRACING_FLAME").unwrap_or(false) { + let (layer, guard) = tracing_flame::FlameLayer::with_file("./tracing.folded").unwrap(); + let layer = layer + .with_empty_samples(false) + .with_module_path(false) + .with_file_and_line(false) + .with_threads_collapsed(true); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + match tracing_error_layer_enablement { - TracingErrorLayerEnablement::EnableWithRustLogFilter => r - .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) - .init(), - TracingErrorLayerEnablement::Disabled => r.init(), + TracingErrorLayerEnablement::EnableWithRustLogFilter => layers + .add_layer(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())), + TracingErrorLayerEnablement::Disabled => (), } - Ok(()) + let r = tracing_subscriber::registry(); + r.with(layers.layers.expect("we add at least one layer")) + .init(); + + Ok(FlushGuardInner { + _tracing_chrome_layer: tracing_chrome_layer_flush_guard, + _tracing_flame_layer: tracing_flame_flush_guard, + } + .into()) } /// Disable the default rust panic hook by using `set_hook`. diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index 1fb75584fc..b65b0822ca 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -112,7 +112,7 @@ struct AnalyzeLayerMapCmd { #[tokio::main] async fn main() -> anyhow::Result<()> { - logging::init( + let _guard = logging::init( LogFormat::Plain, TracingErrorLayerEnablement::EnableWithRustLogFilter, logging::Output::Stdout, diff --git a/pageserver/pagebench/src/main.rs b/pageserver/pagebench/src/main.rs index 743102d853..b385695cd1 100644 --- a/pageserver/pagebench/src/main.rs +++ b/pageserver/pagebench/src/main.rs @@ -30,7 +30,7 @@ enum Args { } fn main() { - logging::init( + let _guard = logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stderr, diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 41835f9843..bd89d8c29c 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -105,7 +105,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; - logging::init( + let _guard = logging::init( conf.log_format, tracing_error_layer_enablement, logging::Output::Stdout, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 35ea037a55..5f5f91e275 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3675,7 +3675,7 @@ pub(crate) mod harness { pub deletion_queue: MockDeletionQueue, } - static LOG_HANDLE: OnceCell<()> = OnceCell::new(); + static LOG_HANDLE: OnceCell = OnceCell::new(); pub(crate) fn setup_logging() { LOG_HANDLE.get_or_init(|| { diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index e53ccaeb3d..4504740350 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -226,7 +226,7 @@ async fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index 4e5f8ed724..3723584843 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -632,7 +632,7 @@ async fn main() -> Result<(), Box> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 6466b9f7a3..6bb5dee831 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -175,7 +175,7 @@ fn main() -> anyhow::Result<()> { async fn async_main() -> anyhow::Result<()> { let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate())); - logging::init( + let _guard = logging::init( LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index d6e2cc2996..9d5a0bfe4e 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -75,6 +75,7 @@ tonic = { version = "0.9", features = ["tls-roots"] } tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "log", "timeout", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } url = { version = "2", features = ["serde"] } uuid = { version = "1", features = ["serde", "v4", "v7"] } zeroize = { version = "1", features = ["derive"] }