From 803b765c76bd7cbe1f5d3392a6ad9b31697ec572 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 6 Dec 2023 17:48:50 +0000 Subject: [PATCH] utils::logging: implement tracing_chrome & tracing_flame support --- Cargo.lock | 42 +++++++++++ compute_tools/src/bin/fast_import.rs | 2 +- libs/desim/tests/reliable_copy_test.rs | 2 +- libs/remote_storage/tests/common/mod.rs | 2 +- libs/utils/Cargo.toml | 2 + libs/utils/src/lib.rs | 1 + libs/utils/src/logging.rs | 94 +++++++++++++++++++++--- pageserver/compaction/tests/tests.rs | 2 +- pageserver/ctl/src/main.rs | 2 +- pageserver/pagebench/src/main.rs | 2 +- pageserver/src/bin/pageserver.rs | 2 +- pageserver/src/tenant.rs | 2 +- safekeeper/src/bin/safekeeper.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 2 +- storage_controller/src/main.rs | 8 +- workspace_hack/Cargo.toml | 1 + 16 files changed, 147 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e0e343996..4493847c27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3513,6 +3513,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.1" @@ -3808,6 +3818,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "p256" version = "0.11.1" @@ -6967,6 +6983,17 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "tracing-chrome" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496b3cd5447f7ff527bbbf19b071ad542a000adf297d4127078b4dfdb931f41a" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.33" @@ -6987,6 +7014,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-flame" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bae117ee14789185e129aaee5d93750abe67fdc5a9a62650452bfe4e122a3a9" +dependencies = [ + "lazy_static", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-log" version = "0.2.0" @@ -7033,6 +7071,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", + "nu-ansi-term", "once_cell", "regex", "serde", @@ -7283,7 +7322,9 @@ dependencies = [ "tokio-util", "toml_edit", "tracing", + "tracing-chrome", "tracing-error", + "tracing-flame", "tracing-subscriber", "url", "uuid", @@ -7870,6 +7911,7 @@ dependencies = [ "tower", "tracing", "tracing-core", + "tracing-subscriber", "url", "zerocopy", "zeroize", diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs index 793ec4cf10..820935050c 100644 --- a/compute_tools/src/bin/fast_import.rs +++ b/compute_tools/src/bin/fast_import.rs @@ -69,7 +69,7 @@ enum EncryptionSecret { #[tokio::main] pub(crate) async fn main() -> anyhow::Result<()> { - utils::logging::init( + let _guard = utils::logging::init( utils::logging::LogFormat::Plain, utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, utils::logging::Output::Stdout, diff --git a/libs/desim/tests/reliable_copy_test.rs b/libs/desim/tests/reliable_copy_test.rs index cf7bff8f5a..4f6d425be9 100644 --- a/libs/desim/tests/reliable_copy_test.rs +++ b/libs/desim/tests/reliable_copy_test.rs @@ -154,7 +154,7 @@ mod reliable_copy_test { /// Run test simulations. #[test] fn sim_example_reliable_copy() { - utils::logging::init( + let _guard = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/remote_storage/tests/common/mod.rs b/libs/remote_storage/tests/common/mod.rs index daab05d91a..e58ad94283 100644 --- a/libs/remote_storage/tests/common/mod.rs +++ b/libs/remote_storage/tests/common/mod.rs @@ -205,7 +205,7 @@ pub(crate) async fn upload_remote_data( pub(crate) fn ensure_logging_ready() { LOGGING_DONE.get_or_init(|| { - utils::logging::init( + let _ = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 02bf77760a..61413f611a 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -66,6 +66,8 @@ const_format.workspace = true # to use tokio channels as streams, this is faster to compile than async_stream # why is it only here? no other crate should use it, streams are rarely needed. tokio-stream = { version = "0.1.14" } +tracing-chrome = "0.7.1" +tracing-flame = "0.2.0" serde_path_to_error.workspace = true diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 2c56dd750f..94daf11ca0 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -31,6 +31,7 @@ pub mod id; pub mod shard; mod hex; + pub use hex::Hex; // http endpoint utils diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index e205d60d74..4e4c06c52a 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -1,4 +1,8 @@ -use std::str::FromStr; +use std::{ + io::BufWriter, + str::FromStr, + sync::{Arc, Mutex}, +}; use anyhow::Context; use metrics::{IntCounter, IntCounterVec}; @@ -98,11 +102,27 @@ pub enum Output { Stderr, } +/// Keep alive and drop it before the program terminates. +#[allow(dead_code)] // We need to store the `Arc<>` for drop semantics. +#[must_use] +pub struct FlushGuard(Arc>); + +struct FlushGuardInner { + _tracing_chrome_layer: Option, + _tracing_flame_layer: Option>>, +} + +impl From for FlushGuard { + fn from(value: FlushGuardInner) -> Self { + Self(Arc::new(Mutex::new(value))) + } +} + pub fn init( log_format: LogFormat, tracing_error_layer_enablement: TracingErrorLayerEnablement, output: Output, -) -> anyhow::Result<()> { +) -> anyhow::Result { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -113,8 +133,28 @@ pub fn init( // NB: the order of the with() calls does not matter. // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering use tracing_subscriber::prelude::*; - let r = tracing_subscriber::registry(); - let r = r.with({ + + // https://users.rust-lang.org/t/how-can-i-init-tracing-registry-dynamically-with-multiple-outputs/94307/6 + #[derive(Default)] + struct LayerStack { + layers: + Option + Sync + Send>>, + } + impl LayerStack { + fn add_layer(&mut self, new_layer: L) + where + L: tracing_subscriber::Layer + Send + Sync, + { + let new = match self.layers.take() { + Some(layers) => Some(layers.and_then(new_layer).boxed()), + None => Some(new_layer.boxed()), + }; + self.layers = new; + } + } + let mut layers = LayerStack::default(); + + layers.add_layer({ let log_layer = tracing_subscriber::fmt::layer() .with_target(false) .with_ansi(false) @@ -131,17 +171,51 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); - let r = r.with( + + layers.add_layer( TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()), ); + + let tracing_chrome_layer_flush_guard = + if crate::env::var("NEON_UTILS_LOGGING_ENABLE_TRACING_CHROME").unwrap_or(false) { + let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new() + .trace_style(tracing_chrome::TraceStyle::Async) + .build(); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + + let tracing_flame_flush_guard = + if crate::env::var("NEON_UTILS_LOGGING_ENABLE_TRACING_FLAME").unwrap_or(false) { + let (layer, guard) = tracing_flame::FlameLayer::with_file("./tracing.folded").unwrap(); + let layer = layer + .with_empty_samples(false) + .with_module_path(false) + .with_file_and_line(false) + .with_threads_collapsed(true); + layers.add_layer(layer.with_filter(rust_log_env_filter())); + Some(guard) + } else { + None + }; + match tracing_error_layer_enablement { - TracingErrorLayerEnablement::EnableWithRustLogFilter => r - .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) - .init(), - TracingErrorLayerEnablement::Disabled => r.init(), + TracingErrorLayerEnablement::EnableWithRustLogFilter => layers + .add_layer(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())), + TracingErrorLayerEnablement::Disabled => (), } - Ok(()) + let r = tracing_subscriber::registry(); + r.with(layers.layers.expect("we add at least one layer")) + .init(); + + Ok(FlushGuardInner { + _tracing_chrome_layer: tracing_chrome_layer_flush_guard, + _tracing_flame_layer: tracing_flame_flush_guard, + } + .into()) } /// Disable the default rust panic hook by using `set_hook`. diff --git a/pageserver/compaction/tests/tests.rs b/pageserver/compaction/tests/tests.rs index bd8b54a286..a74a158e05 100644 --- a/pageserver/compaction/tests/tests.rs +++ b/pageserver/compaction/tests/tests.rs @@ -3,7 +3,7 @@ use pageserver_compaction::interface::CompactionLayer; use pageserver_compaction::simulator::MockTimeline; use utils::logging; -static LOG_HANDLE: OnceCell<()> = OnceCell::new(); +static LOG_HANDLE: OnceCell = OnceCell::new(); pub(crate) fn setup_logging() { LOG_HANDLE.get_or_init(|| { diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index a0aac89dc8..28c1b9bdd7 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -115,7 +115,7 @@ struct AnalyzeLayerMapCmd { #[tokio::main] async fn main() -> anyhow::Result<()> { - logging::init( + let _guard = logging::init( LogFormat::Plain, TracingErrorLayerEnablement::EnableWithRustLogFilter, logging::Output::Stdout, diff --git a/pageserver/pagebench/src/main.rs b/pageserver/pagebench/src/main.rs index 5527557450..4d7d787f3c 100644 --- a/pageserver/pagebench/src/main.rs +++ b/pageserver/pagebench/src/main.rs @@ -32,7 +32,7 @@ enum Args { } fn main() { - logging::init( + let _guard = logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stderr, diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index b92ff4ebf9..602cfd29dc 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -114,7 +114,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; - logging::init( + let _guard = logging::init( conf.log_format, tracing_error_layer_enablement, logging::Output::Stdout, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index e3dab2fc1d..ef50323c62 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5562,7 +5562,7 @@ pub(crate) mod harness { pub deletion_queue: MockDeletionQueue, } - static LOG_HANDLE: OnceCell<()> = OnceCell::new(); + static LOG_HANDLE: OnceCell = OnceCell::new(); pub(crate) fn setup_logging() { LOG_HANDLE.get_or_init(|| { diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index e0ba38d638..cd66cd25c9 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -256,7 +256,7 @@ async fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index 1fbb651656..e7c2483606 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -636,7 +636,7 @@ async fn main() -> Result<(), Box> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 801409d612..50cc91f1be 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -188,7 +188,7 @@ impl Secrets { } fn main() -> anyhow::Result<()> { - logging::init( + let _guard = logging::init( LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, @@ -221,6 +221,12 @@ fn main() -> anyhow::Result<()> { async fn async_main() -> anyhow::Result<()> { let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate())); + let _guard = logging::init( + LogFormat::Plain, + logging::TracingErrorLayerEnablement::Disabled, + logging::Output::Stdout, + )?; + preinitialize_metrics(); let args = Cli::parse(); diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 33bdc25785..f7acb52e3c 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -94,6 +94,7 @@ tonic = { version = "0.12", features = ["tls-roots"] } tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "log", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } url = { version = "2", features = ["serde"] } zerocopy = { version = "0.7", features = ["derive", "simd"] } zeroize = { version = "1", features = ["derive", "serde"] }