diff --git a/Cargo.lock b/Cargo.lock index a9b098a00c..cd6eaae4a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3034,6 +3034,7 @@ dependencies = [ "tokio-util", "toml_edit", "tracing", + "tracing-chrome", "tracing-subscriber", "url", "utils", @@ -5270,6 +5271,17 @@ dependencies = [ "syn 2.0.28", ] +[[package]] +name = "tracing-chrome" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496b3cd5447f7ff527bbbf19b071ad542a000adf297d4127078b4dfdb931f41a" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.31" @@ -5557,6 +5569,7 @@ dependencies = [ "tokio-stream", "tokio-util", "tracing", + "tracing-chrome", "tracing-error", "tracing-subscriber", "url", diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 26fc08fc8f..6c18330be3 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -86,7 +86,10 @@ where .stdout(process_log_file) .stderr(same_file_for_stderr) .args(args); - let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command)); + + let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars( + fill_rust_env_vars(background_command), + )); filled_cmd.envs(envs); let pid_file_to_check = match initial_pid_file { @@ -253,6 +256,15 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command { cmd } +fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command { + for (var, val) in std::env::vars() { + if var.starts_with("NEON_") { + cmd = cmd.env(var, val); + } + } + cmd +} + /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(), /// 1. Claims a pidfile with a fcntl lock on it and /// 2. Sets up the pidfile's file descriptor so that it (and the lock) diff --git a/control_plane/src/bin/attachment_service.rs b/control_plane/src/bin/attachment_service.rs index 16577e27d6..80e86474f4 100644 --- a/control_plane/src/bin/attachment_service.rs +++ b/control_plane/src/bin/attachment_service.rs @@ -283,7 +283,7 @@ fn make_router(persistent_state: PersistentState) -> RouterBuilder anyhow::Result<()> { - logging::init( + let _guard = logging::init( LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs index b631079bc5..b48936cabc 100644 --- a/libs/remote_storage/tests/test_real_azure.rs +++ b/libs/remote_storage/tests/test_real_azure.rs @@ -278,7 +278,7 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res fn ensure_logging_ready() { LOGGING_DONE.get_or_init(|| { - utils::logging::init( + let _ = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs index 48f00e0106..63e74b4597 100644 --- a/libs/remote_storage/tests/test_real_s3.rs +++ b/libs/remote_storage/tests/test_real_s3.rs @@ -207,7 +207,7 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> fn ensure_logging_ready() { LOGGING_DONE.get_or_init(|| { - utils::logging::init( + let _ = utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, utils::logging::Output::Stdout, diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index ccf6f4f2d7..ffff9e90a4 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -49,6 +49,7 @@ const_format.workspace = true # to use tokio channels as streams, this is faster to compile than async_stream # why is it only here? no other crate should use it, streams are rarely needed. tokio-stream = { version = "0.1.14" } +tracing-chrome = "0.7.1" [dev-dependencies] byteorder.workspace = true diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 2f09c2f3ea..7d2c1f9c5d 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -73,11 +73,17 @@ pub enum Output { Stderr, } +/// Keep alive and drop it before the program terminates. +#[must_use] +pub struct FlushGuard { + _tracing_chrome_layer: Option, +} + pub fn init( log_format: LogFormat, tracing_error_layer_enablement: TracingErrorLayerEnablement, output: Output, -) -> anyhow::Result<()> { +) -> anyhow::Result { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -85,11 +91,41 @@ pub fn init( .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")) }; + // WIP: lift it up as an argument + let enable_tracing_chrome = match std::env::var("NEON_PAGESERVER_ENABLE_TRACING_CHROME") { + Ok(s) if s != "0" => true, + Ok(_s) => false, + Err(std::env::VarError::NotPresent) => false, + Err(std::env::VarError::NotUnicode(_)) => { + panic!("env var NEON_PAGESERVER_ENABLE_TRACING_CHROME not unicode") + } + }; + // NB: the order of the with() calls does not matter. // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering use tracing_subscriber::prelude::*; - let r = tracing_subscriber::registry(); - let r = r.with({ + + // https://users.rust-lang.org/t/how-can-i-init-tracing-registry-dynamically-with-multiple-outputs/94307/6 + #[derive(Default)] + struct LayerStack { + layers: + Option + Sync + Send>>, + } + impl LayerStack { + fn add_layer(&mut self, new_layer: L) + where + L: tracing_subscriber::Layer + Send + Sync, + { + let new = match self.layers.take() { + Some(layers) => Some(layers.and_then(new_layer).boxed()), + None => Some(new_layer.boxed()), + }; + self.layers = new; + } + } + let mut layers = LayerStack::default(); + + layers.add_layer({ let log_layer = tracing_subscriber::fmt::layer() .with_target(false) .with_ansi(false) @@ -106,15 +142,31 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); - let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())); + + layers + .add_layer(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())); + + let tracing_chrome_layer_flush_guard = if enable_tracing_chrome { + let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new().build(); + layers.add_layer(layer); + Some(guard) + } else { + None + }; + match tracing_error_layer_enablement { - TracingErrorLayerEnablement::EnableWithRustLogFilter => r - .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) - .init(), - TracingErrorLayerEnablement::Disabled => r.init(), + TracingErrorLayerEnablement::EnableWithRustLogFilter => layers + .add_layer(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())), + TracingErrorLayerEnablement::Disabled => (), } - Ok(()) + let r = tracing_subscriber::registry(); + r.with(layers.layers.expect("we add at least one layer")) + .init(); + + Ok(FlushGuard { + _tracing_chrome_layer: tracing_chrome_layer_flush_guard, + }) } /// Disable the default rust panic hook by using `set_hook`. diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 2f2336a578..aff889d20d 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -84,6 +84,7 @@ strum.workspace = true strum_macros.workspace = true tokio-stream.workspace = true tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } +tracing-chrome = "0.7.1" [dev-dependencies] criterion.workspace = true diff --git a/pageserver/pagebench/src/basebackup.rs b/pageserver/pagebench/src/basebackup.rs index 2beb1f450c..0582b64796 100644 --- a/pageserver/pagebench/src/basebackup.rs +++ b/pageserver/pagebench/src/basebackup.rs @@ -140,7 +140,7 @@ thread_local! { } pub(crate) fn main(args: Args) -> anyhow::Result<()> { - logging::init( + let _guard = logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stderr, diff --git a/pageserver/pagebench/src/getpage_latest_lsn.rs b/pageserver/pagebench/src/getpage_latest_lsn.rs index 61867a13ae..bf302c75e8 100644 --- a/pageserver/pagebench/src/getpage_latest_lsn.rs +++ b/pageserver/pagebench/src/getpage_latest_lsn.rs @@ -137,7 +137,7 @@ thread_local! { } pub(crate) fn main(args: Args) -> anyhow::Result<()> { - logging::init( + let _guard = logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stderr, diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index f971b0a88d..cb05072e68 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -103,7 +103,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; - logging::init( + let _guard = logging::init( conf.log_format, tracing_error_layer_enablement, logging::Output::Stdout, diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 0b5bb22c8b..58c05ab98d 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -199,7 +199,7 @@ async fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout, diff --git a/setup_bench_repo_dir.bash b/setup_bench_repo_dir.bash index d999717c80..cf1dbfd819 100644 --- a/setup_bench_repo_dir.bash +++ b/setup_bench_repo_dir.bash @@ -20,6 +20,8 @@ sudo mount --bind /mnt/bench_repo_dir bench_repo_dir mkdir /mnt/test_output +mkdir /mnt/many_tenants + echo run the following commands cat < Result<(), Box> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init( + let _guard = logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, logging::Output::Stdout,