From cdb95d1a3523e0e1652c849d0a7f8b9b6203fb94 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 7 Mar 2025 18:06:10 +0100 Subject: [PATCH] utils: optionally enable otel tracing in common logging utils This patch augments `utils::logging::init` with the ability to set up OTEL tracing infrastructure. The end goal is for the pageserver to use this in order to export perf traces. Note that an entirely different tracing subscriber is used. This is to avoid interference with the existing tracing set-up. For now, no service uses this functionality. --- Cargo.lock | 2 ++ compute_tools/src/bin/fast_import.rs | 1 + libs/desim/tests/reliable_copy_test.rs | 1 + libs/remote_storage/tests/common/mod.rs | 1 + libs/utils/Cargo.toml | 1 + libs/utils/src/logging.rs | 46 ++++++++++++++++++++++-- pageserver/Cargo.toml | 1 + pageserver/compaction/tests/tests.rs | 3 +- pageserver/ctl/src/main.rs | 1 + pageserver/pagebench/src/main.rs | 1 + pageserver/src/bin/pageserver.rs | 2 ++ pageserver/src/tenant.rs | 3 +- safekeeper/src/bin/safekeeper.rs | 1 + storage_broker/src/bin/storage_broker.rs | 1 + storage_controller/src/main.rs | 1 + 15 files changed, 62 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f15c6e857f..4ac93bba67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4301,6 +4301,7 @@ dependencies = [ "tokio-util", "toml_edit", "tracing", + "tracing-utils", "url", "utils", "uuid", @@ -7837,6 +7838,7 @@ dependencies = [ "tracing", "tracing-error", "tracing-subscriber", + "tracing-utils", "walkdir", ] diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs index 47558be7a0..265bf8112b 100644 --- a/compute_tools/src/bin/fast_import.rs +++ b/compute_tools/src/bin/fast_import.rs @@ -592,6 +592,7 @@ pub(crate) async fn main() -> anyhow::Result<()> { utils::logging::init( utils::logging::LogFormat::Json, utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, + utils::logging::OtelEnablement::Disabled, utils::logging::Output::Stdout, )?; diff --git a/libs/desim/tests/reliable_copy_test.rs b/libs/desim/tests/reliable_copy_test.rs index 1ddf9844de..aa73b276af 100644 --- a/libs/desim/tests/reliable_copy_test.rs +++ b/libs/desim/tests/reliable_copy_test.rs @@ -158,6 +158,7 @@ mod reliable_copy_test { utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, utils::logging::Output::Stdout, ) .expect("logging init failed"); diff --git a/libs/remote_storage/tests/common/mod.rs b/libs/remote_storage/tests/common/mod.rs index daab05d91a..d720d61dae 100644 --- a/libs/remote_storage/tests/common/mod.rs +++ b/libs/remote_storage/tests/common/mod.rs @@ -208,6 +208,7 @@ pub(crate) fn ensure_logging_ready() { utils::logging::init( utils::logging::LogFormat::Test, utils::logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, utils::logging::Output::Stdout, ) .expect("logging init failed"); diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index ac44300a51..4180602ac7 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -42,6 +42,7 @@ toml_edit = { workspace = true, features = ["serde"] } tracing.workspace = true tracing-error.workspace = true tracing-subscriber = { workspace = true, features = ["json", "registry"] } +tracing-utils.workspace = true rand.workspace = true scopeguard.workspace = true strum.workspace = true diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 881f1e765d..a494985bc0 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -7,7 +7,9 @@ use metrics::{IntCounter, IntCounterVec}; use once_cell::sync::Lazy; use strum_macros::{EnumString, VariantNames}; use tokio::time::Instant; +use tracing::Dispatch; use tracing::info; +use tracing::level_filters::LevelFilter; /// Logs a critical error, similarly to `tracing::error!`. This will: /// @@ -125,6 +127,15 @@ pub enum TracingErrorLayerEnablement { EnableWithRustLogFilter, } +pub enum OtelEnablement { + Disabled, + Enabled { + service_name: String, + export_config: tracing_utils::ExportConfig, + runtime: &'static tokio::runtime::Runtime, + }, +} + /// Where the logging should output to. #[derive(Clone, Copy)] pub enum Output { @@ -132,11 +143,22 @@ pub enum Output { Stderr, } +pub struct OtelGuard { + pub dispatch: Dispatch, +} + +impl Drop for OtelGuard { + fn drop(&mut self) { + tracing_utils::shutdown_tracing(); + } +} + pub fn init( log_format: LogFormat, tracing_error_layer_enablement: TracingErrorLayerEnablement, + otel_enablement: OtelEnablement, output: Output, -) -> anyhow::Result<()> { +) -> anyhow::Result> { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -165,6 +187,7 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); + let r = r.with( TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()), ); @@ -175,7 +198,26 @@ pub fn init( TracingErrorLayerEnablement::Disabled => r.init(), } - Ok(()) + let otel_subscriber = match otel_enablement { + OtelEnablement::Disabled => None, + OtelEnablement::Enabled { + service_name, + export_config, + runtime, + } => { + let otel_layer = runtime + .block_on(tracing_utils::init_tracing(&service_name, export_config)) + .with_filter(LevelFilter::INFO); + let otel_subscriber = tracing_subscriber::registry().with(otel_layer); + let otel_dispatch = Dispatch::new(otel_subscriber); + + Some(otel_dispatch) + } + }; + + let otel_guard = otel_subscriber.map(|dispatch| OtelGuard { dispatch }); + + Ok(otel_guard) } /// Disable the default rust panic hook by using `set_hook`. diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index fa16090170..31b31f5c1a 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -66,6 +66,7 @@ tokio-stream.workspace = true tokio-util.workspace = true toml_edit = { workspace = true, features = [ "serde" ] } tracing.workspace = true +tracing-utils.workspace = true url.workspace = true walkdir.workspace = true metrics.workspace = true diff --git a/pageserver/compaction/tests/tests.rs b/pageserver/compaction/tests/tests.rs index bd8b54a286..7db1e0e2d6 100644 --- a/pageserver/compaction/tests/tests.rs +++ b/pageserver/compaction/tests/tests.rs @@ -10,9 +10,10 @@ pub(crate) fn setup_logging() { logging::init( logging::LogFormat::Test, logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, ) - .expect("Failed to init test logging") + .expect("Failed to init test logging"); }); } diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index 72a120a69b..957537cc8e 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -117,6 +117,7 @@ async fn main() -> anyhow::Result<()> { logging::init( LogFormat::Plain, TracingErrorLayerEnablement::EnableWithRustLogFilter, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, )?; diff --git a/pageserver/pagebench/src/main.rs b/pageserver/pagebench/src/main.rs index 5527557450..fb017de119 100644 --- a/pageserver/pagebench/src/main.rs +++ b/pageserver/pagebench/src/main.rs @@ -35,6 +35,7 @@ fn main() { logging::init( logging::LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, logging::Output::Stderr, ) .unwrap(); diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 703629aed5..baa70f8083 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -110,9 +110,11 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; + logging::init( conf.log_format, tracing_error_layer_enablement, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, )?; diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c78d15c9b5..d877d2eb1c 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5718,9 +5718,10 @@ pub(crate) mod harness { // enable it in case the tests exercise code paths that use // debug_assert_current_span_has_tenant_and_timeline_id logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, ) - .expect("Failed to init test logging") + .expect("Failed to init test logging"); }); } diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 10fc4a4b59..73e8eae57e 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -255,6 +255,7 @@ async fn main() -> anyhow::Result<()> { logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, )?; logging::replace_panic_hook_with_tracing_panic_hook().forget(); diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index cc33ec20ff..61962e1e94 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -643,6 +643,7 @@ async fn main() -> Result<(), Box> { logging::init( LogFormat::from_config(&args.log_format)?, logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, )?; logging::replace_panic_hook_with_tracing_panic_hook().forget(); diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 1d49cd85ca..5c954b6838 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -240,6 +240,7 @@ fn main() -> anyhow::Result<()> { logging::init( LogFormat::Plain, logging::TracingErrorLayerEnablement::Disabled, + utils::logging::OtelEnablement::Disabled, logging::Output::Stdout, )?;