utils: allow for setting up OTEL tracing subscriber

This commit is contained in:
Vlad Lazar
2025-02-10 23:26:27 +01:00
parent 74a7f68da0
commit ac1159cb33
16 changed files with 82 additions and 8 deletions

2
Cargo.lock generated
View File

@@ -4301,6 +4301,7 @@ dependencies = [
"tokio-util",
"toml_edit",
"tracing",
"tracing-utils",
"url",
"utils",
"wal_decoder",
@@ -7808,6 +7809,7 @@ dependencies = [
"tracing",
"tracing-error",
"tracing-subscriber",
"tracing-utils",
"walkdir",
]

View File

@@ -592,6 +592,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
utils::logging::init(
utils::logging::LogFormat::Json,
utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
utils::logging::OtelEnablement::Disabled,
utils::logging::Output::Stdout,
)?;

View File

@@ -158,6 +158,7 @@ mod reliable_copy_test {
utils::logging::init(
utils::logging::LogFormat::Test,
utils::logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
utils::logging::Output::Stdout,
)
.expect("logging init failed");

View File

@@ -208,6 +208,7 @@ pub(crate) fn ensure_logging_ready() {
utils::logging::init(
utils::logging::LogFormat::Test,
utils::logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
utils::logging::Output::Stdout,
)
.expect("logging init failed");

View File

@@ -28,7 +28,7 @@ use core::{
task::{Context, Poll},
};
use pin_project_lite::pin_project;
use tracing::{field, span::Span, Dispatch};
use tracing::{Dispatch, field, span::Span};
#[derive(Debug, Clone)]
pub struct PerfSpan {

View File

@@ -42,6 +42,7 @@ toml_edit = { workspace = true, features = ["serde"] }
tracing.workspace = true
tracing-error.workspace = true
tracing-subscriber = { workspace = true, features = ["json", "registry"] }
tracing-utils.workspace = true
rand.workspace = true
scopeguard.workspace = true
strum.workspace = true

View File

@@ -7,7 +7,9 @@ use metrics::{IntCounter, IntCounterVec};
use once_cell::sync::Lazy;
use strum_macros::{EnumString, VariantNames};
use tokio::time::Instant;
use tracing::Dispatch;
use tracing::info;
use tracing::level_filters::LevelFilter;
/// Logs a critical error, similarly to `tracing::error!`. This will:
///
@@ -125,6 +127,15 @@ pub enum TracingErrorLayerEnablement {
EnableWithRustLogFilter,
}
pub enum OtelEnablement {
Disabled,
Enabled {
service_name: String,
export_config: tracing_utils::ExportConfig,
runtime: &'static tokio::runtime::Runtime,
},
}
/// Where the logging should output to.
#[derive(Clone, Copy)]
pub enum Output {
@@ -132,11 +143,24 @@ pub enum Output {
Stderr,
}
pub struct OtelGuard {
pub dispatch: Dispatch,
}
impl Drop for OtelGuard {
fn drop(&mut self) {
tracing_utils::shutdown_tracing();
}
}
pub const PERF_TRACE_TARGET: &str = "P";
pub fn init(
log_format: LogFormat,
tracing_error_layer_enablement: TracingErrorLayerEnablement,
otel_enablement: OtelEnablement,
output: Output,
) -> anyhow::Result<()> {
) -> anyhow::Result<Option<OtelGuard>> {
// We fall back to printing all spans at info-level or above if
// the RUST_LOG environment variable is not set.
let rust_log_env_filter = || {
@@ -165,6 +189,7 @@ pub fn init(
};
log_layer.with_filter(rust_log_env_filter())
});
let r = r.with(
TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
);
@@ -175,7 +200,26 @@ pub fn init(
TracingErrorLayerEnablement::Disabled => r.init(),
}
Ok(())
let otel_subscriber = match otel_enablement {
OtelEnablement::Disabled => None,
OtelEnablement::Enabled {
service_name,
export_config,
runtime,
} => {
let otel_layer = runtime
.block_on(tracing_utils::init_tracing(&service_name, export_config))
.with_filter(LevelFilter::INFO);
let otel_subscriber = tracing_subscriber::registry().with(otel_layer);
let otel_dispatch = Dispatch::new(otel_subscriber);
Some(otel_dispatch)
}
};
let otel_guard = otel_subscriber.map(|dispatch| OtelGuard { dispatch });
Ok(otel_guard)
}
/// Disable the default rust panic hook by using `set_hook`.

View File

@@ -66,6 +66,7 @@ tokio-stream.workspace = true
tokio-util.workspace = true
toml_edit = { workspace = true, features = [ "serde" ] }
tracing.workspace = true
tracing-utils.workspace = true
url.workspace = true
walkdir.workspace = true
metrics.workspace = true

View File

@@ -10,9 +10,10 @@ pub(crate) fn setup_logging() {
logging::init(
logging::LogFormat::Test,
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)
.expect("Failed to init test logging")
.expect("Failed to init test logging");
});
}

View File

@@ -117,6 +117,7 @@ async fn main() -> anyhow::Result<()> {
logging::init(
LogFormat::Plain,
TracingErrorLayerEnablement::EnableWithRustLogFilter,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)?;

View File

@@ -35,6 +35,7 @@ fn main() {
logging::init(
logging::LogFormat::Plain,
logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stderr,
)
.unwrap();

View File

@@ -21,7 +21,8 @@ use pageserver::deletion_queue::DeletionQueue;
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
use pageserver::task_mgr::{
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, OTEL_RUNTIME,
WALRECEIVER_RUNTIME,
};
use pageserver::tenant::{TenantSharedResources, mgr, secondary};
use pageserver::{
@@ -36,7 +37,7 @@ use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::{JwtAuth, SwappableJwtAuth};
use utils::crashsafe::syncfs;
use utils::logging::TracingErrorLayerEnablement;
use utils::logging::{OtelGuard, TracingErrorLayerEnablement};
use utils::sentry_init::init_sentry;
use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};
@@ -110,12 +111,27 @@ fn main() -> anyhow::Result<()> {
} else {
TracingErrorLayerEnablement::Disabled
};
logging::init(
let otel_enablement = match &conf.tracing {
Some(cfg) => utils::logging::OtelEnablement::Enabled {
service_name: "pageserver".to_string(),
export_config: (&cfg.export_config).into(),
runtime: *OTEL_RUNTIME,
},
None => utils::logging::OtelEnablement::Disabled,
};
let otel_guard = logging::init(
conf.log_format,
tracing_error_layer_enablement,
otel_enablement,
logging::Output::Stdout,
)?;
if otel_guard.is_some() {
info!(?conf.tracing, "starting with OTEL tracing enabled");
}
// mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
// disarming this hook on pageserver, because we never tear down tracing.
logging::replace_panic_hook_with_tracing_panic_hook().forget();

View File

@@ -5718,9 +5718,10 @@ pub(crate) mod harness {
// enable it in case the tests exercise code paths that use
// debug_assert_current_span_has_tenant_and_timeline_id
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)
.expect("Failed to init test logging")
.expect("Failed to init test logging");
});
}

View File

@@ -255,6 +255,7 @@ async fn main() -> anyhow::Result<()> {
logging::init(
LogFormat::from_config(&args.log_format)?,
logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)?;
logging::replace_panic_hook_with_tracing_panic_hook().forget();

View File

@@ -643,6 +643,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
logging::init(
LogFormat::from_config(&args.log_format)?,
logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)?;
logging::replace_panic_hook_with_tracing_panic_hook().forget();

View File

@@ -232,6 +232,7 @@ fn main() -> anyhow::Result<()> {
logging::init(
LogFormat::Plain,
logging::TracingErrorLayerEnablement::Disabled,
utils::logging::OtelEnablement::Disabled,
logging::Output::Stdout,
)?;