mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-03 13:30:38 +00:00
utils: allow for setting up OTEL tracing subscriber
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -4301,6 +4301,7 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-utils",
|
||||
"url",
|
||||
"utils",
|
||||
"wal_decoder",
|
||||
@@ -7808,6 +7809,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
||||
@@ -592,6 +592,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
utils::logging::init(
|
||||
utils::logging::LogFormat::Json,
|
||||
utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
utils::logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -158,6 +158,7 @@ mod reliable_copy_test {
|
||||
utils::logging::init(
|
||||
utils::logging::LogFormat::Test,
|
||||
utils::logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
utils::logging::Output::Stdout,
|
||||
)
|
||||
.expect("logging init failed");
|
||||
|
||||
@@ -208,6 +208,7 @@ pub(crate) fn ensure_logging_ready() {
|
||||
utils::logging::init(
|
||||
utils::logging::LogFormat::Test,
|
||||
utils::logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
utils::logging::Output::Stdout,
|
||||
)
|
||||
.expect("logging init failed");
|
||||
|
||||
@@ -28,7 +28,7 @@ use core::{
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use pin_project_lite::pin_project;
|
||||
use tracing::{field, span::Span, Dispatch};
|
||||
use tracing::{Dispatch, field, span::Span};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerfSpan {
|
||||
|
||||
@@ -42,6 +42,7 @@ toml_edit = { workspace = true, features = ["serde"] }
|
||||
tracing.workspace = true
|
||||
tracing-error.workspace = true
|
||||
tracing-subscriber = { workspace = true, features = ["json", "registry"] }
|
||||
tracing-utils.workspace = true
|
||||
rand.workspace = true
|
||||
scopeguard.workspace = true
|
||||
strum.workspace = true
|
||||
|
||||
@@ -7,7 +7,9 @@ use metrics::{IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum_macros::{EnumString, VariantNames};
|
||||
use tokio::time::Instant;
|
||||
use tracing::Dispatch;
|
||||
use tracing::info;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
|
||||
/// Logs a critical error, similarly to `tracing::error!`. This will:
|
||||
///
|
||||
@@ -125,6 +127,15 @@ pub enum TracingErrorLayerEnablement {
|
||||
EnableWithRustLogFilter,
|
||||
}
|
||||
|
||||
pub enum OtelEnablement {
|
||||
Disabled,
|
||||
Enabled {
|
||||
service_name: String,
|
||||
export_config: tracing_utils::ExportConfig,
|
||||
runtime: &'static tokio::runtime::Runtime,
|
||||
},
|
||||
}
|
||||
|
||||
/// Where the logging should output to.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Output {
|
||||
@@ -132,11 +143,24 @@ pub enum Output {
|
||||
Stderr,
|
||||
}
|
||||
|
||||
pub struct OtelGuard {
|
||||
pub dispatch: Dispatch,
|
||||
}
|
||||
|
||||
impl Drop for OtelGuard {
|
||||
fn drop(&mut self) {
|
||||
tracing_utils::shutdown_tracing();
|
||||
}
|
||||
}
|
||||
|
||||
pub const PERF_TRACE_TARGET: &str = "P";
|
||||
|
||||
pub fn init(
|
||||
log_format: LogFormat,
|
||||
tracing_error_layer_enablement: TracingErrorLayerEnablement,
|
||||
otel_enablement: OtelEnablement,
|
||||
output: Output,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> anyhow::Result<Option<OtelGuard>> {
|
||||
// We fall back to printing all spans at info-level or above if
|
||||
// the RUST_LOG environment variable is not set.
|
||||
let rust_log_env_filter = || {
|
||||
@@ -165,6 +189,7 @@ pub fn init(
|
||||
};
|
||||
log_layer.with_filter(rust_log_env_filter())
|
||||
});
|
||||
|
||||
let r = r.with(
|
||||
TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
|
||||
);
|
||||
@@ -175,7 +200,26 @@ pub fn init(
|
||||
TracingErrorLayerEnablement::Disabled => r.init(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
let otel_subscriber = match otel_enablement {
|
||||
OtelEnablement::Disabled => None,
|
||||
OtelEnablement::Enabled {
|
||||
service_name,
|
||||
export_config,
|
||||
runtime,
|
||||
} => {
|
||||
let otel_layer = runtime
|
||||
.block_on(tracing_utils::init_tracing(&service_name, export_config))
|
||||
.with_filter(LevelFilter::INFO);
|
||||
let otel_subscriber = tracing_subscriber::registry().with(otel_layer);
|
||||
let otel_dispatch = Dispatch::new(otel_subscriber);
|
||||
|
||||
Some(otel_dispatch)
|
||||
}
|
||||
};
|
||||
|
||||
let otel_guard = otel_subscriber.map(|dispatch| OtelGuard { dispatch });
|
||||
|
||||
Ok(otel_guard)
|
||||
}
|
||||
|
||||
/// Disable the default rust panic hook by using `set_hook`.
|
||||
|
||||
@@ -66,6 +66,7 @@ tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
toml_edit = { workspace = true, features = [ "serde" ] }
|
||||
tracing.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
url.workspace = true
|
||||
walkdir.workspace = true
|
||||
metrics.workspace = true
|
||||
|
||||
@@ -10,9 +10,10 @@ pub(crate) fn setup_logging() {
|
||||
logging::init(
|
||||
logging::LogFormat::Test,
|
||||
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)
|
||||
.expect("Failed to init test logging")
|
||||
.expect("Failed to init test logging");
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
logging::init(
|
||||
LogFormat::Plain,
|
||||
TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ fn main() {
|
||||
logging::init(
|
||||
logging::LogFormat::Plain,
|
||||
logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stderr,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -21,7 +21,8 @@ use pageserver::deletion_queue::DeletionQueue;
|
||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||
use pageserver::task_mgr::{
|
||||
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
|
||||
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, OTEL_RUNTIME,
|
||||
WALRECEIVER_RUNTIME,
|
||||
};
|
||||
use pageserver::tenant::{TenantSharedResources, mgr, secondary};
|
||||
use pageserver::{
|
||||
@@ -36,7 +37,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::auth::{JwtAuth, SwappableJwtAuth};
|
||||
use utils::crashsafe::syncfs;
|
||||
use utils::logging::TracingErrorLayerEnablement;
|
||||
use utils::logging::{OtelGuard, TracingErrorLayerEnablement};
|
||||
use utils::sentry_init::init_sentry;
|
||||
use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};
|
||||
|
||||
@@ -110,12 +111,27 @@ fn main() -> anyhow::Result<()> {
|
||||
} else {
|
||||
TracingErrorLayerEnablement::Disabled
|
||||
};
|
||||
logging::init(
|
||||
|
||||
let otel_enablement = match &conf.tracing {
|
||||
Some(cfg) => utils::logging::OtelEnablement::Enabled {
|
||||
service_name: "pageserver".to_string(),
|
||||
export_config: (&cfg.export_config).into(),
|
||||
runtime: *OTEL_RUNTIME,
|
||||
},
|
||||
None => utils::logging::OtelEnablement::Disabled,
|
||||
};
|
||||
|
||||
let otel_guard = logging::init(
|
||||
conf.log_format,
|
||||
tracing_error_layer_enablement,
|
||||
otel_enablement,
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
if otel_guard.is_some() {
|
||||
info!(?conf.tracing, "starting with OTEL tracing enabled");
|
||||
}
|
||||
|
||||
// mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
|
||||
// disarming this hook on pageserver, because we never tear down tracing.
|
||||
logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
@@ -5718,9 +5718,10 @@ pub(crate) mod harness {
|
||||
// enable it in case the tests exercise code paths that use
|
||||
// debug_assert_current_span_has_tenant_and_timeline_id
|
||||
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)
|
||||
.expect("Failed to init test logging")
|
||||
.expect("Failed to init test logging");
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -255,6 +255,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
logging::init(
|
||||
LogFormat::from_config(&args.log_format)?,
|
||||
logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
@@ -643,6 +643,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
logging::init(
|
||||
LogFormat::from_config(&args.log_format)?,
|
||||
logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
@@ -232,6 +232,7 @@ fn main() -> anyhow::Result<()> {
|
||||
logging::init(
|
||||
LogFormat::Plain,
|
||||
logging::TracingErrorLayerEnablement::Disabled,
|
||||
utils::logging::OtelEnablement::Disabled,
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user