diff --git a/Cargo.lock b/Cargo.lock index adfd811f2b..f503b45577 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1097,7 +1097,7 @@ checksum = "975982cdb7ad6a142be15bdf84aea7ec6a9e5d4d797c004d43185b24cfe4e684" dependencies = [ "clap", "heck 0.5.0", - "indexmap 2.9.0", + "indexmap 2.10.0", "log", "proc-macro2", "quote", @@ -1313,7 +1313,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "indexmap 2.9.0", + "indexmap 2.10.0", "jsonwebtoken", "regex", "remote_storage", @@ -1350,7 +1350,7 @@ dependencies = [ "http-body-util", "hyper 1.4.1", "hyper-util", - "indexmap 2.9.0", + "indexmap 2.10.0", "itertools 0.10.5", "jsonwebtoken", "metrics", @@ -1383,7 +1383,7 @@ dependencies = [ "tokio-postgres", "tokio-stream", "tokio-util", - "tonic 0.13.1", + "tonic", "tower 0.5.2", "tower-http", "tower-otel", @@ -2659,7 +2659,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.9", - "indexmap 2.9.0", + "indexmap 2.10.0", "slab", "tokio", "tokio-util", @@ -2678,7 +2678,7 @@ dependencies = [ "futures-sink", "futures-util", "http 1.3.1", - "indexmap 2.9.0", + "indexmap 2.10.0", "slab", "tokio", "tokio-util", @@ -2937,7 +2937,7 @@ dependencies = [ "pprof", "regex", "routerify", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-pemfile 2.1.1", "serde", "serde_json", @@ -3274,9 +3274,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -3302,7 +3302,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash", - "indexmap 2.9.0", + "indexmap 2.10.0", "is-terminal", "itoa", "log", @@ -3325,7 +3325,7 @@ dependencies = [ "crossbeam-utils", "dashmap 6.1.0", "env_logger", - "indexmap 2.9.0", + "indexmap 2.10.0", "itoa", "log", "num-format", @@ -4162,23 +4162,23 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "opentelemetry" -version = "0.27.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" dependencies = [ "futures-core", "futures-sink", "js-sys", "pin-project-lite", - "thiserror 1.0.69", + "thiserror 2.0.11", "tracing", ] [[package]] name = "opentelemetry-http" -version = "0.27.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80" +checksum = "50f6639e842a97dbea8886e3439710ae463120091e2e064518ba8e716e6ac36d" dependencies = [ "async-trait", "bytes", @@ -4189,12 +4189,10 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.27.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76" +checksum = "dbee664a43e07615731afc539ca60c6d9f1a9425e25ca09c57bc36c87c55852b" dependencies = [ - "async-trait", - "futures-core", "http 1.3.1", "opentelemetry", "opentelemetry-http", @@ -4202,46 +4200,43 @@ dependencies = [ "opentelemetry_sdk", "prost 0.13.5", "reqwest", - "thiserror 1.0.69", + "thiserror 2.0.11", ] [[package]] name = "opentelemetry-proto" -version = "0.27.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6" +checksum = "2e046fd7660710fe5a05e8748e70d9058dc15c94ba914e7c4faa7c728f0e8ddc" dependencies = [ "opentelemetry", "opentelemetry_sdk", "prost 0.13.5", - "tonic 0.12.3", + "tonic", ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.27.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1b6902ff63b32ef6c489e8048c5e253e2e4a803ea3ea7e783914536eb15c52" +checksum = "83d059a296a47436748557a353c5e6c5705b9470ef6c95cfc52c21a8814ddac2" [[package]] name = "opentelemetry_sdk" -version = "0.27.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" dependencies = [ - "async-trait", "futures-channel", "futures-executor", "futures-util", - "glob", "opentelemetry", "percent-encoding", - "rand 0.8.5", + "rand 0.9.1", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.11", "tokio", "tokio-stream", - "tracing", ] [[package]] @@ -4368,7 +4363,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic 0.13.1", + "tonic", "tracing", "url", "utils", @@ -4465,7 +4460,7 @@ dependencies = [ "reqwest", "rpds", "rstest", - "rustls 0.23.27", + "rustls 0.23.29", "scopeguard", "send-future", "serde", @@ -4489,7 +4484,7 @@ dependencies = [ "tokio-tar", "tokio-util", "toml_edit", - "tonic 0.13.1", + "tonic", "tonic-reflection", "tower 0.5.2", "tracing", @@ -4575,7 +4570,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic 0.13.1", + "tonic", "tracing", "utils", "workspace_hack", @@ -4620,7 +4615,7 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tokio-util", - "tonic 0.13.1", + "tonic", "tonic-build", "utils", "workspace_hack", @@ -5002,7 +4997,7 @@ dependencies = [ "bytes", "once_cell", "pq_proto", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-pemfile 2.1.1", "serde", "thiserror 1.0.69", @@ -5401,7 +5396,7 @@ dependencies = [ "hyper 0.14.30", "hyper 1.4.1", "hyper-util", - "indexmap 2.9.0", + "indexmap 2.10.0", "ipnet", "itertools 0.10.5", "itoa", @@ -5438,7 +5433,7 @@ dependencies = [ "rsa", "rstest", "rustc-hash 2.1.1", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-native-certs 0.8.0", "rustls-pemfile 2.1.1", "scopeguard", @@ -5717,7 +5712,7 @@ dependencies = [ "num-bigint", "percent-encoding", "pin-project-lite", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-native-certs 0.8.0", "ryu", "sha1_smol", @@ -5946,9 +5941,9 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73e6153390585f6961341b50e5a1931d6be6dee4292283635903c26ef9d980d2" +checksum = "d70ea85f131b2ee9874f0b160ac5976f8af75f3c9badfe0d955880257d10bd83" dependencies = [ "anyhow", "async-trait", @@ -6173,15 +6168,15 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.27" +version = "0.23.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" +checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1" dependencies = [ "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.3", + "rustls-webpki 0.103.4", "subtle", "zeroize", ] @@ -6245,9 +6240,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "zeroize", +] [[package]] name = "rustls-webpki" @@ -6272,9 +6270,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.3" +version = "0.103.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" +checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" dependencies = [ "ring", "rustls-pki-types", @@ -6335,7 +6333,7 @@ dependencies = [ "regex", "remote_storage", "reqwest", - "rustls 0.23.27", + "rustls 0.23.29", "safekeeper_api", "safekeeper_client", "scopeguard", @@ -6525,7 +6523,7 @@ checksum = "255914a8e53822abd946e2ce8baa41d4cded6b8e938913b7f7b9da5b7ab44335" dependencies = [ "httpdate", "reqwest", - "rustls 0.23.27", + "rustls 0.23.29", "sentry-backtrace", "sentry-contexts", "sentry-core", @@ -6657,7 +6655,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" dependencies = [ "form_urlencoded", - "indexmap 2.9.0", + "indexmap 2.10.0", "itoa", "ryu", "serde", @@ -6738,7 +6736,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.9.0", + "indexmap 2.10.0", "serde", "serde_derive", "serde_json", @@ -6981,10 +6979,10 @@ dependencies = [ "once_cell", "parking_lot 0.12.1", "prost 0.13.5", - "rustls 0.23.27", + "rustls 0.23.29", "tokio", "tokio-rustls 0.26.2", - "tonic 0.13.1", + "tonic", "tonic-build", "tracing", "utils", @@ -7029,7 +7027,7 @@ dependencies = [ "regex", "reqwest", "routerify", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-native-certs 0.8.0", "safekeeper_api", "safekeeper_client", @@ -7083,7 +7081,7 @@ dependencies = [ "postgres_ffi", "remote_storage", "reqwest", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-native-certs 0.8.0", "serde", "serde_json", @@ -7622,7 +7620,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab" dependencies = [ "ring", - "rustls 0.23.27", + "rustls 0.23.29", "tokio", "tokio-postgres", "tokio-rustls 0.26.2", @@ -7673,7 +7671,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls 0.23.27", + "rustls 0.23.29", "tokio", ] @@ -7772,34 +7770,13 @@ version = "0.22.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" dependencies = [ - "indexmap 2.9.0", + "indexmap 2.10.0", "serde", "serde_spanned", "toml_datetime", "winnow", ] -[[package]] -name = "tonic" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" -dependencies = [ - "async-trait", - "base64 0.22.1", - "bytes", - "http 1.3.1", - "http-body 1.0.0", - "http-body-util", - "percent-encoding", - "pin-project", - "prost 0.13.5", - "tokio-stream", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tonic" version = "0.13.1" @@ -7857,7 +7834,7 @@ dependencies = [ "prost-types 0.13.5", "tokio", "tokio-stream", - "tonic 0.13.1", + "tonic", ] [[package]] @@ -7883,7 +7860,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "indexmap 2.9.0", + "indexmap 2.10.0", "pin-project-lite", "slab", "sync_wrapper 1.0.1", @@ -7921,10 +7898,14 @@ checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-otel" -version = "0.2.0" -source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345000ea5ae33222624a8ccfdd88892c30db4d413a39c2d4bd714b77e0a4b23c" dependencies = [ + "axum", + "cfg-if", "http 1.3.1", + "http-body 1.0.0", "opentelemetry", "pin-project", "tower-layer", @@ -8006,9 +7987,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.28.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a971f6058498b5c0f1affa23e7ea202057a7301dbff68e968b2d578bcbd053" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" dependencies = [ "js-sys", "once_cell", @@ -8216,7 +8197,7 @@ dependencies = [ "base64 0.22.1", "log", "once_cell", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-pki-types", "url", "webpki-roots", @@ -8888,7 +8869,7 @@ dependencies = [ "hyper 0.14.30", "hyper 1.4.1", "hyper-util", - "indexmap 2.9.0", + "indexmap 2.10.0", "itertools 0.12.1", "lazy_static", "libc", @@ -8911,14 +8892,14 @@ dependencies = [ "proc-macro2", "prost 0.13.5", "quote", - "rand 0.8.5", + "rand 0.9.1", "regex", "regex-automata 0.4.9", "regex-syntax 0.8.5", "reqwest", - "rustls 0.23.27", + "rustls 0.23.29", "rustls-pki-types", - "rustls-webpki 0.103.3", + "rustls-webpki 0.103.4", "scopeguard", "sec1 0.7.3", "serde", @@ -8931,6 +8912,7 @@ dependencies = [ "subtle", "syn 2.0.100", "sync_wrapper 0.1.2", + "thiserror 2.0.11", "tikv-jemalloc-ctl", "tikv-jemalloc-sys", "time", @@ -8940,6 +8922,7 @@ dependencies = [ "tokio-stream", "tokio-util", "toml_edit", + "tonic", "tower 0.5.2", "tracing", "tracing-core", diff --git a/Cargo.toml b/Cargo.toml index 3a57976cd8..00efe79554 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,10 +143,10 @@ notify = "6.0.0" num_cpus = "1.15" num-traits = "0.2.19" once_cell = "1.13" -opentelemetry = "0.27" -opentelemetry_sdk = "0.27" -opentelemetry-otlp = { version = "0.27", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] } -opentelemetry-semantic-conventions = "0.27" +opentelemetry = "0.30" +opentelemetry_sdk = "0.30" +opentelemetry-otlp = { version = "0.30", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] } +opentelemetry-semantic-conventions = "0.30" parking_lot = "0.12" parquet = { version = "53", default-features = false, features = ["zstd"] } parquet_derive = "53" @@ -164,7 +164,7 @@ rand_core = "=0.6" redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] } regex = "1.10.2" reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } -reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_27"] } +reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_30"] } reqwest-middleware = "0.4" reqwest-retry = "0.7" routerify = "3" @@ -214,15 +214,12 @@ tonic = { version = "0.13.1", default-features = false, features = ["channel", " tonic-reflection = { version = "0.13.1", features = ["server"] } tower = { version = "0.5.2", default-features = false } tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] } - -# This revision uses opentelemetry 0.27. There's no tag for it. -tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" } - +tower-otel = { version = "0.6", features = ["axum"] } tower-service = "0.3.3" tracing = "0.1" tracing-error = "0.2" tracing-log = "0.2" -tracing-opentelemetry = "0.28" +tracing-opentelemetry = "0.31" tracing-serde = "0.2.0" tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] } try-lock = "0.2.5" diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 78e2c6308f..3d07a2ece8 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -188,7 +188,7 @@ fn main() -> Result<()> { .build()?; let _rt_guard = runtime.enter(); - runtime.block_on(init(cli.dev))?; + let tracing_provider = init(cli.dev)?; // enable core dumping for all child processes setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?; @@ -227,11 +227,11 @@ fn main() -> Result<()> { scenario.teardown(); - deinit_and_exit(exit_code); + deinit_and_exit(tracing_provider, exit_code); } -async fn init(dev_mode: bool) -> Result<()> { - init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?; +fn init(dev_mode: bool) -> Result> { + let provider = init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?; thread::spawn(move || { @@ -242,7 +242,7 @@ async fn init(dev_mode: bool) -> Result<()> { info!("compute build_tag: {}", &BUILD_TAG.to_string()); - Ok(()) + Ok(provider) } fn get_config(cli: &Cli) -> Result { @@ -267,25 +267,27 @@ fn get_config(cli: &Cli) -> Result { } } -fn deinit_and_exit(exit_code: Option) -> ! { - // Shutdown trace pipeline gracefully, so that it has a chance to send any - // pending traces before we exit. Shutting down OTEL tracing provider may - // hang for quite some time, see, for example: - // - https://github.com/open-telemetry/opentelemetry-rust/issues/868 - // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636 - // - // Yet, we want computes to shut down fast enough, as we may need a new one - // for the same timeline ASAP. So wait no longer than 2s for the shutdown to - // complete, then just error out and exit the main thread. - info!("shutting down tracing"); - let (sender, receiver) = mpsc::channel(); - let _ = thread::spawn(move || { - tracing_utils::shutdown_tracing(); - sender.send(()).ok() - }); - let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000)); - if shutdown_res.is_err() { - error!("timed out while shutting down tracing, exiting anyway"); +fn deinit_and_exit(tracing_provider: Option, exit_code: Option) -> ! { + if let Some(p) = tracing_provider { + // Shutdown trace pipeline gracefully, so that it has a chance to send any + // pending traces before we exit. Shutting down OTEL tracing provider may + // hang for quite some time, see, for example: + // - https://github.com/open-telemetry/opentelemetry-rust/issues/868 + // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636 + // + // Yet, we want computes to shut down fast enough, as we may need a new one + // for the same timeline ASAP. So wait no longer than 2s for the shutdown to + // complete, then just error out and exit the main thread. + info!("shutting down tracing"); + let (sender, receiver) = mpsc::channel(); + let _ = thread::spawn(move || { + _ = p.shutdown(); + sender.send(()).ok() + }); + let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000)); + if shutdown_res.is_err() { + error!("timed out while shutting down tracing, exiting anyway"); + } } info!("shutting down"); diff --git a/compute_tools/src/logger.rs b/compute_tools/src/logger.rs index c36f302f99..cd076472a6 100644 --- a/compute_tools/src/logger.rs +++ b/compute_tools/src/logger.rs @@ -13,7 +13,9 @@ use tracing_subscriber::prelude::*; /// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See /// `tracing-utils` package description. /// -pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> { +pub fn init_tracing_and_logging( + default_log_level: &str, +) -> anyhow::Result> { // Initialize Logging let env_filter = tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level)); @@ -24,8 +26,9 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result .with_writer(std::io::stderr); // Initialize OpenTelemetry - let otlp_layer = - tracing_utils::init_tracing("compute_ctl", tracing_utils::ExportConfig::default()).await; + let provider = + tracing_utils::init_tracing("compute_ctl", tracing_utils::ExportConfig::default()); + let otlp_layer = provider.as_ref().map(tracing_utils::layer); // Put it all together tracing_subscriber::registry() @@ -37,7 +40,7 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result utils::logging::replace_panic_hook_with_tracing_panic_hook().forget(); - Ok(()) + Ok(provider) } /// Replace all newline characters with a special character to make it diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index f01c65d1bd..2a8d05f51e 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -394,7 +394,7 @@ impl From<&OtelExporterConfig> for tracing_utils::ExportConfig { tracing_utils::ExportConfig { endpoint: Some(val.endpoint.clone()), protocol: val.protocol.into(), - timeout: val.timeout, + timeout: Some(val.timeout), } } } diff --git a/libs/tracing-utils/src/lib.rs b/libs/tracing-utils/src/lib.rs index 0893aa173b..76782339da 100644 --- a/libs/tracing-utils/src/lib.rs +++ b/libs/tracing-utils/src/lib.rs @@ -1,11 +1,5 @@ //! Helper functions to set up OpenTelemetry tracing. //! -//! This comes in two variants, depending on whether you have a Tokio runtime available. -//! If you do, call `init_tracing()`. It sets up the trace processor and exporter to use -//! the current tokio runtime. If you don't have a runtime available, or you don't want -//! to share the runtime with the tracing tasks, call `init_tracing_without_runtime()` -//! instead. It sets up a dedicated single-threaded Tokio runtime for the tracing tasks. -//! //! Example: //! //! ```rust,no_run @@ -21,7 +15,8 @@ //! .with_writer(std::io::stderr); //! //! // Initialize OpenTelemetry. Exports tracing spans as OpenTelemetry traces -//! let otlp_layer = tracing_utils::init_tracing("my_application", tracing_utils::ExportConfig::default()).await; +//! let provider = tracing_utils::init_tracing("my_application", tracing_utils::ExportConfig::default()); +//! let otlp_layer = provider.as_ref().map(tracing_utils::layer); //! //! // Put it all together //! tracing_subscriber::registry() @@ -36,16 +31,18 @@ pub mod http; pub mod perf_span; -use opentelemetry::KeyValue; use opentelemetry::trace::TracerProvider; use opentelemetry_otlp::WithExportConfig; pub use opentelemetry_otlp::{ExportConfig, Protocol}; +use opentelemetry_sdk::trace::SdkTracerProvider; use tracing::level_filters::LevelFilter; use tracing::{Dispatch, Subscriber}; use tracing_subscriber::Layer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::registry::LookupSpan; +pub type Provider = SdkTracerProvider; + /// Set up OpenTelemetry exporter, using configuration from environment variables. /// /// `service_name` is set as the OpenTelemetry 'service.name' resource (see @@ -70,16 +67,7 @@ use tracing_subscriber::registry::LookupSpan; /// If you need some other setting, please test if it works first. And perhaps /// add a comment in the list above to save the effort of testing for the next /// person. -/// -/// This doesn't block, but is marked as 'async' to hint that this must be called in -/// asynchronous execution context. -pub async fn init_tracing( - service_name: &str, - export_config: ExportConfig, -) -> Option> -where - S: Subscriber + for<'span> LookupSpan<'span>, -{ +pub fn init_tracing(service_name: &str, export_config: ExportConfig) -> Option { if std::env::var("OTEL_SDK_DISABLED") == Ok("true".to_string()) { return None; }; @@ -89,52 +77,14 @@ where )) } -/// Like `init_tracing`, but creates a separate tokio Runtime for the tracing -/// tasks. -pub fn init_tracing_without_runtime( - service_name: &str, - export_config: ExportConfig, -) -> Option> +pub fn layer(p: &Provider) -> impl Layer where S: Subscriber + for<'span> LookupSpan<'span>, { - if std::env::var("OTEL_SDK_DISABLED") == Ok("true".to_string()) { - return None; - }; - - // The opentelemetry batch processor and the OTLP exporter needs a Tokio - // runtime. Create a dedicated runtime for them. One thread should be - // enough. - // - // (Alternatively, instead of batching, we could use the "simple - // processor", which doesn't need Tokio, and use "reqwest-blocking" - // feature for the OTLP exporter, which also doesn't need Tokio. However, - // batching is considered best practice, and also I have the feeling that - // the non-Tokio codepaths in the opentelemetry crate are less used and - // might be more buggy, so better to stay on the well-beaten path.) - // - // We leak the runtime so that it keeps running after we exit the - // function. - let runtime = Box::leak(Box::new( - tokio::runtime::Builder::new_multi_thread() - .enable_all() - .thread_name("otlp runtime thread") - .worker_threads(1) - .build() - .unwrap(), - )); - let _guard = runtime.enter(); - - Some(init_tracing_internal( - service_name.to_string(), - export_config, - )) + tracing_opentelemetry::layer().with_tracer(p.tracer("global")) } -fn init_tracing_internal(service_name: String, export_config: ExportConfig) -> impl Layer -where - S: Subscriber + for<'span> LookupSpan<'span>, -{ +fn init_tracing_internal(service_name: String, export_config: ExportConfig) -> Provider { // Sets up exporter from the provided [`ExportConfig`] parameter. // If the endpoint is not specified, it is loaded from the // OTEL_EXPORTER_OTLP_ENDPOINT environment variable. @@ -153,22 +103,14 @@ where opentelemetry_sdk::propagation::TraceContextPropagator::new(), ); - let tracer = opentelemetry_sdk::trace::TracerProvider::builder() - .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio) - .with_resource(opentelemetry_sdk::Resource::new(vec![KeyValue::new( - opentelemetry_semantic_conventions::resource::SERVICE_NAME, - service_name, - )])) + Provider::builder() + .with_batch_exporter(exporter) + .with_resource( + opentelemetry_sdk::Resource::builder() + .with_service_name(service_name) + .build(), + ) .build() - .tracer("global"); - - tracing_opentelemetry::layer().with_tracer(tracer) -} - -// Shutdown trace pipeline gracefully, so that it has a chance to send any -// pending traces before we exit. -pub fn shutdown_tracing() { - opentelemetry::global::shutdown_tracer_provider(); } pub enum OtelEnablement { @@ -176,17 +118,17 @@ pub enum OtelEnablement { Enabled { service_name: String, export_config: ExportConfig, - runtime: &'static tokio::runtime::Runtime, }, } pub struct OtelGuard { + provider: Provider, pub dispatch: Dispatch, } impl Drop for OtelGuard { fn drop(&mut self) { - shutdown_tracing(); + _ = self.provider.shutdown(); } } @@ -199,22 +141,19 @@ impl Drop for OtelGuard { /// The lifetime of the guard should match taht of the application. On drop, it tears down the /// OTEL infra. pub fn init_performance_tracing(otel_enablement: OtelEnablement) -> Option { - let otel_subscriber = match otel_enablement { + match otel_enablement { OtelEnablement::Disabled => None, OtelEnablement::Enabled { service_name, export_config, - runtime, } => { - let otel_layer = runtime - .block_on(init_tracing(&service_name, export_config)) - .with_filter(LevelFilter::INFO); + let provider = init_tracing(&service_name, export_config)?; + + let otel_layer = layer(&provider).with_filter(LevelFilter::INFO); let otel_subscriber = tracing_subscriber::registry().with(otel_layer); - let otel_dispatch = Dispatch::new(otel_subscriber); + let dispatch = Dispatch::new(otel_subscriber); - Some(otel_dispatch) + Some(OtelGuard { dispatch, provider }) } - }; - - otel_subscriber.map(|dispatch| OtelGuard { dispatch }) + } } diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index dfb8b437c3..855af7009c 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -126,7 +126,6 @@ fn main() -> anyhow::Result<()> { Some(cfg) => tracing_utils::OtelEnablement::Enabled { service_name: "pageserver".to_string(), export_config: (&cfg.export_config).into(), - runtime: *COMPUTE_REQUEST_RUNTIME, }, None => tracing_utils::OtelEnablement::Disabled, }; diff --git a/proxy/src/binary/pg_sni_router.rs b/proxy/src/binary/pg_sni_router.rs index 4ac8b6a995..f3782312dc 100644 --- a/proxy/src/binary/pg_sni_router.rs +++ b/proxy/src/binary/pg_sni_router.rs @@ -76,7 +76,7 @@ fn cli() -> clap::Command { } pub async fn run() -> anyhow::Result<()> { - let _logging_guard = crate::logging::init().await?; + let _logging_guard = crate::logging::init()?; let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); diff --git a/proxy/src/binary/proxy.rs b/proxy/src/binary/proxy.rs index 255f6fbbee..4148f4bc62 100644 --- a/proxy/src/binary/proxy.rs +++ b/proxy/src/binary/proxy.rs @@ -334,7 +334,7 @@ struct PgSniRouterArgs { } pub async fn run() -> anyhow::Result<()> { - let _logging_guard = crate::logging::init().await?; + let _logging_guard = crate::logging::init()?; let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index d4fd826c13..0abb500608 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -26,7 +26,7 @@ use crate::metrics::Metrics; /// configuration from environment variables. For example, to change the /// destination, set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. /// See -pub async fn init() -> anyhow::Result { +pub fn init() -> anyhow::Result { let logfmt = LogFormat::from_env()?; let env_filter = EnvFilter::builder() @@ -43,8 +43,8 @@ pub async fn init() -> anyhow::Result { .expect("this should be a valid filter directive"), ); - let otlp_layer = - tracing_utils::init_tracing("proxy", tracing_utils::ExportConfig::default()).await; + let provider = tracing_utils::init_tracing("proxy", tracing_utils::ExportConfig::default()); + let otlp_layer = provider.as_ref().map(tracing_utils::layer); let json_log_layer = if logfmt == LogFormat::Json { Some(JsonLoggingLayer::new( @@ -76,7 +76,7 @@ pub async fn init() -> anyhow::Result { .with(text_log_layer) .try_init()?; - Ok(LoggingGuard) + Ok(LoggingGuard(provider)) } /// Initialize logging for local_proxy with log prefix and no opentelemetry. @@ -97,7 +97,7 @@ pub fn init_local_proxy() -> anyhow::Result { .with(fmt_layer) .try_init()?; - Ok(LoggingGuard) + Ok(LoggingGuard(None)) } pub struct LocalProxyFormatter(Format); @@ -118,14 +118,16 @@ where } } -pub struct LoggingGuard; +pub struct LoggingGuard(Option); impl Drop for LoggingGuard { fn drop(&mut self) { - // Shutdown trace pipeline gracefully, so that it has a chance to send any - // pending traces before we exit. - tracing::info!("shutting down the tracing machinery"); - tracing_utils::shutdown_tracing(); + if let Some(p) = &self.0 { + // Shutdown trace pipeline gracefully, so that it has a chance to send any + // pending traces before we exit. + tracing::info!("shutting down the tracing machinery"); + drop(p.shutdown()); + } } } diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index d6d64a2045..f5984d3ac3 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -74,7 +74,7 @@ once_cell = { version = "1" } p256 = { version = "0.13", features = ["jwk"] } parquet = { version = "53", default-features = false, features = ["zstd"] } prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } -rand = { version = "0.8", features = ["small_rng"] } +rand = { version = "0.9" } regex = { version = "1" } regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8" } @@ -93,6 +93,7 @@ spki = { version = "0.7", default-features = false, features = ["pem", "std"] } stable_deref_trait = { version = "1" } subtle = { version = "2" } sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] } +thiserror = { version = "2" } tikv-jemalloc-ctl = { version = "0.6", features = ["stats", "use_std"] } tikv-jemalloc-sys = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } time = { version = "0.3", features = ["macros", "serde-well-known"] } @@ -101,6 +102,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["loggin tokio-stream = { version = "0.1", features = ["net", "sync"] } tokio-util = { version = "0.7", features = ["codec", "compat", "io-util", "rt"] } toml_edit = { version = "0.22", features = ["serde"] } +tonic = { version = "0.13", default-features = false, features = ["codegen", "gzip", "prost", "router", "server", "tls-native-roots", "tls-ring", "zstd"] } tower = { version = "0.5", default-features = false, features = ["balance", "buffer", "limit", "log"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" }