From 2d9d6792382fd0ba00cd7c666761537109c9504a Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 30 May 2023 14:07:54 +0300 Subject: [PATCH] to be dropped: tracing-flame usage attempt for our purposes the folded need post processing: ``` cat .neon/tracing.folded \ | sed -Ee 's/^ThreadId\([0-9]+\)-//' \ | inferno-flamegraph >| flamegraph.svg ``` --- Cargo.lock | 12 ++++++++++ libs/utils/Cargo.toml | 1 + libs/utils/src/logging.rs | 41 ++++++++++++++++++++++++++++++++ pageserver/src/bin/pageserver.rs | 10 ++++---- pageserver/src/lib.rs | 7 ++++-- pageserver/src/task_mgr.rs | 2 +- 6 files changed, 66 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d390df94e0..581faaf6c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4590,6 +4590,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-flame" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bae117ee14789185e129aaee5d93750abe67fdc5a9a62650452bfe4e122a3a9" +dependencies = [ + "lazy_static", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-futures" version = "0.2.5" @@ -4843,6 +4854,7 @@ dependencies = [ "tokio", "tracing", "tracing-error", + "tracing-flame", "tracing-subscriber", "url", "uuid", diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 8239ffff57..98fd5dc63f 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -40,6 +40,7 @@ uuid.workspace = true pq_proto.workspace = true metrics.workspace = true workspace_hack.workspace = true +tracing-flame = "0.2" [dev-dependencies] byteorder.workspace = true diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 2b8c852d86..6935cbace6 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -104,6 +104,47 @@ pub fn init( Ok(()) } +pub fn init_with_flame( + log_format: LogFormat, + tracing_error_layer_enablement: TracingErrorLayerEnablement, +) -> anyhow::Result { + // We fall back to printing all spans at info-level or above if + // the RUST_LOG environment variable is not set. + let rust_log_env_filter = || { + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")) + }; + + // NB: the order of the with() calls does not matter. + // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering + use tracing_subscriber::prelude::*; + let r = tracing_subscriber::registry(); + let r = r.with({ + let log_layer = tracing_subscriber::fmt::layer() + .with_target(false) + .with_ansi(atty::is(atty::Stream::Stdout)) + .with_writer(std::io::stdout); + let log_layer = match log_format { + LogFormat::Json => log_layer.json().boxed(), + LogFormat::Plain => log_layer.boxed(), + LogFormat::Test => log_layer.with_test_writer().boxed(), + }; + log_layer.with_filter(rust_log_env_filter()) + }); + let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())); + let (flame_layer, guard) = tracing_flame::FlameLayer::with_file("./tracing.folded").unwrap(); + let r = r.with(flame_layer); + match tracing_error_layer_enablement { + TracingErrorLayerEnablement::EnableWithRustLogFilter => { + r.with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) + .init(); + } + TracingErrorLayerEnablement::Disabled => r.init(), + } + + Ok(guard) +} + /// Disable the default rust panic hook by using `set_hook`. /// /// For neon binaries, the assumption is that tracing is configured before with [`init`], after diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index a2cebffc83..c412ae38e8 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -98,7 +98,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; - logging::init(conf.log_format, tracing_error_layer_enablement)?; + let _guard = logging::init_with_flame(conf.log_format, tracing_error_layer_enablement)?; // mind the order required here: 1. logging, 2. panic_hook, 3. sentry. // disarming this hook on pageserver, because we never tear down tracing. @@ -127,7 +127,7 @@ fn main() -> anyhow::Result<()> { virtual_file::init(conf.max_file_descriptors); page_cache::init(conf.page_cache_size); - start_pageserver(launch_ts, conf).context("Failed to start pageserver")?; + start_pageserver(launch_ts, conf, _guard).context("Failed to start pageserver")?; scenario.teardown(); Ok(()) @@ -225,6 +225,7 @@ fn initialize_config( fn start_pageserver( launch_ts: &'static LaunchTimestamp, conf: &'static PageServerConf, + guard: impl Drop, ) -> anyhow::Result<()> { // Print version and launch timestamp to the log, // and expose them as prometheus metrics. @@ -484,7 +485,8 @@ fn start_pageserver( } // All started up! Now just sit and wait for shutdown signal. - ShutdownSignals::handle(|signal| match signal { + let mut guard = Some(guard); + ShutdownSignals::handle(move |signal| match signal { Signal::Quit => { info!( "Got {}. Terminating in immediate shutdown mode", @@ -498,7 +500,7 @@ fn start_pageserver( "Got {}. Terminating gracefully in fast shutdown mode", signal.name() ); - BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0)); + BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0, guard.take())); unreachable!() } }) diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 776cf0dac1..95851618ce 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -45,8 +45,8 @@ static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]); pub use crate::metrics::preinitialize_metrics; -#[tracing::instrument] -pub async fn shutdown_pageserver(exit_code: i32) { +#[tracing::instrument(skip(guard))] +pub async fn shutdown_pageserver(exit_code: i32, guard: Option) { // Shut down the libpq endpoint task. This prevents new connections from // being accepted. task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None).await; @@ -72,6 +72,9 @@ pub async fn shutdown_pageserver(exit_code: i32) { // There should be nothing left, but let's be sure task_mgr::shutdown_tasks(None, None, None).await; info!("Shut down successfully completed"); + + drop(guard); + std::process::exit(exit_code); } diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs index 82aebc6c07..b58f51665c 100644 --- a/pageserver/src/task_mgr.rs +++ b/pageserver/src/task_mgr.rs @@ -433,7 +433,7 @@ async fn task_finish( } if shutdown_process { - shutdown_pageserver(1).await; + shutdown_pageserver(1, None::>).await; } }