to be dropped: tracing-flame usage attempt

for our purposes the folded need post processing:

```
cat .neon/tracing.folded \
  | sed -Ee 's/^ThreadId\([0-9]+\)-//' \
  | inferno-flamegraph >| flamegraph.svg
```
This commit is contained in:
Joonas Koivunen
2023-05-30 14:07:54 +03:00
parent f4db85de40
commit 2d9d679238
6 changed files with 66 additions and 7 deletions

12
Cargo.lock generated
View File

@@ -4590,6 +4590,17 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "tracing-flame"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bae117ee14789185e129aaee5d93750abe67fdc5a9a62650452bfe4e122a3a9"
dependencies = [
"lazy_static",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-futures"
version = "0.2.5"
@@ -4843,6 +4854,7 @@ dependencies = [
"tokio",
"tracing",
"tracing-error",
"tracing-flame",
"tracing-subscriber",
"url",
"uuid",

View File

@@ -40,6 +40,7 @@ uuid.workspace = true
pq_proto.workspace = true
metrics.workspace = true
workspace_hack.workspace = true
tracing-flame = "0.2"
[dev-dependencies]
byteorder.workspace = true

View File

@@ -104,6 +104,47 @@ pub fn init(
Ok(())
}
pub fn init_with_flame(
log_format: LogFormat,
tracing_error_layer_enablement: TracingErrorLayerEnablement,
) -> anyhow::Result<impl Drop> {
// We fall back to printing all spans at info-level or above if
// the RUST_LOG environment variable is not set.
let rust_log_env_filter = || {
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"))
};
// NB: the order of the with() calls does not matter.
// See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
use tracing_subscriber::prelude::*;
let r = tracing_subscriber::registry();
let r = r.with({
let log_layer = tracing_subscriber::fmt::layer()
.with_target(false)
.with_ansi(atty::is(atty::Stream::Stdout))
.with_writer(std::io::stdout);
let log_layer = match log_format {
LogFormat::Json => log_layer.json().boxed(),
LogFormat::Plain => log_layer.boxed(),
LogFormat::Test => log_layer.with_test_writer().boxed(),
};
log_layer.with_filter(rust_log_env_filter())
});
let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
let (flame_layer, guard) = tracing_flame::FlameLayer::with_file("./tracing.folded").unwrap();
let r = r.with(flame_layer);
match tracing_error_layer_enablement {
TracingErrorLayerEnablement::EnableWithRustLogFilter => {
r.with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
.init();
}
TracingErrorLayerEnablement::Disabled => r.init(),
}
Ok(guard)
}
/// Disable the default rust panic hook by using `set_hook`.
///
/// For neon binaries, the assumption is that tracing is configured before with [`init`], after

View File

@@ -98,7 +98,7 @@ fn main() -> anyhow::Result<()> {
} else {
TracingErrorLayerEnablement::Disabled
};
logging::init(conf.log_format, tracing_error_layer_enablement)?;
let _guard = logging::init_with_flame(conf.log_format, tracing_error_layer_enablement)?;
// mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
// disarming this hook on pageserver, because we never tear down tracing.
@@ -127,7 +127,7 @@ fn main() -> anyhow::Result<()> {
virtual_file::init(conf.max_file_descriptors);
page_cache::init(conf.page_cache_size);
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
start_pageserver(launch_ts, conf, _guard).context("Failed to start pageserver")?;
scenario.teardown();
Ok(())
@@ -225,6 +225,7 @@ fn initialize_config(
fn start_pageserver(
launch_ts: &'static LaunchTimestamp,
conf: &'static PageServerConf,
guard: impl Drop,
) -> anyhow::Result<()> {
// Print version and launch timestamp to the log,
// and expose them as prometheus metrics.
@@ -484,7 +485,8 @@ fn start_pageserver(
}
// All started up! Now just sit and wait for shutdown signal.
ShutdownSignals::handle(|signal| match signal {
let mut guard = Some(guard);
ShutdownSignals::handle(move |signal| match signal {
Signal::Quit => {
info!(
"Got {}. Terminating in immediate shutdown mode",
@@ -498,7 +500,7 @@ fn start_pageserver(
"Got {}. Terminating gracefully in fast shutdown mode",
signal.name()
);
BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0));
BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0, guard.take()));
unreachable!()
}
})

View File

@@ -45,8 +45,8 @@ static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
pub use crate::metrics::preinitialize_metrics;
#[tracing::instrument]
pub async fn shutdown_pageserver(exit_code: i32) {
#[tracing::instrument(skip(guard))]
pub async fn shutdown_pageserver(exit_code: i32, guard: Option<impl Drop>) {
// Shut down the libpq endpoint task. This prevents new connections from
// being accepted.
task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None).await;
@@ -72,6 +72,9 @@ pub async fn shutdown_pageserver(exit_code: i32) {
// There should be nothing left, but let's be sure
task_mgr::shutdown_tasks(None, None, None).await;
info!("Shut down successfully completed");
drop(guard);
std::process::exit(exit_code);
}

View File

@@ -433,7 +433,7 @@ async fn task_finish(
}
if shutdown_process {
shutdown_pageserver(1).await;
shutdown_pageserver(1, None::<Box<u32>>).await;
}
}