mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 19:10:38 +00:00
pageserver: tweaks to slow/hung task logging (#6098)
## Problem - `shutdown_tasks` would log when a particular task was taking a long time to shut down, but not when it eventually completed. That left one uncertain as to whether the slow task was the source of a hang, or just a precursor. ## Summary of changes - Add a log line after a slow task shutdown - Add an equivalent in Gate's `warn_if_stuck`, in case we ever need it. This isn't related to the original issue but was noticed when checking through these logging paths.
This commit is contained in:
@@ -30,18 +30,32 @@ async fn warn_if_stuck<Fut: std::future::Future>(
|
||||
|
||||
let mut fut = std::pin::pin!(fut);
|
||||
|
||||
loop {
|
||||
let mut warned = false;
|
||||
let ret = loop {
|
||||
match tokio::time::timeout(warn_period, &mut fut).await {
|
||||
Ok(ret) => return ret,
|
||||
Ok(ret) => break ret,
|
||||
Err(_) => {
|
||||
tracing::warn!(
|
||||
gate = name,
|
||||
elapsed_ms = started.elapsed().as_millis(),
|
||||
"still waiting, taking longer than expected..."
|
||||
);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// If we emitted a warning for slowness, also emit a message when we complete, so that
|
||||
// someone debugging a shutdown can know for sure whether we have moved past this operation.
|
||||
if warned {
|
||||
tracing::info!(
|
||||
gate = name,
|
||||
elapsed_ms = started.elapsed().as_millis(),
|
||||
"completed, after taking longer than expected"
|
||||
)
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -518,12 +518,13 @@ pub async fn shutdown_tasks(
|
||||
{
|
||||
// allow some time to elapse before logging to cut down the number of log
|
||||
// lines.
|
||||
info!("waiting for {} to shut down", task.name);
|
||||
info!("waiting for task {} to shut down", task.name);
|
||||
// we never handled this return value, but:
|
||||
// - we don't deschedule which would lead to is_cancelled
|
||||
// - panics are already logged (is_panicked)
|
||||
// - task errors are already logged in the wrapper
|
||||
let _ = join_handle.await;
|
||||
info!("task {} completed", task.name);
|
||||
}
|
||||
} else {
|
||||
// Possibly one of:
|
||||
|
||||
Reference in New Issue
Block a user