From fac5db3c8de25b6f44b267365926fd122c901a44 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 14 Feb 2025 15:37:03 +0100 Subject: [PATCH] page_service: emit periodic log message while response flush is slow (#10813) The logic might seem a bit intricate / over-optimized, but I recently spent time benchmarking this code path in the context of a nightly pagebench regression (https://github.com/neondatabase/cloud/issues/21759) and I want to avoid regressing it any further. Ideally would also log the socket send & recv queue length like we do on the compute side in - https://github.com/neondatabase/neon/pull/10673 But that is proving difficult due to the Rust abstractions that wrap the socket fd. Work in progress on that is happening in - https://github.com/neondatabase/neon/pull/10823 Regarding production impact, I am worried at a theoretical level that the additional logging may cause a downward spiral in the case where a pageserver is slow to flush because there is not enough CPU. The logging would consume more CPU and thereby slow down flushes even more. However, I don't think this matters practically speaking. # Refs - context: https://neondb.slack.com/archives/C08DE6Q9C3B/p1739464533762049?thread_ts=1739462628.361019&cid=C08DE6Q9C3B - fixes https://github.com/neondatabase/neon/issues/10668 - part of https://github.com/neondatabase/cloud/issues/23515 # Testing Tested locally by running ``` ./target/debug/pagebench get-page-latest-lsn --num-clients=1000 --queue-depth=1000 ``` in one terminal, waiting a bit, then ``` pkill -STOP pagebench ``` then wait for slow logs to show up in `pageserver.log`. To see that the completion log message is logged, run ``` pkill -CONT pagebench ``` --- pageserver/src/metrics.rs | 42 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 983a3079e4..6a5dc3e749 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1439,27 +1439,43 @@ impl Drop for SmgrOpTimer { } impl SmgrOpFlushInProgress { - pub(crate) async fn measure(self, mut started_at: Instant, mut fut: Fut) -> O + pub(crate) async fn measure(self, started_at: Instant, mut fut: Fut) -> O where Fut: std::future::Future, { let mut fut = std::pin::pin!(fut); - // Whenever observe_guard gets called, or dropped, - // it adds the time elapsed since its last call to metrics. - // Last call is tracked in `now`. + let mut logged = false; + let mut last_counter_increment_at = started_at; let mut observe_guard = scopeguard::guard( - || { + |is_timeout| { let now = Instant::now(); - let elapsed = now - started_at; - self.global_micros - .inc_by(u64::try_from(elapsed.as_micros()).unwrap()); - self.per_timeline_micros - .inc_by(u64::try_from(elapsed.as_micros()).unwrap()); - started_at = now; + + // Increment counter + { + let elapsed_since_last_observe = now - last_counter_increment_at; + self.global_micros + .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap()); + self.per_timeline_micros + .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap()); + last_counter_increment_at = now; + } + + // Log something on every timeout, and on completion but only if we hit a timeout. + if is_timeout || logged { + logged = true; + let elapsed_total = now - started_at; + let msg = if is_timeout { + "slow flush ongoing" + } else { + "slow flush completed or cancelled" + }; + let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64()); + tracing::info!(elapsed_total_secs, msg); + } }, |mut observe| { - observe(); + observe(false); }, ); @@ -1467,7 +1483,7 @@ impl SmgrOpFlushInProgress { match tokio::time::timeout(Duration::from_secs(10), &mut fut).await { Ok(v) => return v, Err(_timeout) => { - (*observe_guard)(); + (*observe_guard)(true); } } }