From fac5db3c8de25b6f44b267365926fd122c901a44 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <christian@neon.tech>
Date: Fri, 14 Feb 2025 15:37:03 +0100
Subject: [PATCH] page_service: emit periodic log message while response flush
 is slow (#10813)

The logic might seem a bit intricate / over-optimized, but I recently
spent time benchmarking this code path in the context of a nightly
pagebench regression
(https://github.com/neondatabase/cloud/issues/21759)
and I want to avoid regressing it any further.

Ideally would also log the socket send & recv queue length like we do on
the compute side in
- https://github.com/neondatabase/neon/pull/10673

But that is proving difficult due to the Rust abstractions that wrap the
socket fd.
Work in progress on that is happening in
- https://github.com/neondatabase/neon/pull/10823

Regarding production impact, I am worried at a theoretical level that
the additional logging may cause a downward spiral in the case where a
pageserver is slow to flush because there is not enough CPU. The logging
would consume more CPU and thereby slow down flushes even more. However,
I don't think this matters practically speaking.


# Refs

- context:
https://neondb.slack.com/archives/C08DE6Q9C3B/p1739464533762049?thread_ts=1739462628.361019&cid=C08DE6Q9C3B
- fixes https://github.com/neondatabase/neon/issues/10668
- part of https://github.com/neondatabase/cloud/issues/23515

# Testing

Tested locally by running

```
./target/debug/pagebench get-page-latest-lsn --num-clients=1000 --queue-depth=1000
```
in one terminal, waiting a bit, then
```
pkill -STOP pagebench
```
then wait for slow logs to show up in `pageserver.log`.
To see that the completion log message is logged, run
```
pkill -CONT pagebench
```
---
 pageserver/src/metrics.rs | 42 +++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 983a3079e4..6a5dc3e749 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1439,27 +1439,43 @@ impl Drop for SmgrOpTimer {
 }
 
 impl SmgrOpFlushInProgress {
-    pub(crate) async fn measure<Fut, O>(self, mut started_at: Instant, mut fut: Fut) -> O
+    pub(crate) async fn measure<Fut, O>(self, started_at: Instant, mut fut: Fut) -> O
     where
         Fut: std::future::Future<Output = O>,
     {
         let mut fut = std::pin::pin!(fut);
 
-        // Whenever observe_guard gets called, or dropped,
-        // it adds the time elapsed since its last call to metrics.
-        // Last call is tracked in `now`.
+        let mut logged = false;
+        let mut last_counter_increment_at = started_at;
         let mut observe_guard = scopeguard::guard(
-            || {
+            |is_timeout| {
                 let now = Instant::now();
-                let elapsed = now - started_at;
-                self.global_micros
-                    .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
-                self.per_timeline_micros
-                    .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
-                started_at = now;
+
+                // Increment counter
+                {
+                    let elapsed_since_last_observe = now - last_counter_increment_at;
+                    self.global_micros
+                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
+                    self.per_timeline_micros
+                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
+                    last_counter_increment_at = now;
+                }
+
+                // Log something on every timeout, and on completion but only if we hit a timeout.
+                if is_timeout || logged {
+                    logged = true;
+                    let elapsed_total = now - started_at;
+                    let msg = if is_timeout {
+                        "slow flush ongoing"
+                    } else {
+                        "slow flush completed or cancelled"
+                    };
+                    let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64());
+                    tracing::info!(elapsed_total_secs, msg);
+                }
             },
             |mut observe| {
-                observe();
+                observe(false);
             },
         );
 
@@ -1467,7 +1483,7 @@ impl SmgrOpFlushInProgress {
             match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
                 Ok(v) => return v,
                 Err(_timeout) => {
-                    (*observe_guard)();
+                    (*observe_guard)(true);
                 }
             }
         }