pageserver: rate limit log for loads of layers visited (#8228)

## Problem At high percentiles we see more than 800 layers being visited by the read path. We need the tenant/timeline to investigate. ## Summary of changes Add a rate limited log line when the average number of layers visited per key is in the last specified histogram bucket. I plan to use this to identify tenants in us-east-2 staging that exhibit this behaviour. Will revert before next week's release.
2026-01-16 01:42:55 +00:00 · 2024-07-02 14:14:10 +01:00
parent 631d39bb78
commit 7d75cf89bc
2 changed files with 29 additions and 3 deletions
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -17,6 +17,16 @@ pub struct KeySpace {
    pub ranges: Vec<Range<Key>>,
 }

+impl std::fmt::Display for KeySpace {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[")?;
+        for range in &self.ranges {
+            write!(f, "{}..{},", range.start, range.end)?;
+        }
+        write!(f, "]")
+    }
+}
+
 /// A wrapper type for sparse keyspaces.
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct SparseKeySpace(pub KeySpace);
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -996,6 +996,7 @@ impl Timeline {
    }

    pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
+    pub(crate) const VEC_GET_LAYERS_VISITED_WARN_THRESH: f64 = 512.0;

    /// Look up multiple page versions at a given LSN
    ///
@@ -1228,7 +1229,7 @@ impl Timeline {
        let get_data_timer = crate::metrics::GET_RECONSTRUCT_DATA_TIME
            .for_get_kind(get_kind)
            .start_timer();
-        self.get_vectored_reconstruct_data(keyspace, lsn, reconstruct_state, ctx)
+        self.get_vectored_reconstruct_data(keyspace.clone(), lsn, reconstruct_state, ctx)
            .await?;
        get_data_timer.stop_and_record();

@@ -1258,11 +1259,26 @@ impl Timeline {
        // (this is a requirement, not a bug). Skip updating the metric in these cases
        // to avoid infinite results.
        if !results.is_empty() {
+            let avg = layers_visited as f64 / results.len() as f64;
+            if avg >= Self::VEC_GET_LAYERS_VISITED_WARN_THRESH {
+                use utils::rate_limit::RateLimit;
+                static LOGGED: Lazy<Mutex<RateLimit>> =
+                    Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
+                let mut rate_limit = LOGGED.lock().unwrap();
+                rate_limit.call(|| {
+                    tracing::info!(
+                    tenant_id = %self.tenant_shard_id.tenant_id,
+                    shard_id = %self.tenant_shard_id.shard_slug(),
+                    timeline_id = %self.timeline_id,
+                    "Vectored read for {} visited {} layers on average per key and {} in total. {}/{} pages were returned",
+                    keyspace, avg, layers_visited, results.len(), keyspace.total_raw_size());
+                });
+            }
+
            // Note that this is an approximation. Tracking the exact number of layers visited
            // per key requires virtually unbounded memory usage and is inefficient
            // (i.e. segment tree tracking each range queried from a layer)
-            crate::metrics::VEC_READ_NUM_LAYERS_VISITED
-                .observe(layers_visited as f64 / results.len() as f64);
+            crate::metrics::VEC_READ_NUM_LAYERS_VISITED.observe(avg);
        }

        Ok(results)