initial logical size: remove CALLS metric from hot path (#6018)

Only introduced a few hours ago (#5995), I took a look at the numbers from staging and realized that `get_current_logical_size()` is on the walingest hot path: we call it for every `ReplicationMessage::XLogData` that we receive. Since the metric is global, it would be quite a busy cache line. This PR replaces it with a new metric purpose-built for what's most interesting right now.
2026-01-14 00:42:54 +00:00 · 2023-12-01 22:45:04 +01:00
parent c1295bfb3a
commit e43cde7aba
3 changed files with 35 additions and 23 deletions
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -511,24 +511,16 @@ pub(crate) mod initial_logical_size {
        }
    }

-    pub(crate) struct Calls {
-        pub(crate) approximate: IntCounter,
-        pub(crate) exact: IntCounter,
-    }
-
-    pub(crate) static CALLS: Lazy<Calls> = Lazy::new(|| {
-        let vec = register_int_counter_vec!(
-            "pageserver_initial_logical_size_calls",
-            "Incremented each time some code asks for incremental logical size.\
-             The label records the accuracy of the result.",
-            &["accuracy"]
-        )
-        .unwrap();
-        Calls {
-            approximate: vec.with_label_values(&["approximate"]),
-            exact: vec.with_label_values(&["exact"]),
-        }
-    });
+    // context: https://github.com/neondatabase/neon/issues/5963
+    pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
+        Lazy::new(|| {
+            register_int_counter!(
+                "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
+                "Counter for the following event: walreceiver calls\
+                 Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
+            )
+            .unwrap()
+        });
 }

 pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -868,6 +868,24 @@ impl Timeline {
            self.try_spawn_size_init_task(initial_part_end, ctx);
        }

+        if let CurrentLogicalSize::Approximate(_) = &current_size {
+            if ctx.task_kind() == TaskKind::WalReceiverConnectionHandler {
+                let first = self
+                    .current_logical_size
+                    .did_return_approximate_to_walreceiver
+                    .compare_exchange(
+                        false,
+                        true,
+                        AtomicOrdering::Relaxed,
+                        AtomicOrdering::Relaxed,
+                    )
+                    .is_ok();
+                if first {
+                    crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE.inc();
+                }
+            }
+        }
+
        current_size
    }

--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -4,7 +4,7 @@ use once_cell::sync::OnceCell;
 use tokio::sync::Semaphore;
 use utils::lsn::Lsn;

-use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
+use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
 use std::sync::Arc;

 /// Internal structure to hold all data needed for logical size calculation.
@@ -55,6 +55,9 @@ pub(super) struct LogicalSize {
    /// see `current_logical_size_gauge`. Use the `update_current_logical_size`
    /// to modify this, it will also keep the prometheus metric in sync.
    pub size_added_after_initial: AtomicI64,
+
+    /// For [`crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE`].
+    pub(super) did_return_approximate_to_walreceiver: AtomicBool,
 }

 /// Normalized current size, that the data in pageserver occupies.
@@ -119,6 +122,7 @@ impl LogicalSize {
            initial_size_computation: Arc::new(Semaphore::new(0)),
            initial_part_end: None,
            size_added_after_initial: AtomicI64::new(0),
+            did_return_approximate_to_walreceiver: AtomicBool::new(false),
        }
    }

@@ -128,6 +132,7 @@ impl LogicalSize {
            initial_size_computation: Arc::new(Semaphore::new(1)),
            initial_part_end: Some(compute_to),
            size_added_after_initial: AtomicI64::new(0),
+            did_return_approximate_to_walreceiver: AtomicBool::new(false),
        }
    }

@@ -137,15 +142,12 @@ impl LogicalSize {
        //                  we change the type.
        match self.initial_logical_size.get() {
            Some((initial_size, _)) => {
-                crate::metrics::initial_logical_size::CALLS.exact.inc();
                CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)
                    .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
                    .unwrap()))
            }
            None => {
-                crate::metrics::initial_logical_size::CALLS
-                    .approximate
-                    .inc();
+
                let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
                CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))
            }