initial logical size calculation: add a bunch of metrics (#5995)

These will help us answer questions such as: - when & at what do calculations get started after PS restart? - how often is the api to get current incrementally-computed logical size called, and does it return Exact vs Approximate? I'd also be interested in a histogram of how much wall clock time size calculations take, but, I don't know good bucket sizes, and, logging it would introduce yet another per-timeline log message during startup; don't think that's worth it just yet. Context - https://neondb.slack.com/archives/C033RQ5SPDH/p1701197668789769 - https://github.com/neondatabase/neon/issues/5962 - https://github.com/neondatabase/neon/issues/5963 - https://github.com/neondatabase/neon/pull/5955 - https://github.com/neondatabase/cloud/issues/7408
2026-01-10 23:12:54 +00:00 · 2023-12-01 12:52:59 +01:00
parent 1ce1c82d78
commit 1c88824ed0
3 changed files with 154 additions and 7 deletions
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -403,6 +403,134 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    .expect("failed to define current logical size metric")
 });

+pub(crate) mod initial_logical_size {
+    use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
+    use once_cell::sync::Lazy;
+
+    use crate::task_mgr::TaskKind;
+
+    pub(crate) struct StartCalculation(IntCounterVec);
+    pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
+        StartCalculation(
+            register_int_counter_vec!(
+                "pageserver_initial_logical_size_start_calculation",
+                "Incremented each time we start an initial logical size calculation attempt. \
+                 The `task_kind` label is for the task kind that caused this attempt.",
+                &["attempt", "task_kind"]
+            )
+            .unwrap(),
+        )
+    });
+
+    struct DropCalculation {
+        first: IntCounter,
+        retry: IntCounter,
+    }
+
+    static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
+        let vec = register_int_counter_vec!(
+            "pageserver_initial_logical_size_drop_calculation",
+            "Incremented each time we abort a started size calculation attmpt.",
+            &["attempt"]
+        )
+        .unwrap();
+        DropCalculation {
+            first: vec.with_label_values(&["first"]),
+            retry: vec.with_label_values(&["retry"]),
+        }
+    });
+
+    pub(crate) struct Calculated {
+        pub(crate) births: IntCounter,
+        pub(crate) deaths: IntCounter,
+    }
+
+    pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
+        births: register_int_counter!(
+            "pageserver_initial_logical_size_finish_calculation",
+            "Incremented every time we finish calculation of initial logical size.\
+             If everything is working well, this should happen at most once per Timeline object."
+        )
+        .unwrap(),
+        deaths: register_int_counter!(
+            "pageserver_initial_logical_size_drop_finished_calculation",
+            "Incremented when we drop a finished initial logical size calculation result.\
+             Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
+        )
+        .unwrap(),
+    });
+
+    pub(crate) struct OngoingCalculationGuard {
+        inc_drop_calculation: Option<IntCounter>,
+    }
+
+    impl StartCalculation {
+        pub(crate) fn first(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
+            let task_kind_label: &'static str =
+                causing_task_kind.map(|k| k.into()).unwrap_or_default();
+            self.0.with_label_values(&["first", task_kind_label]);
+            OngoingCalculationGuard {
+                inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
+            }
+        }
+        pub(crate) fn retry(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
+            let task_kind_label: &'static str =
+                causing_task_kind.map(|k| k.into()).unwrap_or_default();
+            self.0.with_label_values(&["retry", task_kind_label]);
+            OngoingCalculationGuard {
+                inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
+            }
+        }
+    }
+
+    impl Drop for OngoingCalculationGuard {
+        fn drop(&mut self) {
+            if let Some(counter) = self.inc_drop_calculation.take() {
+                counter.inc();
+            }
+        }
+    }
+
+    impl OngoingCalculationGuard {
+        pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
+            drop(self.inc_drop_calculation.take());
+            CALCULATED.births.inc();
+            FinishedCalculationGuard {
+                inc_on_drop: CALCULATED.deaths.clone(),
+            }
+        }
+    }
+
+    pub(crate) struct FinishedCalculationGuard {
+        inc_on_drop: IntCounter,
+    }
+
+    impl Drop for FinishedCalculationGuard {
+        fn drop(&mut self) {
+            self.inc_on_drop.inc();
+        }
+    }
+
+    pub(crate) struct Calls {
+        pub(crate) approximate: IntCounter,
+        pub(crate) exact: IntCounter,
+    }
+
+    pub(crate) static CALLS: Lazy<Calls> = Lazy::new(|| {
+        let vec = register_int_counter_vec!(
+            "pageserver_initial_logical_size_calls",
+            "Incremented each time some code asks for incremental logical size.\
+             The label records the accuracy of the result.",
+            &["accuracy"]
+        )
+        .unwrap();
+        Calls {
+            approximate: vec.with_label_values(&["approximate"]),
+            exact: vec.with_label_values(&["exact"]),
+        }
+    });
+}
+
 pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_tenant_states_count",
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1812,6 +1812,7 @@ impl Timeline {
            "spawning logical size computation from context of task kind {:?}",
            ctx.task_kind()
        );
+        let causing_task_kind = ctx.task_kind();
        // We need to start the computation task.
        // It gets a separate context since it will outlive the request that called this function.
        let self_clone = Arc::clone(self);
@@ -1839,6 +1840,8 @@ impl Timeline {
                    _ = completion::Barrier::maybe_wait(self_clone.initial_logical_size_can_start.clone()) => {}
                };

+
+
                // hold off background tasks from starting until all timelines get to try at least
                // once initial logical size calculation; though retry will rarely be useful.
                // holding off is done because heavier tasks execute blockingly on the same
@@ -1846,7 +1849,12 @@ impl Timeline {
                //
                // dropping this at every outcome is probably better than trying to cling on to it,
                // delay will be terminated by a timeout regardless.
-                let _completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
+                let completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
+
+                let metrics_guard = match &completion {
+                    Some(_) => crate::metrics::initial_logical_size::START_CALCULATION.first(Some(causing_task_kind)),
+                    None => crate::metrics::initial_logical_size::START_CALCULATION.retry(Some(causing_task_kind)),
+                };

                let calculated_size = match self_clone
                    .logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx)
@@ -1891,11 +1899,11 @@ impl Timeline {
                match self_clone
                    .current_logical_size
                    .initial_logical_size
-                    .set(calculated_size)
+                    .set((calculated_size, metrics_guard.calculation_result_saved()))
                {
                    Ok(()) => (),
                    Err(_what_we_just_attempted_to_set) => {
-                        let existing_size = self_clone
+                        let (existing_size, _) = self_clone
                            .current_logical_size
                            .initial_logical_size
                            .get()
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -23,7 +23,10 @@ pub(super) struct LogicalSize {
    ///
    /// NOTE: size at a given LSN is constant, but after a restart we will calculate
    /// the initial size at a different LSN.
-    pub initial_logical_size: OnceCell<u64>,
+    pub initial_logical_size: OnceCell<(
+        u64,
+        crate::metrics::initial_logical_size::FinishedCalculationGuard,
+    )>,

    /// Semaphore to track ongoing calculation of `initial_logical_size`.
    pub initial_size_computation: Arc<tokio::sync::Semaphore>,
@@ -78,7 +81,11 @@ impl CurrentLogicalSize {
 impl LogicalSize {
    pub(super) fn empty_initial() -> Self {
        Self {
-            initial_logical_size: OnceCell::with_value(0),
+            initial_logical_size: OnceCell::with_value((0, {
+                crate::metrics::initial_logical_size::START_CALCULATION
+                    .first(None)
+                    .calculation_result_saved()
+            })),
            //  initial_logical_size already computed, so, don't admit any calculations
            initial_size_computation: Arc::new(Semaphore::new(0)),
            initial_part_end: None,
@@ -100,12 +107,16 @@ impl LogicalSize {
        //                  ^^^ keep this type explicit so that the casts in this function break if
        //                  we change the type.
        match self.initial_logical_size.get() {
-            Some(initial_size) => {
+            Some((initial_size, _)) => {
+                crate::metrics::initial_logical_size::CALLS.exact.inc();
                initial_size.checked_add_signed(size_increment)
                    .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
                    .map(CurrentLogicalSize::Exact)
            }
            None => {
+                crate::metrics::initial_logical_size::CALLS
+                    .approximate
+                    .inc();
                let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
                Ok(CurrentLogicalSize::Approximate(non_negative_size_increment))
            }
@@ -121,7 +132,7 @@ impl LogicalSize {
    /// available for re-use. This doesn't contain the incremental part.
    pub(super) fn initialized_size(&self, lsn: Lsn) -> Option<u64> {
        match self.initial_part_end {
-            Some(v) if v == lsn => self.initial_logical_size.get().copied(),
+            Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, _)| *s),
            _ => None,
        }
    }