logical size: better represent level of accuracy in the type system (#5999)

I would love to not expose the in-accurate value int he mgmt API at all, and in fact control plane doesn't use it [^1]. But our tests do, and I have no desire to change them at this time. [^1]: https://github.com/neondatabase/cloud/pull/8317
2026-01-16 18:02:56 +00:00 · 2023-11-29 16:55:13 +00:00
parent 8cd28e1718
commit ce1652990d
6 changed files with 63 additions and 39 deletions
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -384,7 +384,9 @@ pub struct TimelineInfo {
    /// The LSN that we are advertizing to safekeepers
    pub remote_consistent_lsn_visible: Lsn,

-    pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
+    pub current_logical_size: u64,
+    pub current_logical_size_is_accurate: bool,
+
    /// Sum of the size of all layer files.
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -1,5 +1,4 @@
-use crate::context::RequestContext;
-use anyhow::Context;
+use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
@@ -352,13 +351,11 @@ impl TimelineSnapshot {

            let current_exact_logical_size = {
                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
-                let res = span
-                    .in_scope(|| t.get_current_logical_size(ctx))
-                    .context("get_current_logical_size");
-                match res? {
+                let size = span.in_scope(|| t.get_current_logical_size(ctx));
+                match size {
                    // Only send timeline logical size when it is fully calculated.
-                    (size, is_exact) if is_exact => Some(size),
-                    (_, _) => None,
+                    CurrentLogicalSize::Exact(ref size) => Some(size.into()),
+                    CurrentLogicalSize::Approximate(_) => None,
                }
            };

--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -338,13 +338,7 @@ async fn build_timeline_info_common(
        Lsn(0) => None,
        lsn @ Lsn(_) => Some(lsn),
    };
-    let current_logical_size = match timeline.get_current_logical_size(ctx) {
-        Ok((size, _)) => Some(size),
-        Err(err) => {
-            error!("Timeline info creation failed to get current logical size: {err:?}");
-            None
-        }
-    };
+    let current_logical_size = timeline.get_current_logical_size(ctx);
    let current_physical_size = Some(timeline.layer_size_sum().await);
    let state = timeline.current_state();
    let remote_consistent_lsn_projected = timeline
@@ -368,7 +362,11 @@ async fn build_timeline_info_common(
        last_record_lsn,
        prev_record_lsn: Some(timeline.get_prev_record_lsn()),
        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
-        current_logical_size,
+        current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
+        current_logical_size_is_accurate: match current_logical_size.accuracy() {
+            tenant::timeline::logical_size::Accuracy::Approximate => false,
+            tenant::timeline::logical_size::Accuracy::Exact => true,
+        },
        current_physical_size,
        current_logical_size_non_incremental: None,
        timeline_dir_layer_file_size_sum: None,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2,7 +2,7 @@ pub mod delete;
 mod eviction_task;
 mod init;
 pub mod layer_manager;
-mod logical_size;
+pub(crate) mod logical_size;
 pub mod span;
 pub mod uninit;
 mod walreceiver;
@@ -855,23 +855,20 @@ impl Timeline {
    /// the initial size calculation has not been run (gets triggered on the first size access).
    ///
    /// return size and boolean flag that shows if the size is exact
-    pub fn get_current_logical_size(
+    pub(crate) fn get_current_logical_size(
        self: &Arc<Self>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(u64, bool)> {
+    ) -> logical_size::CurrentLogicalSize {
        let current_size = self.current_logical_size.current_size();
        debug!("Current size: {current_size:?}");

-        let mut is_exact = true;
-        let size = current_size.size();
        if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) =
            (current_size, self.current_logical_size.initial_part_end)
        {
-            is_exact = false;
            self.try_spawn_size_init_task(initial_part_end, ctx);
        }

-        Ok((size, is_exact))
+        current_size
    }

    /// Check if more than 'checkpoint_distance' of WAL has been accumulated in
@@ -2057,10 +2054,10 @@ impl Timeline {
        // one value while current_logical_size is set to the
        // other.
        match logical_size.current_size() {
-            CurrentLogicalSize::Exact(new_current_size) => self
+            CurrentLogicalSize::Exact(ref new_current_size) => self
                .metrics
                .current_logical_size_gauge
-                .set(new_current_size),
+                .set(new_current_size.into()),
            CurrentLogicalSize::Approximate(_) => {
                // don't update the gauge yet, this allows us not to update the gauge back and
                // forth between the initial size calculation task.
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -59,21 +59,50 @@ pub(super) struct LogicalSize {

 /// Normalized current size, that the data in pageserver occupies.
 #[derive(Debug, Clone, Copy)]
-pub(super) enum CurrentLogicalSize {
+pub(crate) enum CurrentLogicalSize {
    /// The size is not yet calculated to the end, this is an intermediate result,
    /// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative,
    /// yet total logical size cannot be below 0.
-    Approximate(u64),
+    Approximate(Approximate),
    // Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are
    // available for observation without any calculations.
-    Exact(u64),
+    Exact(Exact),
+}
+
+#[derive(Debug, Copy, Clone)]
+pub(crate) enum Accuracy {
+    Approximate,
+    Exact,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Approximate(u64);
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Exact(u64);
+
+impl From<&Approximate> for u64 {
+    fn from(value: &Approximate) -> Self {
+        value.0
+    }
+}
+
+impl From<&Exact> for u64 {
+    fn from(val: &Exact) -> Self {
+        val.0
+    }
 }

 impl CurrentLogicalSize {
-    pub(super) fn size(&self) -> u64 {
-        *match self {
-            Self::Approximate(size) => size,
-            Self::Exact(size) => size,
+    pub(crate) fn size_dont_care_about_accuracy(&self) -> u64 {
+        match self {
+            Self::Approximate(size) => size.into(),
+            Self::Exact(size) => size.into(),
+        }
+    }
+    pub(crate) fn accuracy(&self) -> Accuracy {
+        match self {
+            Self::Approximate(_) => Accuracy::Approximate,
+            Self::Exact(_) => Accuracy::Exact,
        }
    }
 }
@@ -109,16 +138,16 @@ impl LogicalSize {
        match self.initial_logical_size.get() {
            Some((initial_size, _)) => {
                crate::metrics::initial_logical_size::CALLS.exact.inc();
-                CurrentLogicalSize::Exact(initial_size.checked_add_signed(size_increment)
+                CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)
                    .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
-                    .unwrap())
+                    .unwrap()))
            }
            None => {
                crate::metrics::initial_logical_size::CALLS
                    .approximate
                    .inc();
                let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
-                CurrentLogicalSize::Approximate(non_negative_size_increment)
+                CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))
            }
        }
    }
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -396,11 +396,12 @@ pub(super) async fn handle_walreceiver_connection(

            // Send the replication feedback message.
            // Regular standby_status_update fields are put into this message.
-            let (timeline_logical_size, _) = timeline
+            let current_timeline_size = timeline
                .get_current_logical_size(&ctx)
-                .context("Status update creation failed to get current logical size")?;
+                // FIXME: https://github.com/neondatabase/neon/issues/5963
+                .size_dont_care_about_accuracy();
            let status_update = PageserverFeedback {
-                current_timeline_size: timeline_logical_size,
+                current_timeline_size,
                last_received_lsn,
                disk_consistent_lsn,
                remote_consistent_lsn,