pageserver: add branch-local consumption metrics

Fix test override with PITR disabled
Code tweak
2026-05-15 20:20:38 +00:00 · 2025-05-21 11:27:12 +02:00 · 2025-05-21 11:01:11 +02:00 · 2025-05-21 10:47:43 +02:00 · 2025-05-21 10:38:49 +02:00 · 2025-05-20 17:29:17 +02:00
9 changed files with 446 additions and 78 deletions
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -18,12 +18,25 @@ use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 // management.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
 pub(super) enum Name {
-    /// Timeline last_record_lsn, absolute
+    /// Timeline last_record_lsn, absolute.
    #[serde(rename = "written_size")]
    WrittenSize,
    /// Timeline last_record_lsn, incremental
    #[serde(rename = "written_data_bytes_delta")]
    WrittenSizeDelta,
+    /// Written bytes only on this timeline (not including ancestors):
+    /// written_size - ancestor_lsn
+    ///
+    /// On the root branch, this is equivalent to `written_size`.
+    #[serde(rename = "written_size_since_parent")]
+    WrittenSizeSinceParent,
+    /// PITR history size only on this timeline (not including ancestors):
+    /// last_record_lsn - max(pitr_cutoff, ancestor_lsn).
+    ///
+    /// On the root branch, this is its entire PITR history size. Not emitted if GC hasn't computed
+    /// the PITR cutoff yet. 0 if PITR is disabled.
+    #[serde(rename = "pitr_history_size_since_parent")]
+    PitrHistorySizeSinceParent,
    /// Timeline logical size
    #[serde(rename = "timeline_logical_size")]
    LogicalSize,
@@ -157,6 +170,32 @@ impl MetricsKey {
        .incremental_values()
    }

+    /// `written_size` - `ancestor_lsn`.
+    const fn written_size_since_parent(
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> AbsoluteValueFactory {
+        MetricsKey {
+            tenant_id,
+            timeline_id: Some(timeline_id),
+            metric: Name::WrittenSizeSinceParent,
+        }
+        .absolute_values()
+    }
+
+    /// `written_size` - max(`pitr_cutoff`, `ancestor_lsn`).
+    const fn pitr_history_size_since_parent(
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> AbsoluteValueFactory {
+        MetricsKey {
+            tenant_id,
+            timeline_id: Some(timeline_id),
+            metric: Name::PitrHistorySizeSinceParent,
+        }
+        .absolute_values()
+    }
+
    /// Exact [`Timeline::get_current_logical_size`].
    ///
    /// [`Timeline::get_current_logical_size`]: crate::tenant::Timeline::get_current_logical_size
@@ -334,7 +373,13 @@ impl TenantSnapshot {
 struct TimelineSnapshot {
    loaded_at: (Lsn, SystemTime),
    last_record_lsn: Lsn,
+    ancestor_lsn: Lsn,
    current_exact_logical_size: Option<u64>,
+    /// Whether PITR is enabled (pitr_interval > 0).
+    pitr_enabled: bool,
+    /// The PITR cutoff LSN. None if not yet initialized. If PITR is disabled, this is approximately
+    /// Some(last_record_lsn), but may lag behind it since it's computed periodically.
+    pitr_cutoff: Option<Lsn>,
 }

 impl TimelineSnapshot {
@@ -354,6 +399,9 @@ impl TimelineSnapshot {
        } else {
            let loaded_at = t.loaded_at;
            let last_record_lsn = t.get_last_record_lsn();
+            let ancestor_lsn = t.get_ancestor_lsn();
+            let pitr_enabled = !t.get_pitr_interval().is_zero();
+            let pitr_cutoff = t.gc_info.read().unwrap().cutoffs.time;

            let current_exact_logical_size = {
                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
@@ -373,7 +421,10 @@ impl TimelineSnapshot {
            Ok(Some(TimelineSnapshot {
                loaded_at,
                last_record_lsn,
+                ancestor_lsn,
                current_exact_logical_size,
+                pitr_enabled,
+                pitr_cutoff,
            }))
        }
    }
@@ -424,6 +475,8 @@ impl TimelineSnapshot {

        let up_to = now;

+        let written_size_last = written_size_now.value.max(prev.1); // don't regress
+
        if let Some(delta) = written_size_now.value.checked_sub(prev.1) {
            let key_value = written_size_delta_key.from_until(prev.0, up_to, delta);
            // written_size_delta
@@ -441,6 +494,27 @@ impl TimelineSnapshot {
            });
        }

+        // Compute the branch-local written size.
+        let written_size_since_parent_key =
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id);
+        metrics.push(
+            written_size_since_parent_key
+                .at(now, written_size_last.saturating_sub(self.ancestor_lsn.0)),
+        );
+
+        // Compute the branch-local PITR history size. Not emitted if GC hasn't yet computed the
+        // PITR cutoff. 0 if PITR is disabled.
+        let pitr_history_size_since_parent_key =
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id);
+        if !self.pitr_enabled {
+            metrics.push(pitr_history_size_since_parent_key.at(now, 0));
+        } else if let Some(pitr_cutoff) = self.pitr_cutoff {
+            metrics.push(pitr_history_size_since_parent_key.at(
+                now,
+                written_size_last.saturating_sub(pitr_cutoff.max(self.ancestor_lsn).0),
+            ));
+        }
+
        {
            let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);
            let current_or_previous = self
--- a/pageserver/src/consumption_metrics/metrics/tests.rs
+++ b/pageserver/src/consumption_metrics/metrics/tests.rs
@@ -12,12 +12,17 @@ fn startup_collected_timeline_metrics_before_advancing() {
    let cache = HashMap::new();

    let initdb_lsn = Lsn(0x10000);
+    let pitr_cutoff = Lsn(0x11000);
    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+    let logical_size = 0x42000;

    let snap = TimelineSnapshot {
        loaded_at: (disk_consistent_lsn, SystemTime::now()),
        last_record_lsn: disk_consistent_lsn,
-        current_exact_logical_size: Some(0x42000),
+        ancestor_lsn: Lsn(0),
+        current_exact_logical_size: Some(logical_size),
+        pitr_enabled: true,
+        pitr_cutoff: Some(pitr_cutoff),
    };

    let now = DateTime::<Utc>::from(SystemTime::now());
@@ -33,7 +38,11 @@ fn startup_collected_timeline_metrics_before_advancing() {
                0
            ),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
 }
@@ -49,7 +58,9 @@ fn startup_collected_timeline_metrics_second_round() {
    let before = DateTime::<Utc>::from(before);

    let initdb_lsn = Lsn(0x10000);
+    let pitr_cutoff = Lsn(0x11000);
    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+    let logical_size = 0x42000;

    let mut metrics = Vec::new();
    let cache = HashMap::from([MetricsKey::written_size(tenant_id, timeline_id)
@@ -59,7 +70,10 @@ fn startup_collected_timeline_metrics_second_round() {
    let snap = TimelineSnapshot {
        loaded_at: (disk_consistent_lsn, init),
        last_record_lsn: disk_consistent_lsn,
-        current_exact_logical_size: Some(0x42000),
+        ancestor_lsn: Lsn(0),
+        current_exact_logical_size: Some(logical_size),
+        pitr_enabled: true,
+        pitr_cutoff: Some(pitr_cutoff),
    };

    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
@@ -69,7 +83,11 @@ fn startup_collected_timeline_metrics_second_round() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
 }
@@ -86,7 +104,9 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
    let before = DateTime::<Utc>::from(before);

    let initdb_lsn = Lsn(0x10000);
+    let pitr_cutoff = Lsn(0x11000);
    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+    let logical_size = 0x42000;

    let mut metrics = Vec::new();
    let cache = HashMap::from([
@@ -103,7 +123,10 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
    let snap = TimelineSnapshot {
        loaded_at: (disk_consistent_lsn, init),
        last_record_lsn: disk_consistent_lsn,
-        current_exact_logical_size: Some(0x42000),
+        ancestor_lsn: Lsn(0),
+        current_exact_logical_size: Some(logical_size),
+        pitr_enabled: true,
+        pitr_cutoff: Some(pitr_cutoff),
    };

    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
@@ -113,16 +136,18 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
 }

+/// Tests that written sizes do not regress across restarts.
 #[test]
 fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
-    // it can happen that we lose the inmemorylayer but have previously sent metrics and we
-    // should never go backwards
-
    let tenant_id = TenantId::generate();
    let timeline_id = TimelineId::generate();

@@ -140,7 +165,10 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
    let snap = TimelineSnapshot {
        loaded_at: (Lsn(50), at_restart),
        last_record_lsn: Lsn(50),
+        ancestor_lsn: Lsn(0),
        current_exact_logical_size: None,
+        pitr_enabled: true,
+        pitr_cutoff: Some(Lsn(20)),
    };

    let mut cache = HashMap::from([
@@ -169,6 +197,8 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
                0
            ),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 100),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 80),
        ]
    );

@@ -183,6 +213,157 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 100),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 80),
+        ]
+    );
+}
+
+/// Tests that written sizes do not regress across restarts, even on child branches.
+#[test]
+fn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_ancestor_lsn() {
+    let tenant_id = TenantId::generate();
+    let timeline_id = TimelineId::generate();
+
+    let [later, now, at_restart] = time_backwards();
+
+    // FIXME: tests would be so much easier if we did not need to juggle back and forth
+    // SystemTime and DateTime::<Utc> ... Could do the conversion only at upload time?
+    let now = DateTime::<Utc>::from(now);
+    let later = DateTime::<Utc>::from(later);
+    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);
+    let way_before = before_restart - std::time::Duration::from_secs(10 * 60);
+    let before_restart = DateTime::<Utc>::from(before_restart);
+    let way_before = DateTime::<Utc>::from(way_before);
+
+    let snap = TimelineSnapshot {
+        loaded_at: (Lsn(50), at_restart),
+        last_record_lsn: Lsn(50),
+        ancestor_lsn: Lsn(40),
+        current_exact_logical_size: None,
+        pitr_enabled: true,
+        pitr_cutoff: Some(Lsn(20)),
+    };
+
+    let mut cache = HashMap::from([
+        MetricsKey::written_size(tenant_id, timeline_id)
+            .at(before_restart, 100)
+            .to_kv_pair(),
+        MetricsKey::written_size_delta(tenant_id, timeline_id)
+            .from_until(
+                way_before,
+                before_restart,
+                // not taken into account, but the timestamps are important
+                999_999_999,
+            )
+            .to_kv_pair(),
+    ]);
+
+    let mut metrics = Vec::new();
+    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
+                before_restart,
+                now,
+                0
+            ),
+            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 60),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 60),
+        ]
+    );
+
+    // now if we cache these metrics, and re-run while "still in recovery"
+    cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));
+
+    // "still in recovery", because our snapshot did not change
+    snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
+            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 60),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 60),
+        ]
+    );
+}
+
+/// Tests that written sizes do not regress across restarts, even on child branches and
+/// with a PITR cutoff after the branch point.
+#[test]
+fn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_ancestor_lsn_and_pitr_cutoff() {
+    let tenant_id = TenantId::generate();
+    let timeline_id = TimelineId::generate();
+
+    let [later, now, at_restart] = time_backwards();
+
+    // FIXME: tests would be so much easier if we did not need to juggle back and forth
+    // SystemTime and DateTime::<Utc> ... Could do the conversion only at upload time?
+    let now = DateTime::<Utc>::from(now);
+    let later = DateTime::<Utc>::from(later);
+    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);
+    let way_before = before_restart - std::time::Duration::from_secs(10 * 60);
+    let before_restart = DateTime::<Utc>::from(before_restart);
+    let way_before = DateTime::<Utc>::from(way_before);
+
+    let snap = TimelineSnapshot {
+        loaded_at: (Lsn(50), at_restart),
+        last_record_lsn: Lsn(50),
+        ancestor_lsn: Lsn(30),
+        current_exact_logical_size: None,
+        pitr_enabled: true,
+        pitr_cutoff: Some(Lsn(40)),
+    };
+
+    let mut cache = HashMap::from([
+        MetricsKey::written_size(tenant_id, timeline_id)
+            .at(before_restart, 100)
+            .to_kv_pair(),
+        MetricsKey::written_size_delta(tenant_id, timeline_id)
+            .from_until(
+                way_before,
+                before_restart,
+                // not taken into account, but the timestamps are important
+                999_999_999,
+            )
+            .to_kv_pair(),
+    ]);
+
+    let mut metrics = Vec::new();
+    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
+                before_restart,
+                now,
+                0
+            ),
+            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 70),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 60),
+        ]
+    );
+
+    // now if we cache these metrics, and re-run while "still in recovery"
+    cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));
+
+    // "still in recovery", because our snapshot did not change
+    snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
+            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 70),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 60),
        ]
    );
 }
@@ -201,7 +382,10 @@ fn post_restart_current_exact_logical_size_uses_cached() {
    let snap = TimelineSnapshot {
        loaded_at: (Lsn(50), at_restart),
        last_record_lsn: Lsn(50),
+        ancestor_lsn: Lsn(0),
        current_exact_logical_size: None,
+        pitr_enabled: true,
+        pitr_cutoff: None,
    };

    let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id)
@@ -286,16 +470,101 @@ fn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {
    times
 }

+/// Tests that disabled PITR history does not yield any history size, even when the PITR cutoff
+/// indicates otherwise.
+#[test]
+fn pitr_disabled_yields_no_history_size() {
+    let tenant_id = TenantId::generate();
+    let timeline_id = TimelineId::generate();
+
+    let mut metrics = Vec::new();
+    let cache = HashMap::new();
+
+    let initdb_lsn = Lsn(0x10000);
+    let pitr_cutoff = Lsn(0x11000);
+    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+
+    let snap = TimelineSnapshot {
+        loaded_at: (disk_consistent_lsn, SystemTime::now()),
+        last_record_lsn: disk_consistent_lsn,
+        ancestor_lsn: Lsn(0),
+        current_exact_logical_size: None,
+        pitr_enabled: false,
+        pitr_cutoff: Some(pitr_cutoff),
+    };
+
+    let now = DateTime::<Utc>::from(SystemTime::now());
+
+    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
+                snap.loaded_at.1.into(),
+                now,
+                0
+            ),
+            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0),
+            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 0),
+        ]
+    );
+}
+
+/// Tests that uninitialized PITR cutoff does not emit any history size metric at all.
+#[test]
+fn pitr_uninitialized_does_not_emit_history_size() {
+    let tenant_id = TenantId::generate();
+    let timeline_id = TimelineId::generate();
+
+    let mut metrics = Vec::new();
+    let cache = HashMap::new();
+
+    let initdb_lsn = Lsn(0x10000);
+    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+
+    let snap = TimelineSnapshot {
+        loaded_at: (disk_consistent_lsn, SystemTime::now()),
+        last_record_lsn: disk_consistent_lsn,
+        ancestor_lsn: Lsn(0),
+        current_exact_logical_size: None,
+        pitr_enabled: true,
+        pitr_cutoff: None,
+    };
+
+    let now = DateTime::<Utc>::from(SystemTime::now());
+
+    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
+                snap.loaded_at.1.into(),
+                now,
+                0
+            ),
+            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
+            MetricsKey::written_size_since_parent(tenant_id, timeline_id)
+                .at(now, disk_consistent_lsn.0),
+        ]
+    );
+}
+
 pub(crate) const fn metric_examples_old(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    now: DateTime<Utc>,
    before: DateTime<Utc>,
-) -> [RawMetric; 5] {
+) -> [RawMetric; 7] {
    [
        MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id)
            .from_until_old_format(before, now, 0),
+        MetricsKey::written_size_since_parent(tenant_id, timeline_id).at_old_format(now, 0),
+        MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
        MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
@@ -307,10 +576,12 @@ pub(crate) const fn metric_examples(
    timeline_id: TimelineId,
    now: DateTime<Utc>,
    before: DateTime<Utc>,
-) -> [NewRawMetric; 5] {
+) -> [NewRawMetric; 7] {
    [
        MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
+        MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 0),
+        MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at(now, 0),
        MetricsKey::synthetic_size(tenant_id).at(now, 1),
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -513,6 +513,14 @@ mod tests {
                line!(),
                r#"{"type":"incremental","start_time":"2023-09-14T00:00:00.123456789Z","stop_time":"2023-09-15T00:00:00.123456789Z","metric":"written_data_bytes_delta","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
            ),
+            (
+                line!(),
+                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"written_size_since_parent","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
+            ),
+            (
+                line!(),
+                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"pitr_history_size_since_parent","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
+            ),
            (
                line!(),
                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"timeline_logical_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
@@ -560,7 +568,7 @@ mod tests {
        assert_eq!(upgraded_samples, new_samples);
    }

-    fn metric_samples_old() -> [RawMetric; 5] {
+    fn metric_samples_old() -> [RawMetric; 7] {
        let tenant_id = TenantId::from_array([0; 16]);
        let timeline_id = TimelineId::from_array([0xff; 16]);

@@ -572,7 +580,7 @@ mod tests {
        super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
    }

-    fn metric_samples() -> [NewRawMetric; 5] {
+    fn metric_samples() -> [NewRawMetric; 7] {
        let tenant_id = TenantId::from_array([0; 16]);
        let timeline_id = TimelineId::from_array([0xff; 16]);

--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -449,7 +449,7 @@ async fn build_timeline_info_common(
    // Internally we distinguish between the planned GC cutoff (PITR point) and the "applied" GC cutoff (where we
    // actually trimmed data to), which can pass each other when PITR is changed.
    let min_readable_lsn = std::cmp::max(
-        timeline.get_gc_cutoff_lsn(),
+        timeline.get_gc_cutoff_lsn().unwrap_or_default(),
        *timeline.get_applied_gc_cutoff_lsn(),
    );

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -4587,7 +4587,7 @@ impl TenantShard {

            target.cutoffs = GcCutoffs {
                space: space_cutoff,
-                time: Lsn::INVALID,
+                time: None,
            };
        }
    }
@@ -4670,8 +4670,8 @@ impl TenantShard {
                // Look up parent's PITR cutoff to update the child's knowledge of whether it is within parent's PITR
                if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
                    if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) {
-                        target.within_ancestor_pitr =
-                            timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.time;
+                        target.within_ancestor_pitr = timeline.get_ancestor_lsn()
+                            >= ancestor_gc_cutoffs.time.unwrap_or_default();
                    }
                }

@@ -4684,13 +4684,15 @@ impl TenantShard {
                    } else {
                        0
                    });
-                timeline.metrics.pitr_history_size.set(
-                    timeline
-                        .get_last_record_lsn()
-                        .checked_sub(target.cutoffs.time)
-                        .unwrap_or(Lsn(0))
-                        .0,
-                );
+                if let Some(time_cutoff) = target.cutoffs.time {
+                    timeline.metrics.pitr_history_size.set(
+                        timeline
+                            .get_last_record_lsn()
+                            .checked_sub(time_cutoff)
+                            .unwrap_or_default()
+                            .0,
+                    );
+                }

                // Apply the cutoffs we found to the Timeline's GcInfo.  Why might we _not_ have cutoffs for a timeline?
                // - this timeline was created while we were finding cutoffs
@@ -4699,8 +4701,8 @@ impl TenantShard {
                    let original_cutoffs = target.cutoffs.clone();
                    // GC cutoffs should never go back
                    target.cutoffs = GcCutoffs {
-                        space: Lsn(cutoffs.space.0.max(original_cutoffs.space.0)),
-                        time: Lsn(cutoffs.time.0.max(original_cutoffs.time.0)),
+                        space: cutoffs.space.max(original_cutoffs.space),
+                        time: cutoffs.time.max(original_cutoffs.time),
                    }
                }
            }
@@ -8937,7 +8939,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Lsn(0x30);
+            guard.cutoffs.time = Some(Lsn(0x30));
            guard.cutoffs.space = Lsn(0x30);
        }

@@ -9045,7 +9047,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Lsn(0x40);
+            guard.cutoffs.time = Some(Lsn(0x40));
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
@@ -9463,7 +9465,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -9547,7 +9549,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Lsn(0x40);
+            guard.cutoffs.time = Some(Lsn(0x40));
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
@@ -10018,7 +10020,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -10081,7 +10083,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time
+                gc_info.cutoffs.time.unwrap_or_default()
            };
            for idx in 0..10 {
                assert_eq!(
@@ -10159,7 +10161,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Lsn(0x38);
+            guard.cutoffs.time = Some(Lsn(0x38));
            guard.cutoffs.space = Lsn(0x38);
        }
        tline
@@ -10267,7 +10269,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -10330,7 +10332,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time
+                gc_info.cutoffs.time.unwrap_or_default()
            };
            for idx in 0..10 {
                assert_eq!(
@@ -10516,7 +10518,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x10),
+                    time: Some(Lsn(0x10)),
                    space: Lsn(0x10),
                },
                leases: Default::default(),
@@ -10536,7 +10538,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x50),
+                    time: Some(Lsn(0x50)),
                    space: Lsn(0x50),
                },
                leases: Default::default(),
@@ -11257,7 +11259,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11646,7 +11648,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11709,7 +11711,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time
+                gc_info.cutoffs.time.unwrap_or_default()
            };
            for idx in 0..10 {
                assert_eq!(
@@ -11898,7 +11900,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11961,7 +11963,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time
+                gc_info.cutoffs.time.unwrap_or_default()
            };
            for idx in 0..10 {
                assert_eq!(
@@ -12224,7 +12226,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![],
                cutoffs: GcCutoffs {
-                    time: Lsn(0x30),
+                    time: Some(Lsn(0x30)),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -235,7 +235,7 @@ pub(super) async fn gather_inputs(
        // than our internal space cutoff.  This means that if someone drops a database and waits for their
        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
        // the space cutoff.
-        let mut next_pitr_cutoff = gc_info.cutoffs.time;
+        let mut next_pitr_cutoff = gc_info.cutoffs.time.unwrap_or_default(); // TODO: handle None

        // If the caller provided a shorter retention period, use that instead of the GC cutoff.
        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -537,29 +537,24 @@ impl GcInfo {
 /// The `GcInfo` component describing which Lsns need to be retained.  Functionally, this
 /// is a single number (the oldest LSN which we must retain), but it internally distinguishes
 /// between time-based and space-based retention for observability and consumption metrics purposes.
-#[derive(Debug, Clone)]
+#[derive(Clone, Debug, Default)]
 pub(crate) struct GcCutoffs {
    /// Calculated from the [`pageserver_api::models::TenantConfig::gc_horizon`], this LSN indicates how much
    /// history we must keep to retain a specified number of bytes of WAL.
    pub(crate) space: Lsn,

-    /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates how much
-    /// history we must keep to enable reading back at least the PITR interval duration.
-    pub(crate) time: Lsn,
-}
-
-impl Default for GcCutoffs {
-    fn default() -> Self {
-        Self {
-            space: Lsn::INVALID,
-            time: Lsn::INVALID,
-        }
-    }
+    /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates
+    /// how much history we must keep to enable reading back at least the PITR interval duration.
+    ///
+    /// None indicates that the PITR cutoff has not been computed. A PITR interval of 0 will yield
+    /// Some(last_record_lsn).
+    pub(crate) time: Option<Lsn>,
 }

 impl GcCutoffs {
    fn select_min(&self) -> Lsn {
-        std::cmp::min(self.space, self.time)
+        // NB: if we haven't computed the PITR cutoff yet, we can't GC anything.
+        self.space.min(self.time.unwrap_or_default())
    }
 }

@@ -1096,11 +1091,14 @@ impl Timeline {
    /// Get the bytes written since the PITR cutoff on this branch, and
    /// whether this branch's ancestor_lsn is within its parent's PITR.
    pub(crate) fn get_pitr_history_stats(&self) -> (u64, bool) {
+        // TODO: for backwards compatibility, we return the full history back to 0 when the PITR
+        // cutoff has not yet been initialized. This should return None instead, but this is exposed
+        // in external HTTP APIs and callers may not handle a null value.
        let gc_info = self.gc_info.read().unwrap();
        let history = self
            .get_last_record_lsn()
-            .checked_sub(gc_info.cutoffs.time)
-            .unwrap_or(Lsn(0))
+            .checked_sub(gc_info.cutoffs.time.unwrap_or_default())
+            .unwrap_or_default()
            .0;
        (history, gc_info.within_ancestor_pitr)
    }
@@ -1110,9 +1108,10 @@ impl Timeline {
        self.applied_gc_cutoff_lsn.read()
    }

-    /// Read timeline's planned GC cutoff: this is the logical end of history that users
-    /// are allowed to read (based on configured PITR), even if physically we have more history.
-    pub(crate) fn get_gc_cutoff_lsn(&self) -> Lsn {
+    /// Read timeline's planned GC cutoff: this is the logical end of history that users are allowed
+    /// to read (based on configured PITR), even if physically we have more history. Returns None
+    /// if the PITR cutoff has not yet been initialized.
+    pub(crate) fn get_gc_cutoff_lsn(&self) -> Option<Lsn> {
        self.gc_info.read().unwrap().cutoffs.time
    }

@@ -2545,6 +2544,13 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)
    }

+    pub(crate) fn get_pitr_interval(&self) -> Duration {
+        let tenant_conf = &self.tenant_conf.load().tenant_conf;
+        tenant_conf
+            .pitr_interval
+            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)
+    }
+
    fn get_compaction_period(&self) -> Duration {
        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
        tenant_conf
@@ -6230,14 +6236,12 @@ impl Timeline {

        pausable_failpoint!("Timeline::find_gc_cutoffs-pausable");

-        if cfg!(test) {
+        if cfg!(test) && pitr == Duration::ZERO {
            // Unit tests which specify zero PITR interval expect to avoid doing any I/O for timestamp lookup
-            if pitr == Duration::ZERO {
-                return Ok(GcCutoffs {
-                    time: self.get_last_record_lsn(),
-                    space: space_cutoff,
-                });
-            }
+            return Ok(GcCutoffs {
+                time: Some(self.get_last_record_lsn()),
+                space: space_cutoff,
+            });
        }

        // Calculate a time-based limit on how much to retain:
@@ -6251,14 +6255,14 @@ impl Timeline {
                // PITR is not set. Retain the size-based limit, or the default time retention,
                // whichever requires less data.
                GcCutoffs {
-                    time: self.get_last_record_lsn(),
+                    time: Some(self.get_last_record_lsn()),
                    space: std::cmp::max(time_cutoff, space_cutoff),
                }
            }
            (Duration::ZERO, None) => {
                // PITR is not set, and time lookup failed
                GcCutoffs {
-                    time: self.get_last_record_lsn(),
+                    time: Some(self.get_last_record_lsn()),
                    space: space_cutoff,
                }
            }
@@ -6266,7 +6270,7 @@ impl Timeline {
                // PITR interval is set & we didn't look up a timestamp successfully.  Conservatively assume PITR
                // cannot advance beyond what was already GC'd, and respect space-based retention
                GcCutoffs {
-                    time: *self.get_applied_gc_cutoff_lsn(),
+                    time: Some(*self.get_applied_gc_cutoff_lsn()),
                    space: space_cutoff,
                }
            }
@@ -6274,7 +6278,7 @@ impl Timeline {
                // PITR interval is set and we looked up timestamp successfully.  Ignore
                // size based retention and make time cutoff authoritative
                GcCutoffs {
-                    time: time_cutoff,
+                    time: Some(time_cutoff),
                    space: time_cutoff,
                }
            }
@@ -6327,7 +6331,7 @@ impl Timeline {
            )
        };

-        let mut new_gc_cutoff = Lsn::min(space_cutoff, time_cutoff);
+        let mut new_gc_cutoff = space_cutoff.min(time_cutoff.unwrap_or_default());
        let standby_horizon = self.standby_horizon.load();
        // Hold GC for the standby, but as a safety guard do it only within some
        // reasonable lag.
@@ -6376,7 +6380,7 @@ impl Timeline {
    async fn gc_timeline(
        &self,
        space_cutoff: Lsn,
-        time_cutoff: Lsn,
+        time_cutoff: Option<Lsn>, // None if uninitialized
        retain_lsns: Vec<Lsn>,
        max_lsn_with_valid_lease: Option<Lsn>,
        new_gc_cutoff: Lsn,
@@ -6395,6 +6399,12 @@ impl Timeline {
            return Ok(result);
        }

+        let Some(time_cutoff) = time_cutoff else {
+            // The GC cutoff should have been computed by now, but let's be defensive.
+            info!("Nothing to GC: time_cutoff not yet computed");
+            return Ok(result);
+        };
+
        // We need to ensure that no one tries to read page versions or create
        // branches at a point before latest_gc_cutoff_lsn. See branch_timeline()
        // for details. This will block until the old value is no longer in use.
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1526,7 +1526,7 @@ impl Timeline {
        info!(
            "starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \
                checked {layers_checked}/{layers_total} layers \
-                (latest_gc_cutoff={} pitr_cutoff={})",
+                (latest_gc_cutoff={} pitr_cutoff={:?})",
            layers_to_rewrite.len(),
            drop_layers.len(),
            *latest_gc_cutoff,
--- a/test_runner/regress/test_pageserver_metric_collection.py
+++ b/test_runner/regress/test_pageserver_metric_collection.py
@@ -508,6 +508,9 @@ PER_METRIC_VERIFIERS = {
    "remote_storage_size": CannotVerifyAnything,
    "written_size": WrittenDataVerifier,
    "written_data_bytes_delta": WrittenDataDeltaVerifier,
+    "written_size_since_parent": WrittenDataVerifier,  # same as written_size on root
+    "pitr_cutoff": CannotVerifyAnything,
+    "pitr_history_size_since_parent": WrittenDataVerifier,  # same as written_size on root w/o GC
    "timeline_logical_size": CannotVerifyAnything,
    "synthetic_storage_size": SyntheticSizeVerifier,
 }
Author	SHA1	Message	Date
Erik Grinaker	072aa1445e	pageserver: add branch-local consumption metrics	2025-05-21 11:27:12 +02:00
Erik Grinaker	6c46275f32	Fix test override with PITR disabled	2025-05-21 11:01:11 +02:00
Erik Grinaker	e6404cf2b3	Code tweak	2025-05-21 10:47:43 +02:00
Erik Grinaker	85ae6b54ff	Default to 0 in `GcCutoffs::select_min()`	2025-05-21 10:38:49 +02:00
Erik Grinaker	d20178259c	Fix build under `feature = "testing"`	2025-05-20 17:29:17 +02:00
Erik Grinaker	aae1b526a9	pageserver: use an `Option` for `GcCutoffs::time`	2025-05-20 17:06:37 +02:00