Rewrite as pitr_cutoff, don't regress

2026-01-15 09:22:55 +00:00 · 2025-04-29 12:34:22 +02:00
parent 697c67ea85
commit c2afddbcba
3 changed files with 75 additions and 32 deletions
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -24,9 +24,12 @@ pub(super) enum Name {
    /// Timeline last_record_lsn, incremental
    #[serde(rename = "written_data_bytes_delta")]
    WrittenSizeDelta,
-    /// Timeline last_record_lsn - gc_cutoffs.time (i.e. pitr_interval)
-    #[serde(rename = "pitr_history_size")]
-    PitrHistorySize,
+    /// Timeline's PITR cutoff LSN (now - pitr_interval). If PITR is disabled
+    /// (pitr_interval = 0), this will equal WrittenSize.
+    ///
+    /// Updated periodically during GC. Does not regress.
+    #[serde(rename = "pitr_cutoff")]
+    PitrCutoff,
    /// Timeline logical size
    #[serde(rename = "timeline_logical_size")]
    LogicalSize,
@@ -163,18 +166,14 @@ impl MetricsKey {
        .incremental_values()
    }

-    /// [`Timeline::get_last_record_lsn`] - [`GcCutoffs::time`] (i.e. `pitr_interval`).
+    /// [`GcCutoffs::time`] (i.e. `now` - `pitr_interval`)
    ///
-    /// [`Timeline::get_last_record_lsn`]: crate::tenant::Timeline::get_last_record_lsn
    /// [`GcCutoffs::time`]: crate::tenant::timeline::GcCutoffs::time
-    const fn pitr_history_size(
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> AbsoluteValueFactory {
+    const fn pitr_cutoff(tenant_id: TenantId, timeline_id: TimelineId) -> AbsoluteValueFactory {
        MetricsKey {
            tenant_id,
            timeline_id: Some(timeline_id),
-            metric: Name::PitrHistorySize,
+            metric: Name::PitrCutoff,
        }
        .absolute_values()
    }
@@ -501,17 +500,24 @@ impl TimelineSnapshot {
            });
        }

-        // Compute the PITR history size.
+        // Compute the PITR cutoff. We have to make sure this doesn't regress:
+        // on restart, GC may not run for a while, and the PITR cutoff will be
+        // reported as 0 (which would bill the user for the entire history from
+        // 0 to last_record_lsn). We therefore clamp this to the cached value
+        // (which persists across restarts).
        //
-        // TODO: verify that the GC cutoffs don't severely regress, e.g. to 0 such that we bill the
-        // entire history. Also verify that it's okay for this to regress on restart, unlike e.g.
-        // written_size above.
-        let pitr_history_size_key = MetricsKey::pitr_history_size(tenant_id, timeline_id);
-        let pitr_history_size = self
+        // If pitr_interval is disabled (pitr_cutoff is None), the cutoff is the
+        // last record LSN, by definition. We don't use the written_size
+        // clamping from above, because our own caching should result in
+        // equivalent clamping.
+        let pitr_cutoff_key = MetricsKey::pitr_cutoff(tenant_id, timeline_id);
+        let mut pitr_cutoff = self
            .pitr_cutoff
-            .map(|pitr| self.last_record_lsn.saturating_sub(pitr))
-            .unwrap_or_default();
-        metrics.push(pitr_history_size_key.at(now, pitr_history_size.into()));
+            .unwrap_or(self.last_record_lsn /* PITR disabled */);
+        if let Some(prev) = cache.get(pitr_cutoff_key.key()) {
+            pitr_cutoff = pitr_cutoff.max(Lsn(prev.value)); // don't regress
+        }
+        metrics.push(pitr_cutoff_key.at(now, pitr_cutoff.into()));

        {
            let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);
--- a/pageserver/src/consumption_metrics/metrics/tests.rs
+++ b/pageserver/src/consumption_metrics/metrics/tests.rs
@@ -36,8 +36,7 @@ fn startup_collected_timeline_metrics_before_advancing() {
                0
            ),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::pitr_history_size(tenant_id, timeline_id)
-                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
@@ -77,8 +76,7 @@ fn startup_collected_timeline_metrics_second_round() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::pitr_history_size(tenant_id, timeline_id)
-                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
@@ -126,15 +124,14 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
-            MetricsKey::pitr_history_size(tenant_id, timeline_id)
-                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
        ]
    );
 }

 #[test]
-fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
+fn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_pitr_cutoff() {
    // it can happen that we lose the inmemorylayer but have previously sent metrics and we
    // should never go backwards

@@ -156,7 +153,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
        loaded_at: (Lsn(50), at_restart),
        last_record_lsn: Lsn(50),
        current_exact_logical_size: None,
-        pitr_cutoff: Some(Lsn(0)),
+        pitr_cutoff: Some(Lsn(20)),
    };

    let mut cache = HashMap::from([
@@ -171,6 +168,9 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
                999_999_999,
            )
            .to_kv_pair(),
+        MetricsKey::pitr_cutoff(tenant_id, timeline_id)
+            .at(before_restart, 70)
+            .to_kv_pair(),
    ]);

    let mut metrics = Vec::new();
@@ -185,7 +185,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
                0
            ),
            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
-            MetricsKey::pitr_history_size(tenant_id, timeline_id).at(now, 50), // does regress
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 70),
        ]
    );

@@ -200,7 +200,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
        &[
            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
-            MetricsKey::pitr_history_size(tenant_id, timeline_id).at(later, 50),
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(later, 70),
        ]
    );
 }
@@ -309,16 +309,53 @@ fn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {
    times
 }

+#[test]
+fn pitr_cutoff_none_yields_last_record_lsn() {
+    let tenant_id = TenantId::generate();
+    let timeline_id = TimelineId::generate();
+
+    let mut metrics = Vec::new();
+    let cache = HashMap::new();
+
+    let initdb_lsn = Lsn(0x10000);
+    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
+
+    let snap = TimelineSnapshot {
+        loaded_at: (disk_consistent_lsn, SystemTime::now()),
+        last_record_lsn: disk_consistent_lsn,
+        current_exact_logical_size: None,
+        pitr_cutoff: None,
+    };
+
+    let now = DateTime::<Utc>::from(SystemTime::now());
+
+    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
+
+    assert_eq!(
+        metrics,
+        &[
+            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
+                snap.loaded_at.1.into(),
+                now,
+                0
+            ),
+            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
+            MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
+        ]
+    );
+}
+
 pub(crate) const fn metric_examples_old(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    now: DateTime<Utc>,
    before: DateTime<Utc>,
-) -> [RawMetric; 6] {
+) -> [RawMetric; 7] {
    [
        MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id)
            .from_until_old_format(before, now, 0),
+        MetricsKey::pitr_cutoff(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
        MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
@@ -335,7 +372,7 @@ pub(crate) const fn metric_examples(
    [
        MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
-        MetricsKey::pitr_history_size(tenant_id, timeline_id).at(now, 0),
+        MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at(now, 0),
        MetricsKey::resident_size(tenant_id).at(now, 0),
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -568,7 +568,7 @@ mod tests {
        assert_eq!(upgraded_samples, new_samples);
    }

-    fn metric_samples_old() -> [RawMetric; 6] {
+    fn metric_samples_old() -> [RawMetric; 7] {
        let tenant_id = TenantId::from_array([0; 16]);
        let timeline_id = TimelineId::from_array([0xff; 16]);