From f2aa8c5db96a6ba7c546a98ef07b049e513efdb3 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 29 Apr 2025 11:34:37 +0200 Subject: [PATCH] Check for pitr_interval == 0 --- pageserver/src/consumption_metrics/metrics.rs | 16 ++++++++++------ .../src/consumption_metrics/metrics/tests.rs | 19 +++++-------------- pageserver/src/tenant.rs | 2 +- pageserver/src/tenant/timeline.rs | 9 ++++++++- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs index 9515f375ed..03445f4280 100644 --- a/pageserver/src/consumption_metrics/metrics.rs +++ b/pageserver/src/consumption_metrics/metrics.rs @@ -1,5 +1,5 @@ use std::sync::Arc; -use std::time::SystemTime; +use std::time::{Duration, SystemTime}; use chrono::{DateTime, Utc}; use consumption_metrics::EventType; @@ -10,7 +10,6 @@ use utils::lsn::Lsn; use super::{Cache, NewRawMetric}; use crate::context::RequestContext; use crate::tenant::mgr::TenantManager; -use crate::tenant::timeline::GcCutoffs; use crate::tenant::timeline::logical_size::CurrentLogicalSize; /// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events` @@ -390,7 +389,8 @@ struct TimelineSnapshot { loaded_at: (Lsn, SystemTime), last_record_lsn: Lsn, current_exact_logical_size: Option, - gc_cutoffs: GcCutoffs, + /// The PITR cutoff LSN. None if PITR is disabled. + pitr_cutoff: Option, } impl TimelineSnapshot { @@ -410,7 +410,8 @@ impl TimelineSnapshot { } else { let loaded_at = t.loaded_at; let last_record_lsn = t.get_last_record_lsn(); - let gc_cutoffs = t.gc_info.read().unwrap().cutoffs; // NB: assume periodically updated + let pitr_cutoff = (t.get_pitr_interval() != Duration::ZERO) + .then(|| t.gc_info.read().unwrap().cutoffs.time); let current_exact_logical_size = { let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id); @@ -431,7 +432,7 @@ impl TimelineSnapshot { loaded_at, last_record_lsn, current_exact_logical_size, - gc_cutoffs, + pitr_cutoff, })) } } @@ -505,7 +506,10 @@ impl TimelineSnapshot { // entire history. Also verify that it's okay for this to regress on restart, unlike e.g. // written_size above. let pitr_history_size_key = MetricsKey::pitr_history_size(tenant_id, timeline_id); - let pitr_history_size = self.last_record_lsn.saturating_sub(self.gc_cutoffs.time); + let pitr_history_size = self + .pitr_cutoff + .map(|pitr| self.last_record_lsn.saturating_sub(pitr)) + .unwrap_or_default(); metrics.push(pitr_history_size_key.at(now, pitr_history_size.into())); { diff --git a/pageserver/src/consumption_metrics/metrics/tests.rs b/pageserver/src/consumption_metrics/metrics/tests.rs index b537efe4d8..da30f7ecaf 100644 --- a/pageserver/src/consumption_metrics/metrics/tests.rs +++ b/pageserver/src/consumption_metrics/metrics/tests.rs @@ -20,10 +20,7 @@ fn startup_collected_timeline_metrics_before_advancing() { loaded_at: (disk_consistent_lsn, SystemTime::now()), last_record_lsn: disk_consistent_lsn, current_exact_logical_size: Some(logical_size), - gc_cutoffs: GcCutoffs { - space: Lsn::INVALID, - time: pitr_cutoff, - }, + pitr_cutoff: Some(pitr_cutoff), }; let now = DateTime::::from(SystemTime::now()); @@ -70,10 +67,7 @@ fn startup_collected_timeline_metrics_second_round() { loaded_at: (disk_consistent_lsn, init), last_record_lsn: disk_consistent_lsn, current_exact_logical_size: Some(logical_size), - gc_cutoffs: GcCutoffs { - space: Lsn::INVALID, - time: pitr_cutoff, - }, + pitr_cutoff: Some(pitr_cutoff), }; snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache); @@ -122,10 +116,7 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() { loaded_at: (disk_consistent_lsn, init), last_record_lsn: disk_consistent_lsn, current_exact_logical_size: Some(logical_size), - gc_cutoffs: GcCutoffs { - space: Lsn::INVALID, - time: pitr_cutoff, - }, + pitr_cutoff: Some(pitr_cutoff), }; snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache); @@ -165,7 +156,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() { loaded_at: (Lsn(50), at_restart), last_record_lsn: Lsn(50), current_exact_logical_size: None, - gc_cutoffs: GcCutoffs::default(), + pitr_cutoff: Some(Lsn(0)), }; let mut cache = HashMap::from([ @@ -229,7 +220,7 @@ fn post_restart_current_exact_logical_size_uses_cached() { loaded_at: (Lsn(50), at_restart), last_record_lsn: Lsn(50), current_exact_logical_size: None, - gc_cutoffs: GcCutoffs::default(), + pitr_cutoff: None, }; let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 330ac7db49..698579e8fb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -4718,7 +4718,7 @@ impl TenantShard { // - this timeline was created while we were finding cutoffs // - lsn for timestamp search fails for this timeline repeatedly if let Some(cutoffs) = gc_cutoffs.get(&timeline.timeline_id) { - let original_cutoffs = target.cutoffs; + let original_cutoffs = target.cutoffs.clone(); // GC cutoffs should never go back target.cutoffs = GcCutoffs { space: Lsn(cutoffs.space.0.max(original_cutoffs.space.0)), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 33512145d4..f3d01b7605 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -537,7 +537,7 @@ impl GcInfo { /// The `GcInfo` component describing which Lsns need to be retained. Functionally, this /// is a single number (the oldest LSN which we must retain), but it internally distinguishes /// between time-based and space-based retention for observability and consumption metrics purposes. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone)] pub(crate) struct GcCutoffs { /// Calculated from the [`pageserver_api::models::TenantConfig::gc_horizon`], this LSN indicates how much /// history we must keep to retain a specified number of bytes of WAL. @@ -2543,6 +2543,13 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout) } + pub(crate) fn get_pitr_interval(&self) -> Duration { + let tenant_conf = &self.tenant_conf.load().tenant_conf; + tenant_conf + .pitr_interval + .unwrap_or(self.conf.default_tenant_conf.pitr_interval) + } + fn get_compaction_period(&self) -> Duration { let tenant_conf = self.tenant_conf.load().tenant_conf.clone(); tenant_conf