diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 5315f0b936..27e25d8e32 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -525,6 +525,15 @@ static LAST_RECORD_LSN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static STANDBY_HORIZON: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_standby_horizon", + "Standby apply LSN for which GC is hold off, by timeline.", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + static RESIDENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_resident_physical_size", @@ -2098,6 +2107,7 @@ pub(crate) struct TimelineMetrics { pub garbage_collect_histo: StorageTimeMetrics, pub find_gc_cutoffs_histo: StorageTimeMetrics, pub last_record_gauge: IntGauge, + pub standby_horizon_gauge: IntGauge, pub resident_physical_size_gauge: UIntGauge, /// copy of LayeredTimeline.current_logical_size pub current_logical_size_gauge: UIntGauge, @@ -2167,6 +2177,9 @@ impl TimelineMetrics { let last_record_gauge = LAST_RECORD_LSN .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let standby_horizon_gauge = STANDBY_HORIZON + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); @@ -2212,6 +2225,7 @@ impl TimelineMetrics { find_gc_cutoffs_histo, load_layer_map_histo, last_record_gauge, + standby_horizon_gauge, resident_physical_size_gauge, current_logical_size_gauge, aux_file_size_gauge, @@ -2246,6 +2260,7 @@ impl TimelineMetrics { let timeline_id = &self.timeline_id; let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2c43c26359..b0e5275b5f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4871,6 +4871,9 @@ impl Timeline { // It is an easy way to unset it when standby disappears without adding // more conf options. self.standby_horizon.store(Lsn::INVALID); + self.metrics + .standby_horizon_gauge + .set(Lsn::INVALID.0 as i64); let res = self .gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff) diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index a3c7adae44..1d2ffec08f 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -735,6 +735,10 @@ impl ConnectionManagerState { self.timeline .standby_horizon .store(Lsn(timeline_update.standby_horizon)); + self.timeline + .metrics + .standby_horizon_gauge + .set(timeline_update.standby_horizon as i64); } let new_safekeeper_id = NodeId(timeline_update.safekeeper_id); diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 8fa67e75c9..8b8075f8c1 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -142,6 +142,7 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_resident_physical_size", "pageserver_io_operations_bytes_total", "pageserver_last_record_lsn", + "pageserver_standby_horizon", "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count", "pageserver_smgr_query_seconds_sum",