diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b178024b9a..c15b1fce63 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1253,6 +1253,15 @@ pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy = Lazy::new(|| { .unwrap() }); +pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_wal_redo_process_launch_duration", + "Histogram of the duration of successful WalRedoProcess::launch calls", + redo_histogram_time_buckets!(), + ) + .expect("failed to define a metric") +}); + pub(crate) struct WalRedoProcessCounters { pub(crate) started: IntCounter, pub(crate) killed_by_cause: enum_map::EnumMap, @@ -1962,6 +1971,7 @@ pub fn preinitialize_metrics() { &WAL_REDO_TIME, &WAL_REDO_RECORDS_HISTOGRAM, &WAL_REDO_BYTES_HISTOGRAM, + &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, ] .into_iter() .for_each(|h| { diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index ed468f220e..edce158e75 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -47,7 +47,8 @@ use pageserver_api::shard::TenantShardId; use crate::config::PageServerConf; use crate::metrics::{ WalRedoKillCause, WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_COUNTERS, - WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, + WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, + WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, }; use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block}; use crate::repository::Key; @@ -241,10 +242,13 @@ impl PostgresRedoManager { let mut proc_guard = self.redo_process.write().unwrap(); match &*proc_guard { None => { + let timer = + WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.start_timer(); let proc = Arc::new( WalRedoProcess::launch(self.conf, self.tenant_id, pg_version) .context("launch walredo process")?, ); + timer.observe_duration(); *proc_guard = Some(Arc::clone(&proc)); proc }