From ca597206b8ebd52b29cec8ddc1e2591211fb22d6 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 29 Nov 2023 19:44:37 +0100 Subject: [PATCH] walredo: latency histogram for spawn duration (#5925) fixes https://github.com/neondatabase/neon/issues/5891 --- pageserver/src/metrics.rs | 10 ++++++++++ pageserver/src/walredo.rs | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b178024b9a..c15b1fce63 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1253,6 +1253,15 @@ pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy = Lazy::new(|| { .unwrap() }); +pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_wal_redo_process_launch_duration", + "Histogram of the duration of successful WalRedoProcess::launch calls", + redo_histogram_time_buckets!(), + ) + .expect("failed to define a metric") +}); + pub(crate) struct WalRedoProcessCounters { pub(crate) started: IntCounter, pub(crate) killed_by_cause: enum_map::EnumMap, @@ -1962,6 +1971,7 @@ pub fn preinitialize_metrics() { &WAL_REDO_TIME, &WAL_REDO_RECORDS_HISTOGRAM, &WAL_REDO_BYTES_HISTOGRAM, + &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, ] .into_iter() .for_each(|h| { diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index ed468f220e..edce158e75 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -47,7 +47,8 @@ use pageserver_api::shard::TenantShardId; use crate::config::PageServerConf; use crate::metrics::{ WalRedoKillCause, WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_COUNTERS, - WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, + WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, + WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, }; use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block}; use crate::repository::Key; @@ -241,10 +242,13 @@ impl PostgresRedoManager { let mut proc_guard = self.redo_process.write().unwrap(); match &*proc_guard { None => { + let timer = + WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.start_timer(); let proc = Arc::new( WalRedoProcess::launch(self.conf, self.tenant_id, pg_version) .context("launch walredo process")?, ); + timer.observe_duration(); *proc_guard = Some(Arc::clone(&proc)); proc }