walredo: latency histogram for spawn duration (#5925)

fixes https://github.com/neondatabase/neon/issues/5891
This commit is contained in:
Christian Schwarz
2023-11-29 19:44:37 +01:00
committed by GitHub
parent 46f20faa0d
commit ca597206b8
2 changed files with 15 additions and 1 deletions

View File

@@ -1253,6 +1253,15 @@ pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
.unwrap()
});
pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_wal_redo_process_launch_duration",
"Histogram of the duration of successful WalRedoProcess::launch calls",
redo_histogram_time_buckets!(),
)
.expect("failed to define a metric")
});
pub(crate) struct WalRedoProcessCounters {
pub(crate) started: IntCounter,
pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
@@ -1962,6 +1971,7 @@ pub fn preinitialize_metrics() {
&WAL_REDO_TIME,
&WAL_REDO_RECORDS_HISTOGRAM,
&WAL_REDO_BYTES_HISTOGRAM,
&WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
]
.into_iter()
.for_each(|h| {

View File

@@ -47,7 +47,8 @@ use pageserver_api::shard::TenantShardId;
use crate::config::PageServerConf;
use crate::metrics::{
WalRedoKillCause, WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_COUNTERS,
WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
};
use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
use crate::repository::Key;
@@ -241,10 +242,13 @@ impl PostgresRedoManager {
let mut proc_guard = self.redo_process.write().unwrap();
match &*proc_guard {
None => {
let timer =
WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.start_timer();
let proc = Arc::new(
WalRedoProcess::launch(self.conf, self.tenant_id, pg_version)
.context("launch walredo process")?,
);
timer.observe_duration();
*proc_guard = Some(Arc::clone(&proc));
proc
}