diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index f85f525630..92ce654b7e 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -94,15 +94,35 @@ pub(crate) static READ_NUM_FS_LAYERS: Lazy = Lazy::new(|| { }); // Metrics collected on operations on the storage repository. -pub(crate) static RECONSTRUCT_TIME: Lazy = Lazy::new(|| { - register_histogram!( + +pub(crate) struct ReconstructTimeMetrics { + ok: Histogram, + err: Histogram, +} + +pub(crate) static RECONSTRUCT_TIME: Lazy = Lazy::new(|| { + let inner = register_histogram_vec!( "pageserver_getpage_reconstruct_seconds", "Time spent in reconstruct_value (reconstruct a page from deltas)", + &["result"], CRITICAL_OP_BUCKETS.into(), ) - .expect("failed to define a metric") + .expect("failed to define a metric"); + ReconstructTimeMetrics { + ok: inner.get_metric_with_label_values(&["ok"]).unwrap(), + err: inner.get_metric_with_label_values(&["err"]).unwrap(), + } }); +impl ReconstructTimeMetrics { + pub(crate) fn for_result(&self, result: &Result) -> &Histogram { + match result { + Ok(_) => &self.ok, + Err(_) => &self.err, + } + } +} + pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy = Lazy::new(|| { register_int_counter!( "pageserver_materialized_cache_hits_direct_total", @@ -1856,7 +1876,6 @@ pub fn preinitialize_metrics() { // histograms [ &READ_NUM_FS_LAYERS, - &RECONSTRUCT_TIME, &WAIT_LSN_TIME, &WAL_REDO_TIME, &WAL_REDO_WAIT_TIME, @@ -1867,4 +1886,7 @@ pub fn preinitialize_metrics() { .for_each(|h| { Lazy::force(h); }); + + // Custom + Lazy::force(&RECONSTRUCT_TIME); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 008b0195ff..8a3f6df2ae 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -56,7 +56,7 @@ use crate::config::PageServerConf; use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum}; use crate::metrics::{ TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT, - RECONSTRUCT_TIME, UNEXPECTED_ONDEMAND_DOWNLOADS, + UNEXPECTED_ONDEMAND_DOWNLOADS, }; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key}; @@ -501,9 +501,12 @@ impl Timeline { .await?; timer.stop_and_record(); - let timer = RECONSTRUCT_TIME.start_timer(); + let start = Instant::now(); let res = self.reconstruct_value(key, lsn, reconstruct_state).await; - timer.stop_and_record(); + let elapsed = start.elapsed(); + crate::metrics::RECONSTRUCT_TIME + .for_result(&res) + .observe(elapsed.as_secs_f64()); if cfg!(feature = "testing") && res.is_err() { // it can only be walredo issue