From 845e2965628a931e9caf8b7233cac3d4b834a6c8 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 11 May 2023 17:02:19 +0200 Subject: [PATCH] eviction: add global histogram for iteration durations (#4212) I would like to know whether and by how much the eviction iterations spike in the $period-sized window that happens every $threshold , when all the timelines do the imitate accesses. refs https://github.com/neondatabase/neon/issues/4154 --- pageserver/src/metrics.rs | 10 ++++++++++ pageserver/src/tenant/timeline/eviction_task.rs | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 542fd511e1..c04f6e054b 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -187,6 +187,16 @@ static PERSISTENT_BYTES_WRITTEN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +pub(crate) static EVICTION_ITERATION_DURATION: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_eviction_iteration_duration_seconds_global", + "Time spent on a single eviction iteration", + &["period_secs", "threshold_secs"], + STORAGE_OP_BUCKETS.into(), + ) + .expect("failed to define a metric") +}); + static EVICTIONS: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_evictions", diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index eb04e7e579..5ea3d5b14d 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -120,6 +120,13 @@ impl Timeline { } let elapsed = start.elapsed(); crate::tenant::tasks::warn_when_period_overrun(elapsed, p.period, "eviction"); + crate::metrics::EVICTION_ITERATION_DURATION + .get_metric_with_label_values(&[ + &format!("{}", p.period.as_secs()), + &format!("{}", p.threshold.as_secs()), + ]) + .unwrap() + .observe(elapsed.as_secs_f64()); ControlFlow::Continue(start + p.period) } }