diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1bc9352256..0a1a22b6e8 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1862,16 +1862,64 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy = Lazy::new(|| { .expect("Failed to register tenant_task_events metric") }); -pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy = Lazy::new(|| { - register_int_counter_pair_vec!( - "pageserver_background_loop_semaphore_wait_start_count", - "Counter for background loop concurrency-limiting semaphore acquire calls started", - "pageserver_background_loop_semaphore_wait_finish_count", - "Counter for background loop concurrency-limiting semaphore acquire calls finished", - &["task"], - ) - .unwrap() -}); +pub struct BackgroundLoopSemaphoreMetrics { + counters: EnumMap, + durations: EnumMap, +} + +pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy = Lazy::new( + || { + let counters = register_int_counter_pair_vec!( + "pageserver_background_loop_semaphore_wait_start_count", + "Counter for background loop concurrency-limiting semaphore acquire calls started", + "pageserver_background_loop_semaphore_wait_finish_count", + "Counter for background loop concurrency-limiting semaphore acquire calls finished", + &["task"], + ) + .unwrap(); + + let durations = register_counter_vec!( + "pageserver_background_loop_semaphore_wait_duration_seconds", + "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls", + &["task"], + ) + .unwrap(); + + BackgroundLoopSemaphoreMetrics { + counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| { + let kind = ::from_usize(i); + counters.with_label_values(&[kind.into()]) + })), + durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| { + let kind = ::from_usize(i); + durations.with_label_values(&[kind.into()]) + })), + } + }, +); + +impl BackgroundLoopSemaphoreMetrics { + pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ { + struct Record<'a> { + metrics: &'a BackgroundLoopSemaphoreMetrics, + task: BackgroundLoopKind, + _counter_guard: metrics::IntCounterPairGuard, + start: Instant, + } + impl Drop for Record<'_> { + fn drop(&mut self) { + let elapsed = self.start.elapsed().as_secs_f64(); + self.metrics.durations[self.task].inc_by(elapsed); + } + } + Record { + metrics: self, + task, + _counter_guard: self.counters[task].guard(), + start: Instant::now(), + } + } +} pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy = Lazy::new(|| { register_int_counter_vec!( @@ -2553,6 +2601,7 @@ use std::time::{Duration, Instant}; use crate::context::{PageContentKind, RequestContext}; use crate::task_mgr::TaskKind; use crate::tenant::mgr::TenantSlot; +use crate::tenant::tasks::BackgroundLoopKind; /// Maintain a per timeline gauge in addition to the global gauge. pub(crate) struct PerTimelineRemotePhysicalSizeGauge { diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 3972685a8e..12f080f3c1 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -61,21 +61,12 @@ impl BackgroundLoopKind { } } -static PERMIT_GAUGES: once_cell::sync::Lazy< - enum_map::EnumMap, -> = once_cell::sync::Lazy::new(|| { - enum_map::EnumMap::from_array(std::array::from_fn(|i| { - let kind = ::from_usize(i); - crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE.with_label_values(&[kind.into()]) - })) -}); - /// Cancellation safe. pub(crate) async fn concurrent_background_tasks_rate_limit_permit( loop_kind: BackgroundLoopKind, _ctx: &RequestContext, ) -> tokio::sync::SemaphorePermit<'static> { - let _guard = PERMIT_GAUGES[loop_kind].guard(); + let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.measure_acquisition(loop_kind); pausable_failpoint!( "initial-size-calculation-permit-pause",