From 2943590694c57bde91eed71aa92bc96c18abb152 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 6 Feb 2025 18:17:47 +0100 Subject: [PATCH] pageserver: use histogram for background job semaphore waits (#10697) ## Problem We don't have visibility into how long an individual background job is waiting for a semaphore permit. ## Summary of changes * Make `pageserver_background_loop_semaphore_wait_seconds` a histogram rather than a sum. * Add a paced warning when a task takes more than 10 minutes to get a permit (for now). * Drive-by cleanup of some `EnumMap` usage. --- pageserver/src/metrics.rs | 58 ++++++++++++++++++---------------- pageserver/src/tenant/tasks.rs | 38 ++++++++++++++++------ 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1cc18d83ce..3b8612a3fa 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -6,7 +6,7 @@ use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; use std::time::{Duration, Instant}; -use enum_map::EnumMap; +use enum_map::{Enum as _, EnumMap}; use futures::Future; use metrics::{ register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, @@ -104,7 +104,7 @@ pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy = Lazy::n .expect("failed to define a metric") }); -// Buckets for background operations like compaction, GC, size calculation +// Buckets for background operation duration in seconds, like compaction, GC, size calculation. const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0]; pub(crate) static STORAGE_TIME_GLOBAL: Lazy = Lazy::new(|| { @@ -236,7 +236,7 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy = Lazy::new(|| GetVectoredLatency { map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| { - let task_kind = ::from_usize(task_kind_idx); + let task_kind = TaskKind::from_usize(task_kind_idx); if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) { let task_kind = task_kind.into(); @@ -259,7 +259,7 @@ pub(crate) static SCAN_LATENCY: Lazy = Lazy::new(|| { ScanLatency { map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| { - let task_kind = ::from_usize(task_kind_idx); + let task_kind = TaskKind::from_usize(task_kind_idx); if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) { let task_kind = task_kind.into(); @@ -300,10 +300,10 @@ static PAGE_CACHE_READ_ACCESSES: Lazy = Lazy::new(|| { pub(crate) static PAGE_CACHE: Lazy = Lazy::new(|| PageCacheMetrics { map: EnumMap::from_array(std::array::from_fn(|task_kind| { - let task_kind = ::from_usize(task_kind); + let task_kind = TaskKind::from_usize(task_kind); let task_kind: &'static str = task_kind.into(); EnumMap::from_array(std::array::from_fn(|content_kind| { - let content_kind = ::from_usize(content_kind); + let content_kind = PageContentKind::from_usize(content_kind); let content_kind: &'static str = content_kind.into(); PageCacheMetricsForTaskKind { read_accesses_immutable: { @@ -1913,7 +1913,7 @@ pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy = Lazy ComputeCommandCounters { map: EnumMap::from_array(std::array::from_fn(|i| { - let command = ::from_usize(i); + let command = ComputeCommandKind::from_usize(i); let command_str: &'static str = command.into(); inner.with_label_values(&[command_str]) })), @@ -2213,13 +2213,13 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy = Lazy::new(|| { pub struct BackgroundLoopSemaphoreMetrics { counters: EnumMap, - durations: EnumMap, + durations: EnumMap, waiting_tasks: EnumMap, running_tasks: EnumMap, } -pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy = Lazy::new( - || { +pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy = + Lazy::new(|| { let counters = register_int_counter_pair_vec!( "pageserver_background_loop_semaphore_wait_start_count", "Counter for background loop concurrency-limiting semaphore acquire calls started", @@ -2229,10 +2229,11 @@ pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy::from_usize(i); + counters: EnumMap::from_array(std::array::from_fn(|i| { + let kind = BackgroundLoopKind::from_usize(i); counters.with_label_values(&[kind.into()]) })), - durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| { - let kind = ::from_usize(i); + durations: EnumMap::from_array(std::array::from_fn(|i| { + let kind = BackgroundLoopKind::from_usize(i); durations.with_label_values(&[kind.into()]) })), - waiting_tasks: enum_map::EnumMap::from_array(std::array::from_fn(|i| { - let kind = ::from_usize(i); + waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| { + let kind = BackgroundLoopKind::from_usize(i); waiting_tasks.with_label_values(&[kind.into()]) })), - running_tasks: enum_map::EnumMap::from_array(std::array::from_fn(|i| { - let kind = ::from_usize(i); + running_tasks: EnumMap::from_array(std::array::from_fn(|i| { + let kind = BackgroundLoopKind::from_usize(i); running_tasks.with_label_values(&[kind.into()]) })), } - }, -); + }); impl BackgroundLoopSemaphoreMetrics { /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the @@ -2304,11 +2304,13 @@ impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> { } /// Signals that the semaphore has been acquired, and updates relevant metrics. - pub fn acquired(&mut self) { + pub fn acquired(&mut self) -> Duration { + let waited = self.start.elapsed(); self.wait_counter_guard.take().expect("already acquired"); - self.metrics.durations[self.task].inc_by(self.start.elapsed().as_secs_f64()); + self.metrics.durations[self.task].observe(waited.as_secs_f64()); self.metrics.waiting_tasks[self.task].dec(); self.metrics.running_tasks[self.task].inc(); + waited } } @@ -2317,7 +2319,7 @@ impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> { fn drop(&mut self) { if self.wait_counter_guard.take().is_some() { // Waiting. - self.metrics.durations[self.task].inc_by(self.start.elapsed().as_secs_f64()); + self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64()); self.metrics.waiting_tasks[self.task].dec(); } else { // Running. @@ -2570,7 +2572,7 @@ pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy = pub(crate) struct WalRedoProcessCounters { pub(crate) started: IntCounter, - pub(crate) killed_by_cause: enum_map::EnumMap, + pub(crate) killed_by_cause: EnumMap, pub(crate) active_stderr_logger_tasks_started: IntCounter, pub(crate) active_stderr_logger_tasks_finished: IntCounter, } @@ -2612,7 +2614,7 @@ impl Default for WalRedoProcessCounters { Self { started, killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| { - let cause = ::from_usize(i); + let cause = WalRedoKillCause::from_usize(i); let cause_str: &'static str = cause.into(); killed.with_label_values(&[cause_str]) })), diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 1c3237d0bd..0f10dd7e10 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -3,7 +3,7 @@ use std::ops::ControlFlow; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; use crate::context::{DownloadBehavior, RequestContext}; @@ -14,9 +14,11 @@ use crate::tenant::throttle::Stats; use crate::tenant::timeline::compaction::CompactionOutcome; use crate::tenant::timeline::CompactionError; use crate::tenant::{Tenant, TenantState}; +use once_cell::sync::Lazy; use rand::Rng; use tokio_util::sync::CancellationToken; use tracing::*; +use utils::rate_limit::RateLimit; use utils::{backoff, completion, pausable_failpoint}; static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy = @@ -41,7 +43,16 @@ static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy &'static str { - self.into() - } -} - pub struct BackgroundLoopSemaphorePermit<'a> { _permit: tokio::sync::SemaphorePermit<'static>, _recorder: BackgroundLoopSemaphoreMetricsRecorder<'a>, @@ -71,6 +76,11 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit( loop_kind: BackgroundLoopKind, _ctx: &RequestContext, ) -> BackgroundLoopSemaphorePermit<'static> { + // TODO: use a lower threshold and remove the pacer once we resolve some blockage. + const WARN_THRESHOLD: Duration = Duration::from_secs(600); + static WARN_PACER: Lazy> = + Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); + let mut recorder = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.record(loop_kind); if loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation { @@ -82,7 +92,15 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit( .acquire() .await .expect("should never close"); - recorder.acquired(); + + let waited = recorder.acquired(); + if waited >= WARN_THRESHOLD { + let waited = waited.as_secs_f64(); + WARN_PACER + .lock() + .unwrap() + .call(|| warn!("{loop_kind} task waited {waited:.3}s for semaphore permit")); + } BackgroundLoopSemaphorePermit { _permit: permit, @@ -628,7 +646,7 @@ pub(crate) fn warn_when_period_overrun( "task iteration took longer than the configured period" ); crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT - .with_label_values(&[task.as_static_str(), &format!("{}", period.as_secs())]) + .with_label_values(&[task.into(), &format!("{}", period.as_secs())]) .inc(); } }