diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 80113220ae..dc7f89cf3a 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -285,6 +285,63 @@ pub static PAGE_CACHE_SIZE: Lazy = Lazy::new(|| PageCacheS }, }); +pub(crate) mod page_cache_eviction_metrics { + use std::num::NonZeroUsize; + + use metrics::{register_int_counter_vec, IntCounter, IntCounterVec}; + use once_cell::sync::Lazy; + + #[derive(Clone, Copy)] + pub(crate) enum Outcome { + FoundSlotUnused { iters: NonZeroUsize }, + FoundSlotEvicted { iters: NonZeroUsize }, + ItersExceeded { iters: NonZeroUsize }, + } + + static ITERS_TOTAL_VEC: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_page_cache_find_victim_iters_total", + "Counter for the number of iterations in the find_victim loop", + &["outcome"], + ) + .expect("failed to define a metric") + }); + + static CALLS_VEC: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_page_cache_find_victim_calls", + "Incremented at the end of each find_victim() call.\ + Filter by outcome to get e.g., eviction rate.", + &["outcome"] + ) + .unwrap() + }); + + pub(crate) fn observe(outcome: Outcome) { + macro_rules! dry { + ($label:literal, $iters:expr) => {{ + static LABEL: &'static str = $label; + static ITERS_TOTAL: Lazy = + Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL])); + static CALLS: Lazy = + Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL])); + ITERS_TOTAL.inc_by(($iters.get()) as u64); + CALLS.inc(); + }}; + } + match outcome { + Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters), + Outcome::FoundSlotEvicted { iters } => { + dry!("found_evicted", iters) + } + Outcome::ItersExceeded { iters } => { + dry!("err_iters_exceeded", iters); + super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit); + } + } + } +} + pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_page_cache_acquire_pinned_slot_seconds", @@ -294,14 +351,6 @@ pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy = Lazy::n .expect("failed to define a metric") }); -pub(crate) static PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_page_cache_find_victim_iters_total", - "Counter for the number of iterations in the find_victim loop", - ) - .expect("failed to define a metric") -}); - static PAGE_CACHE_ERRORS: Lazy = Lazy::new(|| { register_int_counter_vec!( "page_cache_errors_total", diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 0702057766..dbd85d2dcf 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -88,7 +88,11 @@ use utils::{ lsn::Lsn, }; -use crate::{context::RequestContext, metrics::PageCacheSizeMetrics, repository::Key}; +use crate::{ + context::RequestContext, + metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics}, + repository::Key, +}; static PAGE_CACHE: OnceCell = OnceCell::new(); const TEST_PAGE_CACHE_SIZE: usize = 50; @@ -897,8 +901,10 @@ impl PageCache { // Note that just yielding to tokio during iteration without such // priority boosting is likely counter-productive. We'd just give more opportunities // for B to bump usage count, further starving A. - crate::metrics::page_cache_errors_inc( - crate::metrics::PageCacheErrorKind::EvictIterLimit, + page_cache_eviction_metrics::observe( + page_cache_eviction_metrics::Outcome::ItersExceeded { + iters: iters.try_into().unwrap(), + }, ); anyhow::bail!("exceeded evict iter limit"); } @@ -909,8 +915,18 @@ impl PageCache { // remove mapping for old buffer self.remove_mapping(old_key); inner.key = None; + page_cache_eviction_metrics::observe( + page_cache_eviction_metrics::Outcome::FoundSlotEvicted { + iters: iters.try_into().unwrap(), + }, + ); + } else { + page_cache_eviction_metrics::observe( + page_cache_eviction_metrics::Outcome::FoundSlotUnused { + iters: iters.try_into().unwrap(), + }, + ); } - crate::metrics::PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL.inc_by(iters as u64); return Ok((slot_idx, inner)); } }