diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b4e20cb8b9..bd75cf98f5 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -3559,11 +3559,16 @@ pub mod tokio_epoll_uring { } pub(crate) mod tenant_throttling { + use std::num::NonZeroUsize; + use metrics::{register_int_counter_vec, IntCounter}; use once_cell::sync::Lazy; use utils::shard::TenantShardId; - use crate::tenant::{self}; + use crate::{ + assert_u64_eq_usize::UsizeIsU64, + tenant::{self}, + }; struct GlobalAndPerTenantIntCounter { global: IntCounter, @@ -3571,10 +3576,6 @@ pub(crate) mod tenant_throttling { } impl GlobalAndPerTenantIntCounter { - #[inline(always)] - pub(crate) fn inc(&self) { - self.inc_by(1) - } #[inline(always)] pub(crate) fn inc_by(&self, n: u64) { self.global.inc_by(n); @@ -3592,7 +3593,7 @@ pub(crate) mod tenant_throttling { static COUNT_ACCOUNTED_START: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_count_accounted_start_global", - "Count of tenant throttling starts, by kind of throttle.", + "Like pageserver_tenant_throttling_count_accounted_start, but aggregated to the instance.", &["kind"] ) .unwrap() @@ -3600,7 +3601,8 @@ pub(crate) mod tenant_throttling { static COUNT_ACCOUNTED_START_PER_TENANT: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_count_accounted_start", - "Count of tenant throttling starts, by kind of throttle.", + "Counter incremented for each key that enters the throttling stage. + A batched request will increment this counter by the number of requests in the batch.", &["kind", "tenant_id", "shard_id"] ) .unwrap() @@ -3608,7 +3610,7 @@ pub(crate) mod tenant_throttling { static COUNT_ACCOUNTED_FINISH: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_count_accounted_finish_global", - "Count of tenant throttling finishes, by kind of throttle.", + "Like pageserver_tenant_throttling_count_accounted_finish, but aggregated to the instance.", &["kind"] ) .unwrap() @@ -3616,7 +3618,8 @@ pub(crate) mod tenant_throttling { static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_count_accounted_finish", - "Count of tenant throttling finishes, by kind of throttle.", + "Counter incremented for each key that exits the throttling stage. + A batched request will increment this counter by the number of requests in the batch.", &["kind", "tenant_id", "shard_id"] ) .unwrap() @@ -3624,7 +3627,7 @@ pub(crate) mod tenant_throttling { static WAIT_USECS: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_wait_usecs_sum_global", - "Sum of microseconds that spent waiting throttle by kind of throttle.", + "Like pageserver_tenant_throttling_wait_usecs_sum, but aggregated to the instance.", &["kind"] ) .unwrap() @@ -3632,7 +3635,8 @@ pub(crate) mod tenant_throttling { static WAIT_USECS_PER_TENANT: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_wait_usecs_sum", - "Sum of microseconds that spent waiting throttle by kind of throttle.", + "Wall clock time spent waiting on the throttle. + A batched request counts as one ", &["kind", "tenant_id", "shard_id"] ) .unwrap() @@ -3724,21 +3728,26 @@ pub(crate) mod tenant_throttling { impl tenant::throttle::Metric for Metrics { #[inline(always)] - fn accounting_start(&self) { - self.count_accounted_start.inc(); + fn accounting_start(&self, key_count: NonZeroUsize) { + self.count_accounted_start + .inc_by(key_count.get().into_u64()); } #[inline(always)] - fn accounting_finish(&self) { - self.count_accounted_finish.inc(); + fn accounting_finish(&self, key_count: NonZeroUsize) { + self.count_accounted_finish + .inc_by(key_count.get().into_u64()); } #[inline(always)] fn observe_throttling( &self, - tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation, + tenant::throttle::Observation { + key_count, + wait_time, + }: &tenant::throttle::Observation, ) { let val = u64::try_from(wait_time.as_micros()).unwrap(); self.wait_time.inc_by(val); - self.count_throttled.inc(); + self.count_throttled.inc_by(key_count.get().into_u64()); } } } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index d00ec11a76..19131fb33e 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -588,13 +588,14 @@ impl BatchedFeMessage { shard, // 1 token is probably under-estimating because these // request handlers typically do several Timeline::get calls. - 1, + NonZeroUsize::new(1).unwrap(), itertools::Either::Left(std::iter::once(timer)), ) } BatchedFeMessage::GetPage { shard, pages, .. } => ( shard, - pages.len(), + NonZeroUsize::new(pages.len()) + .expect("a batch has always at least one request in it"), itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)), ), BatchedFeMessage::RespondError { .. } => return Ok(()), diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs index 8ab6a0e060..fb68c619ec 100644 --- a/pageserver/src/tenant/throttle.rs +++ b/pageserver/src/tenant/throttle.rs @@ -1,4 +1,5 @@ use std::{ + num::NonZeroUsize, sync::{ atomic::{AtomicU64, Ordering}, Arc, @@ -9,6 +10,8 @@ use std::{ use arc_swap::ArcSwap; use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter}; +use crate::assert_u64_eq_usize::UsizeIsU64; + /// Throttle for `async` functions. /// /// Runtime reconfigurable. @@ -37,11 +40,12 @@ pub struct Inner { pub type Config = pageserver_api::models::ThrottleConfig; pub struct Observation { + pub key_count: NonZeroUsize, pub wait_time: Duration, } pub trait Metric { - fn accounting_start(&self); - fn accounting_finish(&self); + fn accounting_start(&self, key_count: NonZeroUsize); + fn accounting_finish(&self, key_count: NonZeroUsize); fn observe_throttling(&self, observation: &Observation); } @@ -127,7 +131,7 @@ where self.inner.load().rate_limiter.steady_rps() } - pub async fn throttle(&self, key_count: usize) -> ThrottleResult { + pub async fn throttle(&self, key_count: NonZeroUsize) -> ThrottleResult { let inner = self.inner.load_full(); // clones the `Inner` Arc let start = std::time::Instant::now(); @@ -136,11 +140,13 @@ where return ThrottleResult::NotThrottled { start }; } - self.metric.accounting_start(); - self.count_accounted_start.fetch_add(1, Ordering::Relaxed); - let did_throttle = inner.rate_limiter.acquire(key_count).await; - self.count_accounted_finish.fetch_add(1, Ordering::Relaxed); - self.metric.accounting_finish(); + self.metric.accounting_start(key_count); + self.count_accounted_start + .fetch_add(key_count.get().into_u64(), Ordering::Relaxed); + let did_throttle = inner.rate_limiter.acquire(key_count.get()).await; + self.count_accounted_finish + .fetch_add(key_count.get().into_u64(), Ordering::Relaxed); + self.metric.accounting_finish(key_count); if did_throttle { self.count_throttled.fetch_add(1, Ordering::Relaxed); @@ -148,7 +154,10 @@ where let wait_time = now - start; self.sum_throttled_usecs .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed); - let observation = Observation { wait_time }; + let observation = Observation { + key_count, + wait_time, + }; self.metric.observe_throttling(&observation); ThrottleResult::Throttled { start, end: now } } else {