From e68c43c19b6a44cb90e0db29e445a1dd2b3335c2 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 21 Jun 2024 11:36:04 +0000 Subject: [PATCH] DNM: rip out throttling code (the recorder's borrowing of ctx obviously breaks &mut RequestContext passing) --- pageserver/src/context.rs | 3 +- pageserver/src/metrics.rs | 52 +++---------------------------- pageserver/src/page_service.rs | 10 +++--- pageserver/src/tenant/throttle.rs | 15 +-------- pageserver/src/tenant/timeline.rs | 17 ++-------- 5 files changed, 14 insertions(+), 83 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 4e53ba7888..10db047aea 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -97,9 +97,10 @@ pub struct RequestContext { download_behavior: DownloadBehavior, access_stats_behavior: AccessStatsBehavior, page_content_kind: PageContentKind, - pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32, } +pub(crate) struct MicrosSpentThrottled(optional_counter::MicroSecondsCounterU32); + /// The kind of access to the page cache. #[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)] pub enum PageContentKind { diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index ea7a8d3752..493d0cf2cf 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1085,7 +1085,6 @@ impl GlobalAndPerTimelineHistogram { struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { h: &'a GlobalAndPerTimelineHistogram, - ctx: &'c RequestContext, start: std::time::Instant, op: SmgrQueryType, } @@ -1093,32 +1092,11 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> { fn drop(&mut self) { let elapsed = self.start.elapsed(); - let ex_throttled = self - .ctx - .micros_spent_throttled - .close_and_checked_sub_from(elapsed); - let ex_throttled = match ex_throttled { - Ok(res) => res, - Err(error) => { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy>> = - Lazy::new(|| { - Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| { - RateLimit::new(Duration::from_secs(10)) - }))) - }); - let mut guard = LOGGED.lock().unwrap(); - let rate_limit = &mut guard[self.op]; - rate_limit.call(|| { - warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit"); - }); - elapsed - } - }; - self.h.observe(ex_throttled.as_secs_f64()); + self.h.observe(elapsed.as_secs_f64()); } } + #[derive( Debug, Clone, @@ -1233,33 +1211,11 @@ impl SmgrQueryTimePerTimeline { }); Self { metrics } } - pub(crate) fn start_timer<'c: 'a, 'a>( - &'a self, - op: SmgrQueryType, - ctx: &'c RequestContext, - ) -> impl Drop + 'a { + pub(crate) fn start_timer<'a>(&'a self, op: SmgrQueryType) -> impl Drop + 'a { let metric = &self.metrics[op as usize]; let start = Instant::now(); - match ctx.micros_spent_throttled.open() { - Ok(()) => (), - Err(error) => { - use utils::rate_limit::RateLimit; - static LOGGED: Lazy>> = - Lazy::new(|| { - Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| { - RateLimit::new(Duration::from_secs(10)) - }))) - }); - let mut guard = LOGGED.lock().unwrap(); - let rate_limit = &mut guard[op]; - rate_limit.call(|| { - warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit"); - }); - } - } GlobalAndPerTimelineHistogramTimer { h: metric, - ctx, start, op, } @@ -1326,7 +1282,7 @@ mod smgr_query_time_tests { assert_eq!(pre_per_tenant_timeline, 0); let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download); - let timer = metrics.start_timer(*op, &ctx); + let timer = metrics.start_timer(*op); drop(timer); let (post_global, post_per_tenant_timeline) = get_counts(); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index ccb3ee3f88..9c3dbba332 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -963,7 +963,7 @@ impl PageServerHandler { let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; let _timer = timeline .query_metrics - .start_timer(metrics::SmgrQueryType::GetRelExists, ctx); + .start_timer(metrics::SmgrQueryType::GetRelExists); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( @@ -996,7 +996,7 @@ impl PageServerHandler { let _timer = timeline .query_metrics - .start_timer(metrics::SmgrQueryType::GetRelSize, ctx); + .start_timer(metrics::SmgrQueryType::GetRelSize); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( @@ -1029,7 +1029,7 @@ impl PageServerHandler { let _timer = timeline .query_metrics - .start_timer(metrics::SmgrQueryType::GetDbSize, ctx); + .start_timer(metrics::SmgrQueryType::GetDbSize); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( @@ -1206,7 +1206,7 @@ impl PageServerHandler { let _timer = timeline .query_metrics - .start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx); + .start_timer(metrics::SmgrQueryType::GetPageAtLsn); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( @@ -1239,7 +1239,7 @@ impl PageServerHandler { let _timer = timeline .query_metrics - .start_timer(metrics::SmgrQueryType::GetSlruSegment, ctx); + .start_timer(metrics::SmgrQueryType::GetSlruSegment); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs index 6ee79dbdf0..987561993c 100644 --- a/pageserver/src/tenant/throttle.rs +++ b/pageserver/src/tenant/throttle.rs @@ -130,7 +130,7 @@ where self.inner.load().config.steady_rps() } - pub async fn throttle(&self, ctx: &mut RequestContext, key_count: usize) -> Option { + pub async fn throttle(&self, ctx: &RequestContext, key_count: usize) -> Option { let inner = self.inner.load_full(); // clones the `Inner` Arc if !inner.task_kinds.contains(ctx.task_kind()) { return None; @@ -157,19 +157,6 @@ where .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed); let observation = Observation { wait_time }; self.metric.observe_throttling(&observation); - match ctx.micros_spent_throttled.add(wait_time) { - Ok(res) => res, - Err(error) => { - use once_cell::sync::Lazy; - use utils::rate_limit::RateLimit; - static WARN_RATE_LIMIT: Lazy> = - Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); - let mut guard = WARN_RATE_LIMIT.lock().unwrap(); - guard.call(move || { - warn!(error, "error adding time spent throttled; this message is logged at a global rate limit"); - }); - } - } Some(wait_time) } else { None diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 3a128aaabd..cc187a219c 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -882,8 +882,6 @@ impl Timeline { // page_service. debug_assert!(!self.shard_identity.is_key_disposable(&key)); - self.timeline_get_throttle.throttle(ctx, 1).await; - match self.conf.get_impl { GetImpl::Legacy => { let reconstruct_state = ValueReconstructState { @@ -1035,12 +1033,7 @@ impl Timeline { .for_task_kind(ctx.task_kind()) .map(|metric| (metric, Instant::now())); - // start counting after throttle so that throttle time - // is always less than observation time - let throttled = self - .timeline_get_throttle - .throttle(ctx, key_count as usize) - .await; + let throttled = None; let res = match self.conf.get_vectored_impl { GetVectoredImpl::Sequential => { @@ -1129,13 +1122,7 @@ impl Timeline { .for_task_kind(ctx.task_kind()) .map(ScanLatencyOngoingRecording::start_recording); - // start counting after throttle so that throttle time - // is always less than observation time - let throttled = self - .timeline_get_throttle - // assume scan = 1 quota for now until we find a better way to process this - .throttle(ctx, 1) - .await; + let throttled = None; let vectored_res = self .get_vectored_impl(