From e68c43c19b6a44cb90e0db29e445a1dd2b3335c2 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <christian@neon.tech>
Date: Fri, 21 Jun 2024 11:36:04 +0000
Subject: [PATCH] DNM: rip out throttling code (the recorder's borrowing of ctx
 obviously breaks &mut RequestContext passing)

---
 pageserver/src/context.rs         |  3 +-
 pageserver/src/metrics.rs         | 52 +++----------------------------
 pageserver/src/page_service.rs    | 10 +++---
 pageserver/src/tenant/throttle.rs | 15 +--------
 pageserver/src/tenant/timeline.rs | 17 ++--------
 5 files changed, 14 insertions(+), 83 deletions(-)

diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs
index 4e53ba7888..10db047aea 100644
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -97,9 +97,10 @@ pub struct RequestContext {
     download_behavior: DownloadBehavior,
     access_stats_behavior: AccessStatsBehavior,
     page_content_kind: PageContentKind,
-    pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32,
 }
 
+pub(crate) struct MicrosSpentThrottled(optional_counter::MicroSecondsCounterU32);
+
 /// The kind of access to the page cache.
 #[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
 pub enum PageContentKind {
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index ea7a8d3752..493d0cf2cf 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1085,7 +1085,6 @@ impl GlobalAndPerTimelineHistogram {
 
 struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
     h: &'a GlobalAndPerTimelineHistogram,
-    ctx: &'c RequestContext,
     start: std::time::Instant,
     op: SmgrQueryType,
 }
@@ -1093,32 +1092,11 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
 impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
     fn drop(&mut self) {
         let elapsed = self.start.elapsed();
-        let ex_throttled = self
-            .ctx
-            .micros_spent_throttled
-            .close_and_checked_sub_from(elapsed);
-        let ex_throttled = match ex_throttled {
-            Ok(res) => res,
-            Err(error) => {
-                use utils::rate_limit::RateLimit;
-                static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
-                    Lazy::new(|| {
-                        Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
-                            RateLimit::new(Duration::from_secs(10))
-                        })))
-                    });
-                let mut guard = LOGGED.lock().unwrap();
-                let rate_limit = &mut guard[self.op];
-                rate_limit.call(|| {
-                    warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit");
-                });
-                elapsed
-            }
-        };
-        self.h.observe(ex_throttled.as_secs_f64());
+        self.h.observe(elapsed.as_secs_f64());
     }
 }
 
+
 #[derive(
     Debug,
     Clone,
@@ -1233,33 +1211,11 @@ impl SmgrQueryTimePerTimeline {
         });
         Self { metrics }
     }
-    pub(crate) fn start_timer<'c: 'a, 'a>(
-        &'a self,
-        op: SmgrQueryType,
-        ctx: &'c RequestContext,
-    ) -> impl Drop + 'a {
+    pub(crate) fn start_timer<'a>(&'a self, op: SmgrQueryType) -> impl Drop + 'a {
         let metric = &self.metrics[op as usize];
         let start = Instant::now();
-        match ctx.micros_spent_throttled.open() {
-            Ok(()) => (),
-            Err(error) => {
-                use utils::rate_limit::RateLimit;
-                static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
-                    Lazy::new(|| {
-                        Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
-                            RateLimit::new(Duration::from_secs(10))
-                        })))
-                    });
-                let mut guard = LOGGED.lock().unwrap();
-                let rate_limit = &mut guard[op];
-                rate_limit.call(|| {
-                    warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
-                });
-            }
-        }
         GlobalAndPerTimelineHistogramTimer {
             h: metric,
-            ctx,
             start,
             op,
         }
@@ -1326,7 +1282,7 @@ mod smgr_query_time_tests {
             assert_eq!(pre_per_tenant_timeline, 0);
 
             let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
-            let timer = metrics.start_timer(*op, &ctx);
+            let timer = metrics.start_timer(*op);
             drop(timer);
 
             let (post_global, post_per_tenant_timeline) = get_counts();
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index ccb3ee3f88..9c3dbba332 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -963,7 +963,7 @@ impl PageServerHandler {
         let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?;
         let _timer = timeline
             .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetRelExists, ctx);
+            .start_timer(metrics::SmgrQueryType::GetRelExists);
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
@@ -996,7 +996,7 @@ impl PageServerHandler {
 
         let _timer = timeline
             .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetRelSize, ctx);
+            .start_timer(metrics::SmgrQueryType::GetRelSize);
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
@@ -1029,7 +1029,7 @@ impl PageServerHandler {
 
         let _timer = timeline
             .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetDbSize, ctx);
+            .start_timer(metrics::SmgrQueryType::GetDbSize);
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
@@ -1206,7 +1206,7 @@ impl PageServerHandler {
 
         let _timer = timeline
             .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx);
+            .start_timer(metrics::SmgrQueryType::GetPageAtLsn);
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
@@ -1239,7 +1239,7 @@ impl PageServerHandler {
 
         let _timer = timeline
             .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetSlruSegment, ctx);
+            .start_timer(metrics::SmgrQueryType::GetSlruSegment);
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs
index 6ee79dbdf0..987561993c 100644
--- a/pageserver/src/tenant/throttle.rs
+++ b/pageserver/src/tenant/throttle.rs
@@ -130,7 +130,7 @@ where
         self.inner.load().config.steady_rps()
     }
 
-    pub async fn throttle(&self, ctx: &mut RequestContext, key_count: usize) -> Option<Duration> {
+    pub async fn throttle(&self, ctx: &RequestContext, key_count: usize) -> Option<Duration> {
         let inner = self.inner.load_full(); // clones the `Inner` Arc
         if !inner.task_kinds.contains(ctx.task_kind()) {
             return None;
@@ -157,19 +157,6 @@ where
                 .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed);
             let observation = Observation { wait_time };
             self.metric.observe_throttling(&observation);
-            match ctx.micros_spent_throttled.add(wait_time) {
-                Ok(res) => res,
-                Err(error) => {
-                    use once_cell::sync::Lazy;
-                    use utils::rate_limit::RateLimit;
-                    static WARN_RATE_LIMIT: Lazy<Mutex<RateLimit>> =
-                        Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
-                    let mut guard = WARN_RATE_LIMIT.lock().unwrap();
-                    guard.call(move || {
-                        warn!(error, "error adding time spent throttled; this message is logged at a global rate limit");
-                    });
-                }
-            }
             Some(wait_time)
         } else {
             None
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 3a128aaabd..cc187a219c 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -882,8 +882,6 @@ impl Timeline {
         // page_service.
         debug_assert!(!self.shard_identity.is_key_disposable(&key));
 
-        self.timeline_get_throttle.throttle(ctx, 1).await;
-
         match self.conf.get_impl {
             GetImpl::Legacy => {
                 let reconstruct_state = ValueReconstructState {
@@ -1035,12 +1033,7 @@ impl Timeline {
             .for_task_kind(ctx.task_kind())
             .map(|metric| (metric, Instant::now()));
 
-        // start counting after throttle so that throttle time
-        // is always less than observation time
-        let throttled = self
-            .timeline_get_throttle
-            .throttle(ctx, key_count as usize)
-            .await;
+        let throttled = None;
 
         let res = match self.conf.get_vectored_impl {
             GetVectoredImpl::Sequential => {
@@ -1129,13 +1122,7 @@ impl Timeline {
             .for_task_kind(ctx.task_kind())
             .map(ScanLatencyOngoingRecording::start_recording);
 
-        // start counting after throttle so that throttle time
-        // is always less than observation time
-        let throttled = self
-            .timeline_get_throttle
-            // assume scan = 1 quota for now until we find a better way to process this
-            .throttle(ctx, 1)
-            .await;
+        let throttled = None;
 
         let vectored_res = self
             .get_vectored_impl(