proxy+pageserver: shared leaky bucket impl (#8539)

In proxy I switched to a leaky-bucket impl using the GCRA algorithm. I figured I could share the code with pageserver and remove the leaky_bucket crate dependency with some very basic tokio timers and queues for fairness. The underlying algorithm should be fairly clear how it works from the comments I have left in the code. --- In benchmarking pageserver, @problame found that the new implementation fixes a getpage throughput discontinuity in pageserver under the `pagebench get-page-latest-lsn` benchmark with the clickbench dataset (`test_perf_olap.py`). The discontinuity is that for any of `--num-clients={2,3,4}`, getpage throughput remains 10k. With `--num-clients=5` and greater, getpage throughput then jumps to the configured 20k rate limit. With the changes in this PR, the discontinuity is gone, and we scale throughput linearly to `--num-clients` until the configured rate limit. More context in https://github.com/neondatabase/cloud/issues/16886#issuecomment-2315257641. closes https://github.com/neondatabase/cloud/issues/16886 --------- Co-authored-by: Joonas Koivunen <joonas@neon.tech> Co-authored-by: Christian Schwarz <christian@neon.tech>
2026-01-08 05:52:55 +00:00 · 2024-08-29 12:26:52 +01:00
parent c2f8fdccd7
commit a644f01b6a
12 changed files with 395 additions and 114 deletions
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -10,7 +10,5 @@ pub(crate) use limit_algorithm::{
 };
 pub(crate) use limiter::GlobalRateLimiter;

-pub use leaky_bucket::{
-    EndpointRateLimiter, LeakyBucketConfig, LeakyBucketRateLimiter, LeakyBucketState,
-};
+pub use leaky_bucket::{EndpointRateLimiter, LeakyBucketConfig, LeakyBucketRateLimiter};
 pub use limiter::{BucketRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
--- a/proxy/src/rate_limiter/leaky_bucket.rs
+++ b/proxy/src/rate_limiter/leaky_bucket.rs
@@ -8,6 +8,7 @@ use dashmap::DashMap;
 use rand::{thread_rng, Rng};
 use tokio::time::Instant;
 use tracing::info;
+use utils::leaky_bucket::LeakyBucketState;

 use crate::intern::EndpointIdInt;

@@ -16,7 +17,7 @@ pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;

 pub struct LeakyBucketRateLimiter<Key> {
    map: DashMap<Key, LeakyBucketState, RandomState>,
-    config: LeakyBucketConfig,
+    config: utils::leaky_bucket::LeakyBucketConfig,
    access_count: AtomicUsize,
 }

@@ -29,7 +30,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
    pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
        Self {
            map: DashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
-            config,
+            config: config.into(),
            access_count: AtomicUsize::new(0),
        }
    }
@@ -42,12 +43,12 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
            self.do_gc(now);
        }

-        let mut entry = self.map.entry(key).or_insert_with(|| LeakyBucketState {
-            time: now,
-            filled: 0.0,
-        });
+        let mut entry = self
+            .map
+            .entry(key)
+            .or_insert_with(|| LeakyBucketState { empty_at: now });

-        entry.check(&self.config, now, n as f64)
+        entry.add_tokens(&self.config, now, n as f64).is_ok()
    }

    fn do_gc(&self, now: Instant) {
@@ -59,7 +60,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
        let shard = thread_rng().gen_range(0..n);
        self.map.shards()[shard]
            .write()
-            .retain(|_, value| !value.get_mut().update(&self.config, now));
+            .retain(|_, value| !value.get().bucket_is_empty(now));
    }
 }

@@ -68,11 +69,6 @@ pub struct LeakyBucketConfig {
    pub max: f64,
 }

-pub struct LeakyBucketState {
-    filled: f64,
-    time: Instant,
-}
-
 #[cfg(test)]
 impl LeakyBucketConfig {
    pub(crate) fn new(rps: f64, max: f64) -> Self {
@@ -82,40 +78,9 @@ impl LeakyBucketConfig {
    }
 }

-impl LeakyBucketState {
-    pub(crate) fn new() -> Self {
-        Self {
-            filled: 0.0,
-            time: Instant::now(),
-        }
-    }
-
-    /// updates the timer and returns true if the bucket is empty
-    fn update(&mut self, info: &LeakyBucketConfig, now: Instant) -> bool {
-        let drain = now.duration_since(self.time);
-        let drain = drain.as_secs_f64() * info.rps;
-
-        self.filled = (self.filled - drain).clamp(0.0, info.max);
-        self.time = now;
-
-        self.filled == 0.0
-    }
-
-    pub(crate) fn check(&mut self, info: &LeakyBucketConfig, now: Instant, n: f64) -> bool {
-        self.update(info, now);
-
-        if self.filled + n > info.max {
-            return false;
-        }
-        self.filled += n;
-
-        true
-    }
-}
-
-impl Default for LeakyBucketState {
-    fn default() -> Self {
-        Self::new()
+impl From<LeakyBucketConfig> for utils::leaky_bucket::LeakyBucketConfig {
+    fn from(config: LeakyBucketConfig) -> Self {
+        utils::leaky_bucket::LeakyBucketConfig::new(config.rps, config.max)
    }
 }

@@ -125,48 +90,55 @@ mod tests {
    use std::time::Duration;

    use tokio::time::Instant;
+    use utils::leaky_bucket::LeakyBucketState;

-    use super::{LeakyBucketConfig, LeakyBucketState};
+    use super::LeakyBucketConfig;

    #[tokio::test(start_paused = true)]
    async fn check() {
-        let info = LeakyBucketConfig::new(500.0, 2000.0);
-        let mut bucket = LeakyBucketState::new();
+        let config: utils::leaky_bucket::LeakyBucketConfig =
+            LeakyBucketConfig::new(500.0, 2000.0).into();
+        assert_eq!(config.cost, Duration::from_millis(2));
+        assert_eq!(config.bucket_width, Duration::from_secs(4));
+
+        let mut bucket = LeakyBucketState {
+            empty_at: Instant::now(),
+        };

        // should work for 2000 requests this second
        for _ in 0..2000 {
-            assert!(bucket.check(&info, Instant::now(), 1.0));
+            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();
        }
-        assert!(!bucket.check(&info, Instant::now(), 1.0));
-        assert_eq!(bucket.filled, 2000.0);
+        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();
+        assert_eq!(bucket.empty_at - Instant::now(), config.bucket_width);

        // in 1ms we should drain 0.5 tokens.
        // make sure we don't lose any tokens
        tokio::time::advance(Duration::from_millis(1)).await;
-        assert!(!bucket.check(&info, Instant::now(), 1.0));
+        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();
        tokio::time::advance(Duration::from_millis(1)).await;
-        assert!(bucket.check(&info, Instant::now(), 1.0));
+        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();

        // in 10ms we should drain 5 tokens
        tokio::time::advance(Duration::from_millis(10)).await;
        for _ in 0..5 {
-            assert!(bucket.check(&info, Instant::now(), 1.0));
+            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();
        }
-        assert!(!bucket.check(&info, Instant::now(), 1.0));
+        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();

        // in 10s we should drain 5000 tokens
        // but cap is only 2000
        tokio::time::advance(Duration::from_secs(10)).await;
        for _ in 0..2000 {
-            assert!(bucket.check(&info, Instant::now(), 1.0));
+            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();
        }
-        assert!(!bucket.check(&info, Instant::now(), 1.0));
+        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();

        // should sustain 500rps
        for _ in 0..2000 {
            tokio::time::advance(Duration::from_millis(10)).await;
            for _ in 0..5 {
-                assert!(bucket.check(&info, Instant::now(), 1.0));
+                bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();
            }
        }
    }