From f8d8c656fc1aa55f8fa54745b5642e403f201fcd Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 13 Feb 2024 12:09:10 +0000 Subject: [PATCH] libs: add CircuitBreaker type --- libs/utils/src/circuit_breaker.rs | 90 +++++++++++++++++++++++++++++++ libs/utils/src/lib.rs | 2 + 2 files changed, 92 insertions(+) create mode 100644 libs/utils/src/circuit_breaker.rs diff --git a/libs/utils/src/circuit_breaker.rs b/libs/utils/src/circuit_breaker.rs new file mode 100644 index 0000000000..98a1deffd8 --- /dev/null +++ b/libs/utils/src/circuit_breaker.rs @@ -0,0 +1,90 @@ +use std::time::{Duration, Instant}; + +/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly, +/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and +/// to mitigate the log spam from repeated failures. +pub struct CircuitBreaker { + /// Consecutive failures since last success + fail_count: usize, + + /// How many consecutive failures before we break the circuit + fail_threshold: usize, + + /// If circuit is broken, when was it broken? + broken_at: Option, + + /// If set, we will auto-reset the circuit this long after it was broken. If None, broken + /// circuits stay broken forever, or until success() is called. + reset_period: Option, + + /// If this is true, no actual circuit-breaking happens. This is for overriding a circuit breaker + /// to permit something to keep running even if it would otherwise have tripped it. + short_circuit: bool, +} + +impl CircuitBreaker { + pub fn new(fail_threshold: usize, reset_period: Option) -> Self { + Self { + fail_count: 0, + fail_threshold, + broken_at: None, + reset_period, + short_circuit: false, + } + } + + pub fn short_circuit() -> Self { + Self { + fail_threshold: 0, + fail_count: 0, + broken_at: None, + reset_period: None, + short_circuit: true, + } + } + + pub fn fail(&mut self) { + if self.short_circuit { + return; + } + + self.fail_count += 1; + if self.broken_at.is_none() && self.fail_count >= self.fail_threshold { + self.break_circuit(); + } + } + + /// Call this after successfully executing an operation + pub fn success(&mut self) { + self.fail_count = 0; + self.broken_at = None; + } + + /// Call this before attempting an operation, and skip the operation if we are currently broken. + pub fn is_broken(&mut self) -> bool { + if self.short_circuit { + return false; + } + + if let Some(broken_at) = self.broken_at { + match self.reset_period { + Some(reset_period) if broken_at.elapsed() > reset_period => { + self.reset_circuit(); + false + } + _ => true, + } + } else { + false + } + } + + fn break_circuit(&mut self) { + self.broken_at = Some(Instant::now()) + } + + fn reset_circuit(&mut self) { + self.broken_at = None; + self.fail_count = 0; + } +} diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 890061dc59..0ff9b58a69 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -87,6 +87,8 @@ pub mod failpoint_support; pub mod yielding_loop; +pub mod circuit_breaker; + /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages /// /// we have several cases: