diff --git a/libs/vm_monitor/src/runner.rs b/libs/vm_monitor/src/runner.rs index 82055fda2e..8f904b879d 100644 --- a/libs/vm_monitor/src/runner.rs +++ b/libs/vm_monitor/src/runner.rs @@ -5,6 +5,7 @@ //! all functionality. use std::sync::Arc; +use std::time::{Duration, Instant}; use std::{fmt::Debug, mem}; use anyhow::{bail, Context}; @@ -36,6 +37,8 @@ pub struct Runner { /// by us vs the autoscaler-agent. counter: usize, + last_upscale_request_at: Option, + /// A signal to kill the main thread produced by `self.run()`. This is triggered /// when the server receives a new connection. When the thread receives the /// signal off this channel, it will gracefully shutdown. @@ -99,6 +102,7 @@ impl Runner { cgroup: None, dispatcher, counter: 1, // NB: must be odd, see the comment about the field for more. + last_upscale_request_at: None, kill, }; @@ -397,6 +401,20 @@ impl Runner { if request.is_none() { bail!("failed to listen for upscale event from cgroup") } + + // If it's been less than 1 second since the last time we requested upscaling, + // ignore the event, to avoid spamming the agent (otherwise, this can happen + // ~1k times per second). + if let Some(t) = self.last_upscale_request_at { + let elapsed = t.elapsed(); + if elapsed < Duration::from_secs(1) { + info!(elapsed_millis = elapsed.as_millis(), "cgroup asked for upscale but too soon to forward the request, ignoring"); + continue; + } + } + + self.last_upscale_request_at = Some(Instant::now()); + info!("cgroup asking for upscale; forwarding request"); self.counter += 2; // Increment, preserving parity (i.e. keep the // counter odd). See the field comment for more.