vm-monitor: Fix cgroup throttling (#5303)

I believe this (not actual IO problems) is the cause of the "disk speed issue" that we've had for VMs recently. See e.g.: 1. https://neondb.slack.com/archives/C03H1K0PGKH/p1694287808046179?thread_ts=1694271790.580099&cid=C03H1K0PGKH 2. https://neondb.slack.com/archives/C03H1K0PGKH/p1694511932560659 The vm-informant (and now, the vm-monitor, its replacement) is supposed to gradually increase the `neon-postgres` cgroup's memory.high value, because otherwise the kernel will throttle all the processes in the cgroup. This PR fixes a bug with the vm-monitor's implementation of this behavior. --- Other references, for the vm-informant's implementation: - Original issue: neondatabase/autoscaling#44 - Original PR: neondatabase/autoscaling#223
2026-01-08 14:02:55 +00:00 · 2023-09-14 03:21:50 -07:00
parent ffd146c3e5
commit 3895829bda
1 changed files with 12 additions and 10 deletions
--- a/libs/vm_monitor/src/cgroup.rs
+++ b/libs/vm_monitor/src/cgroup.rs
@@ -315,12 +315,8 @@ impl CgroupWatcher {
    where
        E: Stream<Item = Sequenced<u64>>,
    {
-        // There are several actions might do when receiving a `memory.high`,
-        // such as freezing the cgroup, or increasing its `memory.high`. We don't
-        // want to do these things too often (because postgres needs to run, and
-        // we only have so much memory). These timers serve as rate limits for this.
        let mut wait_to_freeze = pin!(tokio::time::sleep(Duration::ZERO));
-        let mut wait_to_increase_memory_high = pin!(tokio::time::sleep(Duration::ZERO));
+        let mut last_memory_high_increase_at: Option<Instant> = None;
        let mut events = pin!(events);

        // Are we waiting to be upscaled? Could be true if we request upscale due
@@ -332,6 +328,8 @@ impl CgroupWatcher {
                upscale = upscales.recv() => {
                    let Sequenced { seqnum, data } = upscale
                        .context("failed to listen on upscale notification channel")?;
+                    waiting_on_upscale = false;
+                    last_memory_high_increase_at = None;
                    self.last_upscale_seqnum.store(seqnum, Ordering::Release);
                    info!(cpu = data.cpu, mem_bytes = data.mem, "received upscale");
                }
@@ -396,12 +394,17 @@ impl CgroupWatcher {
                            .send(())
                            .await
                            .context("failed to request upscale")?;
+                        waiting_on_upscale = true;
                        continue;
                    }

                    // Shoot, we can't freeze or and we're still waiting on upscale,
                    // increase memory.high to reduce throttling
-                    if wait_to_increase_memory_high.is_elapsed() {
+                    let can_increase_memory_high = match last_memory_high_increase_at {
+                        None => true,
+                        Some(t) => t.elapsed() > self.config.memory_high_increase_every,
+                    };
+                    if can_increase_memory_high {
                        info!(
                            "received memory.high event, \
                            but too soon to refreeze and already requested upscale \
@@ -437,12 +440,11 @@ impl CgroupWatcher {
                        );
                        self.set_high_bytes(new_high)
                            .context("failed to set memory.high")?;
-                        wait_to_increase_memory_high
-                            .as_mut()
-                            .reset(Instant::now() + self.config.memory_high_increase_every)
+                        last_memory_high_increase_at = Some(Instant::now());
+                        continue;
                    }

-                    // we can't do anything
+                    info!("received memory.high event, but can't do anything");
                }
            };
        }