storcon: make heartbeats restart aware (#8222)

## Problem Re-attach blocks the pageserver http server from starting up. Hence, it can't reply to heartbeats until that's done. This makes the storage controller mark the node off-line (not good). We worked around this by setting the interval after which nodes are marked offline to 5 minutes. This isn't a long term solution. ## Summary of changes * Introduce a new `NodeAvailability` state: `WarmingUp`. This state models the following time interval: * From receiving the re-attach request until the pageserver replies to the first heartbeat post re-attach * The heartbeat delta generator becomes aware of this state and uses a separate longer interval * Flag `max-warming-up-interval` now models the longer timeout and `max-offline-interval` the shorter one to match the names of the states Closes https://github.com/neondatabase/neon/issues/7552
2026-05-30 11:30:37 +00:00 · 2024-07-25 14:09:12 +01:00
parent f76a4e0ad2
commit 9c5ad21341
17 changed files with 508 additions and 179 deletions
--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -3,7 +3,7 @@ use std::{str::FromStr, time::Duration};
 use pageserver_api::{
    controller_api::{
        NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
-        TenantLocateResponseShard, UtilizationScore,
+        TenantLocateResponseShard,
    },
    shard::TenantShardId,
 };
@@ -46,6 +46,8 @@ pub(crate) struct Node {
 /// whether/how they changed it.
 pub(crate) enum AvailabilityTransition {
    ToActive,
+    ToWarmingUpFromActive,
+    ToWarmingUpFromOffline,
    ToOffline,
    Unchanged,
 }
@@ -90,22 +92,34 @@ impl Node {
        }
    }

+    pub(crate) fn get_availability(&self) -> NodeAvailability {
+        self.availability
+    }
+
    pub(crate) fn set_availability(&mut self, availability: NodeAvailability) {
+        use AvailabilityTransition::*;
+        use NodeAvailability::WarmingUp;
+
        match self.get_availability_transition(availability) {
-            AvailabilityTransition::ToActive => {
+            ToActive => {
                // Give the node a new cancellation token, effectively resetting it to un-cancelled.  Any
                // users of previously-cloned copies of the node will still see the old cancellation
                // state.  For example, Reconcilers in flight will have to complete and be spawned
                // again to realize that the node has become available.
                self.cancel = CancellationToken::new();
            }
-            AvailabilityTransition::ToOffline => {
+            ToOffline | ToWarmingUpFromActive => {
                // Fire the node's cancellation token to cancel any in-flight API requests to it
                self.cancel.cancel();
            }
-            AvailabilityTransition::Unchanged => {}
+            Unchanged | ToWarmingUpFromOffline => {}
+        }
+
+        if let (WarmingUp(crnt), WarmingUp(proposed)) = (self.availability, availability) {
+            self.availability = WarmingUp(std::cmp::max(crnt, proposed));
+        } else {
+            self.availability = availability;
        }
-        self.availability = availability;
    }

    /// Without modifying the availability of the node, convert the intended availability
@@ -120,16 +134,10 @@ impl Node {
        match (self.availability, availability) {
            (Offline, Active(_)) => ToActive,
            (Active(_), Offline) => ToOffline,
-            // Consider the case when the storage controller handles the re-attach of a node
-            // before the heartbeats detect that the node is back online. We still need
-            // [`Service::node_configure`] to attempt reconciliations for shards with an
-            // unknown observed location.
-            // The unsavoury match arm below handles this situation.
-            (Active(lhs), Active(rhs))
-                if lhs == UtilizationScore::worst() && rhs < UtilizationScore::worst() =>
-            {
-                ToActive
-            }
+            (Active(_), WarmingUp(_)) => ToWarmingUpFromActive,
+            (WarmingUp(_), Offline) => ToOffline,
+            (WarmingUp(_), Active(_)) => ToActive,
+            (Offline, WarmingUp(_)) => ToWarmingUpFromOffline,
            _ => Unchanged,
        }
    }
@@ -147,7 +155,7 @@ impl Node {
    pub(crate) fn may_schedule(&self) -> MaySchedule {
        let score = match self.availability {
            NodeAvailability::Active(score) => score,
-            NodeAvailability::Offline => return MaySchedule::No,
+            NodeAvailability::Offline | NodeAvailability::WarmingUp(_) => return MaySchedule::No,
        };

        match self.scheduling {