mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 18:10:37 +00:00
storcon: handle reattach and heartbeat race
Consider the case when the storage controller handles the re-attach of a node before the heartbeats detect that the node is back online. We still need to reconfigure the node (by calling `Service::node_configure`) to migrate attachments back onto the node. In order to determine if node reconfiguration is required, we call into `Node::get_availability_transition`. This commit updates the function to consider the transition from "node just re-attached" (with no utilisation score) to "node responded to the first heartbeat after a period of unavailablity" (with some utilisation score).
This commit is contained in:
@@ -3,7 +3,7 @@ use std::{str::FromStr, time::Duration};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
|
||||
TenantLocateResponseShard,
|
||||
TenantLocateResponseShard, UtilizationScore,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
@@ -116,6 +116,15 @@ impl Node {
|
||||
match (self.availability, availability) {
|
||||
(Offline, Active(_)) => ToActive,
|
||||
(Active(_), Offline) => ToOffline,
|
||||
// Consider the case when the storage controller handles the re-attach of a node
|
||||
// before the heartbeats detect that the node is back online. We still need
|
||||
// [`Service::node_configure`] to migrate attachments back onto the node.
|
||||
// The unsavoury match arm below handles this situation.
|
||||
(Active(lhs), Active(rhs))
|
||||
if lhs == UtilizationScore::worst() && rhs < UtilizationScore::worst() =>
|
||||
{
|
||||
ToActive
|
||||
}
|
||||
_ => Unchanged,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user