storcon: make heartbeats restart aware (#8222)

## Problem
Re-attach blocks the pageserver http server from starting up. Hence, it
can't reply to heartbeats
until that's done. This makes the storage controller mark the node
off-line (not good). We worked
around this by setting the interval after which nodes are marked offline
to 5 minutes. This isn't a
long term solution.

## Summary of changes
* Introduce a new `NodeAvailability` state: `WarmingUp`. This state
models the following time interval:
* From receiving the re-attach request until the pageserver replies to
the first heartbeat post re-attach
* The heartbeat delta generator becomes aware of this state and uses a
separate longer interval
* Flag `max-warming-up-interval` now models the longer timeout and
`max-offline-interval` the shorter one to
match the names of the states

Closes https://github.com/neondatabase/neon/issues/7552
This commit is contained in:
Vlad Lazar
2024-07-25 14:09:12 +01:00
committed by GitHub
parent f76a4e0ad2
commit 9c5ad21341
17 changed files with 508 additions and 179 deletions

View File

@@ -3,7 +3,7 @@ use std::{str::FromStr, time::Duration};
use pageserver_api::{
controller_api::{
NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
TenantLocateResponseShard, UtilizationScore,
TenantLocateResponseShard,
},
shard::TenantShardId,
};
@@ -46,6 +46,8 @@ pub(crate) struct Node {
/// whether/how they changed it.
pub(crate) enum AvailabilityTransition {
ToActive,
ToWarmingUpFromActive,
ToWarmingUpFromOffline,
ToOffline,
Unchanged,
}
@@ -90,22 +92,34 @@ impl Node {
}
}
pub(crate) fn get_availability(&self) -> NodeAvailability {
self.availability
}
pub(crate) fn set_availability(&mut self, availability: NodeAvailability) {
use AvailabilityTransition::*;
use NodeAvailability::WarmingUp;
match self.get_availability_transition(availability) {
AvailabilityTransition::ToActive => {
ToActive => {
// Give the node a new cancellation token, effectively resetting it to un-cancelled. Any
// users of previously-cloned copies of the node will still see the old cancellation
// state. For example, Reconcilers in flight will have to complete and be spawned
// again to realize that the node has become available.
self.cancel = CancellationToken::new();
}
AvailabilityTransition::ToOffline => {
ToOffline | ToWarmingUpFromActive => {
// Fire the node's cancellation token to cancel any in-flight API requests to it
self.cancel.cancel();
}
AvailabilityTransition::Unchanged => {}
Unchanged | ToWarmingUpFromOffline => {}
}
if let (WarmingUp(crnt), WarmingUp(proposed)) = (self.availability, availability) {
self.availability = WarmingUp(std::cmp::max(crnt, proposed));
} else {
self.availability = availability;
}
self.availability = availability;
}
/// Without modifying the availability of the node, convert the intended availability
@@ -120,16 +134,10 @@ impl Node {
match (self.availability, availability) {
(Offline, Active(_)) => ToActive,
(Active(_), Offline) => ToOffline,
// Consider the case when the storage controller handles the re-attach of a node
// before the heartbeats detect that the node is back online. We still need
// [`Service::node_configure`] to attempt reconciliations for shards with an
// unknown observed location.
// The unsavoury match arm below handles this situation.
(Active(lhs), Active(rhs))
if lhs == UtilizationScore::worst() && rhs < UtilizationScore::worst() =>
{
ToActive
}
(Active(_), WarmingUp(_)) => ToWarmingUpFromActive,
(WarmingUp(_), Offline) => ToOffline,
(WarmingUp(_), Active(_)) => ToActive,
(Offline, WarmingUp(_)) => ToWarmingUpFromOffline,
_ => Unchanged,
}
}
@@ -147,7 +155,7 @@ impl Node {
pub(crate) fn may_schedule(&self) -> MaySchedule {
let score = match self.availability {
NodeAvailability::Active(score) => score,
NodeAvailability::Offline => return MaySchedule::No,
NodeAvailability::Offline | NodeAvailability::WarmingUp(_) => return MaySchedule::No,
};
match self.scheduling {