mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-03 19:42:55 +00:00
storcon: validate intent state before applying optimization (#12593)
## Problem In the gap between picking an optimization and applying it, something might insert a change to the intent state that makes it incompatible. If the change is done via the `schedule()` method, we are covered by the increased sequence number, but otherwise we can panic if we violate the intent state invariants. ## Summary of Changes Validate the optimization right before applying it. Since we hold the service lock at that point, nothing else can sneak in. Closes LKB-65
This commit is contained in:
@@ -1272,7 +1272,9 @@ impl TenantShard {
|
||||
}
|
||||
|
||||
/// Return true if the optimization was really applied: it will not be applied if the optimization's
|
||||
/// sequence is behind this tenant shard's
|
||||
/// sequence is behind this tenant shard's or if the intent state proposed by the optimization
|
||||
/// is not compatible with the current intent state. The later may happen when the background
|
||||
/// reconcile loops runs concurrently with HTTP driven optimisations.
|
||||
pub(crate) fn apply_optimization(
|
||||
&mut self,
|
||||
scheduler: &mut Scheduler,
|
||||
@@ -1282,6 +1284,15 @@ impl TenantShard {
|
||||
return false;
|
||||
}
|
||||
|
||||
if !self.validate_optimization(&optimization) {
|
||||
tracing::info!(
|
||||
"Skipping optimization for {} because it does not match current intent: {:?}",
|
||||
self.tenant_shard_id,
|
||||
optimization,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
metrics::METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_schedule_optimization
|
||||
@@ -1322,6 +1333,34 @@ impl TenantShard {
|
||||
true
|
||||
}
|
||||
|
||||
/// Check that the desired modifications to the intent state are compatible with
|
||||
/// the current intent state
|
||||
fn validate_optimization(&self, optimization: &ScheduleOptimization) -> bool {
|
||||
match optimization.action {
|
||||
ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id,
|
||||
new_attached_node_id,
|
||||
}) => {
|
||||
self.intent.attached == Some(old_attached_node_id)
|
||||
&& self.intent.secondary.contains(&new_attached_node_id)
|
||||
}
|
||||
ScheduleOptimizationAction::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id: _,
|
||||
new_node_id,
|
||||
}) => {
|
||||
// It's legal to remove a secondary that is not present in the intent state
|
||||
!self.intent.secondary.contains(&new_node_id)
|
||||
}
|
||||
ScheduleOptimizationAction::CreateSecondary(new_node_id) => {
|
||||
!self.intent.secondary.contains(&new_node_id)
|
||||
}
|
||||
ScheduleOptimizationAction::RemoveSecondary(_) => {
|
||||
// It's legal to remove a secondary that is not present in the intent state
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// When a shard has several secondary locations, we need to pick one in situations where
|
||||
/// we promote one of them to an attached location:
|
||||
/// - When draining a node for restart
|
||||
|
||||
Reference in New Issue
Block a user