Merge remote-tracking branch 'origin' into vlad/hadron-jwt

This commit is contained in:
Vlad Lazar
2025-07-29 09:43:09 +01:00
165 changed files with 6694 additions and 2262 deletions

View File

@@ -981,6 +981,7 @@ impl Reconciler {
));
}
let mut first_err = None;
for (node, conf) in changes {
if self.cancel.is_cancelled() {
return Err(ReconcileError::Cancel);
@@ -990,7 +991,12 @@ impl Reconciler {
// shard _available_ (the attached location), and configuring secondary locations
// can be done lazily when the node becomes available (via background reconciliation).
if node.is_available() {
self.location_config(&node, conf, None, false).await?;
let res = self.location_config(&node, conf, None, false).await;
if let Err(err) = res {
if first_err.is_none() {
first_err = Some(err);
}
}
} else {
// If the node is unavailable, we skip and consider the reconciliation successful: this
// is a common case where a pageserver is marked unavailable: we demote a location on
@@ -1002,6 +1008,10 @@ impl Reconciler {
}
}
if let Some(err) = first_err {
return Err(err);
}
// The condition below identifies a detach. We must have no attached intent and
// must have been attached to something previously. Pass this information to
// the [`ComputeHook`] such that it can update its tenant-wide state.

View File

@@ -1536,10 +1536,19 @@ impl Service {
// so that waiters will see the correct error after waiting.
tenant.set_last_error(result.sequence, e);
// Skip deletions on reconcile failures
let upsert_deltas =
deltas.filter(|delta| matches!(delta, ObservedStateDelta::Upsert(_)));
tenant.apply_observed_deltas(upsert_deltas);
// If the reconciliation failed, don't clear the observed state for places where we
// detached. Instead, mark the observed state as uncertain.
let failed_reconcile_deltas = deltas.map(|delta| {
if let ObservedStateDelta::Delete(node_id) = delta {
ObservedStateDelta::Upsert(Box::new((
node_id,
ObservedStateLocation { conf: None },
)))
} else {
delta
}
});
tenant.apply_observed_deltas(failed_reconcile_deltas);
}
}

View File

@@ -249,6 +249,10 @@ impl IntentState {
}
pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
// Every assertion here should probably have a corresponding check in
// `validate_optimization` unless it is an invariant that should never be violated. Note
// that the lock is not held between planning optimizations and applying them so you have to
// assume any valid state transition of the intent state may have occurred
assert!(!self.secondary.contains(&new_secondary));
assert!(self.attached != Some(new_secondary));
scheduler.update_node_ref_counts(
@@ -1335,8 +1339,9 @@ impl TenantShard {
true
}
/// Check that the desired modifications to the intent state are compatible with
/// the current intent state
/// Check that the desired modifications to the intent state are compatible with the current
/// intent state. Note that the lock is not held between planning optimizations and applying
/// them so any valid state transition of the intent state may have occurred.
fn validate_optimization(&self, optimization: &ScheduleOptimization) -> bool {
match optimization.action {
ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
@@ -1352,6 +1357,9 @@ impl TenantShard {
}) => {
// It's legal to remove a secondary that is not present in the intent state
!self.intent.secondary.contains(&new_node_id)
// Ensure the secondary hasn't already been promoted to attached by a concurrent
// optimization/migration.
&& self.intent.attached != Some(new_node_id)
}
ScheduleOptimizationAction::CreateSecondary(new_node_id) => {
!self.intent.secondary.contains(&new_node_id)