From 8e77e10aa35f01ccf7f149ec179f5b233754eeec Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 7 Jan 2025 16:53:30 +0100 Subject: [PATCH] storage_controller: don't drop observed state on unavailable detach The observed state removal may race with the inline updates of the observed state done from `Service::node_activate_reconcile`. This was intended to work as follows: 1. Detaches while the node is unavailable remove the entry from the observed state. 2. `Service::node_activate_reconcile` diffs the locations returned by the pageserver with the observed state and detaches in-line when required. This commit removes step (1) and lets background reconciliations deal with the mismatch between the intent and observed state. --- storage_controller/src/reconciler.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 8c3ff13249..e0a854fff7 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -211,11 +211,12 @@ impl Reconciler { lazy: bool, ) -> Result<(), ReconcileError> { if !node.is_available() && config.mode == LocationConfigMode::Detached { - // Attempts to detach from offline nodes may be imitated without doing I/O: a node which is offline - // will get fully reconciled wrt the shard's intent state when it is reactivated, irrespective of - // what we put into `observed`, in [`crate::service::Service::node_activate_reconcile`] - tracing::info!("Node {node} is unavailable during detach: proceeding anyway, it will be detached on next activation"); - self.observed.locations.remove(&node.get_id()); + // [`crate::service::Service::node_activate_reconcile`] will update the observed state + // when the node comes back online. At that point, the intent and observed states will + // be mismatched and a background reconciliation will detach. + tracing::info!( + "Node {node} is unavailable during detach: proceeding anyway, it will be detached via background reconciliation" + ); return Ok(()); }