diff --git a/control_plane/attachment_service/src/compute_hook.rs b/control_plane/attachment_service/src/compute_hook.rs index 0d3610aafa..5bd1b6bf09 100644 --- a/control_plane/attachment_service/src/compute_hook.rs +++ b/control_plane/attachment_service/src/compute_hook.rs @@ -240,7 +240,7 @@ impl ComputeHook { let client = reqwest::Client::new(); backoff::retry( || self.do_notify_iteration(&client, url, &reconfigure_request, cancel), - |e| matches!(e, NotifyError::Fatal(_)), + |e| matches!(e, NotifyError::Fatal(_) | NotifyError::Unexpected(_)), 3, 10, "Send compute notification", diff --git a/control_plane/attachment_service/src/service.rs b/control_plane/attachment_service/src/service.rs index febee1aa0d..1db1906df8 100644 --- a/control_plane/attachment_service/src/service.rs +++ b/control_plane/attachment_service/src/service.rs @@ -989,7 +989,15 @@ impl Service { .collect(); } else { // This was an update, wait for reconciliation - self.await_waiters(waiters).await?; + if let Err(e) = self.await_waiters(waiters).await { + // Do not treat a reconcile error as fatal: we have already applied any requested + // Intent changes, and the reconcile can fail for external reasons like unavailable + // compute notification API. In these cases, it is important that we do not + // cause the cloud control plane to retry forever on this API. + tracing::warn!( + "Failed to reconcile after /location_config: {e}, returning success anyway" + ); + } } Ok(result)