diff --git a/control_plane/attachment_service/src/service.rs b/control_plane/attachment_service/src/service.rs index 828ec460fd..a8f97eeaa4 100644 --- a/control_plane/attachment_service/src/service.rs +++ b/control_plane/attachment_service/src/service.rs @@ -163,6 +163,10 @@ pub struct Service { // - Take in shared mode for operations that need the set of shards to stay the same to complete reliably (e.g. timeline CRUD) tenant_locks: IdLockMap, + // Locking for nodes: take exclusively for operations that modify the node's persistent state, or + // that transition it to/from Active. + node_locks: IdLockMap, + // Process shutdown will fire this token cancel: CancellationToken, @@ -835,6 +839,7 @@ impl Service { cancel: CancellationToken::new(), gate: Gate::default(), tenant_locks: Default::default(), + node_locks: Default::default(), }); let result_task_this = this.clone(); @@ -3151,6 +3156,8 @@ impl Service { &self, register_req: NodeRegisterRequest, ) -> Result<(), ApiError> { + let _node_lock = self.node_locks.exclusive(register_req.node_id).await; + // Pre-check for an already-existing node { let locked = self.inner.read().unwrap(); @@ -3212,6 +3219,8 @@ impl Service { &self, config_req: NodeConfigureRequest, ) -> Result<(), ApiError> { + let _node_lock = self.node_locks.exclusive(config_req.node_id).await; + if let Some(scheduling) = config_req.scheduling { // Scheduling is a persistent part of Node: we must write updates to the database before // applying them in memory