storcon: deny external node configuration if an operation is ongoing (#8727)

Per #8674, disallow node configuration while drain/fill are ongoing. Implement it by adding a only-http wrapper `Service::external_node_configure` which checks for operation existing before configuring. Additionally: - allow cancelling drain/fill after a pageserver has restarted and transitioned to WarmingUp Fixes: #8674
2026-01-09 14:32:57 +00:00 · 2024-08-15 12:54:05 +03:00
parent a9c28be7d0
commit d9a57aeed9
4 changed files with 70 additions and 21 deletions
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -313,20 +313,17 @@ pub struct MetadataHealthUpdateRequest {
 pub struct MetadataHealthUpdateResponse {}

 #[derive(Serialize, Deserialize, Debug)]
-
 pub struct MetadataHealthListUnhealthyResponse {
    pub unhealthy_tenant_shards: Vec<TenantShardId>,
 }

 #[derive(Serialize, Deserialize, Debug)]
-
 pub struct MetadataHealthListOutdatedRequest {
    #[serde(with = "humantime_serde")]
    pub not_scrubbed_for: Duration,
 }

 #[derive(Serialize, Deserialize, Debug)]
-
 pub struct MetadataHealthListOutdatedResponse {
    pub health_records: Vec<MetadataHealthRecord>,
 }
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -500,7 +500,7 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
        StatusCode::OK,
        state
            .service
-            .node_configure(
+            .external_node_configure(
                config_req.node_id,
                config_req.availability.map(NodeAvailability::from),
                config_req.scheduling,
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -4912,6 +4912,26 @@ impl Service {
        Ok(())
    }

+    /// Wrapper around [`Self::node_configure`] which only allows changes while there is no ongoing
+    /// operation for HTTP api.
+    pub(crate) async fn external_node_configure(
+        &self,
+        node_id: NodeId,
+        availability: Option<NodeAvailability>,
+        scheduling: Option<NodeSchedulingPolicy>,
+    ) -> Result<(), ApiError> {
+        {
+            let locked = self.inner.read().unwrap();
+            if let Some(op) = locked.ongoing_operation.as_ref().map(|op| op.operation) {
+                return Err(ApiError::PreconditionFailed(
+                    format!("Ongoing background operation forbids configuring: {op}").into(),
+                ));
+            }
+        }
+
+        self.node_configure(node_id, availability, scheduling).await
+    }
+
    pub(crate) async fn start_node_drain(
        self: &Arc<Self>,
        node_id: NodeId,
@@ -5017,14 +5037,14 @@ impl Service {
    }

    pub(crate) async fn cancel_node_drain(&self, node_id: NodeId) -> Result<(), ApiError> {
-        let (node_available, node_policy) = {
+        let node_available = {
            let locked = self.inner.read().unwrap();
            let nodes = &locked.nodes;
            let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
                anyhow::anyhow!("Node {} not registered", node_id).into(),
            ))?;

-            (node.is_available(), node.get_scheduling())
+            node.is_available()
        };

        if !node_available {
@@ -5033,12 +5053,6 @@ impl Service {
            ));
        }

-        if !matches!(node_policy, NodeSchedulingPolicy::Draining) {
-            return Err(ApiError::PreconditionFailed(
-                format!("Node {node_id} has no drain in progress").into(),
-            ));
-        }
-
        if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
            if let Operation::Drain(drain) = op_handler.operation {
                if drain.node_id == node_id {
@@ -5152,14 +5166,14 @@ impl Service {
    }

    pub(crate) async fn cancel_node_fill(&self, node_id: NodeId) -> Result<(), ApiError> {
-        let (node_available, node_policy) = {
+        let node_available = {
            let locked = self.inner.read().unwrap();
            let nodes = &locked.nodes;
            let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
                anyhow::anyhow!("Node {} not registered", node_id).into(),
            ))?;

-            (node.is_available(), node.get_scheduling())
+            node.is_available()
        };

        if !node_available {
@@ -5168,12 +5182,6 @@ impl Service {
            ));
        }

-        if !matches!(node_policy, NodeSchedulingPolicy::Filling) {
-            return Err(ApiError::PreconditionFailed(
-                format!("Node {node_id} has no fill in progress").into(),
-            ));
-        }
-
        if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
            if let Operation::Fill(fill) = op_handler.operation {
                if fill.node_id == node_id {
@@ -5982,7 +5990,7 @@ impl Service {
                .await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
                .await;

-            failpoint_support::sleep_millis_async!("sleepy-drain-loop");
+            failpoint_support::sleep_millis_async!("sleepy-drain-loop", &cancel);
        }

        while !waiters.is_empty() {
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -2091,3 +2091,47 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
        )
        == 0
    )
+
+
+def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvBuilder):
+    # single unsharded tenant, two locations
+    neon_env_builder.num_pageservers = 2
+
+    env = neon_env_builder.init_start()
+
+    env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 1}})
+    env.storage_controller.reconcile_until_idle()
+
+    attached_id = int(env.storage_controller.locate(env.initial_tenant)[0]["node_id"])
+    attached = next((ps for ps in env.pageservers if ps.id == attached_id))
+
+    def attached_is_draining():
+        details = env.storage_controller.node_status(attached.id)
+        assert details["scheduling"] == "Draining"
+
+    env.storage_controller.configure_failpoints(("sleepy-drain-loop", "return(10000)"))
+    env.storage_controller.node_drain(attached.id)
+
+    wait_until(10, 0.5, attached_is_draining)
+
+    attached.restart()
+
+    # we are unable to reconfigure node while the operation is still ongoing
+    with pytest.raises(
+        StorageControllerApiException,
+        match="Precondition failed: Ongoing background operation forbids configuring: drain.*",
+    ):
+        env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"})
+    with pytest.raises(
+        StorageControllerApiException,
+        match="Precondition failed: Ongoing background operation forbids configuring: drain.*",
+    ):
+        env.storage_controller.node_configure(attached.id, {"availability": "Offline"})
+
+    env.storage_controller.cancel_node_drain(attached.id)
+
+    def reconfigure_node_again():
+        env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"})
+
+    # allow for small delay between actually having cancelled and being able reconfigure again
+    wait_until(4, 0.5, reconfigure_node_again)