storcon: deny external node configuration if an operation is ongoing (#8727)

Per #8674, disallow node configuration while drain/fill are ongoing.
Implement it by adding a only-http wrapper
`Service::external_node_configure` which checks for operation existing
before configuring.

Additionally:
- allow cancelling drain/fill after a pageserver has restarted and
transitioned to WarmingUp

Fixes: #8674
This commit is contained in:
Joonas Koivunen
2024-08-15 12:54:05 +03:00
committed by GitHub
parent a9c28be7d0
commit d9a57aeed9
4 changed files with 70 additions and 21 deletions

View File

@@ -313,20 +313,17 @@ pub struct MetadataHealthUpdateRequest {
pub struct MetadataHealthUpdateResponse {}
#[derive(Serialize, Deserialize, Debug)]
pub struct MetadataHealthListUnhealthyResponse {
pub unhealthy_tenant_shards: Vec<TenantShardId>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct MetadataHealthListOutdatedRequest {
#[serde(with = "humantime_serde")]
pub not_scrubbed_for: Duration,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct MetadataHealthListOutdatedResponse {
pub health_records: Vec<MetadataHealthRecord>,
}

View File

@@ -500,7 +500,7 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
StatusCode::OK,
state
.service
.node_configure(
.external_node_configure(
config_req.node_id,
config_req.availability.map(NodeAvailability::from),
config_req.scheduling,

View File

@@ -4912,6 +4912,26 @@ impl Service {
Ok(())
}
/// Wrapper around [`Self::node_configure`] which only allows changes while there is no ongoing
/// operation for HTTP api.
pub(crate) async fn external_node_configure(
&self,
node_id: NodeId,
availability: Option<NodeAvailability>,
scheduling: Option<NodeSchedulingPolicy>,
) -> Result<(), ApiError> {
{
let locked = self.inner.read().unwrap();
if let Some(op) = locked.ongoing_operation.as_ref().map(|op| op.operation) {
return Err(ApiError::PreconditionFailed(
format!("Ongoing background operation forbids configuring: {op}").into(),
));
}
}
self.node_configure(node_id, availability, scheduling).await
}
pub(crate) async fn start_node_drain(
self: &Arc<Self>,
node_id: NodeId,
@@ -5017,14 +5037,14 @@ impl Service {
}
pub(crate) async fn cancel_node_drain(&self, node_id: NodeId) -> Result<(), ApiError> {
let (node_available, node_policy) = {
let node_available = {
let locked = self.inner.read().unwrap();
let nodes = &locked.nodes;
let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
anyhow::anyhow!("Node {} not registered", node_id).into(),
))?;
(node.is_available(), node.get_scheduling())
node.is_available()
};
if !node_available {
@@ -5033,12 +5053,6 @@ impl Service {
));
}
if !matches!(node_policy, NodeSchedulingPolicy::Draining) {
return Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no drain in progress").into(),
));
}
if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
if let Operation::Drain(drain) = op_handler.operation {
if drain.node_id == node_id {
@@ -5152,14 +5166,14 @@ impl Service {
}
pub(crate) async fn cancel_node_fill(&self, node_id: NodeId) -> Result<(), ApiError> {
let (node_available, node_policy) = {
let node_available = {
let locked = self.inner.read().unwrap();
let nodes = &locked.nodes;
let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
anyhow::anyhow!("Node {} not registered", node_id).into(),
))?;
(node.is_available(), node.get_scheduling())
node.is_available()
};
if !node_available {
@@ -5168,12 +5182,6 @@ impl Service {
));
}
if !matches!(node_policy, NodeSchedulingPolicy::Filling) {
return Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no fill in progress").into(),
));
}
if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
if let Operation::Fill(fill) = op_handler.operation {
if fill.node_id == node_id {
@@ -5982,7 +5990,7 @@ impl Service {
.await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
.await;
failpoint_support::sleep_millis_async!("sleepy-drain-loop");
failpoint_support::sleep_millis_async!("sleepy-drain-loop", &cancel);
}
while !waiters.is_empty() {

View File

@@ -2091,3 +2091,47 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
)
== 0
)
def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvBuilder):
# single unsharded tenant, two locations
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start()
env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 1}})
env.storage_controller.reconcile_until_idle()
attached_id = int(env.storage_controller.locate(env.initial_tenant)[0]["node_id"])
attached = next((ps for ps in env.pageservers if ps.id == attached_id))
def attached_is_draining():
details = env.storage_controller.node_status(attached.id)
assert details["scheduling"] == "Draining"
env.storage_controller.configure_failpoints(("sleepy-drain-loop", "return(10000)"))
env.storage_controller.node_drain(attached.id)
wait_until(10, 0.5, attached_is_draining)
attached.restart()
# we are unable to reconfigure node while the operation is still ongoing
with pytest.raises(
StorageControllerApiException,
match="Precondition failed: Ongoing background operation forbids configuring: drain.*",
):
env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"})
with pytest.raises(
StorageControllerApiException,
match="Precondition failed: Ongoing background operation forbids configuring: drain.*",
):
env.storage_controller.node_configure(attached.id, {"availability": "Offline"})
env.storage_controller.cancel_node_drain(attached.id)
def reconfigure_node_again():
env.storage_controller.node_configure(attached.id, {"scheduling": "Pause"})
# allow for small delay between actually having cancelled and being able reconfigure again
wait_until(4, 0.5, reconfigure_node_again)