storcon: implement endpoints for cancellation of drain and fill operations (#8029)

## Problem
There's no way to cancel drain and fill operations.

## Summary of changes
Implement HTTP endpoints to allow cancelling of background operations.
When the operationis cancelled successfully, the node scheduling policy will revert to
`Active`.
This commit is contained in:
Vlad Lazar
2024-06-21 17:13:51 +01:00
committed by GitHub
parent 15728be0e1
commit ee3081863e
4 changed files with 290 additions and 48 deletions

View File

@@ -502,6 +502,17 @@ async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiErro
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.cancel_node_drain(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
@@ -513,6 +524,17 @@ async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.cancel_node_fill(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_tenant_shard_split(
service: Arc<Service>,
mut req: Request<Body>,
@@ -871,9 +893,23 @@ pub fn make_router(
.put("/control/v1/node/:node_id/drain", |r| {
named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
})
.delete("/control/v1/node/:node_id/drain", |r| {
named_request_span(
r,
handle_cancel_node_drain,
RequestName("control_v1_cancel_node_drain"),
)
})
.put("/control/v1/node/:node_id/fill", |r| {
named_request_span(r, handle_node_fill, RequestName("control_v1_node_fill"))
})
.delete("/control/v1/node/:node_id/fill", |r| {
named_request_span(
r,
handle_cancel_node_fill,
RequestName("control_v1_cancel_node_fill"),
)
})
// TODO(vlad): endpoint for cancelling drain and fill
// Tenant Shard operations
.put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {

View File

@@ -4541,7 +4541,8 @@ impl Service {
self.node_configure(node_id, None, Some(NodeSchedulingPolicy::Draining))
.await?;
let cancel = CancellationToken::new();
let cancel = self.cancel.child_token();
let gate_guard = self.gate.enter().map_err(|_| ApiError::ShuttingDown)?;
self.inner.write().unwrap().ongoing_operation = Some(OperationHandler {
operation: Operation::Drain(Drain { node_id }),
@@ -4552,6 +4553,8 @@ impl Service {
let service = self.clone();
let cancel = cancel.clone();
async move {
let _gate_guard = gate_guard;
scopeguard::defer! {
let prev = service.inner.write().unwrap().ongoing_operation.take();
@@ -4593,6 +4596,44 @@ impl Service {
Ok(())
}
pub(crate) async fn cancel_node_drain(&self, node_id: NodeId) -> Result<(), ApiError> {
let (node_available, node_policy) = {
let locked = self.inner.read().unwrap();
let nodes = &locked.nodes;
let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
anyhow::anyhow!("Node {} not registered", node_id).into(),
))?;
(node.is_available(), node.get_scheduling())
};
if !node_available {
return Err(ApiError::ResourceUnavailable(
format!("Node {node_id} is currently unavailable").into(),
));
}
if !matches!(node_policy, NodeSchedulingPolicy::Draining) {
return Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no drain in progress").into(),
));
}
if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
if let Operation::Drain(drain) = op_handler.operation {
if drain.node_id == node_id {
tracing::info!("Cancelling background drain operation for node {node_id}");
op_handler.cancel.cancel();
return Ok(());
}
}
}
Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no drain in progress").into(),
))
}
pub(crate) async fn start_node_fill(self: &Arc<Self>, node_id: NodeId) -> Result<(), ApiError> {
let (ongoing_op, node_available, node_policy, total_nodes_count) = {
let locked = self.inner.read().unwrap();
@@ -4635,7 +4676,8 @@ impl Service {
self.node_configure(node_id, None, Some(NodeSchedulingPolicy::Filling))
.await?;
let cancel = CancellationToken::new();
let cancel = self.cancel.child_token();
let gate_guard = self.gate.enter().map_err(|_| ApiError::ShuttingDown)?;
self.inner.write().unwrap().ongoing_operation = Some(OperationHandler {
operation: Operation::Fill(Fill { node_id }),
@@ -4646,6 +4688,8 @@ impl Service {
let service = self.clone();
let cancel = cancel.clone();
async move {
let _gate_guard = gate_guard;
scopeguard::defer! {
let prev = service.inner.write().unwrap().ongoing_operation.take();
@@ -4687,6 +4731,44 @@ impl Service {
Ok(())
}
pub(crate) async fn cancel_node_fill(&self, node_id: NodeId) -> Result<(), ApiError> {
let (node_available, node_policy) = {
let locked = self.inner.read().unwrap();
let nodes = &locked.nodes;
let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
anyhow::anyhow!("Node {} not registered", node_id).into(),
))?;
(node.is_available(), node.get_scheduling())
};
if !node_available {
return Err(ApiError::ResourceUnavailable(
format!("Node {node_id} is currently unavailable").into(),
));
}
if !matches!(node_policy, NodeSchedulingPolicy::Filling) {
return Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no fill in progress").into(),
));
}
if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
if let Operation::Fill(fill) = op_handler.operation {
if fill.node_id == node_id {
tracing::info!("Cancelling background drain operation for node {node_id}");
op_handler.cancel.cancel();
return Ok(());
}
}
}
Err(ApiError::PreconditionFailed(
format!("Node {node_id} has no fill in progress").into(),
))
}
/// Helper for methods that will try and call pageserver APIs for
/// a tenant, such as timeline CRUD: they cannot proceed unless the tenant
/// is attached somewhere.
@@ -5286,7 +5368,21 @@ impl Service {
while !inspected_all_shards {
if cancel.is_cancelled() {
return Err(OperationError::Cancelled);
match self
.node_configure(node_id, None, Some(NodeSchedulingPolicy::Active))
.await
{
Ok(()) => return Err(OperationError::Cancelled),
Err(err) => {
return Err(OperationError::FinalizeError(
format!(
"Failed to finalise drain cancel of {} by setting scheduling policy to Active: {}",
node_id, err
)
.into(),
));
}
}
}
{
@@ -5356,9 +5452,29 @@ impl Service {
waiters = self
.await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
.await;
failpoint_support::sleep_millis_async!("sleepy-drain-loop");
}
while !waiters.is_empty() {
if cancel.is_cancelled() {
match self
.node_configure(node_id, None, Some(NodeSchedulingPolicy::Active))
.await
{
Ok(()) => return Err(OperationError::Cancelled),
Err(err) => {
return Err(OperationError::FinalizeError(
format!(
"Failed to finalise drain cancel of {} by setting scheduling policy to Active: {}",
node_id, err
)
.into(),
));
}
}
}
tracing::info!("Awaiting {} pending drain reconciliations", waiters.len());
waiters = self
@@ -5495,7 +5611,21 @@ impl Service {
// we validate to ensure that it has not gone stale in the meantime.
while !tids_to_promote.is_empty() {
if cancel.is_cancelled() {
return Err(OperationError::Cancelled);
match self
.node_configure(node_id, None, Some(NodeSchedulingPolicy::Active))
.await
{
Ok(()) => return Err(OperationError::Cancelled),
Err(err) => {
return Err(OperationError::FinalizeError(
format!(
"Failed to finalise drain cancel of {} by setting scheduling policy to Active: {}",
node_id, err
)
.into(),
));
}
}
}
{
@@ -5563,6 +5693,24 @@ impl Service {
}
while !waiters.is_empty() {
if cancel.is_cancelled() {
match self
.node_configure(node_id, None, Some(NodeSchedulingPolicy::Active))
.await
{
Ok(()) => return Err(OperationError::Cancelled),
Err(err) => {
return Err(OperationError::FinalizeError(
format!(
"Failed to finalise drain cancel of {} by setting scheduling policy to Active: {}",
node_id, err
)
.into(),
));
}
}
}
tracing::info!("Awaiting {} pending fill reconciliations", waiters.len());
waiters = self