mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 01:50:38 +00:00
storcon: Make node deletion process cancellable (#12320)
## Problem The current deletion operation is synchronous and blocking, which is unsuitable for potentially long-running tasks like. In such cases, the standard HTTP request-response pattern is not a good fit. ## Summary of Changes - Added new `storcon_cli` commands: `NodeStartDelete` and `NodeCancelDelete` to initiate and cancel deletion asynchronously. - Added corresponding `storcon` HTTP handlers to support the new start/cancel deletion flow. - Introduced a new type of background operation: `Delete`, to track and manage the deletion process outside the request lifecycle. --------- Co-authored-by: Aleksandr Sarantsev <aleksandr.sarantsev@databricks.com>
This commit is contained in:
committed by
GitHub
parent
225267b3ae
commit
b2705cfee6
@@ -635,18 +635,23 @@ impl Persistence {
|
||||
let updated = self
|
||||
.with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
|
||||
Box::pin(async move {
|
||||
// Check if the node is not marked as deleted
|
||||
let deleted_node: i64 = nodes
|
||||
let node: Option<NodePersistence> = nodes
|
||||
.filter(node_id.eq(input_node_id.0 as i64))
|
||||
.filter(lifecycle.eq(String::from(NodeLifecycle::Deleted)))
|
||||
.count()
|
||||
.get_result(conn)
|
||||
.await?;
|
||||
if deleted_node > 0 {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Node {input_node_id} is marked as deleted, re-attach is not allowed"
|
||||
)));
|
||||
}
|
||||
.first::<NodePersistence>(conn)
|
||||
.await
|
||||
.optional()?;
|
||||
|
||||
// Check if the node is not marked as deleted
|
||||
match node {
|
||||
Some(node) if matches!(NodeLifecycle::from_str(&node.lifecycle), Ok(NodeLifecycle::Deleted)) => {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Node {input_node_id} is marked as deleted, re-attach is not allowed"
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
// go through
|
||||
}
|
||||
};
|
||||
|
||||
let rows_updated = diesel::update(tenant_shards)
|
||||
.filter(generation_pageserver.eq(input_node_id.0 as i64))
|
||||
@@ -664,21 +669,23 @@ impl Persistence {
|
||||
.load(conn)
|
||||
.await?;
|
||||
|
||||
// If the node went through a drain and restart phase before re-attaching,
|
||||
// then reset it's node scheduling policy to active.
|
||||
diesel::update(nodes)
|
||||
.filter(node_id.eq(input_node_id.0 as i64))
|
||||
.filter(
|
||||
scheduling_policy
|
||||
.eq(String::from(NodeSchedulingPolicy::PauseForRestart))
|
||||
.or(scheduling_policy
|
||||
.eq(String::from(NodeSchedulingPolicy::Draining)))
|
||||
.or(scheduling_policy
|
||||
.eq(String::from(NodeSchedulingPolicy::Filling))),
|
||||
)
|
||||
.set(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Active)))
|
||||
.execute(conn)
|
||||
.await?;
|
||||
if let Some(node) = node {
|
||||
let old_scheduling_policy =
|
||||
NodeSchedulingPolicy::from_str(&node.scheduling_policy).unwrap();
|
||||
let new_scheduling_policy = match old_scheduling_policy {
|
||||
NodeSchedulingPolicy::Active => NodeSchedulingPolicy::Active,
|
||||
NodeSchedulingPolicy::PauseForRestart => NodeSchedulingPolicy::Active,
|
||||
NodeSchedulingPolicy::Draining => NodeSchedulingPolicy::Active,
|
||||
NodeSchedulingPolicy::Filling => NodeSchedulingPolicy::Active,
|
||||
NodeSchedulingPolicy::Pause => NodeSchedulingPolicy::Pause,
|
||||
NodeSchedulingPolicy::Deleting => NodeSchedulingPolicy::Pause,
|
||||
};
|
||||
diesel::update(nodes)
|
||||
.filter(node_id.eq(input_node_id.0 as i64))
|
||||
.set(scheduling_policy.eq(String::from(new_scheduling_policy)))
|
||||
.execute(conn)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(updated)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user