Make node deletion context-aware (#12494)

## Problem

Deletion process does not calculate preferred nodes correctly - it
doesn't consider current tenant-shard layout among all pageservers.

## Summary of changes

- Added a schedule context calculation for node deletion

Co-authored-by: Aleksandr Sarantsev <aleksandr.sarantsev@databricks.com>
This commit is contained in:
Aleksandr Sarantsev
2025-07-08 17:15:14 +04:00
committed by GitHub
parent 2b2a547671
commit 38384c37ac

View File

@@ -7208,6 +7208,12 @@ impl Service {
let mut locked = self.inner.write().unwrap(); let mut locked = self.inner.write().unwrap();
let (nodes, tenants, scheduler) = locked.parts_mut(); let (nodes, tenants, scheduler) = locked.parts_mut();
// Calculate a schedule context here to avoid borrow checker issues.
let mut schedule_context = ScheduleContext::default();
for (_, shard) in tenants.range(TenantShardId::tenant_range(tid.tenant_id)) {
schedule_context.avoid(&shard.intent.all_pageservers());
}
let tenant_shard = match tenants.get_mut(&tid) { let tenant_shard = match tenants.get_mut(&tid) {
Some(tenant_shard) => tenant_shard, Some(tenant_shard) => tenant_shard,
None => { None => {
@@ -7233,9 +7239,6 @@ impl Service {
} }
if tenant_shard.deref_node(node_id) { if tenant_shard.deref_node(node_id) {
// TODO(ephemeralsad): we should process all shards in a tenant at once, so
// we can avoid settling the tenant unevenly.
let mut schedule_context = ScheduleContext::new(ScheduleMode::Normal);
if let Err(e) = tenant_shard.schedule(scheduler, &mut schedule_context) { if let Err(e) = tenant_shard.schedule(scheduler, &mut schedule_context) {
tracing::error!( tracing::error!(
"Refusing to delete node, shard {} can't be rescheduled: {e}", "Refusing to delete node, shard {} can't be rescheduled: {e}",