From d182ff294c85f01e77bd0cf4e5345e3357986f23 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 6 Nov 2024 15:14:43 +0000 Subject: [PATCH] storcon: respect tenant scheduling policy in drain/fill (#9657) ## Problem Pinning a tenant by setting Pause scheduling policy doesn't work because drain/fill code moves the tenant around during deploys. Closes: https://github.com/neondatabase/neon/issues/9612 ## Summary of changes - In drain, only move a tenant if it is in Active or Essential mode - In fill, only move a tenant if it is in Active mode. The asymmetry is a bit annoying, but it faithfully respects the purposes of the modes: Essential is meant to endeavor to keep the tenant available, which means it needs to be drained but doesn't need to be migrated during fills. --- storage_controller/src/drain_utils.rs | 16 +++++++++++++++- storage_controller/src/service.rs | 10 ++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/storage_controller/src/drain_utils.rs b/storage_controller/src/drain_utils.rs index dea1f04649..47f4276ff2 100644 --- a/storage_controller/src/drain_utils.rs +++ b/storage_controller/src/drain_utils.rs @@ -3,7 +3,7 @@ use std::{ sync::Arc, }; -use pageserver_api::controller_api::NodeSchedulingPolicy; +use pageserver_api::controller_api::{NodeSchedulingPolicy, ShardSchedulingPolicy}; use utils::{id::NodeId, shard::TenantShardId}; use crate::{ @@ -98,6 +98,20 @@ impl TenantShardDrain { return None; } + // Only tenants with a normal (Active) scheduling policy are proactively moved + // around during a node drain. Shards which have been manually configured to a different + // policy are only rescheduled by manual intervention. + match tenant_shard.get_scheduling_policy() { + ShardSchedulingPolicy::Active | ShardSchedulingPolicy::Essential => { + // A migration during drain is classed as 'essential' because it is required to + // uphold our availability goals for the tenant: this shard is elegible for migration. + } + ShardSchedulingPolicy::Pause | ShardSchedulingPolicy::Stop => { + // If we have been asked to avoid rescheduling this shard, then do not migrate it during a drain + return None; + } + } + match scheduler.node_preferred(tenant_shard.intent.get_secondary()) { Some(node) => Some(node), None => { diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 3f6cbfef59..e3a147bc06 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -6721,6 +6721,16 @@ impl Service { .tenants .iter_mut() .filter_map(|(tid, tenant_shard)| { + if !matches!( + tenant_shard.get_scheduling_policy(), + ShardSchedulingPolicy::Active + ) { + // Only include tenants in fills if they have a normal (Active) scheduling policy. We + // even exclude Essential, because moving to fill a node is not essential to keeping this + // tenant available. + return None; + } + if tenant_shard.intent.get_secondary().contains(&node_id) { if let Some(primary) = tenant_shard.intent.get_attached() { return Some((*primary, *tid));