storcon: respect tenant scheduling policy in drain/fill (#9657)

## Problem Pinning a tenant by setting Pause scheduling policy doesn't work because drain/fill code moves the tenant around during deploys. Closes: https://github.com/neondatabase/neon/issues/9612 ## Summary of changes - In drain, only move a tenant if it is in Active or Essential mode - In fill, only move a tenant if it is in Active mode. The asymmetry is a bit annoying, but it faithfully respects the purposes of the modes: Essential is meant to endeavor to keep the tenant available, which means it needs to be drained but doesn't need to be migrated during fills.
2026-01-03 19:42:55 +00:00 · 2024-11-06 15:14:43 +00:00
parent 4dfa0c221b
commit d182ff294c
2 changed files with 25 additions and 1 deletions
--- a/storage_controller/src/drain_utils.rs
+++ b/storage_controller/src/drain_utils.rs
@@ -3,7 +3,7 @@ use std::{
    sync::Arc,
 };

-use pageserver_api::controller_api::NodeSchedulingPolicy;
+use pageserver_api::controller_api::{NodeSchedulingPolicy, ShardSchedulingPolicy};
 use utils::{id::NodeId, shard::TenantShardId};

 use crate::{
@@ -98,6 +98,20 @@ impl TenantShardDrain {
            return None;
        }

+        // Only tenants with a normal (Active) scheduling policy are proactively moved
+        // around during a node drain.  Shards which have been manually configured to a different
+        // policy are only rescheduled by manual intervention.
+        match tenant_shard.get_scheduling_policy() {
+            ShardSchedulingPolicy::Active | ShardSchedulingPolicy::Essential => {
+                // A migration during drain is classed as 'essential' because it is required to
+                // uphold our availability goals for the tenant: this shard is elegible for migration.
+            }
+            ShardSchedulingPolicy::Pause | ShardSchedulingPolicy::Stop => {
+                // If we have been asked to avoid rescheduling this shard, then do not migrate it during a drain
+                return None;
+            }
+        }
+
        match scheduler.node_preferred(tenant_shard.intent.get_secondary()) {
            Some(node) => Some(node),
            None => {
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -6721,6 +6721,16 @@ impl Service {
            .tenants
            .iter_mut()
            .filter_map(|(tid, tenant_shard)| {
+                if !matches!(
+                    tenant_shard.get_scheduling_policy(),
+                    ShardSchedulingPolicy::Active
+                ) {
+                    // Only include tenants in fills if they have a normal (Active) scheduling policy.  We
+                    // even exclude Essential, because moving to fill a node is not essential to keeping this
+                    // tenant available.
+                    return None;
+                }
+
                if tenant_shard.intent.get_secondary().contains(&node_id) {
                    if let Some(primary) = tenant_shard.intent.get_attached() {
                        return Some((*primary, *tid));