From 3c16bd6e0bbc5e39111188cfca571b5033d3a377 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 1 Nov 2024 16:47:20 +0000 Subject: [PATCH] storcon: skip non-active projects in chaos injection (#9606) ## Problem We may sometimes use scheduling modes like `Pause` to pin a tenant in its current location for operational reasons. It is undesirable for the chaos task to make any changes to such projects. ## Summary of changes - Add a check for scheduling mode - Add a log line when we do choose to do a chaos action for a tenant: this will help us understand which operations originate from the chaos task. --- storage_controller/src/service/chaos_injector.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs index 99961d691c..0e551beaa7 100644 --- a/storage_controller/src/service/chaos_injector.rs +++ b/storage_controller/src/service/chaos_injector.rs @@ -1,5 +1,6 @@ use std::{sync::Arc, time::Duration}; +use pageserver_api::controller_api::ShardSchedulingPolicy; use rand::seq::SliceRandom; use rand::thread_rng; use tokio_util::sync::CancellationToken; @@ -47,6 +48,16 @@ impl ChaosInjector { .get_mut(victim) .expect("Held lock between choosing ID and this get"); + if !matches!(shard.get_scheduling_policy(), ShardSchedulingPolicy::Active) { + // Skip non-active scheduling policies, so that a shard with a policy like Pause can + // be pinned without being disrupted by us. + tracing::info!( + "Skipping shard {victim}: scheduling policy is {:?}", + shard.get_scheduling_policy() + ); + continue; + } + // Pick a secondary to promote let Some(new_location) = shard .intent @@ -63,6 +74,8 @@ impl ChaosInjector { continue; }; + tracing::info!("Injecting chaos: migrate {victim} {old_location}->{new_location}"); + shard.intent.demote_attached(scheduler, old_location); shard.intent.promote_attached(scheduler, new_location); self.service.maybe_reconcile_shard(shard, nodes);