storcon: skip non-active projects in chaos injection (#9606)

## Problem

We may sometimes use scheduling modes like `Pause` to pin a tenant in
its current location for operational reasons. It is undesirable for the
chaos task to make any changes to such projects.

## Summary of changes

- Add a check for scheduling mode
- Add a log line when we do choose to do a chaos action for a tenant:
this will help us understand which operations originate from the chaos
task.
This commit is contained in:
John Spray
2024-11-01 16:47:20 +00:00
committed by GitHub
parent 123816e99a
commit 3c16bd6e0b

View File

@@ -1,5 +1,6 @@
use std::{sync::Arc, time::Duration};
use pageserver_api::controller_api::ShardSchedulingPolicy;
use rand::seq::SliceRandom;
use rand::thread_rng;
use tokio_util::sync::CancellationToken;
@@ -47,6 +48,16 @@ impl ChaosInjector {
.get_mut(victim)
.expect("Held lock between choosing ID and this get");
if !matches!(shard.get_scheduling_policy(), ShardSchedulingPolicy::Active) {
// Skip non-active scheduling policies, so that a shard with a policy like Pause can
// be pinned without being disrupted by us.
tracing::info!(
"Skipping shard {victim}: scheduling policy is {:?}",
shard.get_scheduling_policy()
);
continue;
}
// Pick a secondary to promote
let Some(new_location) = shard
.intent
@@ -63,6 +74,8 @@ impl ChaosInjector {
continue;
};
tracing::info!("Injecting chaos: migrate {victim} {old_location}->{new_location}");
shard.intent.demote_attached(scheduler, old_location);
shard.intent.promote_attached(scheduler, new_location);
self.service.maybe_reconcile_shard(shard, nodes);