From b30e80512e4b947cb5e6b4a908068b06cc646016 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 20 Jun 2024 14:03:08 +0100 Subject: [PATCH] storcon: don't call schedule --- storage_controller/src/service.rs | 66 ++++++++------------------ storage_controller/src/tenant_shard.rs | 50 ++++++++----------- 2 files changed, 40 insertions(+), 76 deletions(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 298bac25ff..3bf3ccc2ce 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -5281,7 +5281,6 @@ impl Service { let mut last_inspected_shard: Option = None; let mut inspected_all_shards = false; let mut waiters = Vec::new(); - let mut schedule_context = ScheduleContext::default(); while !inspected_all_shards { if cancel.is_cancelled() { @@ -5322,34 +5321,22 @@ impl Service { } }; - // Reset the scheduling context if we have moved over to a new tenant. - // This is required since the affinity scores stored in the scheduling - // context should be tenant specific. Note that we are relying on - // [`ServiceState::tenants`] being ordered by tenant id. - if last_inspected_shard.map(|tid| tid.tenant_id) != Some(tid.tenant_id) { - schedule_context = ScheduleContext::default(); - } - - match tenant_shard.reschedule_to_secondary( - node_id, - scheduler, - &mut schedule_context, - ) { - Err(e) => { - tracing::warn!( - tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(), - "Scheduling error when draining pageserver {} : {e}", node_id - ); - } - Ok(()) => { - let scheduled_to = tenant_shard.intent.get_attached(); - tracing::info!( - tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(), - "Rescheduled shard while draining node {}: {} -> {:?}", - node_id, - node_id, - scheduled_to - ); + match tenant_shard.reschedule_to_secondary(Some(node_id), None, scheduler) { + Err(e) => { + tracing::warn!( + tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(), + "Scheduling error when draining pageserver {} : {e}", node_id + ); + } + Ok(()) => { + let scheduled_to = tenant_shard.intent.get_attached(); + tracing::info!( + tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(), + "Rescheduled shard while draining node {}: {} -> {:?}", + node_id, + node_id, + scheduled_to + ); let waiter = self.maybe_reconcile_shard(tenant_shard, nodes); if let Some(some) = waiter { @@ -5496,9 +5483,7 @@ impl Service { // secondaries are warm. This is not always true (e.g. we just migrated the // tenant). Take that into consideration by checking the secondary status. let mut tids_to_promote = self.fill_node_plan(node_id); - let mut waiters = Vec::new(); - let mut schedule_context = ScheduleContext::default(); // Execute the plan we've composed above. Before aplying each move from the plan, // we validate to ensure that it has not gone stale in the meantime. @@ -5524,17 +5509,8 @@ impl Service { )); } - let mut last_inspected_tenant = None; while waiters.len() < MAX_RECONCILES_PER_OPERATION { if let Some(tid) = tids_to_promote.pop() { - // Reset the scheduling context if we have moved over to a new tenant. - // This is required since the affinity scores stored in the scheduling - // context should be tenant specific. Note that we are relying on the - // result [`Service::fill_node_plan`] being ordered by tenant id. - if last_inspected_tenant != Some(tid.tenant_id) { - schedule_context = ScheduleContext::default(); - } - if let Some(tenant_shard) = tenants.get_mut(&tid) { // If the node being filled is not a secondary anymore, // skip the promotion. @@ -5543,9 +5519,11 @@ impl Service { } let previously_attached_to = *tenant_shard.intent.get_attached(); - - tenant_shard.intent.promote_attached(scheduler, node_id); - match tenant_shard.schedule(scheduler, &mut schedule_context) { + match tenant_shard.reschedule_to_secondary( + previously_attached_to, + Some(node_id), + scheduler, + ) { Err(e) => { tracing::warn!( tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(), @@ -5569,8 +5547,6 @@ impl Service { } } } - - last_inspected_tenant = Some(tid.tenant_id); } else { break; } diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 456d7292c5..a5415b2cbd 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -646,46 +646,34 @@ impl TenantShard { Ok(()) } - /// Attempt to reschedule the tenant shard to one of its secondary locations. - /// If no schedulable secondary is found, roll back any changes to the intent state. - /// If the `attached_to` node argument doesn't match the intent state, no changes - /// are made and the function returns successfully. + // TODO: comment pub(crate) fn reschedule_to_secondary( &mut self, - attached_to: NodeId, + attached_to: Option, + promote_to: Option, scheduler: &mut Scheduler, - context: &mut ScheduleContext, ) -> Result<(), ScheduleError> { - let original_secondaries = self.intent.get_secondary().clone(); - - if self.intent.demote_attached(scheduler, attached_to) { - let res = match self.schedule(scheduler, context) { - Ok(()) => { - let rescheduled_to_secondary = - self.intent.get_attached().map_or(false, |node| { - original_secondaries.iter().any(|sec| *sec == node) - }); - - if rescheduled_to_secondary { - Ok(()) - } else { - Err(ScheduleError::ImpossibleConstraint) - } + let promote_to = match promote_to { + Some(node) => node, + None => match scheduler.node_preferred(self.intent.get_secondary()) { + Some(node) => node, + None => { + return Err(ScheduleError::ImpossibleConstraint); } - Err(e) => Err(e), - }; + }, + }; - if res.is_err() { - self.intent.set_attached(scheduler, Some(attached_to)); - self.intent.clear_secondary(scheduler); - for sec in original_secondaries { - self.intent.push_secondary(scheduler, sec); - } + assert!(self.intent.get_secondary().contains(&promote_to)); + + if let Some(node) = attached_to { + let demoted = self.intent.demote_attached(scheduler, node); + if !demoted { + return Err(ScheduleError::ImpossibleConstraint); } - - return res; } + self.intent.promote_attached(scheduler, promote_to); + Ok(()) }