diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index c0c5bc371a..222cb9fdd4 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -3572,6 +3572,11 @@ impl Service { .iter() .any(|i| i.generation.is_none() || i.generation_pageserver.is_none()) { + let shard_generations = generations + .into_iter() + .map(|i| (i.tenant_shard_id, (i.generation, i.generation_pageserver))) + .collect::>(); + // One or more shards has not been attached to a pageserver. Check if this is because it's configured // to be detached (409: caller should give up), or because it's meant to be attached but isn't yet (503: caller should retry) let locked = self.inner.read().unwrap(); @@ -3582,6 +3587,28 @@ impl Service { PlacementPolicy::Attached(_) => { // This shard is meant to be attached: the caller is not wrong to try and // use this function, but we can't service the request right now. + let Some(generation) = shard_generations.get(shard_id) else { + // This can only happen if there is a split brain controller modifying the database. This should + // never happen when testing, and if it happens in production we can only log the issue. + debug_assert!(false); + tracing::error!("Shard {shard_id} not found in generation state! Is another rogue controller running?"); + continue; + }; + let (generation, generation_pageserver) = generation; + if let Some(generation) = generation { + if generation_pageserver.is_none() { + // This is legitimate only in a very narrow window where the shard was only just configured into + // Attached mode after being created in Secondary or Detached mode, and it has had its generation + // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver). + tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?"); + } + } else { + // This should never happen: a shard with no generation is only permitted when it was created in some state + // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory) + debug_assert!(false); + tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!"); + continue; + } } PlacementPolicy::Secondary | PlacementPolicy::Detached => { return Err(ApiError::Conflict(format!(