storcon: safety check when completing shard split (#11256)

## Problem There is a rare race between controller graceful deployment and shard splitting where we may incorrectly both abort _and_ complete the split (on different pods), and thereby leave no shards at all in the database. Related: #11254 ## Summary of changes - In complete_shard_split, refuse to delete anything if child shards are not found
2026-01-07 21:42:56 +00:00 · 2025-03-14 20:08:24 +00:00
parent 53d50c7ea5
commit a674ed8caf
2 changed files with 17 additions and 1 deletions
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -967,10 +967,26 @@ impl Persistence {
        &self,
        split_tenant_id: TenantId,
        old_shard_count: ShardCount,
+        new_shard_count: ShardCount,
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::CompleteShardSplit, move |conn| {
            Box::pin(async move {
+                // Sanity: child shards must still exist, as we're deleting parent shards
+                let child_shards_query = tenant_shards
+                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                    .filter(shard_count.eq(new_shard_count.literal() as i32));
+                let child_shards = child_shards_query
+                    .load::<TenantShardPersistence>(conn)
+                    .await?;
+                if child_shards.len() != new_shard_count.count() as usize {
+                    return Err(DatabaseError::Logical(format!(
+                        "Unexpected child shard count {} while completing split to \
+                            count {new_shard_count:?} on tenant {split_tenant_id}",
+                        child_shards.len()
+                    )));
+                }
+
                // Drop parent shards
                diesel::delete(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -5753,7 +5753,7 @@ impl Service {
        //  it doesn't match, but that requires more retry logic on this side)

        self.persistence
-            .complete_shard_split(tenant_id, old_shard_count)
+            .complete_shard_split(tenant_id, old_shard_count, new_shard_count)
            .await?;

        fail::fail_point!("shard-split-post-complete", |_| Err(