diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 4a97aac125..85d9c574a1 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -967,10 +967,26 @@ impl Persistence { &self, split_tenant_id: TenantId, old_shard_count: ShardCount, + new_shard_count: ShardCount, ) -> DatabaseResult<()> { use crate::schema::tenant_shards::dsl::*; self.with_measured_conn(DatabaseOperation::CompleteShardSplit, move |conn| { Box::pin(async move { + // Sanity: child shards must still exist, as we're deleting parent shards + let child_shards_query = tenant_shards + .filter(tenant_id.eq(split_tenant_id.to_string())) + .filter(shard_count.eq(new_shard_count.literal() as i32)); + let child_shards = child_shards_query + .load::(conn) + .await?; + if child_shards.len() != new_shard_count.count() as usize { + return Err(DatabaseError::Logical(format!( + "Unexpected child shard count {} while completing split to \ + count {new_shard_count:?} on tenant {split_tenant_id}", + child_shards.len() + ))); + } + // Drop parent shards diesel::delete(tenant_shards) .filter(tenant_id.eq(split_tenant_id.to_string())) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index f33408a89b..4e00136e1b 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -5753,7 +5753,7 @@ impl Service { // it doesn't match, but that requires more retry logic on this side) self.persistence - .complete_shard_split(tenant_id, old_shard_count) + .complete_shard_split(tenant_id, old_shard_count, new_shard_count) .await?; fail::fail_point!("shard-split-post-complete", |_| Err(