From 529d661532939a01ec74e594cac9ada54ebb2586 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 29 May 2025 12:07:09 +0100 Subject: [PATCH] storcon: skip offline nodes in get_top_tenant_shards (#12057) ## Summary The optimiser background loop could get delayed a lot by waiting for timeouts trying to talk to offline nodes. Fixes: #12056 ## Solution - Skip offline nodes in `get_top_tenant_shards` Link to Devin run: https://app.devin.ai/sessions/065afd6756734d33bbd4d012428c4b6e Requested by: John Spray (john@neon.tech) Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: John Spray --- storage_controller/src/service.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index d284747f73..823f4dadfa 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -8538,8 +8538,9 @@ impl Service { Some(ShardCount(new_shard_count)) } - /// Fetches the top tenant shards from every node, in descending order of - /// max logical size. Any node errors will be logged and ignored. + /// Fetches the top tenant shards from every available node, in descending order of + /// max logical size. Offline nodes are skipped, and any errors from available nodes + /// will be logged and ignored. async fn get_top_tenant_shards( &self, request: &TopTenantShardsRequest, @@ -8550,6 +8551,7 @@ impl Service { .unwrap() .nodes .values() + .filter(|node| node.is_available()) .cloned() .collect_vec();