diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs
index 0bd2eeac35..843159010d 100644
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -391,7 +391,7 @@ impl Scheduler {
             return Err(ScheduleError::NoPageservers);
         }
 
-        let mut scores: Vec<(NodeId, AffinityScore, usize)> = self
+        let mut scores: Vec<(NodeId, AffinityScore, usize, usize)> = self
             .nodes
             .iter()
             .filter_map(|(k, v)| {
@@ -402,6 +402,7 @@ impl Scheduler {
                         *k,
                         context.nodes.get(k).copied().unwrap_or(AffinityScore::FREE),
                         v.shard_count,
+                        v.attached_shard_count,
                     ))
                 }
             })
@@ -409,9 +410,12 @@ impl Scheduler {
 
         // Sort by, in order of precedence:
         //  1st: Affinity score.  We should never pick a higher-score node if a lower-score node is available
-        //  2nd: Utilization.  Within nodes with the same affinity, use the least loaded nodes.
-        //  3rd: Node ID.  This is a convenience to make selection deterministic in tests and empty systems.
-        scores.sort_by_key(|i| (i.1, i.2, i.0));
+        //  2nd: Attached shard count.  Within nodes with the same affinity, we always pick the node with
+        //  the least number of attached shards.
+        //  3rd: Total shard count.  Within nodes with the same affinity and attached shard count, use nodes
+        //  with the lower total shard count.
+        //  4th: Node ID.  This is a convenience to make selection deterministic in tests and empty systems.
+        scores.sort_by_key(|i| (i.1, i.3, i.2, i.0));
 
         if scores.is_empty() {
             // After applying constraints, no pageservers were left.
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index d1b632755f..840bcbb81d 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -1632,14 +1632,10 @@ pub(crate) mod tests {
 
         // We should see equal number of locations on the two nodes.
         assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 4);
-        // Scheduling does not consider the number of attachments picking the initial
-        // pageserver to attach to (hence the assertion that all primaries are on the
-        // same node)
-        // TODO: Tweak the scheduling to evenly distribute attachments for new shards.
-        assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 4);
+        assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 2);
 
         assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 4);
-        assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 0);
+        assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 2);
 
         // Add another two nodes: we should see the shards spread out when their optimize
         // methods are called
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 30f96ceee8..c6450df186 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -1527,13 +1527,7 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
         )
 
     # Give things a chance to settle.
-    # A call to `reconcile_until_idle` could be used here instead,
-    # however since all attachments are placed on the same node,
-    # we'd have to wait for a long time (2 minutes-ish) for optimizations
-    # to quiesce.
-    # TODO: once the initial attachment selection is fixed, update this
-    # to use `reconcile_until_idle`.
-    time.sleep(2)
+    env.storage_controller.reconcile_until_idle(timeout_secs=30)
 
     nodes = env.storage_controller.node_list()
     assert len(nodes) == 2