mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 20:12:54 +00:00
storcon: adjust AZ selection for heterogenous AZs (#12296)
## Problem The scheduler uses total shards per AZ to select the AZ for newly created or attached tenants. This makes bad decisions when we have different node counts per AZ -- we might have 2 very busy pageservers in one AZ, and 4 more lightly loaded pageservers in other AZs, and the scheduler picks the busy pageservers because the total shard count in their AZ is lower. ## Summary of changes - Divide the shard count by the number of nodes in the AZ when scoring in `get_az_for_new_tenant` --------- Co-authored-by: John Spray <john.spray@databricks.com>
This commit is contained in:
@@ -825,6 +825,7 @@ impl Scheduler {
|
||||
struct AzScore {
|
||||
home_shard_count: usize,
|
||||
scheduleable: bool,
|
||||
node_count: usize,
|
||||
}
|
||||
|
||||
let mut azs: HashMap<&AvailabilityZone, AzScore> = HashMap::new();
|
||||
@@ -832,6 +833,7 @@ impl Scheduler {
|
||||
let az = azs.entry(&node.az).or_default();
|
||||
az.home_shard_count += node.home_shard_count;
|
||||
az.scheduleable |= matches!(node.may_schedule, MaySchedule::Yes(_));
|
||||
az.node_count += 1;
|
||||
}
|
||||
|
||||
// If any AZs are schedulable, then filter out the non-schedulable ones (i.e. AZs where
|
||||
@@ -840,10 +842,20 @@ impl Scheduler {
|
||||
azs.retain(|_, i| i.scheduleable);
|
||||
}
|
||||
|
||||
// We will multiply up shard counts by the max node count for scoring, before dividing
|
||||
// by per-node max node count, to get a normalized score that doesn't collapse to zero
|
||||
// when the absolute shard count is less than the node count.
|
||||
let max_node_count = azs.values().map(|i| i.node_count).max().unwrap_or(0);
|
||||
|
||||
// Find the AZ with the lowest number of shards currently allocated
|
||||
Some(
|
||||
azs.into_iter()
|
||||
.min_by_key(|i| (i.1.home_shard_count, i.0))
|
||||
.min_by_key(|i| {
|
||||
(
|
||||
(i.1.home_shard_count * max_node_count) / i.1.node_count,
|
||||
i.0,
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
.0
|
||||
.clone(),
|
||||
|
||||
Reference in New Issue
Block a user