mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 03:30:36 +00:00
Compare commits
3 Commits
release-73
...
jcsp/issue
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
62aa8f2ba2 | ||
|
|
efc3f1cfe5 | ||
|
|
b66bf890fa |
102
Cargo.lock
generated
102
Cargo.lock
generated
@@ -82,12 +82,27 @@ dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"anstyle-wincon 1.0.1",
|
||||
"colorchoice",
|
||||
"is-terminal",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon 3.0.4",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.8"
|
||||
@@ -122,6 +137,16 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
@@ -1185,7 +1210,7 @@ version = "4.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstream 0.3.2",
|
||||
"anstyle",
|
||||
"bitflags 1.3.2",
|
||||
"clap_lex",
|
||||
@@ -1924,6 +1949,15 @@ dependencies = [
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.10.2"
|
||||
@@ -1937,6 +1971,18 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
|
||||
dependencies = [
|
||||
"anstream 0.6.15",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
@@ -2811,6 +2857,12 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
@@ -3254,6 +3306,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.4.1"
|
||||
@@ -3537,6 +3599,12 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "p256"
|
||||
version = "0.11.1"
|
||||
@@ -4119,7 +4187,7 @@ dependencies = [
|
||||
"bindgen",
|
||||
"bytes",
|
||||
"crc32c",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"memoffset 0.9.0",
|
||||
"once_cell",
|
||||
@@ -4312,7 +4380,7 @@ dependencies = [
|
||||
"consumption_metrics",
|
||||
"dashmap",
|
||||
"ecdsa 0.16.9",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"fallible-iterator",
|
||||
"framed-websockets",
|
||||
"futures",
|
||||
@@ -5787,6 +5855,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"test-log",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -6039,6 +6108,28 @@ dependencies = [
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-log"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93"
|
||||
dependencies = [
|
||||
"env_logger 0.11.2",
|
||||
"test-log-macros",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-log-macros"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.57"
|
||||
@@ -6570,6 +6661,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
@@ -6874,7 +6966,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"camino-tempfile",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
|
||||
@@ -56,3 +56,6 @@ utils = { path = "../libs/utils/" }
|
||||
metrics = { path = "../libs/metrics/" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
test-log = "*"
|
||||
@@ -206,6 +206,10 @@ pub(crate) struct NodeSecondarySchedulingScore {
|
||||
/// The number of shards belonging to the tenant currently being
|
||||
/// scheduled that are attached to this node.
|
||||
affinity_score: AffinityScore,
|
||||
/// Size of [`ScheduleContext::attached_nodes`] for the current node.
|
||||
/// This normally tracks the number of attached shards belonging to the
|
||||
/// tenant being scheduled that are already on this node.
|
||||
secondary_shards_in_context: usize,
|
||||
/// Utilisation score that combines shard count and disk utilisation
|
||||
utilization_score: u64,
|
||||
/// Total number of shards attached to this node. When nodes have identical utilisation, this
|
||||
@@ -231,6 +235,7 @@ impl NodeSchedulingScore for NodeSecondarySchedulingScore {
|
||||
|
||||
Some(Self {
|
||||
az_match: SecondaryAzMatch(AzMatch::new(&node.az, preferred_az.as_ref())),
|
||||
secondary_shards_in_context: context.secondary_nodes.get(node_id).copied().unwrap_or(0),
|
||||
affinity_score: context
|
||||
.nodes
|
||||
.get(node_id)
|
||||
@@ -327,6 +332,9 @@ pub(crate) struct ScheduleContext {
|
||||
/// Specifically how many _attached_ locations are on each node
|
||||
pub(crate) attached_nodes: HashMap<NodeId, usize>,
|
||||
|
||||
/// Specifically how many _secondary_ locations are on each node
|
||||
pub(crate) secondary_nodes: HashMap<NodeId, usize>,
|
||||
|
||||
pub(crate) mode: ScheduleMode,
|
||||
}
|
||||
|
||||
@@ -345,6 +353,11 @@ impl ScheduleContext {
|
||||
*entry += 1;
|
||||
}
|
||||
|
||||
pub(crate) fn push_secondary(&mut self, node_id: NodeId) {
|
||||
let entry = self.secondary_nodes.entry(node_id).or_default();
|
||||
*entry += 1;
|
||||
}
|
||||
|
||||
pub(crate) fn get_node_affinity(&self, node_id: NodeId) -> AffinityScore {
|
||||
self.nodes
|
||||
.get(&node_id)
|
||||
@@ -786,7 +799,14 @@ pub(crate) mod test_utils {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pageserver_api::{controller_api::NodeAvailability, models::utilization::test_utilization};
|
||||
use pageserver_api::{
|
||||
controller_api::NodeAvailability, models::utilization::test_utilization,
|
||||
shard::ShardIdentity, shard::TenantShardId,
|
||||
};
|
||||
use utils::{
|
||||
id::TenantId,
|
||||
shard::{ShardCount, ShardNumber},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -1074,4 +1094,171 @@ mod tests {
|
||||
intent.clear(&mut scheduler);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repro_foo() {
|
||||
let az_tag = AvailabilityZone("az-a".to_string());
|
||||
|
||||
let nodes = test_utils::make_test_nodes(
|
||||
5,
|
||||
&[
|
||||
az_tag.clone(),
|
||||
az_tag.clone(),
|
||||
az_tag.clone(),
|
||||
az_tag.clone(),
|
||||
az_tag.clone(),
|
||||
],
|
||||
);
|
||||
let mut scheduler = Scheduler::new(nodes.values());
|
||||
|
||||
// Need to keep these alive because they contribute to shard counts via RAII
|
||||
let mut scheduled_shards = Vec::new();
|
||||
|
||||
let mut context = ScheduleContext::default();
|
||||
|
||||
fn schedule_shard(
|
||||
tenant_shard_id: TenantShardId,
|
||||
expect_attached: NodeId,
|
||||
expect_secondary: NodeId,
|
||||
scheduled_shards: &mut Vec<TenantShard>,
|
||||
scheduler: &mut Scheduler,
|
||||
context: &mut ScheduleContext,
|
||||
) {
|
||||
let shard_identity = ShardIdentity::new(
|
||||
tenant_shard_id.shard_number,
|
||||
tenant_shard_id.shard_count,
|
||||
pageserver_api::shard::ShardStripeSize(1),
|
||||
)
|
||||
.unwrap();
|
||||
let mut shard = TenantShard::new(
|
||||
tenant_shard_id,
|
||||
shard_identity,
|
||||
pageserver_api::controller_api::PlacementPolicy::Attached(1),
|
||||
);
|
||||
|
||||
shard.schedule(scheduler, context).unwrap();
|
||||
|
||||
assert_eq!(shard.intent.get_attached().unwrap(), expect_attached);
|
||||
assert_eq!(
|
||||
shard.intent.get_secondary().first().unwrap(),
|
||||
&expect_secondary
|
||||
);
|
||||
|
||||
scheduled_shards.push(shard);
|
||||
}
|
||||
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(0),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(1),
|
||||
NodeId(2),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(1),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(3),
|
||||
NodeId(4),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(2),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(5),
|
||||
NodeId(1),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(3),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(2),
|
||||
NodeId(3),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(4),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(4),
|
||||
NodeId(5),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(5),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(1),
|
||||
NodeId(2),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(6),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(3),
|
||||
NodeId(4),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
schedule_shard(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(7),
|
||||
shard_count: ShardCount(8),
|
||||
},
|
||||
NodeId(5),
|
||||
NodeId(1),
|
||||
&mut scheduled_shards,
|
||||
&mut scheduler,
|
||||
&mut context,
|
||||
);
|
||||
|
||||
for shard in &scheduled_shards {
|
||||
assert_eq!(shard.optimize_attachment(&nodes, &context), None);
|
||||
}
|
||||
|
||||
for mut shard in scheduled_shards {
|
||||
shard.intent.clear(&mut scheduler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5858,10 +5858,7 @@ impl Service {
|
||||
|
||||
// Accumulate the schedule context for all the shards in a tenant: we must have
|
||||
// the total view of all shards before we can try to optimize any of them.
|
||||
schedule_context.avoid(&shard.intent.all_pageservers());
|
||||
if let Some(attached) = shard.intent.get_attached() {
|
||||
schedule_context.push_attached(*attached);
|
||||
}
|
||||
shard.populate_context(&mut schedule_context);
|
||||
tenant_shards.push(shard);
|
||||
|
||||
// Once we have seen the last shard in the tenant, proceed to search across all shards
|
||||
|
||||
@@ -566,10 +566,7 @@ impl TenantShard {
|
||||
) -> Result<(), ScheduleError> {
|
||||
let r = self.do_schedule(scheduler, context);
|
||||
|
||||
context.avoid(&self.intent.all_pageservers());
|
||||
if let Some(attached) = self.intent.get_attached() {
|
||||
context.push_attached(*attached);
|
||||
}
|
||||
self.populate_context(context);
|
||||
|
||||
r
|
||||
}
|
||||
@@ -676,6 +673,19 @@ impl TenantShard {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// When building the ScheduleContext of a tenant, call this on each shard to
|
||||
/// add its contribution to the context.
|
||||
pub(crate) fn populate_context(&self, context: &mut ScheduleContext) {
|
||||
context.avoid(&self.intent.all_pageservers());
|
||||
|
||||
if let Some(attached) = self.intent.get_attached() {
|
||||
context.push_attached(*attached);
|
||||
}
|
||||
for secondary in self.intent.get_secondary() {
|
||||
context.push_secondary(*secondary);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reschedule this tenant shard to one of its secondary locations. Returns a scheduling error
|
||||
/// if the swap is not possible and leaves the intent state in its original state.
|
||||
///
|
||||
@@ -823,10 +833,13 @@ impl TenantShard {
|
||||
continue;
|
||||
};
|
||||
|
||||
// TODO: make this AZ aware: secondary should be chosen "As if I am an attachment, but
|
||||
// in a different AZ to my actual preferred AZ"
|
||||
|
||||
// Let the scheduler suggest a node, where it would put us if we were scheduling afresh
|
||||
// This implicitly limits the choice to nodes that are available, and prefers nodes
|
||||
// with lower utilization.
|
||||
let Ok(candidate_node) = scheduler.schedule_shard::<SecondaryShardTag>(
|
||||
let Ok(candidate_node) = scheduler.schedule_shard::<AttachedShardTag>(
|
||||
&self.intent.all_pageservers(),
|
||||
&self.preferred_az_id,
|
||||
schedule_context,
|
||||
@@ -1632,10 +1645,8 @@ pub(crate) mod tests {
|
||||
shard_b.intent.push_secondary(&mut scheduler, NodeId(3));
|
||||
|
||||
let mut schedule_context = ScheduleContext::default();
|
||||
schedule_context.avoid(&shard_a.intent.all_pageservers());
|
||||
schedule_context.push_attached(shard_a.intent.get_attached().unwrap());
|
||||
schedule_context.avoid(&shard_b.intent.all_pageservers());
|
||||
schedule_context.push_attached(shard_b.intent.get_attached().unwrap());
|
||||
shard_a.populate_context(&mut schedule_context);
|
||||
shard_b.populate_context(&mut schedule_context);
|
||||
|
||||
let optimization_a = shard_a.optimize_attachment(&nodes, &schedule_context);
|
||||
|
||||
@@ -1699,10 +1710,8 @@ pub(crate) mod tests {
|
||||
shard_b.intent.push_secondary(&mut scheduler, NodeId(3));
|
||||
|
||||
let mut schedule_context = ScheduleContext::default();
|
||||
schedule_context.avoid(&shard_a.intent.all_pageservers());
|
||||
schedule_context.push_attached(shard_a.intent.get_attached().unwrap());
|
||||
schedule_context.avoid(&shard_b.intent.all_pageservers());
|
||||
schedule_context.push_attached(shard_b.intent.get_attached().unwrap());
|
||||
shard_a.populate_context(&mut schedule_context);
|
||||
shard_b.populate_context(&mut schedule_context);
|
||||
|
||||
let optimization_a = shard_a.optimize_secondary(&mut scheduler, &schedule_context);
|
||||
|
||||
@@ -1744,10 +1753,7 @@ pub(crate) mod tests {
|
||||
let mut any_changed = false;
|
||||
|
||||
for shard in shards.iter() {
|
||||
schedule_context.avoid(&shard.intent.all_pageservers());
|
||||
if let Some(attached) = shard.intent.get_attached() {
|
||||
schedule_context.push_attached(*attached);
|
||||
}
|
||||
shard.populate_context(&mut schedule_context);
|
||||
}
|
||||
|
||||
for shard in shards.iter_mut() {
|
||||
|
||||
Reference in New Issue
Block a user