1use api::v1::meta::heartbeat_request::NodeWorkloads;
16use api::v1::meta::{ErrorCode, ResponseHeader};
17use common_meta::cluster::{NodeInfo, NodeStatus};
18use common_time::util::SystemTimer;
19use tonic::{Code, Status};
20
21pub(crate) fn is_unreachable(status: &Status) -> bool {
22 status.code() == Code::Unavailable || status.code() == Code::DeadlineExceeded
23}
24
25pub(crate) fn is_not_leader(header: &Option<ResponseHeader>) -> bool {
26 let Some(header) = header else {
27 return false;
28 };
29
30 let Some(err) = header.error.as_ref() else {
31 return false;
32 };
33
34 err.code == ErrorCode::NotLeader as i32
35}
36
37fn is_active_node(
38 timer: &impl SystemTimer,
39 last_activity_ts: i64,
40 active_duration: std::time::Duration,
41) -> bool {
42 let now = timer.current_time_millis();
43 let elapsed = now.checked_sub(last_activity_ts).unwrap_or(0) as u64;
44 elapsed < active_duration.as_millis() as u64
45}
46
47pub(crate) fn alive_frontends(
48 timer: &impl SystemTimer,
49 nodes: Vec<NodeInfo>,
50 active_duration: std::time::Duration,
51) -> Vec<NodeInfo> {
52 nodes
53 .into_iter()
54 .filter_map(|node| {
55 if matches!(node.status, NodeStatus::Frontend(_))
56 && is_active_node(timer, node.last_activity_ts, active_duration)
57 {
58 Some(node)
59 } else {
60 None
61 }
62 })
63 .collect()
64}
65
66pub(crate) fn alive_datanodes(
67 timer: &impl SystemTimer,
68 nodes: Vec<NodeInfo>,
69 active_duration: std::time::Duration,
70 filter: Option<for<'a> fn(&'a NodeWorkloads) -> bool>,
71) -> Vec<NodeInfo> {
72 let filter = filter.unwrap_or(|_| true);
73
74 nodes
75 .into_iter()
76 .filter_map(|node| {
77 if let NodeStatus::Datanode(status) = &node.status
78 && is_active_node(timer, node.last_activity_ts, active_duration)
79 {
80 let workloads = NodeWorkloads::Datanode(status.workloads.clone());
81 filter(&workloads).then_some(node)
82 } else {
83 None
84 }
85 })
86 .collect()
87}
88
89pub(crate) fn alive_flownodes(
90 timer: &impl SystemTimer,
91 nodes: Vec<NodeInfo>,
92 active_duration: std::time::Duration,
93 filter: Option<for<'a> fn(&'a NodeWorkloads) -> bool>,
94) -> Vec<NodeInfo> {
95 let filter = filter.unwrap_or(|_| true);
96
97 nodes
98 .into_iter()
99 .filter_map(|node| {
100 if let NodeStatus::Flownode(status) = &node.status
101 && is_active_node(timer, node.last_activity_ts, active_duration)
102 {
103 let workloads = NodeWorkloads::Flownode(status.workloads.clone());
104 filter(&workloads).then_some(node)
105 } else {
106 None
107 }
108 })
109 .collect()
110}
111
112#[cfg(test)]
113mod tests {
114 use std::time::Duration;
115
116 use api::v1::meta::heartbeat_request::NodeWorkloads;
117 use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads, Peer};
118 use common_meta::cluster::{
119 DatanodeStatus, FlownodeStatus, FrontendStatus, MetasrvStatus, NodeInfo, NodeStatus, Role,
120 };
121 use common_time::util::SystemTimer;
122
123 use super::*;
124
125 struct MockSystemTimer(i64);
126
127 impl MockSystemTimer {
128 fn new(now: i64) -> Self {
129 Self(now)
130 }
131 }
132
133 impl SystemTimer for MockSystemTimer {
134 fn current_time_millis(&self) -> i64 {
135 self.0
136 }
137
138 fn current_time_rfc3339(&self) -> String {
139 "1970-01-01T00:00:00Z".to_string()
140 }
141 }
142
143 fn node_info(role: Role, id: u64, addr: &str, last_activity_ts: i64) -> NodeInfo {
144 let status = match role {
145 Role::Frontend => NodeStatus::Frontend(FrontendStatus {}),
146 Role::Datanode => NodeStatus::Datanode(DatanodeStatus {
147 rcus: 0,
148 wcus: 0,
149 leader_regions: 0,
150 follower_regions: 0,
151 workloads: DatanodeWorkloads { types: vec![] },
152 }),
153 Role::Flownode => NodeStatus::Flownode(FlownodeStatus {
154 workloads: FlownodeWorkloads { types: vec![] },
155 }),
156 Role::Metasrv => NodeStatus::Metasrv(MetasrvStatus { is_leader: false }),
157 };
158
159 NodeInfo {
160 peer: Peer::new(id, addr),
161 last_activity_ts,
162 status,
163 version: String::new(),
164 git_commit: String::new(),
165 start_time_ms: 0,
166 total_cpu_millicores: 0,
167 total_memory_bytes: 0,
168 cpu_usage_millicores: 0,
169 memory_usage_bytes: 0,
170 hostname: String::new(),
171 env_vars: Default::default(),
172 }
173 }
174
175 fn ingest_only(workloads: &NodeWorkloads) -> bool {
176 matches!(
177 workloads,
178 NodeWorkloads::Datanode(DatanodeWorkloads { types }) if types.as_slice() == [1]
179 )
180 }
181
182 fn empty_flownode_workloads(workloads: &NodeWorkloads) -> bool {
183 matches!(
184 workloads,
185 NodeWorkloads::Flownode(FlownodeWorkloads { types }) if types.is_empty()
186 )
187 }
188
189 #[test]
190 fn test_alive_frontends_filters_by_activity_and_role() {
191 let timer = MockSystemTimer::new(100);
192 let peers = alive_frontends(
193 &timer,
194 vec![
195 node_info(Role::Frontend, 1, "127.0.0.1:3001", 95),
196 node_info(Role::Frontend, 2, "127.0.0.1:3002", 89),
197 node_info(Role::Datanode, 3, "127.0.0.1:4001", 99),
198 ],
199 Duration::from_millis(10),
200 );
201
202 assert_eq!(
203 vec![1],
204 peers
205 .into_iter()
206 .map(|node| node.peer.id)
207 .collect::<Vec<_>>()
208 );
209 }
210
211 #[test]
212 fn test_alive_datanodes_filters_by_activity_and_workload() {
213 let timer = MockSystemTimer::new(100);
214 let mut first = node_info(Role::Datanode, 1, "127.0.0.1:4001", 95);
215 let mut second = node_info(Role::Datanode, 2, "127.0.0.1:4002", 95);
216 let stale = node_info(Role::Datanode, 3, "127.0.0.1:4003", 89);
217
218 if let NodeStatus::Datanode(status) = &mut first.status {
219 status.workloads = DatanodeWorkloads { types: vec![1] };
220 }
221 if let NodeStatus::Datanode(status) = &mut second.status {
222 status.workloads = DatanodeWorkloads { types: vec![2] };
223 }
224
225 let peers = alive_datanodes(
226 &timer,
227 vec![first, second, stale],
228 Duration::from_millis(10),
229 Some(ingest_only),
230 );
231
232 assert_eq!(
233 vec![1],
234 peers
235 .into_iter()
236 .map(|node| node.peer.id)
237 .collect::<Vec<_>>()
238 );
239 }
240
241 #[test]
242 fn test_alive_flownodes_uses_empty_workload_semantics() {
243 let timer = MockSystemTimer::new(100);
244 let peers = alive_flownodes(
245 &timer,
246 vec![
247 node_info(Role::Flownode, 1, "127.0.0.1:5001", 95),
248 node_info(Role::Flownode, 2, "127.0.0.1:5002", 89),
249 node_info(Role::Frontend, 3, "127.0.0.1:3001", 99),
250 ],
251 Duration::from_millis(10),
252 Some(empty_flownode_workloads),
253 );
254
255 assert_eq!(
256 vec![1],
257 peers
258 .into_iter()
259 .map(|node| node.peer.id)
260 .collect::<Vec<_>>()
261 );
262 }
263
264 #[test]
265 fn test_alive_flownodes_filters_by_workloads() {
266 let timer = MockSystemTimer::new(100);
267 let mut first = node_info(Role::Flownode, 1, "127.0.0.1:5001", 95);
268 let mut second = node_info(Role::Flownode, 2, "127.0.0.1:5002", 95);
269
270 if let NodeStatus::Flownode(status) = &mut first.status {
271 status.workloads = FlownodeWorkloads { types: vec![7] };
272 }
273 if let NodeStatus::Flownode(status) = &mut second.status {
274 status.workloads = FlownodeWorkloads { types: vec![8] };
275 }
276
277 fn workload_type_is_7(workloads: &NodeWorkloads) -> bool {
278 matches!(
279 workloads,
280 NodeWorkloads::Flownode(FlownodeWorkloads { types }) if types.as_slice() == [7]
281 )
282 }
283
284 let peers = alive_flownodes(
285 &timer,
286 vec![first, second],
287 Duration::from_millis(10),
288 Some(workload_type_is_7),
289 );
290
291 assert_eq!(
292 vec![1],
293 peers
294 .into_iter()
295 .map(|node| node.peer.id)
296 .collect::<Vec<_>>()
297 );
298 }
299}