1use std::collections::HashMap;
16use std::fmt::{Display, Formatter};
17use std::hash::{DefaultHasher, Hash, Hasher};
18use std::str::FromStr;
19
20use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads, FrontendWorkloads, HeartbeatRequest};
21use common_error::ext::ErrorExt;
22use lazy_static::lazy_static;
23use regex::Regex;
24use serde::{Deserialize, Serialize};
25use snafu::{OptionExt, ResultExt, ensure};
26
27use crate::datanode::RegionStat;
28use crate::error::{
29 DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
30 InvalidRoleSnafu, ParseNumSnafu, Result,
31};
32use crate::key::flow::flow_state::FlowStat;
33use crate::peer::Peer;
34
35const CLUSTER_NODE_INFO_PREFIX: &str = "__meta_cluster_node_info";
36
37lazy_static! {
38 static ref CLUSTER_NODE_INFO_PREFIX_PATTERN: Regex = Regex::new(&format!(
39 "^{CLUSTER_NODE_INFO_PREFIX}-([0-9]+)-([0-9]+)-([0-9]+)$"
40 ))
41 .unwrap();
42}
43
44#[async_trait::async_trait]
46pub trait ClusterInfo {
47 type Error: ErrorExt;
48
49 async fn list_nodes(
51 &self,
52 role: Option<Role>,
53 ) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
54
55 async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
57
58 async fn list_flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error>;
60
61 }
63
64#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize, PartialOrd, Ord)]
66pub struct NodeInfoKey {
67 pub role: Role,
69 pub node_id: u64,
71}
72
73impl NodeInfoKey {
74 pub fn new(request: &HeartbeatRequest) -> Option<Self> {
77 let HeartbeatRequest { header, peer, .. } = request;
78 let header = header.as_ref()?;
79 let peer = peer.as_ref()?;
80
81 let role = header.role.try_into().ok()?;
82 let node_id = match role {
83 Role::Frontend => calculate_node_id(&peer.addr),
86 _ => peer.id,
87 };
88
89 Some(NodeInfoKey { role, node_id })
90 }
91
92 pub fn key_prefix() -> String {
93 format!("{}-0-", CLUSTER_NODE_INFO_PREFIX)
94 }
95
96 pub fn key_prefix_with_role(role: Role) -> String {
97 format!("{}-0-{}-", CLUSTER_NODE_INFO_PREFIX, i32::from(role))
98 }
99}
100
101fn calculate_node_id(addr: &str) -> u64 {
103 let mut hasher = DefaultHasher::new();
104 addr.hash(&mut hasher);
105 hasher.finish()
106}
107
108#[derive(Debug, Serialize, Deserialize)]
110pub struct NodeInfo {
111 pub peer: Peer,
113 pub last_activity_ts: i64,
115 pub status: NodeStatus,
117 pub version: String,
119 pub git_commit: String,
121 pub start_time_ms: u64,
123 #[serde(default)]
125 pub total_cpu_millicores: i64,
126 #[serde(default)]
128 pub total_memory_bytes: i64,
129 #[serde(default)]
131 pub cpu_usage_millicores: i64,
132 #[serde(default)]
134 pub memory_usage_bytes: i64,
135 #[serde(default)]
137 pub hostname: String,
138 #[serde(default)]
140 pub env_vars: HashMap<String, String>,
141}
142
143#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize, PartialOrd, Ord)]
144pub enum Role {
145 Datanode,
146 Frontend,
147 Flownode,
148 Metasrv,
149}
150
151#[derive(Debug, Serialize, Deserialize)]
152pub enum NodeStatus {
153 Datanode(DatanodeStatus),
154 Frontend(FrontendStatus),
155 Flownode(FlownodeStatus),
156 Metasrv(MetasrvStatus),
157 Standalone,
158}
159
160impl NodeStatus {
161 pub fn role_name(&self) -> &str {
163 match self {
164 NodeStatus::Datanode(_) => "DATANODE",
165 NodeStatus::Frontend(_) => "FRONTEND",
166 NodeStatus::Flownode(_) => "FLOWNODE",
167 NodeStatus::Metasrv(_) => "METASRV",
168 NodeStatus::Standalone => "STANDALONE",
169 }
170 }
171}
172
173#[derive(Debug, Serialize, Deserialize)]
175pub struct DatanodeStatus {
176 pub rcus: i64,
178 pub wcus: i64,
180 pub leader_regions: usize,
182 pub follower_regions: usize,
184 pub workloads: DatanodeWorkloads,
186}
187
188#[derive(Debug, Default, Serialize, Deserialize)]
190pub struct FrontendStatus {
191 #[serde(default)]
193 pub workloads: FrontendWorkloads,
194}
195
196#[derive(Debug, Serialize, Deserialize)]
198pub struct FlownodeStatus {
199 #[serde(default)]
201 pub workloads: FlownodeWorkloads,
202}
203
204#[derive(Debug, Serialize, Deserialize)]
206pub struct MetasrvStatus {
207 pub is_leader: bool,
208}
209
210impl FromStr for NodeInfoKey {
211 type Err = Error;
212
213 fn from_str(key: &str) -> Result<Self> {
214 let caps = CLUSTER_NODE_INFO_PREFIX_PATTERN
215 .captures(key)
216 .context(InvalidNodeInfoKeySnafu { key })?;
217 ensure!(caps.len() == 4, InvalidNodeInfoKeySnafu { key });
218
219 let role = caps[2].to_string();
220 let node_id = caps[3].to_string();
221 let role: i32 = role.parse().context(ParseNumSnafu {
222 err_msg: format!("invalid role {role}"),
223 })?;
224 let role = Role::try_from(role)?;
225 let node_id: u64 = node_id.parse().context(ParseNumSnafu {
226 err_msg: format!("invalid node_id: {node_id}"),
227 })?;
228
229 Ok(Self { role, node_id })
230 }
231}
232
233impl TryFrom<Vec<u8>> for NodeInfoKey {
234 type Error = Error;
235
236 fn try_from(bytes: Vec<u8>) -> Result<Self> {
237 String::from_utf8(bytes)
238 .context(FromUtf8Snafu {
239 name: "NodeInfoKey",
240 })
241 .map(|x| x.parse())?
242 }
243}
244
245impl From<&NodeInfoKey> for Vec<u8> {
246 fn from(key: &NodeInfoKey) -> Self {
247 format!(
248 "{}-0-{}-{}",
249 CLUSTER_NODE_INFO_PREFIX,
250 i32::from(key.role),
251 key.node_id
252 )
253 .into_bytes()
254 }
255}
256
257impl Display for NodeInfoKey {
258 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
259 write!(f, "{:?}-{}", self.role, self.node_id)
260 }
261}
262
263impl FromStr for NodeInfo {
264 type Err = Error;
265
266 fn from_str(value: &str) -> Result<Self> {
267 serde_json::from_str(value).context(DecodeJsonSnafu)
268 }
269}
270
271impl TryFrom<Vec<u8>> for NodeInfo {
272 type Error = Error;
273
274 fn try_from(bytes: Vec<u8>) -> Result<Self> {
275 String::from_utf8(bytes)
276 .context(FromUtf8Snafu { name: "NodeInfo" })
277 .map(|x| x.parse())?
278 }
279}
280
281impl TryFrom<NodeInfo> for Vec<u8> {
282 type Error = Error;
283
284 fn try_from(info: NodeInfo) -> Result<Self> {
285 Ok(serde_json::to_string(&info)
286 .context(EncodeJsonSnafu)?
287 .into_bytes())
288 }
289}
290
291impl From<Role> for i32 {
292 fn from(role: Role) -> Self {
293 match role {
294 Role::Datanode => 0,
295 Role::Frontend => 1,
296 Role::Flownode => 2,
297 Role::Metasrv => 99,
298 }
299 }
300}
301
302impl TryFrom<i32> for Role {
303 type Error = Error;
304
305 fn try_from(role: i32) -> Result<Self> {
306 match role {
307 0 => Ok(Self::Datanode),
308 1 => Ok(Self::Frontend),
309 2 => Ok(Self::Flownode),
310 99 => Ok(Self::Metasrv),
311 _ => InvalidRoleSnafu { role }.fail(),
312 }
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use std::assert_matches;
319
320 use common_workload::DatanodeWorkloadType;
321
322 use super::*;
323 use crate::cluster::Role::{Datanode, Frontend};
324 use crate::cluster::{DatanodeStatus, FlownodeStatus, NodeInfo, NodeInfoKey, NodeStatus};
325 use crate::peer::Peer;
326
327 #[test]
328 fn test_node_info_key_round_trip() {
329 let key = NodeInfoKey {
330 role: Datanode,
331 node_id: 2,
332 };
333
334 let key_bytes: Vec<u8> = (&key).into();
335 let new_key: NodeInfoKey = key_bytes.try_into().unwrap();
336
337 assert_eq!(Datanode, new_key.role);
338 assert_eq!(2, new_key.node_id);
339 }
340
341 #[test]
342 fn test_node_info_round_trip() {
343 let node_info = NodeInfo {
344 peer: Peer {
345 id: 1,
346 addr: "127.0.0.1".to_string(),
347 },
348 last_activity_ts: 123,
349 status: NodeStatus::Datanode(DatanodeStatus {
350 rcus: 1,
351 wcus: 2,
352 leader_regions: 3,
353 follower_regions: 4,
354 workloads: DatanodeWorkloads {
355 types: vec![DatanodeWorkloadType::Hybrid.to_i32()],
356 },
357 }),
358 version: "".to_string(),
359 git_commit: "".to_string(),
360 start_time_ms: 1,
361 total_cpu_millicores: 0,
362 total_memory_bytes: 0,
363 cpu_usage_millicores: 0,
364 memory_usage_bytes: 0,
365 hostname: "test_hostname".to_string(),
366 env_vars: Default::default(),
367 };
368
369 let node_info_bytes: Vec<u8> = node_info.try_into().unwrap();
370 let new_node_info: NodeInfo = node_info_bytes.try_into().unwrap();
371
372 assert_matches!(
373 new_node_info,
374 NodeInfo {
375 peer: Peer { id: 1, .. },
376 last_activity_ts: 123,
377 status: NodeStatus::Datanode(DatanodeStatus {
378 rcus: 1,
379 wcus: 2,
380 leader_regions: 3,
381 follower_regions: 4,
382 ..
383 }),
384 start_time_ms: 1,
385 ..
386 }
387 );
388 }
389
390 #[test]
391 fn test_node_info_key_prefix() {
392 let prefix = NodeInfoKey::key_prefix();
393 assert_eq!(prefix, "__meta_cluster_node_info-0-");
394
395 let prefix = NodeInfoKey::key_prefix_with_role(Frontend);
396 assert_eq!(prefix, "__meta_cluster_node_info-0-1-");
397 }
398
399 #[test]
400 fn test_calculate_node_id_from_addr() {
401 assert_eq!(calculate_node_id(""), calculate_node_id(""));
403
404 let addr1 = "127.0.0.1:8080";
406 let id1 = calculate_node_id(addr1);
407 let id2 = calculate_node_id(addr1);
408 assert_eq!(id1, id2);
409
410 let addr2 = "127.0.0.1:8081";
412 let id3 = calculate_node_id(addr2);
413 assert_ne!(id1, id3);
414
415 let long_addr = "very.long.domain.name.example.com:9999";
417 let id4 = calculate_node_id(long_addr);
418 assert!(id4 > 0);
419 }
420
421 #[test]
422 fn test_flownode_status_backward_compatible_without_workloads() {
423 let raw = r#"{
424 "peer":{"id":1,"addr":"127.0.0.1"},
425 "last_activity_ts":123,
426 "status":{"Flownode":{}},
427 "version":"",
428 "git_commit":"",
429 "start_time_ms":1,
430 "total_cpu_millicores":0,
431 "total_memory_bytes":0,
432 "cpu_usage_millicores":0,
433 "memory_usage_bytes":0,
434 "hostname":""
435 }"#;
436
437 let node_info: NodeInfo = raw.parse().unwrap();
438 assert_matches!(
439 node_info.status,
440 NodeStatus::Flownode(FlownodeStatus { workloads }) if workloads.types.is_empty()
441 );
442 }
443
444 #[test]
445 fn test_flownode_status_round_trip_with_workloads() {
446 let node_info = NodeInfo {
447 peer: Peer {
448 id: 1,
449 addr: "127.0.0.1".to_string(),
450 },
451 last_activity_ts: 123,
452 status: NodeStatus::Flownode(FlownodeStatus {
453 workloads: FlownodeWorkloads { types: vec![7] },
454 }),
455 version: "".to_string(),
456 git_commit: "".to_string(),
457 start_time_ms: 1,
458 total_cpu_millicores: 0,
459 total_memory_bytes: 0,
460 cpu_usage_millicores: 0,
461 memory_usage_bytes: 0,
462 hostname: "test_hostname".to_string(),
463 env_vars: Default::default(),
464 };
465
466 let node_info_bytes: Vec<u8> = node_info.try_into().unwrap();
467 let new_node_info: NodeInfo = node_info_bytes.try_into().unwrap();
468
469 assert_matches!(
470 new_node_info,
471 NodeInfo {
472 status: NodeStatus::Flownode(FlownodeStatus { workloads }),
473 ..
474 } if workloads.types == vec![7]
475 );
476 }
477
478 #[test]
479 fn test_node_info_backward_compatible_without_env_vars() {
480 let raw = r#"{
482 "peer":{"id":1,"addr":"127.0.0.1"},
483 "last_activity_ts":123,
484 "status":{"Datanode":{"rcus":0,"wcus":0,"leader_regions":0,"follower_regions":0,"workloads":{"types":[0]}}},
485 "version":"",
486 "git_commit":"",
487 "start_time_ms":1,
488 "total_cpu_millicores":0,
489 "total_memory_bytes":0,
490 "cpu_usage_millicores":0,
491 "memory_usage_bytes":0,
492 "hostname":"test"
493 }"#;
494
495 let node_info: NodeInfo = raw.parse().unwrap();
496 assert!(node_info.env_vars.is_empty());
497 }
498
499 #[test]
500 fn test_node_info_with_env_vars_round_trip() {
501 let mut env_vars = HashMap::new();
502 env_vars.insert("AZ".to_string(), "us-east-1a".to_string());
503
504 let node_info = NodeInfo {
505 peer: Peer {
506 id: 1,
507 addr: "127.0.0.1".to_string(),
508 },
509 last_activity_ts: 123,
510 status: NodeStatus::Datanode(DatanodeStatus {
511 rcus: 0,
512 wcus: 0,
513 leader_regions: 0,
514 follower_regions: 0,
515 workloads: DatanodeWorkloads { types: vec![] },
516 }),
517 version: "".to_string(),
518 git_commit: "".to_string(),
519 start_time_ms: 1,
520 total_cpu_millicores: 0,
521 total_memory_bytes: 0,
522 cpu_usage_millicores: 0,
523 memory_usage_bytes: 0,
524 hostname: "test".to_string(),
525 env_vars,
526 };
527
528 let node_info_bytes: Vec<u8> = node_info.try_into().unwrap();
529 let new_node_info: NodeInfo = node_info_bytes.try_into().unwrap();
530 assert_eq!(new_node_info.env_vars.get("AZ").unwrap(), "us-east-1a");
531 }
532}