feat: refine failure detector (#7005)

* feat: refine failure detector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* revert back default value

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* revert change of test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-09-23 18:43:22 -07:00
committed by GitHub
parent 80c8ab42b0
commit f65dcd12cc
4 changed files with 18 additions and 44 deletions

View File

@@ -378,10 +378,9 @@
| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
| `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
| `failure_detector` | -- | -- | -- |
| `failure_detector.threshold` | Float | `8.0` | The threshold value used by the failure detector to determine failure conditions. |
| `failure_detector.min_std_deviation` | String | `100ms` | The minimum standard deviation of the heartbeat intervals, used to calculate acceptable variations. |
| `failure_detector.acceptable_heartbeat_pause` | String | `10000ms` | The acceptable pause duration between heartbeats, used to determine if a heartbeat interval is acceptable. |
| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | The initial estimate of the heartbeat interval used by the failure detector. |
| `failure_detector.threshold` | Float | `8.0` | Maximum acceptable φ before the peer is treated as failed.<br/>Lower values react faster but yield more false positives. |
| `failure_detector.min_std_deviation` | String | `100ms` | The minimum standard deviation of the heartbeat intervals.<br/>So tiny variations dont make φ explode. Prevents hypersensitivity when heartbeat intervals barely vary. |
| `failure_detector.acceptable_heartbeat_pause` | String | `10000ms` | The acceptable pause duration between heartbeats.<br/>Additional extra grace period to the learned mean interval before φ rises, absorbing temporary network hiccups or GC pauses. |
| `datanode` | -- | -- | Datanode options. |
| `datanode.client` | -- | -- | Datanode client options. |
| `datanode.client.timeout` | String | `10s` | Operation timeout. |

View File

@@ -149,20 +149,18 @@ max_metadata_value_size = "1500KiB"
max_running_procedures = 128
# Failure detectors options.
# GreptimeDB uses the Phi Accrual Failure Detector algorithm to detect datanode failures.
[failure_detector]
## The threshold value used by the failure detector to determine failure conditions.
## Maximum acceptable φ before the peer is treated as failed.
## Lower values react faster but yield more false positives.
threshold = 8.0
## The minimum standard deviation of the heartbeat intervals, used to calculate acceptable variations.
## The minimum standard deviation of the heartbeat intervals.
## So tiny variations dont make φ explode. Prevents hypersensitivity when heartbeat intervals barely vary.
min_std_deviation = "100ms"
## The acceptable pause duration between heartbeats, used to determine if a heartbeat interval is acceptable.
## The acceptable pause duration between heartbeats.
## Additional extra grace period to the learned mean interval before φ rises, absorbing temporary network hiccups or GC pauses.
acceptable_heartbeat_pause = "10000ms"
## The initial estimate of the heartbeat interval used by the failure detector.
first_heartbeat_estimate = "1000ms"
## Datanode options.
[datanode]

View File

@@ -399,7 +399,6 @@ mod tests {
threshold = 8.0
min_std_deviation = "100ms"
acceptable_heartbeat_pause = "3000ms"
first_heartbeat_estimate = "1000ms"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -430,13 +429,6 @@ mod tests {
.acceptable_heartbeat_pause
.as_millis()
);
assert_eq!(
1000,
options
.failure_detector
.first_heartbeat_estimate
.as_millis()
);
assert_eq!(
options.procedure.max_metadata_value_size,
Some(ReadableSize::kb(1500))

View File

@@ -18,6 +18,8 @@ use std::time::Duration;
use common_meta::distributed_time_constants;
use serde::{Deserialize, Serialize};
const FIRST_HEARTBEAT_ESTIMATE_MILLIS: i64 = 1000;
/// This is our port of Akka's "[PhiAccrualFailureDetector](https://github.com/akka/akka/blob/v2.6.21/akka-remote/src/main/scala/akka/remote/PhiAccrualFailureDetector.scala)"
/// under Apache License 2.0.
///
@@ -56,10 +58,6 @@ pub(crate) struct PhiAccrualFailureDetector {
/// arrivals, due to for example network drop.
acceptable_heartbeat_pause_millis: u32,
/// Bootstrap the stats with heartbeats that corresponds to this duration, with a rather high
/// standard deviation (since environment is unknown in the beginning).
first_heartbeat_estimate_millis: u32,
heartbeat_history: HeartbeatHistory,
last_heartbeat_millis: Option<i64>,
}
@@ -72,8 +70,6 @@ pub struct PhiAccrualFailureDetectorOptions {
pub min_std_deviation: Duration,
#[serde(with = "humantime_serde")]
pub acceptable_heartbeat_pause: Duration,
#[serde(with = "humantime_serde")]
pub first_heartbeat_estimate: Duration,
}
impl Default for PhiAccrualFailureDetectorOptions {
@@ -86,7 +82,6 @@ impl Default for PhiAccrualFailureDetectorOptions {
acceptable_heartbeat_pause: Duration::from_secs(
distributed_time_constants::DATANODE_LEASE_SECS,
),
first_heartbeat_estimate: Duration::from_millis(1000),
}
}
}
@@ -104,7 +99,6 @@ impl PhiAccrualFailureDetector {
min_std_deviation_millis: options.min_std_deviation.as_millis() as f32,
acceptable_heartbeat_pause_millis: options.acceptable_heartbeat_pause.as_millis()
as u32,
first_heartbeat_estimate_millis: options.first_heartbeat_estimate.as_millis() as u32,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
}
@@ -124,11 +118,11 @@ impl PhiAccrualFailureDetector {
// guess statistics for first heartbeat,
// important so that connections with only one heartbeat becomes unavailable
// bootstrap with 2 entries with rather high standard deviation
let std_deviation = self.first_heartbeat_estimate_millis / 4;
let std_deviation = FIRST_HEARTBEAT_ESTIMATE_MILLIS / 4;
self.heartbeat_history
.add((self.first_heartbeat_estimate_millis - std_deviation) as _);
.add((FIRST_HEARTBEAT_ESTIMATE_MILLIS - std_deviation) as _);
self.heartbeat_history
.add((self.first_heartbeat_estimate_millis + std_deviation) as _);
.add((FIRST_HEARTBEAT_ESTIMATE_MILLIS + std_deviation) as _);
}
let _ = self.last_heartbeat_millis.insert(ts_millis);
}
@@ -367,7 +361,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -381,14 +374,13 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
fd.heartbeat(0);
assert!((fd.phi(1000)).abs() - 0.3 < 0.2);
assert!((fd.phi(2000)).abs() - 4.5 < 0.3);
assert!((fd.phi(3000)).abs() > 15.0);
assert!((fd.phi(FIRST_HEARTBEAT_ESTIMATE_MILLIS)).abs() - 0.3 < 0.2);
assert!((fd.phi(FIRST_HEARTBEAT_ESTIMATE_MILLIS * 2)).abs() - 4.5 < 0.3);
assert!((fd.phi(FIRST_HEARTBEAT_ESTIMATE_MILLIS * 3)).abs() > 15.0);
}
#[test]
@@ -397,7 +389,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -413,7 +404,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -431,7 +421,6 @@ mod tests {
threshold: 3.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -449,7 +438,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 3000,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -488,7 +476,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 3000,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -507,7 +494,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 3000,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(1000),
last_heartbeat_millis: None,
};
@@ -528,7 +514,6 @@ mod tests {
threshold: 8.0,
min_std_deviation_millis: 100.0,
acceptable_heartbeat_pause_millis: 0,
first_heartbeat_estimate_millis: 1000,
heartbeat_history: HeartbeatHistory::new(3),
last_heartbeat_millis: None,
};