chore: add logs and metrics (#2858)

* chore: add logs and metrics

* feat: add the timer to track heartbeat intervel

* feat: add the gauge to track region leases

* refactor: use gauge instead of the timer

* chore: apply suggestions from CR

* feat: add hit rate and etcd txn metrics
This commit is contained in:
Weny Xu
2023-12-04 11:51:30 +09:00
committed by GitHub
parent 781f2422b3
commit c26f2f94c0
8 changed files with 105 additions and 5 deletions

View File

@@ -129,8 +129,12 @@ impl RegionAliveKeeper {
let (role, region_id) = (region.role().into(), RegionId::from(region.region_id));
if let Some(handle) = self.find_handle(region_id).await {
handle.reset_deadline(role, deadline).await;
} else {
warn!(
"Trying to renew the lease for region {region_id}, the keeper handler is not found!"
);
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
}

View File

@@ -37,6 +37,7 @@ use crate::alive_keeper::RegionAliveKeeper;
use crate::config::DatanodeOptions;
use crate::error::{self, MetaClientInitSnafu, Result};
use crate::event_listener::RegionServerEventReceiver;
use crate::metrics;
use crate::region_server::RegionServer;
pub(crate) mod handler;
@@ -72,9 +73,9 @@ impl HeartbeatTask {
opts.heartbeat.interval.as_millis() as u64,
));
let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![
region_alive_keeper.clone(),
Arc::new(ParseMailboxMessageHandler),
Arc::new(RegionHeartbeatResponseHandler::new(region_server.clone())),
region_alive_keeper.clone(),
]));
Ok(Self {
@@ -101,8 +102,10 @@ impl HeartbeatTask {
quit_signal: Arc<Notify>,
) -> Result<HeartbeatSender> {
let client_id = meta_client.id();
let (tx, mut rx) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
let mut last_received_lease = Instant::now();
let _handle = common_runtime::spawn_bg(async move {
while let Some(res) = match rx.message().await {
Ok(m) => m,
@@ -114,6 +117,28 @@ impl HeartbeatTask {
if let Some(msg) = res.mailbox_message.as_ref() {
info!("Received mailbox message: {msg:?}, meta_client id: {client_id:?}");
}
if let Some(lease) = res.region_lease.as_ref() {
metrics::LAST_RECEIVED_HEARTBEAT_ELAPSED
.set(last_received_lease.elapsed().as_millis() as i64);
// Resets the timer.
last_received_lease = Instant::now();
let mut leader_region_lease_count = 0;
let mut follower_region_lease_count = 0;
for lease in &lease.regions {
match lease.role() {
RegionRole::Leader => leader_region_lease_count += 1,
RegionRole::Follower => follower_region_lease_count += 1,
}
}
metrics::HEARTBEAT_REGION_LEASES
.with_label_values(&["leader"])
.set(leader_region_lease_count);
metrics::HEARTBEAT_REGION_LEASES
.with_label_values(&["follower"])
.set(follower_region_lease_count);
}
let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), res);
if let Err(e) = Self::handle_response(ctx, handler_executor.clone()).await {
error!(e; "Error while handling heartbeat response");

View File

@@ -18,6 +18,8 @@ use prometheus::*;
/// Region request type label.
pub const REGION_REQUEST_TYPE: &str = "datanode_region_request_type";
pub const REGION_ROLE: &str = "region_role";
lazy_static! {
/// The elapsed time of handling a request in the region_server.
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
@@ -26,4 +28,17 @@ lazy_static! {
&[REGION_REQUEST_TYPE]
)
.unwrap();
/// The elapsed time since the last received heartbeat.
pub static ref LAST_RECEIVED_HEARTBEAT_ELAPSED: IntGauge = register_int_gauge!(
"last_received_heartbeat_lease_elapsed",
"last received heartbeat lease elapsed",
)
.unwrap();
/// The received region leases via heartbeat.
pub static ref HEARTBEAT_REGION_LEASES: IntGaugeVec = register_int_gauge_vec!(
"heartbeat_region_leases",
"received region leases via heartbeat",
&[REGION_ROLE]
)
.unwrap();
}