diff --git a/src/datanode/src/metrics.rs b/src/datanode/src/metrics.rs index 1b0e513375..4e763f5858 100644 --- a/src/datanode/src/metrics.rs +++ b/src/datanode/src/metrics.rs @@ -75,4 +75,20 @@ lazy_static! { &[RESULT_TYPE] ) .unwrap(); + + /// Total count of failed region server requests. + pub static ref REGION_SERVER_REQUEST_FAILURE_COUNT: IntCounterVec = register_int_counter_vec!( + "greptime_datanode_region_request_fail_count", + "failed region server requests count", + &[REGION_REQUEST_TYPE] + ) + .unwrap(); + + /// Total count of failed insert requests to region server. + pub static ref REGION_SERVER_INSERT_FAIL_COUNT: IntCounterVec = register_int_counter_vec!( + "greptime_datanode_region_failed_insert_count", + "failed region server insert requests count", + &[REGION_REQUEST_TYPE] + ) + .unwrap(); } diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs index 03f90bd0dc..ff80c8b10a 100644 --- a/src/datanode/src/region_server.rs +++ b/src/datanode/src/region_server.rs @@ -600,6 +600,8 @@ impl RegionServer { #[async_trait] impl RegionServerHandler for RegionServer { async fn handle(&self, request: region_request::Body) -> ServerResult { + let failed_requests_cnt = crate::metrics::REGION_SERVER_REQUEST_FAILURE_COUNT + .with_label_values(&[request.as_ref()]); let response = match &request { region_request::Body::Creates(_) | region_request::Body::Drops(_) @@ -617,6 +619,9 @@ impl RegionServerHandler for RegionServer { _ => self.handle_requests_in_serial(request).await, } .map_err(BoxedError::new) + .inspect_err(|_| { + failed_requests_cnt.inc(); + }) .context(ExecuteGrpcRequestSnafu)?; Ok(RegionResponseV1 { @@ -1230,6 +1235,11 @@ impl RegionServerInner { }) } Err(err) => { + if matches!(region_change, RegionChange::Ingest) { + crate::metrics::REGION_SERVER_INSERT_FAIL_COUNT + .with_label_values(&[request_type]) + .inc(); + } // Removes the region status if the operation fails. self.unset_region_status(region_id, &engine, region_change); Err(err)