chore(metrics): add region server requests failures count metrics (#7173)

* chore/add-region-insert-failure-metric: Add metric for failed insert requests to region server in datanode module

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>

* chore/add-region-insert-failure-metric:
 Add metric for tracking failed region server requests

 - Introduce a new metric `REGION_SERVER_REQUEST_FAILURE_COUNT` to count failed region server requests.
 - Update `REGION_SERVER_INSERT_FAIL_COUNT` metric description for consistency.
 - Implement error handling in `RegionServerHandler` to increment the new failure metric on request errors.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>

---------

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
Lei, HUANG
2025-11-04 23:23:40 -08:00
committed by GitHub
parent aac3ede261
commit abcfbd7f41
2 changed files with 26 additions and 0 deletions

View File

@@ -75,4 +75,20 @@ lazy_static! {
&[RESULT_TYPE]
)
.unwrap();
/// Total count of failed region server requests.
pub static ref REGION_SERVER_REQUEST_FAILURE_COUNT: IntCounterVec = register_int_counter_vec!(
"greptime_datanode_region_request_fail_count",
"failed region server requests count",
&[REGION_REQUEST_TYPE]
)
.unwrap();
/// Total count of failed insert requests to region server.
pub static ref REGION_SERVER_INSERT_FAIL_COUNT: IntCounterVec = register_int_counter_vec!(
"greptime_datanode_region_failed_insert_count",
"failed region server insert requests count",
&[REGION_REQUEST_TYPE]
)
.unwrap();
}

View File

@@ -600,6 +600,8 @@ impl RegionServer {
#[async_trait]
impl RegionServerHandler for RegionServer {
async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
let failed_requests_cnt = crate::metrics::REGION_SERVER_REQUEST_FAILURE_COUNT
.with_label_values(&[request.as_ref()]);
let response = match &request {
region_request::Body::Creates(_)
| region_request::Body::Drops(_)
@@ -617,6 +619,9 @@ impl RegionServerHandler for RegionServer {
_ => self.handle_requests_in_serial(request).await,
}
.map_err(BoxedError::new)
.inspect_err(|_| {
failed_requests_cnt.inc();
})
.context(ExecuteGrpcRequestSnafu)?;
Ok(RegionResponseV1 {
@@ -1230,6 +1235,11 @@ impl RegionServerInner {
})
}
Err(err) => {
if matches!(region_change, RegionChange::Ingest) {
crate::metrics::REGION_SERVER_INSERT_FAIL_COUNT
.with_label_values(&[request_type])
.inc();
}
// Removes the region status if the operation fails.
self.unset_region_status(region_id, &engine, region_change);
Err(err)