chore: update flush failure metric name and update grafana dashboard (#6138)

* 1. rename `greptime_mito_flush_errors_total` metric to `greptime_mito_flush_errors_total` for consistency
2. update grafana dashboard to add following panel:
  - compaction input/output bytes
  - bulk insert handle elasped time in frontend and region worker
This commit is contained in:
Lei, HUANG
2025-05-20 20:05:54 +08:00
committed by GitHub
parent 7ae0e150e5
commit eaf7b4b9dd
8 changed files with 8524 additions and 7550 deletions

View File

@@ -33,7 +33,7 @@ use crate::error::{
};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{
FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL,
FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_FAILURE_TOTAL, FLUSH_REQUESTS_TOTAL,
INFLIGHT_FLUSH_COUNT,
};
use crate::read::Source;
@@ -601,7 +601,7 @@ impl FlushScheduler {
pub(crate) fn on_flush_failed(&mut self, region_id: RegionId, err: Arc<Error>) {
error!(err; "Region {} failed to flush, cancel all pending tasks", region_id);
FLUSH_ERRORS_TOTAL.inc();
FLUSH_FAILURE_TOTAL.inc();
// Remove this region.
let Some(flush_status) = self.region_status.remove(&region_id) else {

View File

@@ -70,8 +70,8 @@ lazy_static! {
)
.unwrap();
/// Counter of scheduled failed flush jobs.
pub static ref FLUSH_ERRORS_TOTAL: IntCounter =
register_int_counter!("greptime_mito_flush_errors_total", "mito flush errors total").unwrap();
pub static ref FLUSH_FAILURE_TOTAL: IntCounter =
register_int_counter!("greptime_mito_flush_failure_total", "mito flush failure total").unwrap();
/// Elapsed time of a flush job.
pub static ref FLUSH_ELAPSED: HistogramVec = register_histogram_vec!(
"greptime_mito_flush_elapsed",