diff --git a/grafana/dashboards/metrics/cluster/dashboard.json b/grafana/dashboards/metrics/cluster/dashboard.json
index c99a91178d..2ebe91f530 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
@@ -25,7 +25,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
- "id": 5,
+ "id": 7,
"links": [],
"panels": [
{
@@ -4476,7 +4476,7 @@
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
- "drawStyle": "line",
+ "drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
@@ -4553,9 +4553,22 @@
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99",
"range": true,
"refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~\"$datanode\"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~\"$datanode\"}[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg",
+ "range": true,
+ "refId": "B"
}
],
- "title": "Compaction P99 per Instance by Stage",
+ "title": "Compaction Elapsed Time per Instance by Stage",
"type": "timeseries"
},
{
@@ -5546,13 +5559,131 @@
"title": "Region Worker Handle Bulk Insert Requests",
"type": "timeseries"
},
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Per-stage elapsed time for region worker to decode requests.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "points",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 117
+ },
+ "id": 338,
+ "options": {
+ "legend": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.0.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95",
+ "range": true,
+ "refId": "A",
+ "useBackend": false
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Region Worker Convert Requests",
+ "type": "timeseries"
+ },
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 117
+ "y": 125
},
"id": 313,
"panels": [
@@ -6682,7 +6813,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 118
+ "y": 126
},
"id": 324,
"panels": [
@@ -6979,7 +7110,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 119
+ "y": 127
},
"id": 328,
"panels": [
@@ -7627,6 +7758,5 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
- "version": 3,
- "weekStart": ""
+ "version": 3
}
diff --git a/grafana/dashboards/metrics/cluster/dashboard.md b/grafana/dashboards/metrics/cluster/dashboard.md
index 9552fe946c..d7ffd4544d 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -60,7 +60,7 @@
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
-| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))`
`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -70,6 +70,7 @@
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`
`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
diff --git a/grafana/dashboards/metrics/cluster/dashboard.yaml b/grafana/dashboards/metrics/cluster/dashboard.yaml
index b70b62cc26..1de99330fc 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -487,7 +487,7 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- - title: Compaction P99 per Instance by Stage
+ - title: Compaction Elapsed Time per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
@@ -497,6 +497,11 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+ - expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
@@ -607,6 +612,21 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+ - title: Region Worker Convert Requests
+ type: timeseries
+ description: Per-stage elapsed time for region worker to decode requests.
+ unit: s
+ queries:
+ - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
+ - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: OpenDAL
panels:
- title: QPS per Instance
diff --git a/grafana/dashboards/metrics/standalone/dashboard.json b/grafana/dashboards/metrics/standalone/dashboard.json
index 5cad7b6197..e9f4f34ea6 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
@@ -25,7 +25,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
- "id": 5,
+ "id": 7,
"links": [],
"panels": [
{
@@ -4476,7 +4476,7 @@
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
- "drawStyle": "line",
+ "drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
@@ -4553,9 +4553,22 @@
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99",
"range": true,
"refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg",
+ "range": true,
+ "refId": "B"
}
],
- "title": "Compaction P99 per Instance by Stage",
+ "title": "Compaction Elapsed Time per Instance by Stage",
"type": "timeseries"
},
{
@@ -5546,13 +5559,131 @@
"title": "Region Worker Handle Bulk Insert Requests",
"type": "timeseries"
},
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Per-stage elapsed time for region worker to decode requests.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "points",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 117
+ },
+ "id": 338,
+ "options": {
+ "legend": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.0.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95",
+ "range": true,
+ "refId": "A",
+ "useBackend": false
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Region Worker Convert Requests",
+ "type": "timeseries"
+ },
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 117
+ "y": 125
},
"id": 313,
"panels": [
@@ -6682,7 +6813,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 118
+ "y": 126
},
"id": 324,
"panels": [
@@ -6979,7 +7110,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 119
+ "y": 127
},
"id": 328,
"panels": [
@@ -7627,6 +7758,5 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
- "version": 3,
- "weekStart": ""
+ "version": 3
}
diff --git a/grafana/dashboards/metrics/standalone/dashboard.md b/grafana/dashboards/metrics/standalone/dashboard.md
index 5674fb0ae5..5e05d46ecb 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -60,7 +60,7 @@
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
-| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))`
`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -70,6 +70,7 @@
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`
`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
diff --git a/grafana/dashboards/metrics/standalone/dashboard.yaml b/grafana/dashboards/metrics/standalone/dashboard.yaml
index f602424178..bfe37a5ed5 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -487,7 +487,7 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- - title: Compaction P99 per Instance by Stage
+ - title: Compaction Elapsed Time per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
@@ -497,6 +497,11 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+ - expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
@@ -607,6 +612,21 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+ - title: Region Worker Convert Requests
+ type: timeseries
+ description: Per-stage elapsed time for region worker to decode requests.
+ unit: s
+ queries:
+ - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
+ - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: OpenDAL
panels:
- title: QPS per Instance
diff --git a/grafana/scripts/gen-dashboards.sh b/grafana/scripts/gen-dashboards.sh
index 891eb7f7f5..9cf8cbd2a3 100755
--- a/grafana/scripts/gen-dashboards.sh
+++ b/grafana/scripts/gen-dashboards.sh
@@ -6,7 +6,7 @@ DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
remove_instance_filters() {
# Remove the instance filters for the standalone dashboards.
- sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
+ sed -E 's/instance=~\\"(\$datanode|\$frontend|\$metasrv|\$flownode)\\",?//g' "$CLUSTER_DASHBOARD_DIR/dashboard.json" > "$STANDALONE_DASHBOARD_DIR/dashboard.json"
}
generate_intermediate_dashboards_and_docs() {