diff --git a/grafana/dashboards/metrics/cluster/dashboard.json b/grafana/dashboards/metrics/cluster/dashboard.json index c99a91178d..2ebe91f530 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.json +++ b/grafana/dashboards/metrics/cluster/dashboard.json @@ -25,7 +25,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 5, + "id": 7, "links": [], "panels": [ { @@ -4476,7 +4476,7 @@ "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, - "drawStyle": "line", + "drawStyle": "points", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { @@ -4553,9 +4553,22 @@ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~\"$datanode\"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~\"$datanode\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg", + "range": true, + "refId": "B" } ], - "title": "Compaction P99 per Instance by Stage", + "title": "Compaction Elapsed Time per Instance by Stage", "type": "timeseries" }, { @@ -5546,13 +5559,131 @@ "title": "Region Worker Handle Bulk Insert Requests", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Per-stage elapsed time for region worker to decode requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 117 + }, + "id": 338, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG", + "range": true, + "refId": "B" + } + ], + "title": "Region Worker Convert Requests", + "type": "timeseries" + }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 117 + "y": 125 }, "id": 313, "panels": [ @@ -6682,7 +6813,7 @@ "h": 1, "w": 24, "x": 0, - "y": 118 + "y": 126 }, "id": 324, "panels": [ @@ -6979,7 +7110,7 @@ "h": 1, "w": 24, "x": 0, - "y": 119 + "y": 127 }, "id": 328, "panels": [ @@ -7627,6 +7758,5 @@ "timezone": "", "title": "GreptimeDB", "uid": "dejf3k5e7g2kgb", - "version": 3, - "weekStart": "" + "version": 3 } diff --git a/grafana/dashboards/metrics/cluster/dashboard.md b/grafana/dashboards/metrics/cluster/dashboard.md index 9552fe946c..d7ffd4544d 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.md +++ b/grafana/dashboards/metrics/cluster/dashboard.md @@ -60,7 +60,7 @@ | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` | | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` | | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` | -| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` | +| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))`
`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` | | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` | | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` | | Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` | @@ -70,6 +70,7 @@ | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` | | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`
`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` | | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` | +| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` | # OpenDAL | Title | Query | Type | Description | Datasource | Unit | Legend Format | | --- | --- | --- | --- | --- | --- | --- | diff --git a/grafana/dashboards/metrics/cluster/dashboard.yaml b/grafana/dashboards/metrics/cluster/dashboard.yaml index b70b62cc26..1de99330fc 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.yaml +++ b/grafana/dashboards/metrics/cluster/dashboard.yaml @@ -487,7 +487,7 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{ instance }}]-[{{pod}}]' - - title: Compaction P99 per Instance by Stage + - title: Compaction Elapsed Time per Instance by Stage type: timeseries description: Compaction latency by stage unit: s @@ -497,6 +497,11 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99' + - expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg' - title: Compaction P99 per Instance type: timeseries description: Compaction P99 per Instance. @@ -607,6 +612,21 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG' + - title: Region Worker Convert Requests + type: timeseries + description: Per-stage elapsed time for region worker to decode requests. + unit: s + queries: + - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95' + - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG' - title: OpenDAL panels: - title: QPS per Instance diff --git a/grafana/dashboards/metrics/standalone/dashboard.json b/grafana/dashboards/metrics/standalone/dashboard.json index 5cad7b6197..e9f4f34ea6 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.json +++ b/grafana/dashboards/metrics/standalone/dashboard.json @@ -25,7 +25,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 5, + "id": 7, "links": [], "panels": [ { @@ -4476,7 +4476,7 @@ "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, - "drawStyle": "line", + "drawStyle": "points", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { @@ -4553,9 +4553,22 @@ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg", + "range": true, + "refId": "B" } ], - "title": "Compaction P99 per Instance by Stage", + "title": "Compaction Elapsed Time per Instance by Stage", "type": "timeseries" }, { @@ -5546,13 +5559,131 @@ "title": "Region Worker Handle Bulk Insert Requests", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Per-stage elapsed time for region worker to decode requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 117 + }, + "id": 338, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG", + "range": true, + "refId": "B" + } + ], + "title": "Region Worker Convert Requests", + "type": "timeseries" + }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 117 + "y": 125 }, "id": 313, "panels": [ @@ -6682,7 +6813,7 @@ "h": 1, "w": 24, "x": 0, - "y": 118 + "y": 126 }, "id": 324, "panels": [ @@ -6979,7 +7110,7 @@ "h": 1, "w": 24, "x": 0, - "y": 119 + "y": 127 }, "id": 328, "panels": [ @@ -7627,6 +7758,5 @@ "timezone": "", "title": "GreptimeDB", "uid": "dejf3k5e7g2kgb", - "version": 3, - "weekStart": "" + "version": 3 } diff --git a/grafana/dashboards/metrics/standalone/dashboard.md b/grafana/dashboards/metrics/standalone/dashboard.md index 5674fb0ae5..5e05d46ecb 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.md +++ b/grafana/dashboards/metrics/standalone/dashboard.md @@ -60,7 +60,7 @@ | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` | | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` | | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` | -| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` | +| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))`
`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` | | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` | | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`
`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` | | Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` | @@ -70,6 +70,7 @@ | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` | | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`
`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` | | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` | +| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`
`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` | # OpenDAL | Title | Query | Type | Description | Datasource | Unit | Legend Format | | --- | --- | --- | --- | --- | --- | --- | diff --git a/grafana/dashboards/metrics/standalone/dashboard.yaml b/grafana/dashboards/metrics/standalone/dashboard.yaml index f602424178..bfe37a5ed5 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.yaml +++ b/grafana/dashboards/metrics/standalone/dashboard.yaml @@ -487,7 +487,7 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{ instance }}]-[{{pod}}]' - - title: Compaction P99 per Instance by Stage + - title: Compaction Elapsed Time per Instance by Stage type: timeseries description: Compaction latency by stage unit: s @@ -497,6 +497,11 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99' + - expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg' - title: Compaction P99 per Instance type: timeseries description: Compaction P99 per Instance. @@ -607,6 +612,21 @@ groups: type: prometheus uid: ${metrics} legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG' + - title: Region Worker Convert Requests + type: timeseries + description: Per-stage elapsed time for region worker to decode requests. + unit: s + queries: + - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95' + - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG' - title: OpenDAL panels: - title: QPS per Instance diff --git a/grafana/scripts/gen-dashboards.sh b/grafana/scripts/gen-dashboards.sh index 891eb7f7f5..9cf8cbd2a3 100755 --- a/grafana/scripts/gen-dashboards.sh +++ b/grafana/scripts/gen-dashboards.sh @@ -6,7 +6,7 @@ DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35 remove_instance_filters() { # Remove the instance filters for the standalone dashboards. - sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json + sed -E 's/instance=~\\"(\$datanode|\$frontend|\$metasrv|\$flownode)\\",?//g' "$CLUSTER_DASHBOARD_DIR/dashboard.json" > "$STANDALONE_DASHBOARD_DIR/dashboard.json" } generate_intermediate_dashboards_and_docs() {