diff --git a/grafana/dashboards/metrics/cluster/dashboard.json b/grafana/dashboards/metrics/cluster/dashboard.json index d56df06a3b..01253bd83b 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.json +++ b/grafana/dashboards/metrics/cluster/dashboard.json @@ -10308,12 +10308,7 @@ "show": true }, "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "table_schema" - } - ] + "sortBy": [] }, "pluginVersion": "11.6.0", "targets": [ @@ -10695,6 +10690,864 @@ ], "title": "Datanode Data Distribution", "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 105 + }, + "id": 370, + "panels": [], + "title": "Autopilot", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer action count", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 106 + }, + "id": 371, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (result) (\n changes(greptime_region_balancer_actions_total[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{result}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer gate stop count by gate and reason", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 106 + }, + "id": 372, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{gate}} / {{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Gate Stops", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer datanode count by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 114 + }, + "id": 373, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (state) (greptime_region_balancer_datanodes)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Datanodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer region count by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 114 + }, + "id": 374, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (state) (greptime_region_balancer_regions)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Regions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer datanode stability statistics by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*count$" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*ratio$" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 122 + }, + "id": 375, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "builder", + "expr": "sum by (state) (greptime_region_balancer_datanode_stability)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Datanode Stability", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition action count by result", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": -1, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 130 + }, + "id": 376, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{result}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition gate stop count by gate and reason", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 130 + }, + "id": 377, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{gate}} / {{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Gate Stops", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition sampling elapsed time by stage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 138 + }, + "id": 378, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Sampling P99", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition executor elapsed time by stage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 138 + }, + "id": 379, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Executor P99", + "type": "timeseries" } ], "preload": false, @@ -10828,12 +11681,12 @@ ] }, "time": { - "from": "now-2h", + "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "GreptimeDB Copy", "uid": "dflfbxbwvvchsa", - "version": 37 + "version": 67 } \ No newline at end of file diff --git a/grafana/dashboards/metrics/cluster/dashboard.md b/grafana/dashboards/metrics/cluster/dashboard.md index c398ea7f90..2204c6d0a7 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.md +++ b/grafana/dashboards/metrics/cluster/dashboard.md @@ -226,3 +226,17 @@ SELECT FROM leader_regions GROUP BY datanode ORDER BY data_size DESC;` | `piechart` | Distribution of leader regions and data size across datanodes. | `mysql` | `bytes` | -- | +# Autopilot +| Title | Query | Type | Description | Datasource | Unit | Legend Format | +| --- | --- | --- | --- | --- | --- | --- | +| Region Balancer Actions | `sum by (result) ( + changes(greptime_region_balancer_actions_total[$__rate_interval]) +)` | `timeseries` | Region balancer action count | `prometheus` | `short` | `{{result}}` | +| Region Balancer Gate Stops | `sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval]))` | `timeseries` | Region balancer gate stop count by gate and reason | `prometheus` | `short` | `{{gate}} / {{reason}}` | +| Region Balancer Datanodes | `sum by (state) (greptime_region_balancer_datanodes)` | `stat` | Region balancer datanode count by state | `prometheus` | `short` | `{{state}}` | +| Region Balancer Regions | `sum by (state) (greptime_region_balancer_regions)` | `stat` | Region balancer region count by state | `prometheus` | `short` | `{{state}}` | +| Region Balancer Datanode Stability | `sum by (state) (greptime_region_balancer_datanode_stability)` | `stat` | Region balancer datanode stability statistics by state | `prometheus` | `binBps` | `{{state}}` | +| Auto Repartition Actions | `sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval]))` | `timeseries` | Auto repartition action count by result | `prometheus` | `short` | `{{result}}` | +| Auto Repartition Gate Stops | `sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval]))` | `timeseries` | Auto repartition gate stop count by gate and reason | `prometheus` | `short` | `{{gate}} / {{reason}}` | +| Auto Repartition Sampling P99 | `histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval])))` | `timeseries` | Auto repartition sampling elapsed time by stage | `prometheus` | `s` | `{{stage}}` | +| Auto Repartition Executor P99 | `histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval])))` | `timeseries` | Auto repartition executor elapsed time by stage | `prometheus` | `s` | `{{stage}}` | diff --git a/grafana/dashboards/metrics/cluster/dashboard.yaml b/grafana/dashboards/metrics/cluster/dashboard.yaml index 2617016083..ca8f88267f 100644 --- a/grafana/dashboards/metrics/cluster/dashboard.yaml +++ b/grafana/dashboards/metrics/cluster/dashboard.yaml @@ -1215,3 +1215,98 @@ groups: datasource: type: mysql uid: ${information_schema} + - title: Autopilot + panels: + - title: Region Balancer Actions + type: timeseries + description: Region balancer action count + unit: short + queries: + - expr: |- + sum by (result) ( + changes(greptime_region_balancer_actions_total[$__rate_interval]) + ) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{result}}' + - title: Region Balancer Gate Stops + type: timeseries + description: Region balancer gate stop count by gate and reason + unit: short + queries: + - expr: sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{gate}} / {{reason}}' + - title: Region Balancer Datanodes + type: stat + description: Region balancer datanode count by state + unit: short + queries: + - expr: sum by (state) (greptime_region_balancer_datanodes) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Region Balancer Regions + type: stat + description: Region balancer region count by state + unit: short + queries: + - expr: sum by (state) (greptime_region_balancer_regions) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Region Balancer Datanode Stability + type: stat + description: Region balancer datanode stability statistics by state + unit: binBps + queries: + - expr: sum by (state) (greptime_region_balancer_datanode_stability) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Auto Repartition Actions + type: timeseries + description: Auto repartition action count by result + unit: short + queries: + - expr: sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{result}}' + - title: Auto Repartition Gate Stops + type: timeseries + description: Auto repartition gate stop count by gate and reason + unit: short + queries: + - expr: sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{gate}} / {{reason}}' + - title: Auto Repartition Sampling P99 + type: timeseries + description: Auto repartition sampling elapsed time by stage + unit: s + queries: + - expr: histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{stage}}' + - title: Auto Repartition Executor P99 + type: timeseries + description: Auto repartition executor elapsed time by stage + unit: s + queries: + - expr: histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{stage}}' diff --git a/grafana/dashboards/metrics/standalone/dashboard.json b/grafana/dashboards/metrics/standalone/dashboard.json index 229bdd30ea..c8313ecb7a 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.json +++ b/grafana/dashboards/metrics/standalone/dashboard.json @@ -10308,12 +10308,7 @@ "show": true }, "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "table_schema" - } - ] + "sortBy": [] }, "pluginVersion": "11.6.0", "targets": [ @@ -10695,6 +10690,864 @@ ], "title": "Datanode Data Distribution", "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 105 + }, + "id": 370, + "panels": [], + "title": "Autopilot", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer action count", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 106 + }, + "id": 371, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (result) (\n changes(greptime_region_balancer_actions_total[$__rate_interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{result}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer gate stop count by gate and reason", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 106 + }, + "id": 372, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{gate}} / {{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Gate Stops", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer datanode count by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 114 + }, + "id": 373, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (state) (greptime_region_balancer_datanodes)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Datanodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer region count by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 114 + }, + "id": 374, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (state) (greptime_region_balancer_regions)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Regions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Region balancer datanode stability statistics by state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "binBps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*count$" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*ratio$" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 122 + }, + "id": 375, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "builder", + "expr": "sum by (state) (greptime_region_balancer_datanode_stability)", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Region Balancer Datanode Stability", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition action count by result", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": -1, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 130 + }, + "id": 376, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{result}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Actions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition gate stop count by gate and reason", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 130 + }, + "id": 377, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{gate}} / {{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Gate Stops", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition sampling elapsed time by stage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 138 + }, + "id": 378, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Sampling P99", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "description": "Auto repartition executor elapsed time by stage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 138 + }, + "id": 379, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.6.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Auto Repartition Executor P99", + "type": "timeseries" } ], "preload": false, @@ -10828,12 +11681,12 @@ ] }, "time": { - "from": "now-2h", + "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "GreptimeDB Copy", "uid": "dflfbxbwvvchsa", - "version": 37 + "version": 67 } \ No newline at end of file diff --git a/grafana/dashboards/metrics/standalone/dashboard.md b/grafana/dashboards/metrics/standalone/dashboard.md index d2a4ddba0a..672a2a8364 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.md +++ b/grafana/dashboards/metrics/standalone/dashboard.md @@ -226,3 +226,17 @@ SELECT FROM leader_regions GROUP BY datanode ORDER BY data_size DESC;` | `piechart` | Distribution of leader regions and data size across datanodes. | `mysql` | `bytes` | -- | +# Autopilot +| Title | Query | Type | Description | Datasource | Unit | Legend Format | +| --- | --- | --- | --- | --- | --- | --- | +| Region Balancer Actions | `sum by (result) ( + changes(greptime_region_balancer_actions_total[$__rate_interval]) +)` | `timeseries` | Region balancer action count | `prometheus` | `short` | `{{result}}` | +| Region Balancer Gate Stops | `sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval]))` | `timeseries` | Region balancer gate stop count by gate and reason | `prometheus` | `short` | `{{gate}} / {{reason}}` | +| Region Balancer Datanodes | `sum by (state) (greptime_region_balancer_datanodes)` | `stat` | Region balancer datanode count by state | `prometheus` | `short` | `{{state}}` | +| Region Balancer Regions | `sum by (state) (greptime_region_balancer_regions)` | `stat` | Region balancer region count by state | `prometheus` | `short` | `{{state}}` | +| Region Balancer Datanode Stability | `sum by (state) (greptime_region_balancer_datanode_stability)` | `stat` | Region balancer datanode stability statistics by state | `prometheus` | `binBps` | `{{state}}` | +| Auto Repartition Actions | `sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval]))` | `timeseries` | Auto repartition action count by result | `prometheus` | `short` | `{{result}}` | +| Auto Repartition Gate Stops | `sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval]))` | `timeseries` | Auto repartition gate stop count by gate and reason | `prometheus` | `short` | `{{gate}} / {{reason}}` | +| Auto Repartition Sampling P99 | `histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval])))` | `timeseries` | Auto repartition sampling elapsed time by stage | `prometheus` | `s` | `{{stage}}` | +| Auto Repartition Executor P99 | `histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval])))` | `timeseries` | Auto repartition executor elapsed time by stage | `prometheus` | `s` | `{{stage}}` | diff --git a/grafana/dashboards/metrics/standalone/dashboard.yaml b/grafana/dashboards/metrics/standalone/dashboard.yaml index db1d2b6a7a..c23501c074 100644 --- a/grafana/dashboards/metrics/standalone/dashboard.yaml +++ b/grafana/dashboards/metrics/standalone/dashboard.yaml @@ -1215,3 +1215,98 @@ groups: datasource: type: mysql uid: ${information_schema} + - title: Autopilot + panels: + - title: Region Balancer Actions + type: timeseries + description: Region balancer action count + unit: short + queries: + - expr: |- + sum by (result) ( + changes(greptime_region_balancer_actions_total[$__rate_interval]) + ) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{result}}' + - title: Region Balancer Gate Stops + type: timeseries + description: Region balancer gate stop count by gate and reason + unit: short + queries: + - expr: sum by (gate, reason) (changes(greptime_region_balancer_gate_stop_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{gate}} / {{reason}}' + - title: Region Balancer Datanodes + type: stat + description: Region balancer datanode count by state + unit: short + queries: + - expr: sum by (state) (greptime_region_balancer_datanodes) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Region Balancer Regions + type: stat + description: Region balancer region count by state + unit: short + queries: + - expr: sum by (state) (greptime_region_balancer_regions) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Region Balancer Datanode Stability + type: stat + description: Region balancer datanode stability statistics by state + unit: binBps + queries: + - expr: sum by (state) (greptime_region_balancer_datanode_stability) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{state}}' + - title: Auto Repartition Actions + type: timeseries + description: Auto repartition action count by result + unit: short + queries: + - expr: sum by (result) (changes(greptime_auto_repartition_actions_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{result}}' + - title: Auto Repartition Gate Stops + type: timeseries + description: Auto repartition gate stop count by gate and reason + unit: short + queries: + - expr: sum by (gate, reason) (changes(greptime_auto_repartition_gate_stop_total[$__rate_interval])) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{gate}} / {{reason}}' + - title: Auto Repartition Sampling P99 + type: timeseries + description: Auto repartition sampling elapsed time by stage + unit: s + queries: + - expr: histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_sampling_elapsed_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{stage}}' + - title: Auto Repartition Executor P99 + type: timeseries + description: Auto repartition executor elapsed time by stage + unit: s + queries: + - expr: histogram_quantile(0.99, sum by (le, stage) (rate(greptime_auto_repartition_executor_elapsed_bucket[$__rate_interval]))) + datasource: + type: prometheus + uid: ${metrics} + legendFormat: '{{stage}}'