From 28255d8aded717fd170394f70e251fa105ff4c30 Mon Sep 17 00:00:00 2001
From: fys <40801205+fengys1996@users.noreply.github.com>
Date: Thu, 8 Jan 2026 14:47:46 +0800
Subject: [PATCH] chore: add grafana dashboard about trigger (#7536)
* chore: add grafana dashboard about trigger
* fix: ci
* fix: ci
* fix: row title
* fix: ci
* modify desc
* fix: wrong legend
* revert some
---
.../dashboards/metrics/cluster/dashboard.json | 755 +++++++++++++++++-
.../dashboards/metrics/cluster/dashboard.md | 26 +-
.../dashboards/metrics/cluster/dashboard.yaml | 90 ++-
.../metrics/standalone/dashboard.json | 755 +++++++++++++++++-
.../metrics/standalone/dashboard.md | 26 +-
.../metrics/standalone/dashboard.yaml | 90 ++-
6 files changed, 1728 insertions(+), 14 deletions(-)
diff --git a/grafana/dashboards/metrics/cluster/dashboard.json b/grafana/dashboards/metrics/cluster/dashboard.json
index 79c037dc12..918eb3c4e8 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
@@ -8863,7 +8863,7 @@
"type": "prometheus",
"uid": "${metrics}"
},
- "description": "Elapsed of Reconciliation steps ",
+ "description": "Elapsed of Reconciliation steps",
"fieldConfig": {
"defaults": {
"color": {
@@ -9366,7 +9366,7 @@
"editorMode": "code",
"expr": "greptime_flow_input_buf_size",
"instant": false,
- "legendFormat": "[{{instance}}]-[{{pod}]",
+ "legendFormat": "[{{instance}}]-[{{pod}}]",
"range": true,
"refId": "A"
}
@@ -9472,6 +9472,755 @@
],
"title": "Flownode",
"type": "row"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 187
+ },
+ "id": 357,
+ "panels": [],
+ "title": "Trigger",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "bf9fzta69bhtsa"
+ },
+ "description": "Total number of triggers currently defined.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 188
+ },
+ "id": 358,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "greptime_trigger_count{}",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Trigger Count",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time for trigger evaluation, including query execution and condition evaluation.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 196
+ },
+ "id": 359,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Trigger Eval Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failed trigger evaluations.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 196
+ },
+ "id": 360,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_evaluate_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Trigger Eval Failure Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time to send trigger alerts to notification channels.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 204
+ },
+ "id": 361,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Send Alert Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failures when sending trigger alerts.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 204
+ },
+ "id": 364,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_send_alert_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Send Alert Failure Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time to persist trigger alert records.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 212
+ },
+ "id": 363,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Save Alert Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failures when persisting trigger alert records.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 212
+ },
+ "id": 362,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Save Alert Failure Rate",
+ "type": "timeseries"
}
],
"preload": false,
@@ -9613,4 +10362,4 @@
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
"version": 15
-}
\ No newline at end of file
+}
diff --git a/grafana/dashboards/metrics/cluster/dashboard.md b/grafana/dashboards/metrics/cluster/dashboard.md
index e57276779d..c0bc49770b 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -111,12 +111,34 @@
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
-| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`
`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`
`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
-| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
+| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
+# Trigger
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Trigger Count | `greptime_trigger_count{}` | `timeseries` | Total number of triggers currently defined. | `prometheus` | -- | `__auto` |
+| Trigger Eval Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time for trigger evaluation, including query execution and condition evaluation. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+| Trigger Eval Failure Rate | `rate(greptime_trigger_evaluate_failure_count[$__rate_interval])` | `timeseries` | Rate of failed trigger evaluations. | `prometheus` | `none` | `__auto` |
+| Send Alert Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time to send trigger alerts to notification channels. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99` |
+| Send Alert Failure Rate | `rate(greptime_trigger_send_alert_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when sending trigger alerts. | `prometheus` | `none` | `__auto` |
+| Save Alert Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time to persist trigger alert records. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99` |
+| Save Alert Failure Rate | `rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when persisting trigger alert records. | `prometheus` | `none` | `__auto` |
diff --git a/grafana/dashboards/metrics/cluster/dashboard.yaml b/grafana/dashboards/metrics/cluster/dashboard.yaml
index 0173ef456b..934cf58c0c 100644
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -1002,7 +1002,7 @@ groups:
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
- title: Reconciliation steps
type: timeseries
- description: 'Elapsed of Reconciliation steps '
+ description: Elapsed of Reconciliation steps
unit: s
queries:
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
@@ -1057,7 +1057,7 @@ groups:
datasource:
type: prometheus
uid: ${metrics}
- legendFormat: '[{{instance}}]-[{{pod}]'
+ legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
@@ -1067,3 +1067,89 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
+ - title: Trigger
+ panels:
+ - title: Trigger Count
+ type: timeseries
+ description: Total number of triggers currently defined.
+ queries:
+ - expr: greptime_trigger_count{}
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Trigger Eval Elapsed
+ type: timeseries
+ description: Elapsed time for trigger evaluation, including query execution and condition evaluation.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-p75'
+ - title: Trigger Eval Failure Rate
+ type: timeseries
+ description: Rate of failed trigger evaluations.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_evaluate_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Send Alert Elapsed
+ type: timeseries
+ description: Elapsed time to send trigger alerts to notification channels.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75'
+ - title: Send Alert Failure Rate
+ type: timeseries
+ description: Rate of failures when sending trigger alerts.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_send_alert_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Save Alert Elapsed
+ type: timeseries
+ description: Elapsed time to persist trigger alert records.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75'
+ - title: Save Alert Failure Rate
+ type: timeseries
+ description: Rate of failures when persisting trigger alert records.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
diff --git a/grafana/dashboards/metrics/standalone/dashboard.json b/grafana/dashboards/metrics/standalone/dashboard.json
index a88f784101..28f38f9dd7 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
@@ -8863,7 +8863,7 @@
"type": "prometheus",
"uid": "${metrics}"
},
- "description": "Elapsed of Reconciliation steps ",
+ "description": "Elapsed of Reconciliation steps",
"fieldConfig": {
"defaults": {
"color": {
@@ -9366,7 +9366,7 @@
"editorMode": "code",
"expr": "greptime_flow_input_buf_size",
"instant": false,
- "legendFormat": "[{{instance}}]-[{{pod}]",
+ "legendFormat": "[{{instance}}]-[{{pod}}]",
"range": true,
"refId": "A"
}
@@ -9472,6 +9472,755 @@
],
"title": "Flownode",
"type": "row"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 187
+ },
+ "id": 357,
+ "panels": [],
+ "title": "Trigger",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "bf9fzta69bhtsa"
+ },
+ "description": "Total number of triggers currently defined.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 188
+ },
+ "id": 358,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "greptime_trigger_count{}",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Trigger Count",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time for trigger evaluation, including query execution and condition evaluation.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 196
+ },
+ "id": 359,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Trigger Eval Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failed trigger evaluations.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 196
+ },
+ "id": 360,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_evaluate_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Trigger Eval Failure Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time to send trigger alerts to notification channels.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 204
+ },
+ "id": 361,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Send Alert Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failures when sending trigger alerts.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 204
+ },
+ "id": 364,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_send_alert_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Send Alert Failure Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Elapsed time to persist trigger alert records.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 212
+ },
+ "id": 363,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Save Alert Elapsed",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "description": "Rate of failures when persisting trigger alert records.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 212
+ },
+ "id": 362,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "11.6.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${metrics}"
+ },
+ "editorMode": "code",
+ "expr": "rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Save Alert Failure Rate",
+ "type": "timeseries"
}
],
"preload": false,
@@ -9613,4 +10362,4 @@
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
"version": 15
-}
\ No newline at end of file
+}
diff --git a/grafana/dashboards/metrics/standalone/dashboard.md b/grafana/dashboards/metrics/standalone/dashboard.md
index 09a62681db..63b45208c5 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -111,12 +111,34 @@
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`
`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
-| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`
`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`
`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
-| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
+| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
+# Trigger
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Trigger Count | `greptime_trigger_count{}` | `timeseries` | Total number of triggers currently defined. | `prometheus` | -- | `__auto` |
+| Trigger Eval Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time for trigger evaluation, including query execution and condition evaluation. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+| Trigger Eval Failure Rate | `rate(greptime_trigger_evaluate_failure_count[$__rate_interval])` | `timeseries` | Rate of failed trigger evaluations. | `prometheus` | `none` | `__auto` |
+| Send Alert Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time to send trigger alerts to notification channels. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99` |
+| Send Alert Failure Rate | `rate(greptime_trigger_send_alert_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when sending trigger alerts. | `prometheus` | `none` | `__auto` |
+| Save Alert Elapsed | `histogram_quantile(0.99,
+ rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
+)`
`histogram_quantile(0.75,
+ rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
+)` | `timeseries` | Elapsed time to persist trigger alert records. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99` |
+| Save Alert Failure Rate | `rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when persisting trigger alert records. | `prometheus` | `none` | `__auto` |
diff --git a/grafana/dashboards/metrics/standalone/dashboard.yaml b/grafana/dashboards/metrics/standalone/dashboard.yaml
index f3013589cd..32f771f66f 100644
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -1002,7 +1002,7 @@ groups:
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
- title: Reconciliation steps
type: timeseries
- description: 'Elapsed of Reconciliation steps '
+ description: Elapsed of Reconciliation steps
unit: s
queries:
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
@@ -1057,7 +1057,7 @@ groups:
datasource:
type: prometheus
uid: ${metrics}
- legendFormat: '[{{instance}}]-[{{pod}]'
+ legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
@@ -1067,3 +1067,89 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
+ - title: Trigger
+ panels:
+ - title: Trigger Count
+ type: timeseries
+ description: Total number of triggers currently defined.
+ queries:
+ - expr: greptime_trigger_count{}
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Trigger Eval Elapsed
+ type: timeseries
+ description: Elapsed time for trigger evaluation, including query execution and condition evaluation.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-p75'
+ - title: Trigger Eval Failure Rate
+ type: timeseries
+ description: Rate of failed trigger evaluations.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_evaluate_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Send Alert Elapsed
+ type: timeseries
+ description: Elapsed time to send trigger alerts to notification channels.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75'
+ - title: Send Alert Failure Rate
+ type: timeseries
+ description: Rate of failures when sending trigger alerts.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_send_alert_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto
+ - title: Save Alert Elapsed
+ type: timeseries
+ description: Elapsed time to persist trigger alert records.
+ unit: s
+ queries:
+ - expr: "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99'
+ - expr: "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75'
+ - title: Save Alert Failure Rate
+ type: timeseries
+ description: Rate of failures when persisting trigger alert records.
+ unit: none
+ queries:
+ - expr: rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])
+ datasource:
+ type: prometheus
+ uid: ${metrics}
+ legendFormat: __auto