feat: add flow batching metrics to grafana dashboard (#8353)

Signed-off-by: evenyag <realevenyag@gmail.com>
2026-07-03 12:30:40 +00:00 · 2026-06-24 17:35:25 +08:00
parent 619460f742
commit 8513d8d6b6
6 changed files with 1780 additions and 2 deletions
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
@@ -3636,6 +3636,19 @@
          "legendFormat": "trigger-save-alert",
          "range": true,
          "refId": "D"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(greptime_flow_batching_error_count[$__rate_interval]))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "flow-batching",
+          "range": true,
+          "refId": "E"
        }
      ],
      "title": "Flow and Trigger Failures",
@@ -17027,6 +17040,770 @@
          ],
          "title": "Flow Processing Error per Instance",
          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode task attempt, error, and slow-query rates by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 178
+          },
+          "id": 558,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-error",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-slow",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Flow Batching Attempt / Error Rate",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query latency by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 0,
+            "y": 186
+          },
+          "id": 559,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p99",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Flow Batching Query Latency",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query window count by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 8,
+            "y": 186
+          },
+          "id": 560,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Flow Batching Query Window Count",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query and stalled window sizes by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 186
+          },
+          "id": 561,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-stalled-p95",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Flow Batching Window Size",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode dirty time-window count marked by bulk inserts.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 0,
+            "y": 194
+          },
+          "id": 562,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Bulk Dirty Windows",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode checkpoint state-machine decision rate.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 8,
+            "y": 194
+          },
+          "id": 563,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Checkpoint Decisions",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query attempts by checkpoint mode.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 194
+          },
+          "id": 564,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Query Mode",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
        }
      ],
      "title": "Flownode",
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -36,7 +36,7 @@
 | Scan and Compaction Memory Rejects | `sum(rate(greptime_mito_scan_requests_rejected_total{instance=~"$datanode"}[$__rate_interval]))`<br/>`sum(rate(greptime_mito_scan_memory_exhausted_total{instance=~"$datanode"}[$__rate_interval]))`<br/>`sum(rate(greptime_mito_compaction_memory_rejected_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Datanode scan and compaction memory rejection/exhaustion counters. | `prometheus` | `rps` | `scan-rejected` |
 | OpenDAL Errors | `sum by (scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode",error!="NotFound"}[$__rate_interval]))` | `timeseries` | Object-store errors by scheme, operation, and error, excluding NotFound noise. | `prometheus` | `eps` | `{{scheme}}-{{operation}}-{{error}}` |
 | Metasrv Failures | `sum(rate(greptime_meta_region_migration_fail[$__rate_interval]))`<br/>`sum(rate(greptime_meta_reconciliation_procedure_error[$__rate_interval]))` | `timeseries` | Region migration and reconciliation failures in metasrv. | `prometheus` | `eps` | `migration-fail` |
-| Flow and Trigger Failures | `sum by (code) (rate(greptime_flow_errors[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_evaluate_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_send_alert_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval]))` | `timeseries` | Derived-data and alerting pipeline failures. | `prometheus` | `eps` | `flow-{{code}}` |
+| Flow and Trigger Failures | `sum by (code) (rate(greptime_flow_errors[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_evaluate_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_send_alert_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_flow_batching_error_count[$__rate_interval]))` | `timeseries` | Derived-data and alerting pipeline failures. | `prometheus` | `eps` | `flow-{{code}}` |
 | Mito GC Failures | `sum(rate(greptime_mito_gc_errors_total{instance=~"$datanode"}[$__rate_interval]))`<br/>`sum(rate(greptime_mito_gc_orphaned_index_files{instance=~"$datanode"}[$__rate_interval]))`<br/>`sum(rate(greptime_mito_gc_skipped_unparsable_files{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Mito garbage-collection errors and skipped/orphaned files on datanodes. | `prometheus` | `short` | `gc-errors` |
 # Capacity
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
@@ -282,6 +282,13 @@ ORDER BY data_size DESC;` | `piechart` | Distribution of leader regions and data
 | Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
 | Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
 | Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
+| Flow Batching Attempt / Error Rate | `sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))` | `timeseries` | Flow batching mode task attempt, error, and slow-query rates by flow. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt` |
+| Flow Batching Query Latency | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))` | `timeseries` | Flow batching mode query latency by flow. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95` |
+| Flow Batching Query Window Count | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))` | `timeseries` | Flow batching mode query window count by flow. | `prometheus` | `short` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95` |
+| Flow Batching Window Size | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))` | `timeseries` | Flow batching mode query and stalled window sizes by flow. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95` |
+| Flow Batching Bulk Dirty Windows | `sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)` | `timeseries` | Flow batching mode dirty time-window count marked by bulk inserts. | `prometheus` | `short` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]` |
+| Flow Batching Checkpoint Decisions | `sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))` | `timeseries` | Flow batching mode checkpoint state-machine decision rate. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]` |
+| Flow Batching Query Mode | `sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))` | `timeseries` | Flow batching mode query attempts by checkpoint mode. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]` |
 # Trigger
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -535,6 +535,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: trigger-save-alert
+            - expr: sum(rate(greptime_flow_batching_error_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: flow-batching
        - title: Mito GC Failures
          type: timeseries
          description: Mito garbage-collection errors and skipped/orphaned files on datanodes.
@@ -2245,6 +2250,106 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
+        - title: Flow Batching Attempt / Error Rate
+          type: timeseries
+          description: Flow batching mode task attempt, error, and slow-query rates by flow.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-error'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-slow'
+        - title: Flow Batching Query Latency
+          type: timeseries
+          description: Flow batching mode query latency by flow.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95'
+            - expr: histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p99'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg'
+        - title: Flow Batching Query Window Count
+          type: timeseries
+          description: Flow batching mode query window count by flow.
+          unit: short
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg'
+        - title: Flow Batching Window Size
+          type: timeseries
+          description: Flow batching mode query and stalled window sizes by flow.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95'
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-stalled-p95'
+        - title: Flow Batching Bulk Dirty Windows
+          type: timeseries
+          description: Flow batching mode dirty time-window count marked by bulk inserts.
+          unit: short
+          queries:
+            - expr: sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]'
+        - title: Flow Batching Checkpoint Decisions
+          type: timeseries
+          description: Flow batching mode checkpoint state-machine decision rate.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]'
+        - title: Flow Batching Query Mode
+          type: timeseries
+          description: Flow batching mode query attempts by checkpoint mode.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]'
    - title: Trigger
      panels:
        - title: Trigger Count
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
@@ -3636,6 +3636,19 @@
          "legendFormat": "trigger-save-alert",
          "range": true,
          "refId": "D"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(greptime_flow_batching_error_count[$__rate_interval]))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "flow-batching",
+          "range": true,
+          "refId": "E"
        }
      ],
      "title": "Flow and Trigger Failures",
@@ -17027,6 +17040,770 @@
          ],
          "title": "Flow Processing Error per Instance",
          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode task attempt, error, and slow-query rates by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 178
+          },
+          "id": 558,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-error",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-slow",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Flow Batching Attempt / Error Rate",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query latency by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 0,
+            "y": 186
+          },
+          "id": 559,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p99",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Flow Batching Query Latency",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query window count by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 8,
+            "y": 186
+          },
+          "id": 560,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Flow Batching Query Window Count",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query and stalled window sizes by flow.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 186
+          },
+          "id": 561,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-stalled-p95",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Flow Batching Window Size",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode dirty time-window count marked by bulk inserts.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 0,
+            "y": 194
+          },
+          "id": 562,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Bulk Dirty Windows",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode checkpoint state-machine decision rate.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 8,
+            "y": 194
+          },
+          "id": 563,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Checkpoint Decisions",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${metrics}"
+          },
+          "description": "Flow batching mode query attempts by checkpoint mode.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 8,
+            "x": 16,
+            "y": 194
+          },
+          "id": 564,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${metrics}"
+              },
+              "editorMode": "code",
+              "expr": "sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Flow Batching Query Mode",
+          "type": "timeseries",
+          "pluginVersion": "11.6.0"
        }
      ],
      "title": "Flownode",
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -36,7 +36,7 @@
 | Scan and Compaction Memory Rejects | `sum(rate(greptime_mito_scan_requests_rejected_total[$__rate_interval]))`<br/>`sum(rate(greptime_mito_scan_memory_exhausted_total[$__rate_interval]))`<br/>`sum(rate(greptime_mito_compaction_memory_rejected_total[$__rate_interval]))` | `timeseries` | Datanode scan and compaction memory rejection/exhaustion counters. | `prometheus` | `rps` | `scan-rejected` |
 | OpenDAL Errors | `sum by (scheme, operation, error) (rate(opendal_operation_errors_total{error!="NotFound"}[$__rate_interval]))` | `timeseries` | Object-store errors by scheme, operation, and error, excluding NotFound noise. | `prometheus` | `eps` | `{{scheme}}-{{operation}}-{{error}}` |
 | Metasrv Failures | `sum(rate(greptime_meta_region_migration_fail[$__rate_interval]))`<br/>`sum(rate(greptime_meta_reconciliation_procedure_error[$__rate_interval]))` | `timeseries` | Region migration and reconciliation failures in metasrv. | `prometheus` | `eps` | `migration-fail` |
-| Flow and Trigger Failures | `sum by (code) (rate(greptime_flow_errors[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_evaluate_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_send_alert_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval]))` | `timeseries` | Derived-data and alerting pipeline failures. | `prometheus` | `eps` | `flow-{{code}}` |
+| Flow and Trigger Failures | `sum by (code) (rate(greptime_flow_errors[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_evaluate_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_send_alert_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval]))`<br/>`sum(rate(greptime_flow_batching_error_count[$__rate_interval]))` | `timeseries` | Derived-data and alerting pipeline failures. | `prometheus` | `eps` | `flow-{{code}}` |
 | Mito GC Failures | `sum(rate(greptime_mito_gc_errors_total[$__rate_interval]))`<br/>`sum(rate(greptime_mito_gc_orphaned_index_files[$__rate_interval]))`<br/>`sum(rate(greptime_mito_gc_skipped_unparsable_files[$__rate_interval]))` | `timeseries` | Mito garbage-collection errors and skipped/orphaned files on datanodes. | `prometheus` | `short` | `gc-errors` |
 # Capacity
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
@@ -282,6 +282,13 @@ ORDER BY data_size DESC;` | `piechart` | Distribution of leader regions and data
 | Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
 | Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
 | Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
+| Flow Batching Attempt / Error Rate | `sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))` | `timeseries` | Flow batching mode task attempt, error, and slow-query rates by flow. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt` |
+| Flow Batching Query Latency | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))` | `timeseries` | Flow batching mode query latency by flow. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95` |
+| Flow Batching Query Window Count | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))`<br/>`sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))` | `timeseries` | Flow batching mode query window count by flow. | `prometheus` | `short` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95` |
+| Flow Batching Window Size | `histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))` | `timeseries` | Flow batching mode query and stalled window sizes by flow. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95` |
+| Flow Batching Bulk Dirty Windows | `sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)` | `timeseries` | Flow batching mode dirty time-window count marked by bulk inserts. | `prometheus` | `short` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]` |
+| Flow Batching Checkpoint Decisions | `sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))` | `timeseries` | Flow batching mode checkpoint state-machine decision rate. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]` |
+| Flow Batching Query Mode | `sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))` | `timeseries` | Flow batching mode query attempts by checkpoint mode. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]` |
 # Trigger
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -535,6 +535,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: trigger-save-alert
+            - expr: sum(rate(greptime_flow_batching_error_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: flow-batching
        - title: Mito GC Failures
          type: timeseries
          description: Mito garbage-collection errors and skipped/orphaned files on datanodes.
@@ -2245,6 +2250,106 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
+        - title: Flow Batching Attempt / Error Rate
+          type: timeseries
+          description: Flow batching mode task attempt, error, and slow-query rates by flow.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_start_query_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-attempt'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_error_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-error'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_slow_query_secs_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-slow'
+        - title: Flow Batching Query Latency
+          type: timeseries
+          description: Flow batching mode query latency by flow.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95'
+            - expr: histogram_quantile(0.99, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p99'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_time_secs_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg'
+        - title: Flow Batching Query Window Count
+          type: timeseries
+          description: Flow batching mode query window count by flow.
+          unit: short
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-p95'
+            - expr: sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_sum[$__rate_interval])) / sum by(instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_cnt_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-avg'
+        - title: Flow Batching Window Size
+          type: timeseries
+          description: Flow batching mode query and stalled window sizes by flow.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_query_window_size_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-query-p95'
+            - expr: histogram_quantile(0.95, sum by(le,instance,pod,flow_id) (rate(greptime_flow_batching_engine_stalled_window_size_secs_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-stalled-p95'
+        - title: Flow Batching Bulk Dirty Windows
+          type: timeseries
+          description: Flow batching mode dirty time-window count marked by bulk inserts.
+          unit: short
+          queries:
+            - expr: sum by(instance,pod,flow_id) (greptime_flow_batching_engine_bulk_mark_time_window)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]'
+        - title: Flow Batching Checkpoint Decisions
+          type: timeseries
+          description: Flow batching mode checkpoint state-machine decision rate.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id,mode,decision,reason) (rate(greptime_flow_batching_checkpoint_decision_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]-[{{decision}}]-[{{reason}}]'
+        - title: Flow Batching Query Mode
+          type: timeseries
+          description: Flow batching mode query attempts by checkpoint mode.
+          unit: ops
+          queries:
+            - expr: sum by(instance,pod,flow_id,mode) (rate(greptime_flow_batching_query_mode_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[flow={{flow_id}}]-[{{mode}}]'
    - title: Trigger
      panels:
        - title: Trigger Count