Files
greptimedb/grafana/scripts/gen-dashboards.sh
Yingwen 9ed84cc26f fix: improve Grafana metrics dashboards (#8298)
* chore: initial changes

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: improve troubleshooting dashboard

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: rm troubleshooting-dashboard.md

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: optimize metrics dashboard

Signed-off-by: evenyag <realevenyag@gmail.com>

* docs: move troubleshooting-dashboard.md

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: move mito gc duration panel

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: cleanup the dashboard

- Overview trend panels are now aggregate-only:
    - Total Ingestion Rate Trend
    - Total Query Rate Trend

- Protocol breakdowns remain in Ingestion and Queries.
- Mito Backpressure and Failures no longer duplicates scan/GC signals.
- Removed Write Stall per Instance.
- Split Object Store and WAL into collapsed Object Store and collapsed
  WAL.
- Moved WAL/logstore panels out of Storage into WAL.
- Normalized OpenDAL “other request” matchers.
- Normalized trigger elapsed p99/p75/avg aggregation.
- Regenerated standalone JSON and dashboard YAML/Markdown.
- Updated docs/troubleshooting-dashboard.md.

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: rearrange metasrv dashboard panels

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: improve troubleshooting dashboard layout

Signed-off-by: evenyag <realevenyag@gmail.com>

* docs: remove obsolete troubleshooting dashboard doc

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: correct cluster dashboard panel queries (missing _bucket, raw counters, rate normalization)

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: correct trigger panel datasource, collapse flush/compaction, split request latency panels

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: update grafana metrics dashboard panels

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: correct Grafana dashboard units

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: regenerate Grafana dashboards

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: use throughput unit for index IO bytes

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
2026-06-16 07:13:04 +00:00

30 lines
1.2 KiB
Bash
Executable File

#! /usr/bin/env bash
CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/metrics/cluster}
STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/metrics/standalone}
DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
remove_instance_filters() {
# Remove the instance filters for the standalone dashboards.
sed -E 's/instance=~\\"(\$datanode|\$frontend|\$metasrv|\$flownode)\\"[[:space:]]*,?[[:space:]]*//g' "$CLUSTER_DASHBOARD_DIR/dashboard.json" \
| sed -E 's/\{[[:space:]]*,[[:space:]]*/{/g' \
| sed -E 's/,[[:space:]]*\}/}/g' \
| sed -E 's/([A-Za-z_:][A-Za-z0-9_:]*)\{[[:space:]]*\}/\1/g' \
> "$STANDALONE_DASHBOARD_DIR/dashboard.json"
}
generate_intermediate_dashboards_and_docs() {
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
-i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json \
-o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml \
-m /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.md
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
-i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json \
-o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml \
-m /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.md
}
remove_instance_filters
generate_intermediate_dashboards_and_docs