From 962990009cbf269e36f170caec23fad91b3cec8b Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Wed, 10 Jun 2026 16:51:37 +0800
Subject: [PATCH] ci: notify jsonbench result (#8273)

Signed-off-by: luofucong <luofc@foxmail.com>
---
 .../find-previous-workflow-artifact.sh        | 184 +++++++++++++++
 .github/scripts/jsonbench-summary.py          | 217 ++++++++++++++++++
 .github/workflows/nightly-jsonbench.yaml      |  90 +++++++-
 3 files changed, 490 insertions(+), 1 deletion(-)
 create mode 100755 .github/scripts/find-previous-workflow-artifact.sh
 create mode 100644 .github/scripts/jsonbench-summary.py

diff --git a/.github/scripts/find-previous-workflow-artifact.sh b/.github/scripts/find-previous-workflow-artifact.sh
new file mode 100755
index 0000000000..f45123f2bc
--- /dev/null
+++ b/.github/scripts/find-previous-workflow-artifact.sh
@@ -0,0 +1,184 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+usage() {
+  cat <<'EOF'
+Find the most recent previous successful workflow run that has a non-expired artifact.
+
+Usage:
+  find-previous-workflow-artifact.sh --workflow-path PATH --artifact-name NAME [options]
+
+Options:
+  --repo OWNER/REPO          GitHub repository. Defaults to GITHUB_REPOSITORY.
+  --current-run-id ID        Current workflow run id to exclude. Defaults to GITHUB_RUN_ID.
+  --workflow-path PATH       Workflow path, for example .github/workflows/nightly-jsonbench.yaml.
+  --artifact-name NAME       Artifact name to find.
+  --status STATUS            Workflow run status filter. Defaults to success.
+  --per-page N               GitHub API page size. Defaults to 100.
+  --run-id-only              Print only the run id. This is the default.
+  --artifact-id-only         Print only the artifact id.
+  --json                     Print a JSON object with run_id and artifact_id.
+  --debug                    Print GitHub API requests and responses to stderr.
+  -h, --help                 Show this help.
+
+The script uses gh CLI and jq. Provide GH_TOKEN or authenticate gh before running it.
+EOF
+}
+
+repo="${GITHUB_REPOSITORY:-}"
+current_run_id="${GITHUB_RUN_ID:-}"
+workflow_path=""
+artifact_name=""
+status="success"
+per_page="100"
+output_format="run_id"
+debug="false"
+
+debug_log() {
+  if [[ "${debug}" == "true" ]]; then
+    printf '[debug] %s\n' "$*" >&2
+  fi
+}
+
+log_stderr_file() {
+  if [[ "${debug}" != "true" || ! -s "${err_file}" ]]; then
+    return
+  fi
+
+  while read -r line; do
+    debug_log "stderr: ${line}"
+  done < "${err_file}"
+  : > "${err_file}"
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --repo)
+      repo="$2"
+      shift 2
+      ;;
+    --current-run-id)
+      current_run_id="$2"
+      shift 2
+      ;;
+    --workflow-path)
+      workflow_path="$2"
+      shift 2
+      ;;
+    --artifact-name)
+      artifact_name="$2"
+      shift 2
+      ;;
+    --status)
+      status="$2"
+      shift 2
+      ;;
+    --per-page)
+      per_page="$2"
+      shift 2
+      ;;
+    --run-id-only)
+      output_format="run_id"
+      shift
+      ;;
+    --artifact-id-only)
+      output_format="artifact_id"
+      shift
+      ;;
+    --json)
+      output_format="json"
+      shift
+      ;;
+    --debug)
+      debug="true"
+      shift
+      ;;
+    -h | --help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      usage >&2
+      exit 2
+      ;;
+  esac
+done
+
+if [[ -z "${repo}" ]]; then
+  echo "--repo is required when GITHUB_REPOSITORY is not set." >&2
+  exit 2
+fi
+
+if [[ -z "${workflow_path}" ]]; then
+  echo "--workflow-path is required." >&2
+  exit 2
+fi
+
+if [[ -z "${artifact_name}" ]]; then
+  echo "--artifact-name is required." >&2
+  exit 2
+fi
+
+err_file=$(mktemp)
+trap 'rm -f "${err_file}"' EXIT
+
+debug_log "request: gh api --method GET repos/${repo}/actions/runs -f status=${status} -f per_page=${per_page} --paginate"
+candidate_run_ids=$(
+  gh api --method GET "repos/${repo}/actions/runs" \
+    -f "status=${status}" \
+    -f "per_page=${per_page}" \
+    --paginate \
+    --jq ".workflow_runs[] | select(.path == \"${workflow_path}\") | .id" \
+    2> "${err_file}" || true
+)
+log_stderr_file
+debug_log "response run ids: ${candidate_run_ids:-<none>}"
+
+while read -r run_id; do
+  if [[ -z "${run_id}" || "${run_id}" == "${current_run_id}" ]]; then
+    debug_log "skip run id: ${run_id:-<empty>}"
+    continue
+  fi
+
+  debug_log "request: gh api repos/${repo}/actions/runs/${run_id}/artifacts"
+  artifacts_response=$(
+    gh api "repos/${repo}/actions/runs/${run_id}/artifacts" \
+      2> "${err_file}" || true
+  )
+  log_stderr_file
+  debug_log "response for run ${run_id}: ${artifacts_response}"
+
+  artifact_id=$(
+    printf '%s\n' "${artifacts_response}" \
+      | jq -r --arg name "${artifact_name}" '.artifacts[]? | select(.name == $name and (.expired | not)) | .id' \
+      | head -n 1 || true
+  )
+  debug_log "artifact id for run ${run_id}: ${artifact_id:-<none>}"
+
+  if [[ -z "${artifact_id}" ]]; then
+    continue
+  fi
+
+  case "${output_format}" in
+    run_id)
+      echo "${run_id}"
+      ;;
+    artifact_id)
+      echo "${artifact_id}"
+      ;;
+    json)
+      printf '{"run_id":"%s","artifact_id":"%s"}\n' "${run_id}" "${artifact_id}"
+      ;;
+  esac
+  exit 0
+done <<< "${candidate_run_ids}"
+
+debug_log "no previous workflow run with artifact '${artifact_name}' found"
+
+case "${output_format}" in
+  json)
+    printf '{"run_id":"","artifact_id":""}\n'
+    ;;
+esac
diff --git a/.github/scripts/jsonbench-summary.py b/.github/scripts/jsonbench-summary.py
new file mode 100644
index 0000000000..6a0fff70c4
--- /dev/null
+++ b/.github/scripts/jsonbench-summary.py
@@ -0,0 +1,217 @@
+# Copyright 2023 Greptime Team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/usr/bin/env python3
+
+import argparse
+import ast
+import json
+import pathlib
+import re
+
+
+def read_number(result_dir, patterns):
+    for pattern in patterns:
+        for path in sorted(result_dir.rglob(pattern)):
+            text = path.read_text(encoding="utf-8", errors="replace")
+            match = re.search(r"\d+(?:\.\d+)?", text)
+            if match:
+                return match.group(0)
+    return None
+
+
+def format_gb(value):
+    if value is None:
+        return "N/A"
+
+    try:
+        bytes_size = float(value)
+    except ValueError:
+        return "N/A"
+
+    return f"{bytes_size / 1000 / 1000 / 1000:.2f} GB"
+
+
+def format_dataset(choice):
+    datasets = {
+        "1": "1M",
+        "2": "10M",
+        "3": "100M",
+        "4": "1000M",
+    }
+    if choice is None:
+        return "N/A"
+    return datasets.get(choice, f"choice {choice}")
+
+
+def read_runtime_text(result_dir):
+    runtime_files = sorted(result_dir.rglob("*.results_runtime"))
+    if runtime_files:
+        return "\n".join(
+            path.read_text(encoding="utf-8", errors="replace")
+            for path in runtime_files
+        )
+
+    log_files = sorted(result_dir.rglob("*.log"))
+    return "\n".join(
+        path.read_text(encoding="utf-8", errors="replace")
+        for path in log_files
+    )
+
+
+def parse_query_rows(text):
+    rows = []
+    query_index = 0
+    has_timings = False
+
+    for line in text.splitlines():
+        stripped = line.strip()
+        if stripped.startswith("Running query:"):
+            query_index += 1
+            has_timings = False
+            continue
+
+        if not (
+            query_index > 0
+            and not has_timings
+            and stripped.startswith("[")
+            and stripped.endswith("]")
+        ):
+            continue
+
+        try:
+            timings = ast.literal_eval(stripped)
+        except (SyntaxError, ValueError):
+            continue
+
+        has_timings = True
+        for label, value in (
+            ("cold", timings[0] if len(timings) > 0 else None),
+            ("hot", timings[1] if len(timings) > 1 else None),
+        ):
+            if value is not None:
+                rows.append((query_index, label, float(value)))
+
+    return rows
+
+
+def query_rows_to_map(query_rows):
+    queries = {}
+    for query_index, label, value in query_rows:
+        queries.setdefault(query_index, {})[label] = value
+    return queries
+
+
+def format_duration(value):
+    if value is None:
+        return "N/A"
+    return f"{value:.3f}"
+
+
+def format_delta(current, last):
+    if current is None or last in (None, 0):
+        return "N/A"
+
+    percent = (current - last) / last * 100
+    if abs(percent) <= 0.1:
+        return "0"
+
+    formatted = f"{percent:+.1f}"
+    return formatted.rstrip("0").rstrip(".")
+
+
+def format_query_table(query_rows, previous_query_rows):
+    queries = query_rows_to_map(query_rows)
+    previous_queries = query_rows_to_map(previous_query_rows)
+
+    rows = [("Query", "Cold (s)", "Cold Last (%)", "Hot (s)", "Hot Last (%)")]
+    rows.extend(
+        (
+            f"Q{query_index}",
+            format_duration(values.get("cold")),
+            format_delta(
+                values.get("cold"),
+                previous_queries.get(query_index, {}).get("cold"),
+            ),
+            format_duration(values.get("hot")),
+            format_delta(
+                values.get("hot"),
+                previous_queries.get(query_index, {}).get("hot"),
+            ),
+        )
+        for query_index, values in sorted(queries.items())
+    )
+
+    widths = [max(len(row[column]) for row in rows) for column in range(5)]
+    separator = tuple("-" * width for width in widths)
+    rows.insert(1, separator)
+
+    return "\n".join(
+        (
+            f"| {query:<{widths[0]}} | {cold:>{widths[1]}} | "
+            f"{cold_delta:>{widths[2]}} | {hot:>{widths[3]}} | "
+            f"{hot_delta:>{widths[4]}} |"
+        )
+        for query, cold, cold_delta, hot, hot_delta in rows
+    )
+
+
+def build_payload(result_dir, previous_result_dir, result, run_url):
+    if result != "success":
+        return {"text": f"Nightly JSONBench failed, please check {run_url}."}
+
+    data_size = read_number(result_dir, ["*.total_size", "*.data_size"])
+    count = read_number(result_dir, ["*.count"])
+    dataset = read_number(result_dir, ["*.dataset"])
+    query_rows = parse_query_rows(read_runtime_text(result_dir))
+    previous_query_rows = []
+    if previous_result_dir and previous_result_dir.exists():
+        previous_query_rows = parse_query_rows(read_runtime_text(previous_result_dir))
+
+    summary = (
+        f"Dataset: {format_dataset(dataset)}\n"
+        f"Data size: {format_gb(data_size)}\n"
+        f"Count: {count or 'N/A'}"
+    )
+    table = format_query_table(query_rows, previous_query_rows)
+    text = (
+        "Nightly JSONBench has completed successfully.\n"
+        f"<{run_url}|Workflow run>\n"
+        f"```{summary}\n\n{table}```"
+    )
+    return {"text": text}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--result-dir", required=True, type=pathlib.Path)
+    parser.add_argument("--previous-result-dir", type=pathlib.Path)
+    parser.add_argument("--result", required=True)
+    parser.add_argument("--run-url", required=True)
+    args = parser.parse_args()
+
+    print(
+        json.dumps(
+            build_payload(
+                args.result_dir,
+                args.previous_result_dir,
+                args.result,
+                args.run_url,
+            )
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/nightly-jsonbench.yaml b/.github/workflows/nightly-jsonbench.yaml
index a9ce4dd363..886dfabccd 100644
--- a/.github/workflows/nightly-jsonbench.yaml
+++ b/.github/workflows/nightly-jsonbench.yaml
@@ -140,6 +140,7 @@ jobs:
 
           export JSONBENCH_DATA_DIR="/root/data/bluesky"
           echo "Use JSONBench data directory ${JSONBENCH_DATA_DIR}"
+          echo "${JSONBENCH_DATASET}" > jsonbench.dataset
 
           echo "Cloning JSONBench"
           git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench
@@ -186,6 +187,7 @@ jobs:
           name: jsonbench-results
           path: |
             ./greptimedb.log
+            ./jsonbench.dataset
             ./JSONBench/greptimedb/*.log
             ./JSONBench/greptimedb/*.total_size
             ./JSONBench/greptimedb/*.data_size
@@ -198,7 +200,7 @@ jobs:
 
   stop-linux-arm64-runner:
     name: Stop Linux ARM64 runner
-    # It's always run as the last job in the workflow to make sure that the runner is released.
+    # It always runs after JSONBench to make sure that the runner is released.
     if: ${{ always() && needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id != '' }}
     runs-on: ubuntu-latest
     needs: [
@@ -221,3 +223,89 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ vars.EC2_RUNNER_REGION }}
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+
+  notification:
+    name: Send JSONBench notification
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }}
+    needs: [ jsonbench, stop-linux-arm64-runner ]
+    runs-on: ubuntu-latest
+    env:
+      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_JSON_CHANNEL }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Download JSONBench results
+        if: ${{ needs.jsonbench.result == 'success' }}
+        uses: actions/download-artifact@v4
+        with:
+          name: jsonbench-results
+          path: jsonbench-results
+
+      - name: Find previous JSONBench results
+        id: previous_jsonbench
+        if: ${{ needs.jsonbench.result == 'success' }}
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CURRENT_RUN_ID: ${{ github.run_id }}
+          JSONBENCH_PREVIOUS_RESULT_DEBUG: ${{ vars.JSONBENCH_PREVIOUS_RESULT_DEBUG }}
+        run: |
+          set -euo pipefail
+
+          debug_args=()
+          if [[ "${JSONBENCH_PREVIOUS_RESULT_DEBUG:-false}" == "true" ]]; then
+            debug_args+=(--debug)
+          fi
+
+          previous_run_id=$(
+            bash .github/scripts/find-previous-workflow-artifact.sh \
+              --repo "${GITHUB_REPOSITORY}" \
+              --current-run-id "${CURRENT_RUN_ID}" \
+              --workflow-path ".github/workflows/nightly-jsonbench.yaml" \
+              --artifact-name "jsonbench-results" \
+              "${debug_args[@]}" \
+              --run-id-only
+          )
+
+          echo "run_id=${previous_run_id}" >> "${GITHUB_OUTPUT}"
+
+      - name: Download previous JSONBench results
+        if: ${{ steps.previous_jsonbench.outputs.run_id != '' }}
+        uses: actions/download-artifact@v4
+        with:
+          name: jsonbench-results
+          path: previous-jsonbench-results
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          run-id: ${{ steps.previous_jsonbench.outputs.run_id }}
+
+      - name: Prepare JSONBench summary
+        id: jsonbench-summary
+        shell: bash
+        env:
+          JSONBENCH_RESULT: ${{ needs.jsonbench.result }}
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        run: |
+          set -euo pipefail
+
+          payload=$(
+            python3 .github/scripts/jsonbench-summary.py \
+              --result-dir jsonbench-results \
+              --previous-result-dir previous-jsonbench-results \
+              --result "${JSONBENCH_RESULT}" \
+              --run-url "${RUN_URL}"
+          )
+          delimiter="jsonbench_payload_${RANDOM}_${RANDOM}"
+          {
+            echo "payload<<${delimiter}"
+            echo "${payload}"
+            echo "${delimiter}"
+          } >> "${GITHUB_OUTPUT}"
+
+      - name: Notify JSONBench result
+        uses: slackapi/slack-github-action@v1.23.0
+        with:
+          payload: ${{ steps.jsonbench-summary.outputs.payload }}