From 962990009cbf269e36f170caec23fad91b3cec8b Mon Sep 17 00:00:00 2001 From: LFC <990479+MichaelScofield@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:51:37 +0800 Subject: [PATCH] ci: notify jsonbench result (#8273) Signed-off-by: luofucong --- .../find-previous-workflow-artifact.sh | 184 +++++++++++++++ .github/scripts/jsonbench-summary.py | 217 ++++++++++++++++++ .github/workflows/nightly-jsonbench.yaml | 90 +++++++- 3 files changed, 490 insertions(+), 1 deletion(-) create mode 100755 .github/scripts/find-previous-workflow-artifact.sh create mode 100644 .github/scripts/jsonbench-summary.py diff --git a/.github/scripts/find-previous-workflow-artifact.sh b/.github/scripts/find-previous-workflow-artifact.sh new file mode 100755 index 0000000000..f45123f2bc --- /dev/null +++ b/.github/scripts/find-previous-workflow-artifact.sh @@ -0,0 +1,184 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Find the most recent previous successful workflow run that has a non-expired artifact. + +Usage: + find-previous-workflow-artifact.sh --workflow-path PATH --artifact-name NAME [options] + +Options: + --repo OWNER/REPO GitHub repository. Defaults to GITHUB_REPOSITORY. + --current-run-id ID Current workflow run id to exclude. Defaults to GITHUB_RUN_ID. + --workflow-path PATH Workflow path, for example .github/workflows/nightly-jsonbench.yaml. + --artifact-name NAME Artifact name to find. + --status STATUS Workflow run status filter. Defaults to success. + --per-page N GitHub API page size. Defaults to 100. + --run-id-only Print only the run id. This is the default. + --artifact-id-only Print only the artifact id. + --json Print a JSON object with run_id and artifact_id. + --debug Print GitHub API requests and responses to stderr. + -h, --help Show this help. + +The script uses gh CLI and jq. Provide GH_TOKEN or authenticate gh before running it. +EOF +} + +repo="${GITHUB_REPOSITORY:-}" +current_run_id="${GITHUB_RUN_ID:-}" +workflow_path="" +artifact_name="" +status="success" +per_page="100" +output_format="run_id" +debug="false" + +debug_log() { + if [[ "${debug}" == "true" ]]; then + printf '[debug] %s\n' "$*" >&2 + fi +} + +log_stderr_file() { + if [[ "${debug}" != "true" || ! -s "${err_file}" ]]; then + return + fi + + while read -r line; do + debug_log "stderr: ${line}" + done < "${err_file}" + : > "${err_file}" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) + repo="$2" + shift 2 + ;; + --current-run-id) + current_run_id="$2" + shift 2 + ;; + --workflow-path) + workflow_path="$2" + shift 2 + ;; + --artifact-name) + artifact_name="$2" + shift 2 + ;; + --status) + status="$2" + shift 2 + ;; + --per-page) + per_page="$2" + shift 2 + ;; + --run-id-only) + output_format="run_id" + shift + ;; + --artifact-id-only) + output_format="artifact_id" + shift + ;; + --json) + output_format="json" + shift + ;; + --debug) + debug="true" + shift + ;; + -h | --help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if [[ -z "${repo}" ]]; then + echo "--repo is required when GITHUB_REPOSITORY is not set." >&2 + exit 2 +fi + +if [[ -z "${workflow_path}" ]]; then + echo "--workflow-path is required." >&2 + exit 2 +fi + +if [[ -z "${artifact_name}" ]]; then + echo "--artifact-name is required." >&2 + exit 2 +fi + +err_file=$(mktemp) +trap 'rm -f "${err_file}"' EXIT + +debug_log "request: gh api --method GET repos/${repo}/actions/runs -f status=${status} -f per_page=${per_page} --paginate" +candidate_run_ids=$( + gh api --method GET "repos/${repo}/actions/runs" \ + -f "status=${status}" \ + -f "per_page=${per_page}" \ + --paginate \ + --jq ".workflow_runs[] | select(.path == \"${workflow_path}\") | .id" \ + 2> "${err_file}" || true +) +log_stderr_file +debug_log "response run ids: ${candidate_run_ids:-}" + +while read -r run_id; do + if [[ -z "${run_id}" || "${run_id}" == "${current_run_id}" ]]; then + debug_log "skip run id: ${run_id:-}" + continue + fi + + debug_log "request: gh api repos/${repo}/actions/runs/${run_id}/artifacts" + artifacts_response=$( + gh api "repos/${repo}/actions/runs/${run_id}/artifacts" \ + 2> "${err_file}" || true + ) + log_stderr_file + debug_log "response for run ${run_id}: ${artifacts_response}" + + artifact_id=$( + printf '%s\n' "${artifacts_response}" \ + | jq -r --arg name "${artifact_name}" '.artifacts[]? | select(.name == $name and (.expired | not)) | .id' \ + | head -n 1 || true + ) + debug_log "artifact id for run ${run_id}: ${artifact_id:-}" + + if [[ -z "${artifact_id}" ]]; then + continue + fi + + case "${output_format}" in + run_id) + echo "${run_id}" + ;; + artifact_id) + echo "${artifact_id}" + ;; + json) + printf '{"run_id":"%s","artifact_id":"%s"}\n' "${run_id}" "${artifact_id}" + ;; + esac + exit 0 +done <<< "${candidate_run_ids}" + +debug_log "no previous workflow run with artifact '${artifact_name}' found" + +case "${output_format}" in + json) + printf '{"run_id":"","artifact_id":""}\n' + ;; +esac diff --git a/.github/scripts/jsonbench-summary.py b/.github/scripts/jsonbench-summary.py new file mode 100644 index 0000000000..6a0fff70c4 --- /dev/null +++ b/.github/scripts/jsonbench-summary.py @@ -0,0 +1,217 @@ +# Copyright 2023 Greptime Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env python3 + +import argparse +import ast +import json +import pathlib +import re + + +def read_number(result_dir, patterns): + for pattern in patterns: + for path in sorted(result_dir.rglob(pattern)): + text = path.read_text(encoding="utf-8", errors="replace") + match = re.search(r"\d+(?:\.\d+)?", text) + if match: + return match.group(0) + return None + + +def format_gb(value): + if value is None: + return "N/A" + + try: + bytes_size = float(value) + except ValueError: + return "N/A" + + return f"{bytes_size / 1000 / 1000 / 1000:.2f} GB" + + +def format_dataset(choice): + datasets = { + "1": "1M", + "2": "10M", + "3": "100M", + "4": "1000M", + } + if choice is None: + return "N/A" + return datasets.get(choice, f"choice {choice}") + + +def read_runtime_text(result_dir): + runtime_files = sorted(result_dir.rglob("*.results_runtime")) + if runtime_files: + return "\n".join( + path.read_text(encoding="utf-8", errors="replace") + for path in runtime_files + ) + + log_files = sorted(result_dir.rglob("*.log")) + return "\n".join( + path.read_text(encoding="utf-8", errors="replace") + for path in log_files + ) + + +def parse_query_rows(text): + rows = [] + query_index = 0 + has_timings = False + + for line in text.splitlines(): + stripped = line.strip() + if stripped.startswith("Running query:"): + query_index += 1 + has_timings = False + continue + + if not ( + query_index > 0 + and not has_timings + and stripped.startswith("[") + and stripped.endswith("]") + ): + continue + + try: + timings = ast.literal_eval(stripped) + except (SyntaxError, ValueError): + continue + + has_timings = True + for label, value in ( + ("cold", timings[0] if len(timings) > 0 else None), + ("hot", timings[1] if len(timings) > 1 else None), + ): + if value is not None: + rows.append((query_index, label, float(value))) + + return rows + + +def query_rows_to_map(query_rows): + queries = {} + for query_index, label, value in query_rows: + queries.setdefault(query_index, {})[label] = value + return queries + + +def format_duration(value): + if value is None: + return "N/A" + return f"{value:.3f}" + + +def format_delta(current, last): + if current is None or last in (None, 0): + return "N/A" + + percent = (current - last) / last * 100 + if abs(percent) <= 0.1: + return "0" + + formatted = f"{percent:+.1f}" + return formatted.rstrip("0").rstrip(".") + + +def format_query_table(query_rows, previous_query_rows): + queries = query_rows_to_map(query_rows) + previous_queries = query_rows_to_map(previous_query_rows) + + rows = [("Query", "Cold (s)", "Cold Last (%)", "Hot (s)", "Hot Last (%)")] + rows.extend( + ( + f"Q{query_index}", + format_duration(values.get("cold")), + format_delta( + values.get("cold"), + previous_queries.get(query_index, {}).get("cold"), + ), + format_duration(values.get("hot")), + format_delta( + values.get("hot"), + previous_queries.get(query_index, {}).get("hot"), + ), + ) + for query_index, values in sorted(queries.items()) + ) + + widths = [max(len(row[column]) for row in rows) for column in range(5)] + separator = tuple("-" * width for width in widths) + rows.insert(1, separator) + + return "\n".join( + ( + f"| {query:<{widths[0]}} | {cold:>{widths[1]}} | " + f"{cold_delta:>{widths[2]}} | {hot:>{widths[3]}} | " + f"{hot_delta:>{widths[4]}} |" + ) + for query, cold, cold_delta, hot, hot_delta in rows + ) + + +def build_payload(result_dir, previous_result_dir, result, run_url): + if result != "success": + return {"text": f"Nightly JSONBench failed, please check {run_url}."} + + data_size = read_number(result_dir, ["*.total_size", "*.data_size"]) + count = read_number(result_dir, ["*.count"]) + dataset = read_number(result_dir, ["*.dataset"]) + query_rows = parse_query_rows(read_runtime_text(result_dir)) + previous_query_rows = [] + if previous_result_dir and previous_result_dir.exists(): + previous_query_rows = parse_query_rows(read_runtime_text(previous_result_dir)) + + summary = ( + f"Dataset: {format_dataset(dataset)}\n" + f"Data size: {format_gb(data_size)}\n" + f"Count: {count or 'N/A'}" + ) + table = format_query_table(query_rows, previous_query_rows) + text = ( + "Nightly JSONBench has completed successfully.\n" + f"<{run_url}|Workflow run>\n" + f"```{summary}\n\n{table}```" + ) + return {"text": text} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--result-dir", required=True, type=pathlib.Path) + parser.add_argument("--previous-result-dir", type=pathlib.Path) + parser.add_argument("--result", required=True) + parser.add_argument("--run-url", required=True) + args = parser.parse_args() + + print( + json.dumps( + build_payload( + args.result_dir, + args.previous_result_dir, + args.result, + args.run_url, + ) + ) + ) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/nightly-jsonbench.yaml b/.github/workflows/nightly-jsonbench.yaml index a9ce4dd363..886dfabccd 100644 --- a/.github/workflows/nightly-jsonbench.yaml +++ b/.github/workflows/nightly-jsonbench.yaml @@ -140,6 +140,7 @@ jobs: export JSONBENCH_DATA_DIR="/root/data/bluesky" echo "Use JSONBench data directory ${JSONBENCH_DATA_DIR}" + echo "${JSONBENCH_DATASET}" > jsonbench.dataset echo "Cloning JSONBench" git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench @@ -186,6 +187,7 @@ jobs: name: jsonbench-results path: | ./greptimedb.log + ./jsonbench.dataset ./JSONBench/greptimedb/*.log ./JSONBench/greptimedb/*.total_size ./JSONBench/greptimedb/*.data_size @@ -198,7 +200,7 @@ jobs: stop-linux-arm64-runner: name: Stop Linux ARM64 runner - # It's always run as the last job in the workflow to make sure that the runner is released. + # It always runs after JSONBench to make sure that the runner is released. if: ${{ always() && needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id != '' }} runs-on: ubuntu-latest needs: [ @@ -221,3 +223,89 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ vars.EC2_RUNNER_REGION }} github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + + notification: + name: Send JSONBench notification + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} + needs: [ jsonbench, stop-linux-arm64-runner ] + runs-on: ubuntu-latest + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_JSON_CHANNEL }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Download JSONBench results + if: ${{ needs.jsonbench.result == 'success' }} + uses: actions/download-artifact@v4 + with: + name: jsonbench-results + path: jsonbench-results + + - name: Find previous JSONBench results + id: previous_jsonbench + if: ${{ needs.jsonbench.result == 'success' }} + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CURRENT_RUN_ID: ${{ github.run_id }} + JSONBENCH_PREVIOUS_RESULT_DEBUG: ${{ vars.JSONBENCH_PREVIOUS_RESULT_DEBUG }} + run: | + set -euo pipefail + + debug_args=() + if [[ "${JSONBENCH_PREVIOUS_RESULT_DEBUG:-false}" == "true" ]]; then + debug_args+=(--debug) + fi + + previous_run_id=$( + bash .github/scripts/find-previous-workflow-artifact.sh \ + --repo "${GITHUB_REPOSITORY}" \ + --current-run-id "${CURRENT_RUN_ID}" \ + --workflow-path ".github/workflows/nightly-jsonbench.yaml" \ + --artifact-name "jsonbench-results" \ + "${debug_args[@]}" \ + --run-id-only + ) + + echo "run_id=${previous_run_id}" >> "${GITHUB_OUTPUT}" + + - name: Download previous JSONBench results + if: ${{ steps.previous_jsonbench.outputs.run_id != '' }} + uses: actions/download-artifact@v4 + with: + name: jsonbench-results + path: previous-jsonbench-results + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ steps.previous_jsonbench.outputs.run_id }} + + - name: Prepare JSONBench summary + id: jsonbench-summary + shell: bash + env: + JSONBENCH_RESULT: ${{ needs.jsonbench.result }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + + payload=$( + python3 .github/scripts/jsonbench-summary.py \ + --result-dir jsonbench-results \ + --previous-result-dir previous-jsonbench-results \ + --result "${JSONBENCH_RESULT}" \ + --run-url "${RUN_URL}" + ) + delimiter="jsonbench_payload_${RANDOM}_${RANDOM}" + { + echo "payload<<${delimiter}" + echo "${payload}" + echo "${delimiter}" + } >> "${GITHUB_OUTPUT}" + + - name: Notify JSONBench result + uses: slackapi/slack-github-action@v1.23.0 + with: + payload: ${{ steps.jsonbench-summary.outputs.payload }}