ci: notify jsonbench result (#8273)

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2026-06-10 16:51:37 +08:00
committed by GitHub
parent 05c4588f90
commit 962990009c
3 changed files with 490 additions and 1 deletions

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<'EOF'
Find the most recent previous successful workflow run that has a non-expired artifact.
Usage:
find-previous-workflow-artifact.sh --workflow-path PATH --artifact-name NAME [options]
Options:
--repo OWNER/REPO GitHub repository. Defaults to GITHUB_REPOSITORY.
--current-run-id ID Current workflow run id to exclude. Defaults to GITHUB_RUN_ID.
--workflow-path PATH Workflow path, for example .github/workflows/nightly-jsonbench.yaml.
--artifact-name NAME Artifact name to find.
--status STATUS Workflow run status filter. Defaults to success.
--per-page N GitHub API page size. Defaults to 100.
--run-id-only Print only the run id. This is the default.
--artifact-id-only Print only the artifact id.
--json Print a JSON object with run_id and artifact_id.
--debug Print GitHub API requests and responses to stderr.
-h, --help Show this help.
The script uses gh CLI and jq. Provide GH_TOKEN or authenticate gh before running it.
EOF
}
repo="${GITHUB_REPOSITORY:-}"
current_run_id="${GITHUB_RUN_ID:-}"
workflow_path=""
artifact_name=""
status="success"
per_page="100"
output_format="run_id"
debug="false"
debug_log() {
if [[ "${debug}" == "true" ]]; then
printf '[debug] %s\n' "$*" >&2
fi
}
log_stderr_file() {
if [[ "${debug}" != "true" || ! -s "${err_file}" ]]; then
return
fi
while read -r line; do
debug_log "stderr: ${line}"
done < "${err_file}"
: > "${err_file}"
}
while [[ $# -gt 0 ]]; do
case "$1" in
--repo)
repo="$2"
shift 2
;;
--current-run-id)
current_run_id="$2"
shift 2
;;
--workflow-path)
workflow_path="$2"
shift 2
;;
--artifact-name)
artifact_name="$2"
shift 2
;;
--status)
status="$2"
shift 2
;;
--per-page)
per_page="$2"
shift 2
;;
--run-id-only)
output_format="run_id"
shift
;;
--artifact-id-only)
output_format="artifact_id"
shift
;;
--json)
output_format="json"
shift
;;
--debug)
debug="true"
shift
;;
-h | --help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 2
;;
esac
done
if [[ -z "${repo}" ]]; then
echo "--repo is required when GITHUB_REPOSITORY is not set." >&2
exit 2
fi
if [[ -z "${workflow_path}" ]]; then
echo "--workflow-path is required." >&2
exit 2
fi
if [[ -z "${artifact_name}" ]]; then
echo "--artifact-name is required." >&2
exit 2
fi
err_file=$(mktemp)
trap 'rm -f "${err_file}"' EXIT
debug_log "request: gh api --method GET repos/${repo}/actions/runs -f status=${status} -f per_page=${per_page} --paginate"
candidate_run_ids=$(
gh api --method GET "repos/${repo}/actions/runs" \
-f "status=${status}" \
-f "per_page=${per_page}" \
--paginate \
--jq ".workflow_runs[] | select(.path == \"${workflow_path}\") | .id" \
2> "${err_file}" || true
)
log_stderr_file
debug_log "response run ids: ${candidate_run_ids:-<none>}"
while read -r run_id; do
if [[ -z "${run_id}" || "${run_id}" == "${current_run_id}" ]]; then
debug_log "skip run id: ${run_id:-<empty>}"
continue
fi
debug_log "request: gh api repos/${repo}/actions/runs/${run_id}/artifacts"
artifacts_response=$(
gh api "repos/${repo}/actions/runs/${run_id}/artifacts" \
2> "${err_file}" || true
)
log_stderr_file
debug_log "response for run ${run_id}: ${artifacts_response}"
artifact_id=$(
printf '%s\n' "${artifacts_response}" \
| jq -r --arg name "${artifact_name}" '.artifacts[]? | select(.name == $name and (.expired | not)) | .id' \
| head -n 1 || true
)
debug_log "artifact id for run ${run_id}: ${artifact_id:-<none>}"
if [[ -z "${artifact_id}" ]]; then
continue
fi
case "${output_format}" in
run_id)
echo "${run_id}"
;;
artifact_id)
echo "${artifact_id}"
;;
json)
printf '{"run_id":"%s","artifact_id":"%s"}\n' "${run_id}" "${artifact_id}"
;;
esac
exit 0
done <<< "${candidate_run_ids}"
debug_log "no previous workflow run with artifact '${artifact_name}' found"
case "${output_format}" in
json)
printf '{"run_id":"","artifact_id":""}\n'
;;
esac

217
.github/scripts/jsonbench-summary.py vendored Normal file
View File

@@ -0,0 +1,217 @@
# Copyright 2023 Greptime Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env python3
import argparse
import ast
import json
import pathlib
import re
def read_number(result_dir, patterns):
for pattern in patterns:
for path in sorted(result_dir.rglob(pattern)):
text = path.read_text(encoding="utf-8", errors="replace")
match = re.search(r"\d+(?:\.\d+)?", text)
if match:
return match.group(0)
return None
def format_gb(value):
if value is None:
return "N/A"
try:
bytes_size = float(value)
except ValueError:
return "N/A"
return f"{bytes_size / 1000 / 1000 / 1000:.2f} GB"
def format_dataset(choice):
datasets = {
"1": "1M",
"2": "10M",
"3": "100M",
"4": "1000M",
}
if choice is None:
return "N/A"
return datasets.get(choice, f"choice {choice}")
def read_runtime_text(result_dir):
runtime_files = sorted(result_dir.rglob("*.results_runtime"))
if runtime_files:
return "\n".join(
path.read_text(encoding="utf-8", errors="replace")
for path in runtime_files
)
log_files = sorted(result_dir.rglob("*.log"))
return "\n".join(
path.read_text(encoding="utf-8", errors="replace")
for path in log_files
)
def parse_query_rows(text):
rows = []
query_index = 0
has_timings = False
for line in text.splitlines():
stripped = line.strip()
if stripped.startswith("Running query:"):
query_index += 1
has_timings = False
continue
if not (
query_index > 0
and not has_timings
and stripped.startswith("[")
and stripped.endswith("]")
):
continue
try:
timings = ast.literal_eval(stripped)
except (SyntaxError, ValueError):
continue
has_timings = True
for label, value in (
("cold", timings[0] if len(timings) > 0 else None),
("hot", timings[1] if len(timings) > 1 else None),
):
if value is not None:
rows.append((query_index, label, float(value)))
return rows
def query_rows_to_map(query_rows):
queries = {}
for query_index, label, value in query_rows:
queries.setdefault(query_index, {})[label] = value
return queries
def format_duration(value):
if value is None:
return "N/A"
return f"{value:.3f}"
def format_delta(current, last):
if current is None or last in (None, 0):
return "N/A"
percent = (current - last) / last * 100
if abs(percent) <= 0.1:
return "0"
formatted = f"{percent:+.1f}"
return formatted.rstrip("0").rstrip(".")
def format_query_table(query_rows, previous_query_rows):
queries = query_rows_to_map(query_rows)
previous_queries = query_rows_to_map(previous_query_rows)
rows = [("Query", "Cold (s)", "Cold Last (%)", "Hot (s)", "Hot Last (%)")]
rows.extend(
(
f"Q{query_index}",
format_duration(values.get("cold")),
format_delta(
values.get("cold"),
previous_queries.get(query_index, {}).get("cold"),
),
format_duration(values.get("hot")),
format_delta(
values.get("hot"),
previous_queries.get(query_index, {}).get("hot"),
),
)
for query_index, values in sorted(queries.items())
)
widths = [max(len(row[column]) for row in rows) for column in range(5)]
separator = tuple("-" * width for width in widths)
rows.insert(1, separator)
return "\n".join(
(
f"| {query:<{widths[0]}} | {cold:>{widths[1]}} | "
f"{cold_delta:>{widths[2]}} | {hot:>{widths[3]}} | "
f"{hot_delta:>{widths[4]}} |"
)
for query, cold, cold_delta, hot, hot_delta in rows
)
def build_payload(result_dir, previous_result_dir, result, run_url):
if result != "success":
return {"text": f"Nightly JSONBench failed, please check {run_url}."}
data_size = read_number(result_dir, ["*.total_size", "*.data_size"])
count = read_number(result_dir, ["*.count"])
dataset = read_number(result_dir, ["*.dataset"])
query_rows = parse_query_rows(read_runtime_text(result_dir))
previous_query_rows = []
if previous_result_dir and previous_result_dir.exists():
previous_query_rows = parse_query_rows(read_runtime_text(previous_result_dir))
summary = (
f"Dataset: {format_dataset(dataset)}\n"
f"Data size: {format_gb(data_size)}\n"
f"Count: {count or 'N/A'}"
)
table = format_query_table(query_rows, previous_query_rows)
text = (
"Nightly JSONBench has completed successfully.\n"
f"<{run_url}|Workflow run>\n"
f"```{summary}\n\n{table}```"
)
return {"text": text}
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--result-dir", required=True, type=pathlib.Path)
parser.add_argument("--previous-result-dir", type=pathlib.Path)
parser.add_argument("--result", required=True)
parser.add_argument("--run-url", required=True)
args = parser.parse_args()
print(
json.dumps(
build_payload(
args.result_dir,
args.previous_result_dir,
args.result,
args.run_url,
)
)
)
if __name__ == "__main__":
main()