mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-07-03 20:40:37 +00:00
ci: notify jsonbench result (#8273)
Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
184
.github/scripts/find-previous-workflow-artifact.sh
vendored
Executable file
184
.github/scripts/find-previous-workflow-artifact.sh
vendored
Executable file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Find the most recent previous successful workflow run that has a non-expired artifact.
|
||||
|
||||
Usage:
|
||||
find-previous-workflow-artifact.sh --workflow-path PATH --artifact-name NAME [options]
|
||||
|
||||
Options:
|
||||
--repo OWNER/REPO GitHub repository. Defaults to GITHUB_REPOSITORY.
|
||||
--current-run-id ID Current workflow run id to exclude. Defaults to GITHUB_RUN_ID.
|
||||
--workflow-path PATH Workflow path, for example .github/workflows/nightly-jsonbench.yaml.
|
||||
--artifact-name NAME Artifact name to find.
|
||||
--status STATUS Workflow run status filter. Defaults to success.
|
||||
--per-page N GitHub API page size. Defaults to 100.
|
||||
--run-id-only Print only the run id. This is the default.
|
||||
--artifact-id-only Print only the artifact id.
|
||||
--json Print a JSON object with run_id and artifact_id.
|
||||
--debug Print GitHub API requests and responses to stderr.
|
||||
-h, --help Show this help.
|
||||
|
||||
The script uses gh CLI and jq. Provide GH_TOKEN or authenticate gh before running it.
|
||||
EOF
|
||||
}
|
||||
|
||||
repo="${GITHUB_REPOSITORY:-}"
|
||||
current_run_id="${GITHUB_RUN_ID:-}"
|
||||
workflow_path=""
|
||||
artifact_name=""
|
||||
status="success"
|
||||
per_page="100"
|
||||
output_format="run_id"
|
||||
debug="false"
|
||||
|
||||
debug_log() {
|
||||
if [[ "${debug}" == "true" ]]; then
|
||||
printf '[debug] %s\n' "$*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_stderr_file() {
|
||||
if [[ "${debug}" != "true" || ! -s "${err_file}" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
while read -r line; do
|
||||
debug_log "stderr: ${line}"
|
||||
done < "${err_file}"
|
||||
: > "${err_file}"
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--repo)
|
||||
repo="$2"
|
||||
shift 2
|
||||
;;
|
||||
--current-run-id)
|
||||
current_run_id="$2"
|
||||
shift 2
|
||||
;;
|
||||
--workflow-path)
|
||||
workflow_path="$2"
|
||||
shift 2
|
||||
;;
|
||||
--artifact-name)
|
||||
artifact_name="$2"
|
||||
shift 2
|
||||
;;
|
||||
--status)
|
||||
status="$2"
|
||||
shift 2
|
||||
;;
|
||||
--per-page)
|
||||
per_page="$2"
|
||||
shift 2
|
||||
;;
|
||||
--run-id-only)
|
||||
output_format="run_id"
|
||||
shift
|
||||
;;
|
||||
--artifact-id-only)
|
||||
output_format="artifact_id"
|
||||
shift
|
||||
;;
|
||||
--json)
|
||||
output_format="json"
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
debug="true"
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "${repo}" ]]; then
|
||||
echo "--repo is required when GITHUB_REPOSITORY is not set." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ -z "${workflow_path}" ]]; then
|
||||
echo "--workflow-path is required." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ -z "${artifact_name}" ]]; then
|
||||
echo "--artifact-name is required." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
err_file=$(mktemp)
|
||||
trap 'rm -f "${err_file}"' EXIT
|
||||
|
||||
debug_log "request: gh api --method GET repos/${repo}/actions/runs -f status=${status} -f per_page=${per_page} --paginate"
|
||||
candidate_run_ids=$(
|
||||
gh api --method GET "repos/${repo}/actions/runs" \
|
||||
-f "status=${status}" \
|
||||
-f "per_page=${per_page}" \
|
||||
--paginate \
|
||||
--jq ".workflow_runs[] | select(.path == \"${workflow_path}\") | .id" \
|
||||
2> "${err_file}" || true
|
||||
)
|
||||
log_stderr_file
|
||||
debug_log "response run ids: ${candidate_run_ids:-<none>}"
|
||||
|
||||
while read -r run_id; do
|
||||
if [[ -z "${run_id}" || "${run_id}" == "${current_run_id}" ]]; then
|
||||
debug_log "skip run id: ${run_id:-<empty>}"
|
||||
continue
|
||||
fi
|
||||
|
||||
debug_log "request: gh api repos/${repo}/actions/runs/${run_id}/artifacts"
|
||||
artifacts_response=$(
|
||||
gh api "repos/${repo}/actions/runs/${run_id}/artifacts" \
|
||||
2> "${err_file}" || true
|
||||
)
|
||||
log_stderr_file
|
||||
debug_log "response for run ${run_id}: ${artifacts_response}"
|
||||
|
||||
artifact_id=$(
|
||||
printf '%s\n' "${artifacts_response}" \
|
||||
| jq -r --arg name "${artifact_name}" '.artifacts[]? | select(.name == $name and (.expired | not)) | .id' \
|
||||
| head -n 1 || true
|
||||
)
|
||||
debug_log "artifact id for run ${run_id}: ${artifact_id:-<none>}"
|
||||
|
||||
if [[ -z "${artifact_id}" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
case "${output_format}" in
|
||||
run_id)
|
||||
echo "${run_id}"
|
||||
;;
|
||||
artifact_id)
|
||||
echo "${artifact_id}"
|
||||
;;
|
||||
json)
|
||||
printf '{"run_id":"%s","artifact_id":"%s"}\n' "${run_id}" "${artifact_id}"
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
done <<< "${candidate_run_ids}"
|
||||
|
||||
debug_log "no previous workflow run with artifact '${artifact_name}' found"
|
||||
|
||||
case "${output_format}" in
|
||||
json)
|
||||
printf '{"run_id":"","artifact_id":""}\n'
|
||||
;;
|
||||
esac
|
||||
217
.github/scripts/jsonbench-summary.py
vendored
Normal file
217
.github/scripts/jsonbench-summary.py
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
# Copyright 2023 Greptime Team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
|
||||
|
||||
def read_number(result_dir, patterns):
|
||||
for pattern in patterns:
|
||||
for path in sorted(result_dir.rglob(pattern)):
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
match = re.search(r"\d+(?:\.\d+)?", text)
|
||||
if match:
|
||||
return match.group(0)
|
||||
return None
|
||||
|
||||
|
||||
def format_gb(value):
|
||||
if value is None:
|
||||
return "N/A"
|
||||
|
||||
try:
|
||||
bytes_size = float(value)
|
||||
except ValueError:
|
||||
return "N/A"
|
||||
|
||||
return f"{bytes_size / 1000 / 1000 / 1000:.2f} GB"
|
||||
|
||||
|
||||
def format_dataset(choice):
|
||||
datasets = {
|
||||
"1": "1M",
|
||||
"2": "10M",
|
||||
"3": "100M",
|
||||
"4": "1000M",
|
||||
}
|
||||
if choice is None:
|
||||
return "N/A"
|
||||
return datasets.get(choice, f"choice {choice}")
|
||||
|
||||
|
||||
def read_runtime_text(result_dir):
|
||||
runtime_files = sorted(result_dir.rglob("*.results_runtime"))
|
||||
if runtime_files:
|
||||
return "\n".join(
|
||||
path.read_text(encoding="utf-8", errors="replace")
|
||||
for path in runtime_files
|
||||
)
|
||||
|
||||
log_files = sorted(result_dir.rglob("*.log"))
|
||||
return "\n".join(
|
||||
path.read_text(encoding="utf-8", errors="replace")
|
||||
for path in log_files
|
||||
)
|
||||
|
||||
|
||||
def parse_query_rows(text):
|
||||
rows = []
|
||||
query_index = 0
|
||||
has_timings = False
|
||||
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("Running query:"):
|
||||
query_index += 1
|
||||
has_timings = False
|
||||
continue
|
||||
|
||||
if not (
|
||||
query_index > 0
|
||||
and not has_timings
|
||||
and stripped.startswith("[")
|
||||
and stripped.endswith("]")
|
||||
):
|
||||
continue
|
||||
|
||||
try:
|
||||
timings = ast.literal_eval(stripped)
|
||||
except (SyntaxError, ValueError):
|
||||
continue
|
||||
|
||||
has_timings = True
|
||||
for label, value in (
|
||||
("cold", timings[0] if len(timings) > 0 else None),
|
||||
("hot", timings[1] if len(timings) > 1 else None),
|
||||
):
|
||||
if value is not None:
|
||||
rows.append((query_index, label, float(value)))
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def query_rows_to_map(query_rows):
|
||||
queries = {}
|
||||
for query_index, label, value in query_rows:
|
||||
queries.setdefault(query_index, {})[label] = value
|
||||
return queries
|
||||
|
||||
|
||||
def format_duration(value):
|
||||
if value is None:
|
||||
return "N/A"
|
||||
return f"{value:.3f}"
|
||||
|
||||
|
||||
def format_delta(current, last):
|
||||
if current is None or last in (None, 0):
|
||||
return "N/A"
|
||||
|
||||
percent = (current - last) / last * 100
|
||||
if abs(percent) <= 0.1:
|
||||
return "0"
|
||||
|
||||
formatted = f"{percent:+.1f}"
|
||||
return formatted.rstrip("0").rstrip(".")
|
||||
|
||||
|
||||
def format_query_table(query_rows, previous_query_rows):
|
||||
queries = query_rows_to_map(query_rows)
|
||||
previous_queries = query_rows_to_map(previous_query_rows)
|
||||
|
||||
rows = [("Query", "Cold (s)", "Cold Last (%)", "Hot (s)", "Hot Last (%)")]
|
||||
rows.extend(
|
||||
(
|
||||
f"Q{query_index}",
|
||||
format_duration(values.get("cold")),
|
||||
format_delta(
|
||||
values.get("cold"),
|
||||
previous_queries.get(query_index, {}).get("cold"),
|
||||
),
|
||||
format_duration(values.get("hot")),
|
||||
format_delta(
|
||||
values.get("hot"),
|
||||
previous_queries.get(query_index, {}).get("hot"),
|
||||
),
|
||||
)
|
||||
for query_index, values in sorted(queries.items())
|
||||
)
|
||||
|
||||
widths = [max(len(row[column]) for row in rows) for column in range(5)]
|
||||
separator = tuple("-" * width for width in widths)
|
||||
rows.insert(1, separator)
|
||||
|
||||
return "\n".join(
|
||||
(
|
||||
f"| {query:<{widths[0]}} | {cold:>{widths[1]}} | "
|
||||
f"{cold_delta:>{widths[2]}} | {hot:>{widths[3]}} | "
|
||||
f"{hot_delta:>{widths[4]}} |"
|
||||
)
|
||||
for query, cold, cold_delta, hot, hot_delta in rows
|
||||
)
|
||||
|
||||
|
||||
def build_payload(result_dir, previous_result_dir, result, run_url):
|
||||
if result != "success":
|
||||
return {"text": f"Nightly JSONBench failed, please check {run_url}."}
|
||||
|
||||
data_size = read_number(result_dir, ["*.total_size", "*.data_size"])
|
||||
count = read_number(result_dir, ["*.count"])
|
||||
dataset = read_number(result_dir, ["*.dataset"])
|
||||
query_rows = parse_query_rows(read_runtime_text(result_dir))
|
||||
previous_query_rows = []
|
||||
if previous_result_dir and previous_result_dir.exists():
|
||||
previous_query_rows = parse_query_rows(read_runtime_text(previous_result_dir))
|
||||
|
||||
summary = (
|
||||
f"Dataset: {format_dataset(dataset)}\n"
|
||||
f"Data size: {format_gb(data_size)}\n"
|
||||
f"Count: {count or 'N/A'}"
|
||||
)
|
||||
table = format_query_table(query_rows, previous_query_rows)
|
||||
text = (
|
||||
"Nightly JSONBench has completed successfully.\n"
|
||||
f"<{run_url}|Workflow run>\n"
|
||||
f"```{summary}\n\n{table}```"
|
||||
)
|
||||
return {"text": text}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--result-dir", required=True, type=pathlib.Path)
|
||||
parser.add_argument("--previous-result-dir", type=pathlib.Path)
|
||||
parser.add_argument("--result", required=True)
|
||||
parser.add_argument("--run-url", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(
|
||||
json.dumps(
|
||||
build_payload(
|
||||
args.result_dir,
|
||||
args.previous_result_dir,
|
||||
args.result,
|
||||
args.run_url,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user