mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-07-03 20:40:37 +00:00
283 lines
7.9 KiB
Python
283 lines
7.9 KiB
Python
# Copyright 2023 Greptime Team
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import ast
|
|
import json
|
|
import pathlib
|
|
import re
|
|
import statistics
|
|
|
|
|
|
def read_number(result_dir, patterns):
|
|
for pattern in patterns:
|
|
for path in sorted(result_dir.rglob(pattern)):
|
|
text = path.read_text(encoding="utf-8", errors="replace")
|
|
match = re.search(r"\d+(?:\.\d+)?", text)
|
|
if match:
|
|
return match.group(0)
|
|
return None
|
|
|
|
|
|
def format_gb(value):
|
|
if value is None:
|
|
return "N/A"
|
|
|
|
try:
|
|
bytes_size = float(value)
|
|
except ValueError:
|
|
return "N/A"
|
|
|
|
return f"{bytes_size / 1000 / 1000 / 1000:.2f} GB"
|
|
|
|
|
|
def format_dataset(choice):
|
|
datasets = {
|
|
"1": "1M",
|
|
"2": "10M",
|
|
"3": "100M",
|
|
"4": "1000M",
|
|
}
|
|
if choice is None:
|
|
return "N/A"
|
|
return datasets.get(choice, f"choice {choice}")
|
|
|
|
|
|
def read_runtime_text(result_dir):
|
|
runtime_files = sorted(result_dir.rglob("*.results_runtime"))
|
|
if runtime_files:
|
|
return "\n".join(
|
|
path.read_text(encoding="utf-8", errors="replace")
|
|
for path in runtime_files
|
|
)
|
|
|
|
log_files = sorted(result_dir.rglob("*.log"))
|
|
return "\n".join(
|
|
path.read_text(encoding="utf-8", errors="replace")
|
|
for path in log_files
|
|
)
|
|
|
|
|
|
def parse_query_rows(text):
|
|
rows = []
|
|
query_index = 0
|
|
has_timings = False
|
|
|
|
for line in text.splitlines():
|
|
stripped = line.strip()
|
|
if stripped.startswith("Running query:"):
|
|
query_index += 1
|
|
has_timings = False
|
|
continue
|
|
|
|
if not (
|
|
query_index > 0
|
|
and not has_timings
|
|
and stripped.startswith("[")
|
|
and stripped.endswith("]")
|
|
):
|
|
continue
|
|
|
|
try:
|
|
timings = ast.literal_eval(stripped)
|
|
except (SyntaxError, ValueError):
|
|
continue
|
|
|
|
has_timings = True
|
|
for label, value in (
|
|
("cold", timings[0] if len(timings) > 0 else None),
|
|
("hot", timings[1] if len(timings) > 1 else None),
|
|
):
|
|
if value is not None:
|
|
rows.append((query_index, label, float(value)))
|
|
|
|
return rows
|
|
|
|
|
|
def parse_load_data_stats(result_dir):
|
|
values = []
|
|
for path in sorted(result_dir.rglob("*.load_data")):
|
|
for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
|
|
stripped = line.strip()
|
|
if not stripped.startswith("{"):
|
|
continue
|
|
|
|
try:
|
|
data = json.loads(stripped)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
value = data.get("execution_time_ms")
|
|
if isinstance(value, (int, float)):
|
|
values.append(value / 1000)
|
|
|
|
if not values:
|
|
return {}
|
|
|
|
return {
|
|
"Max": max(values),
|
|
"Min": min(values),
|
|
"Avg": sum(values) / len(values),
|
|
"Median": statistics.median(values),
|
|
}
|
|
|
|
|
|
def query_rows_to_map(query_rows):
|
|
queries = {}
|
|
for query_index, label, value in query_rows:
|
|
queries.setdefault(query_index, {})[label] = value
|
|
return queries
|
|
|
|
|
|
def format_duration(value):
|
|
if value is None:
|
|
return "N/A"
|
|
return f"{value:.3f}"
|
|
|
|
|
|
def format_delta(current, last):
|
|
if current is None or last in (None, 0):
|
|
return "N/A"
|
|
|
|
percent = (current - last) / last * 100
|
|
if abs(percent) <= 0.1:
|
|
return "0"
|
|
|
|
formatted = f"{percent:+.1f}"
|
|
return formatted.rstrip("0").rstrip(".")
|
|
|
|
|
|
def format_insert_table(stats, previous_stats):
|
|
rows = [("Indicator", "Value (s)", "Value Last (%)")]
|
|
for label, key in (
|
|
("Max", "Max"),
|
|
("Min", "Min"),
|
|
("Avg", "Avg"),
|
|
("Median", "Median"),
|
|
):
|
|
rows.append(
|
|
(
|
|
label,
|
|
format_duration(stats.get(key)),
|
|
format_delta(stats.get(key), previous_stats.get(key)),
|
|
)
|
|
)
|
|
|
|
widths = [max(len(row[column]) for row in rows) for column in range(3)]
|
|
separator = tuple("-" * width for width in widths)
|
|
rows.insert(1, separator)
|
|
|
|
return "\n".join(
|
|
f"| {indicator:<{widths[0]}} | {value:>{widths[1]}} | "
|
|
f"{delta:>{widths[2]}} |"
|
|
for indicator, value, delta in rows
|
|
)
|
|
|
|
|
|
def format_query_table(query_rows, previous_query_rows):
|
|
queries = query_rows_to_map(query_rows)
|
|
previous_queries = query_rows_to_map(previous_query_rows)
|
|
|
|
rows = [("Query", "Cold (s)", "Cold Last (%)", "Hot (s)", "Hot Last (%)")]
|
|
rows.extend(
|
|
(
|
|
f"Q{query_index}",
|
|
format_duration(values.get("cold")),
|
|
format_delta(
|
|
values.get("cold"),
|
|
previous_queries.get(query_index, {}).get("cold"),
|
|
),
|
|
format_duration(values.get("hot")),
|
|
format_delta(
|
|
values.get("hot"),
|
|
previous_queries.get(query_index, {}).get("hot"),
|
|
),
|
|
)
|
|
for query_index, values in sorted(queries.items())
|
|
)
|
|
|
|
widths = [max(len(row[column]) for row in rows) for column in range(5)]
|
|
separator = tuple("-" * width for width in widths)
|
|
rows.insert(1, separator)
|
|
|
|
return "\n".join(
|
|
(
|
|
f"| {query:<{widths[0]}} | {cold:>{widths[1]}} | "
|
|
f"{cold_delta:>{widths[2]}} | {hot:>{widths[3]}} | "
|
|
f"{hot_delta:>{widths[4]}} |"
|
|
)
|
|
for query, cold, cold_delta, hot, hot_delta in rows
|
|
)
|
|
|
|
|
|
def build_payload(result_dir, previous_result_dir, result, run_url, runner_name):
|
|
title = f"Runner: {runner_name or 'N/A'}\n"
|
|
if result != "success":
|
|
return {
|
|
"text": f"{title}Nightly JSONBench failed, please check {run_url}."
|
|
}
|
|
|
|
data_size = read_number(result_dir, ["*.total_size", "*.data_size"])
|
|
count = read_number(result_dir, ["*.count"])
|
|
dataset = read_number(result_dir, ["*.dataset"])
|
|
query_rows = parse_query_rows(read_runtime_text(result_dir))
|
|
insert_stats = parse_load_data_stats(result_dir)
|
|
previous_query_rows = []
|
|
previous_insert_stats = {}
|
|
if previous_result_dir and previous_result_dir.exists():
|
|
previous_query_rows = parse_query_rows(read_runtime_text(previous_result_dir))
|
|
previous_insert_stats = parse_load_data_stats(previous_result_dir)
|
|
|
|
summary = (
|
|
f"Dataset: {format_dataset(dataset)}\n"
|
|
f"Data size: {format_gb(data_size)}\n"
|
|
f"Count: {count or 'N/A'}"
|
|
)
|
|
insert_table = format_insert_table(insert_stats, previous_insert_stats)
|
|
query_table = format_query_table(query_rows, previous_query_rows)
|
|
text = (
|
|
f"{title}Nightly JSONBench has completed successfully.\n"
|
|
f"<{run_url}|Workflow run>\n"
|
|
f"```{summary}\n\nInsert:\n{insert_table}\n\n{query_table}```"
|
|
)
|
|
return {"text": text}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--result-dir", required=True, type=pathlib.Path)
|
|
parser.add_argument("--previous-result-dir", type=pathlib.Path)
|
|
parser.add_argument("--result", required=True)
|
|
parser.add_argument("--run-url", required=True)
|
|
parser.add_argument("--runner-name")
|
|
args = parser.parse_args()
|
|
|
|
print(
|
|
json.dumps(
|
|
build_payload(
|
|
args.result_dir,
|
|
args.previous_result_dir,
|
|
args.result,
|
|
args.run_url,
|
|
args.runner_name,
|
|
)
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|