Files
greptimedb/.github/scripts/query-regression-run.py
discord9 c44f8da646 feat: add query regression perf harness (#8406)
* feat: add query regression perf harness

Signed-off-by: discord9 <discord9@163.com>

* feat: extend query regression cases

Signed-off-by: discord9 <discord9@163.com>

* ci: harden query regression workflows

Signed-off-by: discord9 <discord9@163.com>

* fix: address query regression review comments

Signed-off-by: discord9 <discord9@163.com>

* ci: limit query regression PR triggers

Signed-off-by: discord9 <discord9@163.com>

* ci: run full query regression case set

Signed-off-by: discord9 <discord9@163.com>

* refactor: model query regression scenarios

Signed-off-by: discord9 <discord9@163.com>

* fix: avoid unenforced query regression thresholds

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
2026-07-03 09:09:01 +00:00

185 lines
6.4 KiB
Python

#!/usr/bin/env python3
# Copyright 2023 Greptime Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run one or more query regression cases after the binaries are built."""
from __future__ import annotations
import argparse
import os
import re
import subprocess
from pathlib import Path
DEFAULT_CASES = [
"tests/perf/query_cases/smoke_direct_sst/case.toml",
"tests/perf/query_cases/promql_pushdown_7913/case.toml",
"tests/perf/query_cases/sql_topk_order_by/case.toml",
"tests/perf/query_cases/sql_aggregate_order_by/case.toml",
"tests/perf/query_cases/sql_join_filter_order/case.toml",
]
def split_cases(values: list[str]) -> list[str]:
tokens: list[str] = []
for value in values:
tokens.extend(part for part in re.split(r"[\s,]+", value.strip()) if part)
if not tokens or tokens == ["all"]:
return DEFAULT_CASES.copy()
if "all" in tokens:
raise ValueError("'all' cannot be mixed with explicit case paths")
return list(dict.fromkeys(tokens))
def parse_bool(value: str) -> bool:
return value.lower() in {"1", "true", "yes", "on"}
def profile_dir(cargo_profile: str) -> str:
if cargo_profile == "dev":
return "debug"
return cargo_profile
def resolve_case_path(candidate_src: Path, case: str) -> Path:
path = Path(case)
if path.is_absolute() or path.parts[:1] == (candidate_src.name,):
return path
return candidate_src / path
def case_slug(case_path: Path) -> str:
raw = case_path.parent.name if case_path.name == "case.toml" else case_path.stem
return re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-") or "case"
def append_github_output(path: str | None, status: int) -> None:
if not path:
return
with open(path, "a", encoding="utf-8") as fp:
fp.write(f"status={status}\n")
def append_step_summary(summary: Path) -> None:
step_summary = os.environ.get("GITHUB_STEP_SUMMARY")
if not step_summary or not summary.exists():
return
with open(step_summary, "a", encoding="utf-8") as out:
out.write(summary.read_text())
def run_case(args: argparse.Namespace, case_path: Path, work_dir: Path) -> int:
target_dir = profile_dir(args.cargo_profile)
cmd = [
"uv",
"run",
"--no-project",
"python",
str(args.candidate_src / "tests/perf/query_regression_runner.py"),
"--case",
str(case_path),
"--base-bin",
str(args.base_src / "target" / target_dir / "greptime"),
"--candidate-bin",
str(args.candidate_src / "target" / target_dir / "greptime"),
"--fixture-generator",
str(args.candidate_src / "target" / target_dir / "query_perf_fixture"),
"--work-dir",
str(work_dir),
"--http-timeout",
str(args.http_timeout),
]
if parse_bool(args.allow_large_fixture):
cmd.append("--allow-large-fixture")
print(f"::group::Query regression case: {case_path}", flush=True)
try:
return subprocess.run(cmd, check=False).returncode
finally:
print("::endgroup::", flush=True)
def write_summary(args: argparse.Namespace, reports: list[Path]) -> int:
cmd = ["uv", "run", "--no-project", "python", str(args.summary_script)]
for report in reports:
cmd.extend(["--report", str(report)])
cmd.extend(
[
"--run-url",
args.run_url,
"--case-name",
args.case_name,
"--base-ref",
args.base_ref,
"--candidate-ref",
args.candidate_ref,
"--output",
str(args.summary_output),
]
)
return subprocess.run(cmd, check=False).returncode
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--cases", action="append", help="'all' or comma/space separated case paths")
parser.add_argument("--base-src", type=Path, default=Path("base-src"))
parser.add_argument("--candidate-src", type=Path, default=Path("candidate-src"))
parser.add_argument("--cargo-profile", default=os.environ.get("CARGO_PROFILE", "nightly"))
parser.add_argument("--work-dir", default=Path("query-regression-work"), type=Path)
parser.add_argument("--http-timeout", default=os.environ.get("HTTP_TIMEOUT", "300"))
parser.add_argument("--allow-large-fixture", default=os.environ.get("ALLOW_LARGE_FIXTURE", "false"))
parser.add_argument(
"--summary-script",
type=Path,
default=Path("candidate-src/.github/scripts/query-regression-summary.py"),
)
parser.add_argument("--summary-output", type=Path, default=Path("query-regression-summary.md"))
parser.add_argument("--run-url", default=os.environ.get("RUN_URL", ""))
parser.add_argument("--case-name", default=os.environ.get("CASE_NAME", "default case set"))
parser.add_argument("--base-ref", default=os.environ.get("BASE_REF", ""))
parser.add_argument("--candidate-ref", default=os.environ.get("CANDIDATE_REF", ""))
parser.add_argument("--github-output", default=os.environ.get("GITHUB_OUTPUT"))
args = parser.parse_args()
try:
cases = split_cases(args.cases or [os.environ.get("CASE_PATHS", "all")])
except ValueError as err:
print(f"error: {err}", flush=True)
append_github_output(args.github_output, 1)
return 0
reports: list[Path] = []
status = 0
for case in cases:
case_path = resolve_case_path(args.candidate_src, case)
work_dir = args.work_dir / case_slug(case_path)
reports.append(work_dir / "query-regression-report.json")
case_status = run_case(args, case_path, work_dir)
if case_status != 0:
status = case_status or 1
summary_status = write_summary(args, reports)
if summary_status != 0 and status == 0:
status = summary_status
append_step_summary(args.summary_output)
append_github_output(args.github_output, status)
return 0
if __name__ == "__main__":
raise SystemExit(main())