diff --git a/scripts/generate_and_push_perf_report.sh b/scripts/generate_and_push_perf_report.sh index 9e03302b0f..178c570b13 100755 --- a/scripts/generate_and_push_perf_report.sh +++ b/scripts/generate_and_push_perf_report.sh @@ -8,17 +8,3 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) echo "Uploading perf report to neon pg" # ingest per test results data into neon backed postgres running in staging to build grafana reports on that data DATABASE_URL="$PERF_TEST_RESULT_CONNSTR" poetry run python "$SCRIPT_DIR"/ingest_perf_test_result.py --ingest "$REPORT_FROM" - -# Activate poetry's venv. Needed because git upload does not run in a project dir (it uses tmp to store the repository) -# so the problem occurs because poetry cannot find pyproject.toml in temp dir created by git upload -# shellcheck source=/dev/null -. "$(poetry env info --path)"/bin/activate - -echo "Uploading perf result to zenith-perf-data" -scripts/git-upload \ - --repo=https://"$VIP_VAP_ACCESS_TOKEN"@github.com/neondatabase/zenith-perf-data.git \ - --message="add performance test result for $GITHUB_SHA neon revision" \ - --branch=master \ - copy "$REPORT_FROM" "data/$REPORT_TO" `# COPY FROM TO_RELATIVE`\ - --merge \ - --run-cmd "python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html" diff --git a/scripts/generate_perf_report_page.py b/scripts/generate_perf_report_page.py deleted file mode 100755 index b5b49bb600..0000000000 --- a/scripts/generate_perf_report_page.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, cast - -from jinja2 import Template - -# skip 'input' columns. They are included in the header and just blow the table -EXCLUDE_COLUMNS = frozenset( - { - "scale", - "duration", - "number_of_clients", - "number_of_threads", - "init_start_timestamp", - "init_end_timestamp", - "run_start_timestamp", - "run_end_timestamp", - } -) - -KEY_EXCLUDE_FIELDS = frozenset( - { - "init_start_timestamp", - "init_end_timestamp", - "run_start_timestamp", - "run_end_timestamp", - } -) -NEGATIVE_COLOR = "negative" -POSITIVE_COLOR = "positive" -EPS = 1e-6 - - -@dataclass -class SuitRun: - revision: str - values: Dict[str, Any] - - -@dataclass -class SuitRuns: - platform: str - suit: str - common_columns: List[Tuple[str, str]] - value_columns: List[str] - runs: List[SuitRun] - - -@dataclass -class RowValue: - value: str - color: str - ratio: str - - -def get_columns(values: List[Dict[Any, Any]]) -> Tuple[List[Tuple[str, str]], List[str]]: - value_columns = [] - common_columns = [] - for item in values: - if item["name"] in KEY_EXCLUDE_FIELDS: - continue - if item["report"] != "test_param": - value_columns.append(cast(str, item["name"])) - else: - common_columns.append((cast(str, item["name"]), cast(str, item["value"]))) - value_columns.sort() - common_columns.sort(key=lambda x: x[0]) # sort by name - return common_columns, value_columns - - -def format_ratio(ratio: float, report: str) -> Tuple[str, str]: - color = "" - sign = "+" if ratio > 0 else "" - if abs(ratio) < 0.05: - return f" ({sign}{ratio:.2f})", color - - if report not in {"test_param", "higher_is_better", "lower_is_better"}: - raise ValueError(f"Unknown report type: {report}") - - if report == "test_param": - return f"{ratio:.2f}", color - - if ratio > 0: - if report == "higher_is_better": - color = POSITIVE_COLOR - elif report == "lower_is_better": - color = NEGATIVE_COLOR - elif ratio < 0: - if report == "higher_is_better": - color = NEGATIVE_COLOR - elif report == "lower_is_better": - color = POSITIVE_COLOR - - return f" ({sign}{ratio:.2f})", color - - -def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]: - for item in suit_run.values["data"]: - if item["name"] == name: - return cast(Dict[str, Any], item) - return None - - -def get_row_values( - columns: List[str], run_result: SuitRun, prev_result: Optional[SuitRun] -) -> List[RowValue]: - row_values = [] - for column in columns: - current_value = extract_value(column, run_result) - if current_value is None: - # should never happen - raise ValueError(f"{column} not found in {run_result.values}") - - value = current_value["value"] - if isinstance(value, float): - value = f"{value:.2f}" - - if prev_result is None: - row_values.append(RowValue(value, "", "")) - continue - - prev_value = extract_value(column, prev_result) - if prev_value is None: - # this might happen when new metric is added and there is no value for it in previous run - # let this be here, TODO add proper handling when this actually happens - raise ValueError(f"{column} not found in previous result") - # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero - ratio = (float(value) + EPS) / (float(prev_value["value"]) + EPS) - 1 - ratio_display, color = format_ratio(ratio, current_value["report"]) - row_values.append(RowValue(value, color, ratio_display)) - return row_values - - -@dataclass -class SuiteRunTableRow: - revision: str - values: List[RowValue] - - -def prepare_rows_from_runs(value_columns: List[str], runs: List[SuitRun]) -> List[SuiteRunTableRow]: - rows = [] - prev_run = None - for run in runs: - rows.append( - SuiteRunTableRow( - revision=run.revision, values=get_row_values(value_columns, run, prev_run) - ) - ) - prev_run = run - - return rows - - -def main(args: argparse.Namespace) -> None: - input_dir = Path(args.input_dir) - grouped_runs: Dict[str, SuitRuns] = {} - # we have files in form: _.json - # fill them in the hashmap so we have grouped items for the - # same run configuration (scale, duration etc.) ordered by counter. - for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split("_")[0])): - run_data = json.loads(item.read_text()) - revision = run_data["revision"] - - for suit_result in run_data["result"]: - key = "{}{}".format(run_data["platform"], suit_result["suit"]) - # pack total duration as a synthetic value - total_duration = suit_result["total_duration"] - suit_result["data"].append( - { - "name": "total_duration", - "value": total_duration, - "unit": "s", - "report": "lower_is_better", - } - ) - common_columns, value_columns = get_columns(suit_result["data"]) - - grouped_runs.setdefault( - key, - SuitRuns( - platform=run_data["platform"], - suit=suit_result["suit"], - common_columns=common_columns, - value_columns=value_columns, - runs=[], - ), - ) - - grouped_runs[key].runs.append(SuitRun(revision=revision, values=suit_result)) - context = {} - for result in grouped_runs.values(): - suit = result.suit - context[suit] = { - "common_columns": result.common_columns, - "value_columns": result.value_columns, - "platform": result.platform, - # reverse the order so newest results are on top of the table - "rows": reversed(prepare_rows_from_runs(result.value_columns, result.runs)), - } - - template = Template((Path(__file__).parent / "perf_report_template.html").read_text()) - - Path(args.out).write_text(template.render(context=context)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--input-dir", - dest="input_dir", - required=True, - help="Directory with jsons generated by the test suite", - ) - parser.add_argument("--out", required=True, help="Output html file path") - args = parser.parse_args() - main(args) diff --git a/scripts/git-upload b/scripts/git-upload deleted file mode 100755 index d56c0f8e94..0000000000 --- a/scripts/git-upload +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os -import shlex -import shutil -import subprocess -import sys -import textwrap -from contextlib import contextmanager -from distutils.dir_util import copy_tree -from pathlib import Path -from tempfile import TemporaryDirectory -from typing import Optional - - -def absolute_path(path): - return Path(path).resolve() - - -def relative_path(path): - path = Path(path) - if path.is_absolute(): - raise Exception(f'path `{path}` must be relative!') - return path - - -@contextmanager -def chdir(cwd: Path): - old = os.getcwd() - os.chdir(cwd) - try: - yield cwd - finally: - os.chdir(old) - - -def run(cmd, *args, **kwargs): - print('$', ' '.join(cmd)) - subprocess.check_call(cmd, *args, **kwargs) - - -class GitRepo: - def __init__(self, url, branch: Optional[str] = None): - self.url = url - self.cwd = TemporaryDirectory() - self.branch = branch - - args = [ - 'git', - 'clone', - '--single-branch', - ] - if self.branch: - args.extend(['--branch', self.branch]) - - subprocess.check_call([ - *args, - str(url), - self.cwd.name, - ]) - - def is_dirty(self): - res = subprocess.check_output(['git', 'status', '--porcelain'], text=True).strip() - return bool(res) - - def update(self, message, action, branch=None): - with chdir(self.cwd.name): - if not branch: - cmd = ['git', 'branch', '--show-current'] - branch = subprocess.check_output(cmd, text=True).strip() - - # Run action in repo's directory - action() - - run(['git', 'add', '.']) - - if not self.is_dirty(): - print('No changes detected, quitting') - return - - git_with_user = [ - 'git', - '-c', - 'user.name=vipvap', - '-c', - 'user.email=vipvap@zenith.tech', - ] - run(git_with_user + [ - 'commit', - '--author="vipvap "', - f'--message={message}', - ]) - - for _ in range(5): - try: - run(['git', 'fetch', 'origin', branch]) - run(git_with_user + ['rebase', f'origin/{branch}']) - run(['git', 'push', 'origin', branch]) - return - - except subprocess.CalledProcessError as e: - print(f'failed to update branch `{branch}`: {e}', file=sys.stderr) - - raise Exception(f'failed to update branch `{branch}`') - - -def do_copy(args): - src = args.src - dst = args.dst - - if args.forbid_overwrite and dst.exists(): - raise FileExistsError(f"File exists: '{dst}'") - - if src.is_dir(): - if not args.merge: - shutil.rmtree(dst, ignore_errors=True) - # distutils is deprecated, but this is a temporary workaround before python version bump - # here we need dir_exists_ok=True from shutil.copytree which is available in python 3.8+ - copy_tree(str(src), str(dst)) - else: - shutil.copy(src, dst) - - if args.run_cmd: - run(shlex.split(args.run_cmd)) - - -def main(): - parser = argparse.ArgumentParser(description='Git upload tool') - parser.add_argument('--repo', type=str, metavar='URL', required=True, help='git repo url') - parser.add_argument('--message', type=str, metavar='TEXT', help='commit message') - parser.add_argument('--branch', type=str, metavar='TEXT', help='target git repo branch') - - commands = parser.add_subparsers(title='commands', dest='subparser_name') - - p_copy = commands.add_parser( - 'copy', - help='copy file into the repo', - formatter_class=argparse.RawTextHelpFormatter, - ) - p_copy.add_argument('src', type=absolute_path, help='source path') - p_copy.add_argument('dst', type=relative_path, help='relative dest path') - p_copy.add_argument('--forbid-overwrite', action='store_true', help='do not allow overwrites') - p_copy.add_argument( - '--merge', - action='store_true', - help='when copying a directory do not delete existing data, but add new files') - p_copy.add_argument('--run-cmd', - help=textwrap.dedent('''\ - run arbitrary cmd on top of copied files, - example usage is static content generation - based on current repository state\ - ''')) - - args = parser.parse_args() - - commands = { - 'copy': do_copy, - } - - action = commands.get(args.subparser_name) - if action: - message = args.message or 'update' - GitRepo(args.repo, args.branch).update(message, lambda: action(args)) - else: - parser.print_usage() - - -if __name__ == '__main__': - main()