From 01c57ec547cb701f2253c8c445931644cc9f60b9 Mon Sep 17 00:00:00 2001 From: Abhijeet Patil Date: Mon, 5 Feb 2024 10:08:20 +0000 Subject: [PATCH] Removed Uploading of perf result to git repo 'zenith-perf-data' (#6590) ## Problem We were archiving the pref benchmarks to - neon DB - git repo `zenith-perf-data` As the pref batch ran in parallel when the uploading of results to zenith-perf-data` git repo resulted in merge conflicts. Which made the run flaky and as a side effect the build started failing . The problem is been expressed in https://github.com/neondatabase/neon/issues/5160 ## Summary of changes As the results were not used from the git repo it was redundant hence in this PR cleaning up the results uploading of of perf results to git repo The shell script `generate_and_push_perf_report.sh` was using a py script [git-upload](https://github.com/neondatabase/neon/compare/remove-perf-benchmark-git-upload?expand=1#diff-c6d938e7f060e487367d9dc8055245c82b51a73c1f97956111a495a8a86e9a33) and [scripts/generate_perf_report_page.py](https://github.com/neondatabase/neon/pull/6590/files#diff-81af2147e72d07e4cf8ee4395632596d805d6168ba75c71cab58db2659956ef8) which are not used anywhere else in repo hence also cleaning that up ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat the commit message to not include the above checklist --- scripts/generate_and_push_perf_report.sh | 14 -- scripts/generate_perf_report_page.py | 219 ----------------------- scripts/git-upload | 170 ------------------ 3 files changed, 403 deletions(-) delete mode 100755 scripts/generate_perf_report_page.py delete mode 100755 scripts/git-upload diff --git a/scripts/generate_and_push_perf_report.sh b/scripts/generate_and_push_perf_report.sh index 9e03302b0f..178c570b13 100755 --- a/scripts/generate_and_push_perf_report.sh +++ b/scripts/generate_and_push_perf_report.sh @@ -8,17 +8,3 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) echo "Uploading perf report to neon pg" # ingest per test results data into neon backed postgres running in staging to build grafana reports on that data DATABASE_URL="$PERF_TEST_RESULT_CONNSTR" poetry run python "$SCRIPT_DIR"/ingest_perf_test_result.py --ingest "$REPORT_FROM" - -# Activate poetry's venv. Needed because git upload does not run in a project dir (it uses tmp to store the repository) -# so the problem occurs because poetry cannot find pyproject.toml in temp dir created by git upload -# shellcheck source=/dev/null -. "$(poetry env info --path)"/bin/activate - -echo "Uploading perf result to zenith-perf-data" -scripts/git-upload \ - --repo=https://"$VIP_VAP_ACCESS_TOKEN"@github.com/neondatabase/zenith-perf-data.git \ - --message="add performance test result for $GITHUB_SHA neon revision" \ - --branch=master \ - copy "$REPORT_FROM" "data/$REPORT_TO" `# COPY FROM TO_RELATIVE`\ - --merge \ - --run-cmd "python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html" diff --git a/scripts/generate_perf_report_page.py b/scripts/generate_perf_report_page.py deleted file mode 100755 index b5b49bb600..0000000000 --- a/scripts/generate_perf_report_page.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, cast - -from jinja2 import Template - -# skip 'input' columns. They are included in the header and just blow the table -EXCLUDE_COLUMNS = frozenset( - { - "scale", - "duration", - "number_of_clients", - "number_of_threads", - "init_start_timestamp", - "init_end_timestamp", - "run_start_timestamp", - "run_end_timestamp", - } -) - -KEY_EXCLUDE_FIELDS = frozenset( - { - "init_start_timestamp", - "init_end_timestamp", - "run_start_timestamp", - "run_end_timestamp", - } -) -NEGATIVE_COLOR = "negative" -POSITIVE_COLOR = "positive" -EPS = 1e-6 - - -@dataclass -class SuitRun: - revision: str - values: Dict[str, Any] - - -@dataclass -class SuitRuns: - platform: str - suit: str - common_columns: List[Tuple[str, str]] - value_columns: List[str] - runs: List[SuitRun] - - -@dataclass -class RowValue: - value: str - color: str - ratio: str - - -def get_columns(values: List[Dict[Any, Any]]) -> Tuple[List[Tuple[str, str]], List[str]]: - value_columns = [] - common_columns = [] - for item in values: - if item["name"] in KEY_EXCLUDE_FIELDS: - continue - if item["report"] != "test_param": - value_columns.append(cast(str, item["name"])) - else: - common_columns.append((cast(str, item["name"]), cast(str, item["value"]))) - value_columns.sort() - common_columns.sort(key=lambda x: x[0]) # sort by name - return common_columns, value_columns - - -def format_ratio(ratio: float, report: str) -> Tuple[str, str]: - color = "" - sign = "+" if ratio > 0 else "" - if abs(ratio) < 0.05: - return f" ({sign}{ratio:.2f})", color - - if report not in {"test_param", "higher_is_better", "lower_is_better"}: - raise ValueError(f"Unknown report type: {report}") - - if report == "test_param": - return f"{ratio:.2f}", color - - if ratio > 0: - if report == "higher_is_better": - color = POSITIVE_COLOR - elif report == "lower_is_better": - color = NEGATIVE_COLOR - elif ratio < 0: - if report == "higher_is_better": - color = NEGATIVE_COLOR - elif report == "lower_is_better": - color = POSITIVE_COLOR - - return f" ({sign}{ratio:.2f})", color - - -def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]: - for item in suit_run.values["data"]: - if item["name"] == name: - return cast(Dict[str, Any], item) - return None - - -def get_row_values( - columns: List[str], run_result: SuitRun, prev_result: Optional[SuitRun] -) -> List[RowValue]: - row_values = [] - for column in columns: - current_value = extract_value(column, run_result) - if current_value is None: - # should never happen - raise ValueError(f"{column} not found in {run_result.values}") - - value = current_value["value"] - if isinstance(value, float): - value = f"{value:.2f}" - - if prev_result is None: - row_values.append(RowValue(value, "", "")) - continue - - prev_value = extract_value(column, prev_result) - if prev_value is None: - # this might happen when new metric is added and there is no value for it in previous run - # let this be here, TODO add proper handling when this actually happens - raise ValueError(f"{column} not found in previous result") - # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero - ratio = (float(value) + EPS) / (float(prev_value["value"]) + EPS) - 1 - ratio_display, color = format_ratio(ratio, current_value["report"]) - row_values.append(RowValue(value, color, ratio_display)) - return row_values - - -@dataclass -class SuiteRunTableRow: - revision: str - values: List[RowValue] - - -def prepare_rows_from_runs(value_columns: List[str], runs: List[SuitRun]) -> List[SuiteRunTableRow]: - rows = [] - prev_run = None - for run in runs: - rows.append( - SuiteRunTableRow( - revision=run.revision, values=get_row_values(value_columns, run, prev_run) - ) - ) - prev_run = run - - return rows - - -def main(args: argparse.Namespace) -> None: - input_dir = Path(args.input_dir) - grouped_runs: Dict[str, SuitRuns] = {} - # we have files in form: _.json - # fill them in the hashmap so we have grouped items for the - # same run configuration (scale, duration etc.) ordered by counter. - for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split("_")[0])): - run_data = json.loads(item.read_text()) - revision = run_data["revision"] - - for suit_result in run_data["result"]: - key = "{}{}".format(run_data["platform"], suit_result["suit"]) - # pack total duration as a synthetic value - total_duration = suit_result["total_duration"] - suit_result["data"].append( - { - "name": "total_duration", - "value": total_duration, - "unit": "s", - "report": "lower_is_better", - } - ) - common_columns, value_columns = get_columns(suit_result["data"]) - - grouped_runs.setdefault( - key, - SuitRuns( - platform=run_data["platform"], - suit=suit_result["suit"], - common_columns=common_columns, - value_columns=value_columns, - runs=[], - ), - ) - - grouped_runs[key].runs.append(SuitRun(revision=revision, values=suit_result)) - context = {} - for result in grouped_runs.values(): - suit = result.suit - context[suit] = { - "common_columns": result.common_columns, - "value_columns": result.value_columns, - "platform": result.platform, - # reverse the order so newest results are on top of the table - "rows": reversed(prepare_rows_from_runs(result.value_columns, result.runs)), - } - - template = Template((Path(__file__).parent / "perf_report_template.html").read_text()) - - Path(args.out).write_text(template.render(context=context)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--input-dir", - dest="input_dir", - required=True, - help="Directory with jsons generated by the test suite", - ) - parser.add_argument("--out", required=True, help="Output html file path") - args = parser.parse_args() - main(args) diff --git a/scripts/git-upload b/scripts/git-upload deleted file mode 100755 index d56c0f8e94..0000000000 --- a/scripts/git-upload +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os -import shlex -import shutil -import subprocess -import sys -import textwrap -from contextlib import contextmanager -from distutils.dir_util import copy_tree -from pathlib import Path -from tempfile import TemporaryDirectory -from typing import Optional - - -def absolute_path(path): - return Path(path).resolve() - - -def relative_path(path): - path = Path(path) - if path.is_absolute(): - raise Exception(f'path `{path}` must be relative!') - return path - - -@contextmanager -def chdir(cwd: Path): - old = os.getcwd() - os.chdir(cwd) - try: - yield cwd - finally: - os.chdir(old) - - -def run(cmd, *args, **kwargs): - print('$', ' '.join(cmd)) - subprocess.check_call(cmd, *args, **kwargs) - - -class GitRepo: - def __init__(self, url, branch: Optional[str] = None): - self.url = url - self.cwd = TemporaryDirectory() - self.branch = branch - - args = [ - 'git', - 'clone', - '--single-branch', - ] - if self.branch: - args.extend(['--branch', self.branch]) - - subprocess.check_call([ - *args, - str(url), - self.cwd.name, - ]) - - def is_dirty(self): - res = subprocess.check_output(['git', 'status', '--porcelain'], text=True).strip() - return bool(res) - - def update(self, message, action, branch=None): - with chdir(self.cwd.name): - if not branch: - cmd = ['git', 'branch', '--show-current'] - branch = subprocess.check_output(cmd, text=True).strip() - - # Run action in repo's directory - action() - - run(['git', 'add', '.']) - - if not self.is_dirty(): - print('No changes detected, quitting') - return - - git_with_user = [ - 'git', - '-c', - 'user.name=vipvap', - '-c', - 'user.email=vipvap@zenith.tech', - ] - run(git_with_user + [ - 'commit', - '--author="vipvap "', - f'--message={message}', - ]) - - for _ in range(5): - try: - run(['git', 'fetch', 'origin', branch]) - run(git_with_user + ['rebase', f'origin/{branch}']) - run(['git', 'push', 'origin', branch]) - return - - except subprocess.CalledProcessError as e: - print(f'failed to update branch `{branch}`: {e}', file=sys.stderr) - - raise Exception(f'failed to update branch `{branch}`') - - -def do_copy(args): - src = args.src - dst = args.dst - - if args.forbid_overwrite and dst.exists(): - raise FileExistsError(f"File exists: '{dst}'") - - if src.is_dir(): - if not args.merge: - shutil.rmtree(dst, ignore_errors=True) - # distutils is deprecated, but this is a temporary workaround before python version bump - # here we need dir_exists_ok=True from shutil.copytree which is available in python 3.8+ - copy_tree(str(src), str(dst)) - else: - shutil.copy(src, dst) - - if args.run_cmd: - run(shlex.split(args.run_cmd)) - - -def main(): - parser = argparse.ArgumentParser(description='Git upload tool') - parser.add_argument('--repo', type=str, metavar='URL', required=True, help='git repo url') - parser.add_argument('--message', type=str, metavar='TEXT', help='commit message') - parser.add_argument('--branch', type=str, metavar='TEXT', help='target git repo branch') - - commands = parser.add_subparsers(title='commands', dest='subparser_name') - - p_copy = commands.add_parser( - 'copy', - help='copy file into the repo', - formatter_class=argparse.RawTextHelpFormatter, - ) - p_copy.add_argument('src', type=absolute_path, help='source path') - p_copy.add_argument('dst', type=relative_path, help='relative dest path') - p_copy.add_argument('--forbid-overwrite', action='store_true', help='do not allow overwrites') - p_copy.add_argument( - '--merge', - action='store_true', - help='when copying a directory do not delete existing data, but add new files') - p_copy.add_argument('--run-cmd', - help=textwrap.dedent('''\ - run arbitrary cmd on top of copied files, - example usage is static content generation - based on current repository state\ - ''')) - - args = parser.parse_args() - - commands = { - 'copy': do_copy, - } - - action = commands.get(args.subparser_name) - if action: - message = args.message or 'update' - GitRepo(args.repo, args.branch).update(message, lambda: action(args)) - else: - parser.print_usage() - - -if __name__ == '__main__': - main()