From ccf3c8cc3050998b81dab6e27aab29f914e06d4b Mon Sep 17 00:00:00 2001
From: Dmitry Rodionov <dmitry@zenith.tech>
Date: Mon, 24 Jan 2022 18:07:27 +0300
Subject: [PATCH] store performance test results in our staging cluster to be
 able to visualize them in grafana

---
 .circleci/config.yml                     |   1 +
 .github/workflows/benchmarking.yml       |  10 +-
 scripts/generate_and_push_perf_report.sh |   7 +-
 scripts/ingest_perf_test_result.py       | 136 +++++++++++++++++++++++
 4 files changed, 149 insertions(+), 5 deletions(-)
 create mode 100644 scripts/ingest_perf_test_result.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index d1a232d3c9..1f0bf1765c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -297,6 +297,7 @@ jobs:
             - PLATFORM: zenith-local-ci
           command: |
             PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+            rm -rf $PERF_REPORT_DIR || true
 
             TEST_SELECTION="test_runner/<< parameters.test_selection >>"
             EXTRA_PARAMS="<< parameters.extra_params >>"
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index bbdf404a5f..b7e1149cb8 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -2,8 +2,8 @@ name: benchmarking
 
 on:
   # uncomment to run on push for debugging your PR
-  # push:
-  #   branches: [ mybranch ]
+  push:
+    branches: [ dkr/ingest-perf-data ]
   schedule:
     # * is a special character in YAML so you have to quote this string
     #          ┌───────────── minute (0 - 59)
@@ -41,7 +41,7 @@ jobs:
       run: |
         python3 -m pip install --upgrade poetry wheel
         # since pip/poetry caches are reused there shouldn't be any troubles with install every time
-        poetry install
+        ./scripts/pysync
 
     - name: Show versions
       run: |
@@ -89,11 +89,15 @@ jobs:
         BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
         REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
       run: |
+        # just to be sure that no data was cached on self hosted runner
+        # since it might generate duplicates when calling ingest_perf_test_result.py
+        rm -rf perf-report-staging || true
         mkdir -p perf-report-staging
         ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
 
     - name: Submit result
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
       run: |
         REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
diff --git a/scripts/generate_and_push_perf_report.sh b/scripts/generate_and_push_perf_report.sh
index a662aeffeb..3ddf0f9beb 100755
--- a/scripts/generate_and_push_perf_report.sh
+++ b/scripts/generate_and_push_perf_report.sh
@@ -6,11 +6,14 @@ set -eux -o pipefail
 
 SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 
-git clone https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git
+git clone --single-branch --branch testing https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git  # TODO (PRIOR MERGE) replace back before merge
 cd zenith-perf-data
 mkdir -p reports/
 mkdir -p data/$REPORT_TO
 
+# ingest per test results data into zenith backed postgres running in staging to build grafana reports on that data
+DATABASE_URL=$PERF_TEST_RESULT_CONNSTR poetry run python $SCRIPT_DIR/ingest_perf_test_result.py --ingest $REPORT_FROM
+
 cp $REPORT_FROM/* data/$REPORT_TO
 
 echo "Generating report"
@@ -24,4 +27,4 @@ git \
     --author="vipvap <vipvap@zenith.tech>" \
     -m "add performance test result for $GITHUB_SHA zenith revision"
 
-git push https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git master
+git push https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git testing # master TODO (PRIOR MERGE) replace back before merge
diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py
new file mode 100644
index 0000000000..89463c986a
--- /dev/null
+++ b/scripts/ingest_perf_test_result.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+import argparse
+from contextlib import contextmanager
+import json
+import os
+import psycopg2
+import psycopg2.extras
+from pathlib import Path
+from datetime import datetime
+
+CREATE_TABLE = """
+CREATE TABLE IF NOT EXISTS perf_test_results (
+    id SERIAL PRIMARY KEY,
+    suit TEXT,
+    revision CHAR(40),
+    platform TEXT,
+    metric_name TEXT,
+    metric_value NUMERIC,
+    metric_unit VARCHAR(10),
+    metric_report_type TEXT,
+    recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+)
+"""
+
+
+def err(msg):
+    print(f'error: {msg}')
+    exit(1)
+
+
+@contextmanager
+def get_connection_cursor():
+    connstr = os.getenv('DATABASE_URL')
+    if not connstr:
+        err('DATABASE_URL environment variable is not set')
+    with psycopg2.connect(connstr) as conn:
+        with conn.cursor() as cur:
+            yield cur
+
+
+def create_table(cur):
+    cur.execute(CREATE_TABLE)
+
+
+def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int:
+    run_data = json.loads(data_dile.read_text())
+    revision = run_data['revision']
+    platform = run_data['platform']
+
+    run_result = run_data['result']
+    args_list = []
+
+    for suit_result in run_result:
+        suit = suit_result['suit']
+        total_duration = suit_result['total_duration']
+
+        suit_result['data'].append({
+            'name': 'total_duration',
+            'value': total_duration,
+            'unit': 's',
+            'report': 'lower_is_better',
+        })
+
+        for metric in suit_result['data']:
+            values = {
+                'suit': suit,
+                'revision': revision,
+                'platform': platform,
+                'metric_name': metric['name'],
+                'metric_value': metric['value'],
+                'metric_unit': metric['unit'],
+                'metric_report_type': metric['report'],
+                'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp),
+            }
+            args_list.append(values)
+
+    psycopg2.extras.execute_values(
+        cursor,
+        """
+        INSERT INTO perf_test_results (
+            suit,
+            revision,
+            platform,
+            metric_name,
+            metric_value,
+            metric_unit,
+            metric_report_type,
+            recorded_at_timestamp
+        ) VALUES %s
+        """,
+        args_list,
+        template="""(
+            %(suit)s,
+            %(revision)s,
+            %(platform)s,
+            %(metric_name)s,
+            %(metric_value)s,
+            %(metric_unit)s,
+            %(metric_report_type)s,
+            %(recorded_at_timestamp)s
+        )""",
+    )
+    return len(args_list)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Perf test result uploader. \
+            Database connection string should be provided via DATABASE_URL environment variable', )
+    parser.add_argument(
+        '--ingest',
+        type=Path,
+        help='Path to perf test result file, or directory with perf test result files')
+    parser.add_argument('--initdb', action='store_true', help='Initialuze database')
+
+    args = parser.parse_args()
+    with get_connection_cursor() as cur:
+        if args.initdb:
+            create_table(cur)
+
+        if not args.ingest.exists():
+            err(f'ingest path {args.ingest} does not exist')
+
+        if args.ingest:
+            if args.ingest.is_dir():
+                for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])):
+                    recorded_at_timestamp = int(item.name.split('_')[0])
+                    ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp)
+                    print(f'Ingested {ingested} metric values from {item}')
+            else:
+                recorded_at_timestamp = int(args.ingest.name.split('_')[0])
+                ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp)
+                print(f'Ingested {ingested} metric values from {args.ingest}')
+
+
+if __name__ == '__main__':
+    main()