From 8e25d3e79e467fff51f09bf027bed1701fe9177b Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Wed, 6 Sep 2023 11:22:57 +0100
Subject: [PATCH] test_runner: add scale parameter to tpc-h tests (#5079)

## Problem

It's hard to find out which DB size we use for OLAP benchmarks (TPC-H in
particular).
This PR adds handling of `TEST_OLAP_SCALE` env var, which is get added
to a test name as a parameter.

This is required for performing larger periodic benchmarks.

## Summary of changes
- Handle `TEST_OLAP_SCALE` in
`test_runner/performance/test_perf_olap.py`
- Set `TEST_OLAP_SCALE` in `.github/workflows/benchmarking.yml` to a
TPC-H scale
---
 .github/workflows/benchmarking.yml        | 66 +++++++++++++++++++----
 test_runner/performance/test_perf_olap.py | 18 +++++--
 2 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 172b904331..bea88e67a5 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -117,6 +117,7 @@ jobs:
     outputs:
       pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
       olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
+      tpch-compare-matrix: ${{ steps.tpch-compare-matrix.outputs.matrix }}
 
     steps:
     - name: Generate matrix for pgbench benchmark
@@ -158,6 +159,25 @@ jobs:
 
         echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
 
+    - name: Generate matrix for TPC-H benchmarks
+      id: tpch-compare-matrix
+      run: |
+        matrix='{
+          "platform": [
+            "neon-captest-reuse"
+          ],
+          "scale": [
+            "10"
+          ]
+        }'
+
+        if [ "$(date +%A)" = "Saturday" ]; then
+          matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
+                                                   { "platform": "rds-aurora",   "scale": "10" }]')
+        fi
+
+        echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
+
   pgbench-compare:
     needs: [ generate-matrices ]
 
@@ -233,7 +253,11 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-        psql ${CONNSTR} -c "SELECT version();"
+        QUERY="SELECT version();"
+        if [ "${PLATFORM}" = "neon"* ]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
 
     - name: Benchmark init
       uses: ./.github/actions/run-python-test-set
@@ -358,7 +382,11 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-        psql ${CONNSTR} -c "SELECT version();"
+        QUERY="SELECT version();"
+        if [ "${PLATFORM}" = "neon"* ]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
 
     - name: ClickBench benchmark
       uses: ./.github/actions/run-python-test-set
@@ -372,6 +400,7 @@ jobs:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        TEST_OLAP_SCALE: 10
 
     - name: Create Allure report
       if: ${{ !cancelled() }}
@@ -398,7 +427,7 @@ jobs:
 
     strategy:
       fail-fast: false
-      matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
+      matrix: ${{ fromJson(needs.generate-matrices.outputs.tpch-compare-matrix) }}
 
     env:
       POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
@@ -407,6 +436,7 @@ jobs:
       BUILD_TYPE: remote
       SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
       PLATFORM: ${{ matrix.platform }}
+      TEST_OLAP_SCALE: ${{ matrix.scale }}
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
@@ -428,18 +458,17 @@ jobs:
         ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
         echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
 
-    - name: Set up Connection String
-      id: set-up-connstr
+    - name: Get Connstring Secret Name
       run: |
         case "${PLATFORM}" in
           neon-captest-reuse)
-            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
+            ENV_PLATFORM=CAPTEST_TPCH
             ;;
           rds-aurora)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
+            ENV_PLATFORM=RDS_AURORA_TPCH
             ;;
           rds-postgres)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
+            ENV_PLATFORM=RDS_AURORA_TPCH
             ;;
           *)
             echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
@@ -447,9 +476,21 @@ jobs:
             ;;
         esac
 
+        CONNSTR_SECRET_NAME="BENCHMARK_${ENV_PLATFORM}_S${SCALE}_CONNSTR"
+        echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+        CONNSTR=${{ secrets[env.CONNSTR_SECRET_NAME] }}
+
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-        psql ${CONNSTR} -c "SELECT version();"
+        QUERY="SELECT version();"
+        if [ "${PLATFORM}" = "neon"* ]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
 
     - name: Run TPC-H benchmark
       uses: ./.github/actions/run-python-test-set
@@ -463,6 +504,7 @@ jobs:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        TEST_OLAP_SCALE: ${{ matrix.scale }}
 
     - name: Create Allure report
       if: ${{ !cancelled() }}
@@ -534,7 +576,11 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-        psql ${CONNSTR} -c "SELECT version();"
+        QUERY="SELECT version();"
+        if [ "${PLATFORM}" = "neon"* ]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
 
     - name: Run user examples
       uses: ./.github/actions/run-python-test-set
diff --git a/test_runner/performance/test_perf_olap.py b/test_runner/performance/test_perf_olap.py
index 39aafa80df..0f7615f7ed 100644
--- a/test_runner/performance/test_perf_olap.py
+++ b/test_runner/performance/test_perf_olap.py
@@ -1,5 +1,6 @@
+import os
 from dataclasses import dataclass
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple
 
 import pytest
 from _pytest.mark import ParameterSet
@@ -78,6 +79,15 @@ QUERIES: Tuple[LabelledQuery, ...] = (
 )
 
 
+def get_scale() -> List[str]:
+    # We parametrize each tpc-h and clickbench test with scale
+    # to distinguish them from each other, but don't really use it inside.
+    # Databases are pre-created and passed through BENCHMARK_CONNSTR env variable.
+
+    scale = os.getenv("TEST_OLAP_SCALE", "noscale")
+    return [scale]
+
+
 def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> None:
     # prepare connstr:
     # - cut out password from connstr to pass it via env
@@ -100,9 +110,10 @@ def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> N
             env.pg_bin.run_capture(["psql", connstr, "-c", query], env=environ)
 
 
+@pytest.mark.parametrize("scale", get_scale())
 @pytest.mark.parametrize("query", QUERIES)
 @pytest.mark.remote_cluster
-def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare):
+def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare, scale: str):
     """
     An OLAP-style ClickHouse benchmark
 
@@ -128,9 +139,10 @@ def tpch_queuies() -> Tuple[ParameterSet, ...]:
     )
 
 
+@pytest.mark.parametrize("scale", get_scale())
 @pytest.mark.parametrize("query", tpch_queuies())
 @pytest.mark.remote_cluster
-def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare):
+def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare, scale: str):
     """
     TCP-H Benchmark