test_runner: add scale parameter to tpc-h tests (#5079)

## Problem It's hard to find out which DB size we use for OLAP benchmarks (TPC-H in particular). This PR adds handling of `TEST_OLAP_SCALE` env var, which is get added to a test name as a parameter. This is required for performing larger periodic benchmarks. ## Summary of changes - Handle `TEST_OLAP_SCALE` in `test_runner/performance/test_perf_olap.py` - Set `TEST_OLAP_SCALE` in `.github/workflows/benchmarking.yml` to a TPC-H scale
2026-05-29 19:10:38 +00:00 · 2023-09-06 11:22:57 +01:00
parent 4fec48f2b5
commit 8e25d3e79e
2 changed files with 71 additions and 13 deletions
--- a/test_runner/performance/test_perf_olap.py
+++ b/test_runner/performance/test_perf_olap.py
@@ -1,5 +1,6 @@
+import os
 from dataclasses import dataclass
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple

 import pytest
 from _pytest.mark import ParameterSet
@@ -78,6 +79,15 @@ QUERIES: Tuple[LabelledQuery, ...] = (
 )


+def get_scale() -> List[str]:
+    # We parametrize each tpc-h and clickbench test with scale
+    # to distinguish them from each other, but don't really use it inside.
+    # Databases are pre-created and passed through BENCHMARK_CONNSTR env variable.
+
+    scale = os.getenv("TEST_OLAP_SCALE", "noscale")
+    return [scale]
+
+
 def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> None:
    # prepare connstr:
    # - cut out password from connstr to pass it via env
@@ -100,9 +110,10 @@ def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> N
            env.pg_bin.run_capture(["psql", connstr, "-c", query], env=environ)


+@pytest.mark.parametrize("scale", get_scale())
@pytest.mark.parametrize("query", QUERIES)
@pytest.mark.remote_cluster
-def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare):
+def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare, scale: str):
    """
    An OLAP-style ClickHouse benchmark

@@ -128,9 +139,10 @@ def tpch_queuies() -> Tuple[ParameterSet, ...]:
    )


+@pytest.mark.parametrize("scale", get_scale())
@pytest.mark.parametrize("query", tpch_queuies())
@pytest.mark.remote_cluster
-def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare):
+def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare, scale: str):
    """
    TCP-H Benchmark