diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml index dfb314571b..2d4cabdde5 100644 --- a/.github/actions/allure-report/action.yml +++ b/.github/actions/allure-report/action.yml @@ -32,8 +32,8 @@ runs: exit 2 fi - - name: Calculate key - id: calculate-key + - name: Calculate variables + id: calculate-vars shell: bash -euxo pipefail {0} run: | # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key @@ -41,14 +41,22 @@ runs: pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true) if [ "${pr_number}" != "null" ]; then key=pr-${pr_number} - elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then + elif [ "${GITHUB_REF_NAME}" = "main" ]; then # Shortcut for a special branch key=main + elif [ "${GITHUB_REF_NAME}" = "release" ]; then + # Shortcut for a special branch + key=release else - key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-") + key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-") fi echo "KEY=${key}" >> $GITHUB_OUTPUT + # Sanitize test selection to remove `/` and any other special characters + # Use printf instead of echo to avoid having `\n` at the end of the string + test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" ) + echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT + - uses: actions/setup-java@v3 if: ${{ inputs.action == 'generate' }} with: @@ -74,10 +82,11 @@ runs: - name: Upload Allure results if: ${{ inputs.action == 'store' }} env: - REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} - RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} + RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} TEST_OUTPUT: /tmp/test_output BUCKET: neon-github-public-dev + TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} shell: bash -euxo pipefail {0} run: | # Add metadata @@ -98,7 +107,7 @@ runs: BUILD_TYPE=${{ inputs.build_type }} EOF - ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst" + ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst" ZSTD_NBTHREADS=0 tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd . @@ -109,8 +118,9 @@ runs: if: ${{ inputs.action == 'generate' }} shell: bash -euxo pipefail {0} env: - LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt + LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt BUCKET: neon-github-public-dev + TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} run: | LOCK_TIMEOUT=300 # seconds @@ -123,12 +133,12 @@ runs: fi sleep 1 done - echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt + echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}" # A double-check that exactly WE have acquired the lock aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt - if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then + if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then break fi done @@ -137,8 +147,8 @@ runs: if: ${{ inputs.action == 'generate' }} id: generate-report env: - REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} - RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} + RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} TEST_OUTPUT: /tmp/test_output BUCKET: neon-github-public-dev shell: bash -euxo pipefail {0} @@ -192,12 +202,13 @@ runs: if: ${{ inputs.action == 'generate' && always() }} shell: bash -euxo pipefail {0} env: - LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt + LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt BUCKET: neon-github-public-dev + TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} run: | aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0 - if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then + if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then aws s3 rm "s3://${BUCKET}/${LOCK_FILE}" fi diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index b3008a2aed..5831198654 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -110,8 +110,14 @@ jobs: rm -rf perf-report-staging mkdir -p perf-report-staging # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file, - # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests - ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400 + # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests. + # Do not run tests from test_runner/performance/test_perf_olap.py because they require a prepared DB. We run them separately in `clickbench-compare` job. + ./scripts/pytest test_runner/performance/ -v \ + -m "remote_cluster" \ + --sparse-ordering \ + --out-dir perf-report-staging \ + --timeout 5400 \ + --ignore test_runner/performance/test_perf_olap.py - name: Submit result env: @@ -207,7 +213,7 @@ jobs: CONNSTR=${{ steps.create-neon-project.outputs.dsn }} ;; rds-aurora) - CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }} + CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }} ;; rds-postgres) CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }} @@ -292,3 +298,112 @@ jobs: slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + + clickbench-compare: + # ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters + # we use for performance testing in pgbench-compare. + # Run this job only when pgbench-compare is finished to avoid the intersection. + # We might change it after https://github.com/neondatabase/neon/issues/2900. + # + # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows + # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB + + needs: [ pgbench-compare ] + + strategy: + fail-fast: false + matrix: + # neon-captest-prefetch: We have pre-created projects with prefetch enabled + # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs + # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage + platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ] + + env: + POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install + DEFAULT_PG_VERSION: 14 + TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + PLATFORM: ${{ matrix.platform }} + + runs-on: [ self-hosted, dev, x64 ] + container: + image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned + options: --init + + timeout-minutes: 360 # 6h + + steps: + - uses: actions/checkout@v3 + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-release-artifact + path: /tmp/neon/ + prefix: latest + + - name: Add Postgres binaries to PATH + run: | + ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version + echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH + + - name: Set up Connection String + id: set-up-connstr + run: | + case "${PLATFORM}" in + neon-captest-prefetch) + CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }} + ;; + rds-aurora) + CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }} + ;; + rds-postgres) + CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }} + ;; + *) + echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'" + exit 1 + ;; + esac + + echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + + psql ${CONNSTR} -c "SELECT version();" + + - name: Set database options + if: matrix.platform == 'neon-captest-prefetch' + run: | + psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on" + psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10" + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} + + - name: Benchmark clickbench + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance/test_perf_olap.py + run_in_parallel: false + save_perf_report: ${{ env.SAVE_PERF_REPORT }} + extra_params: -m remote_cluster --timeout 21600 -k test_clickbench + env: + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} + + - name: Create Allure report + if: success() || failure() + uses: ./.github/actions/allure-report + with: + action: generate + build_type: ${{ env.BUILD_TYPE }} + + - name: Post to a Slack channel + if: ${{ github.event.schedule && failure() }} + uses: slackapi/slack-github-action@v1 + with: + channel-id: "C033QLM5P7D" # dev-staging-stream + slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} diff --git a/test_runner/performance/test_perf_olap.py b/test_runner/performance/test_perf_olap.py new file mode 100644 index 0000000000..84693325c0 --- /dev/null +++ b/test_runner/performance/test_perf_olap.py @@ -0,0 +1,111 @@ +from dataclasses import dataclass +from typing import Dict, Tuple + +import pytest +from fixtures.compare_fixtures import RemoteCompare +from fixtures.log_helper import log + + +@dataclass +class LabelledQuery: + """An SQL query with a label for the test report.""" + + label: str + query: str + + +# A list of queries to run. +# Please do not alter the label for the query, as it is used to identify it. +# Labels for ClickBench queries match the labels in ClickBench reports +# on https://benchmark.clickhouse.com/ (the DB size may differ). +QUERIES: Tuple[LabelledQuery, ...] = ( + # Disable `black` formatting for the list of queries so that it's easier to read + # fmt: off + ### ClickBench queries: + LabelledQuery("Q0", r"SELECT COUNT(*) FROM hits;"), + LabelledQuery("Q1", r"SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;"), + LabelledQuery("Q2", r"SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;"), + LabelledQuery("Q3", r"SELECT AVG(UserID) FROM hits;"), + LabelledQuery("Q4", r"SELECT COUNT(DISTINCT UserID) FROM hits;"), + LabelledQuery("Q5", r"SELECT COUNT(DISTINCT SearchPhrase) FROM hits;"), + LabelledQuery("Q6", r"SELECT MIN(EventDate), MAX(EventDate) FROM hits;"), + LabelledQuery("Q7", r"SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;"), + LabelledQuery("Q8", r"SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;"), + LabelledQuery("Q9", r"SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q10", r"SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;"), + LabelledQuery("Q11", r"SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;"), + LabelledQuery("Q12", r"SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q13", r"SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;"), + LabelledQuery("Q14", r"SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q15", r"SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;"), + LabelledQuery("Q16", r"SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;"), + LabelledQuery("Q17", r"SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;"), + LabelledQuery("Q18", r"SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;"), + LabelledQuery("Q19", r"SELECT UserID FROM hits WHERE UserID = 435090932899640449;"), + LabelledQuery("Q20", r"SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';"), + LabelledQuery("Q21", r"SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q22", r"SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q23", r"SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;"), + LabelledQuery("Q24", r"SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;"), + LabelledQuery("Q25", r"SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;"), + LabelledQuery("Q26", r"SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;"), + LabelledQuery("Q27", r"SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;"), + LabelledQuery("Q28", r"SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;"), + LabelledQuery("Q29", r"SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;"), + LabelledQuery("Q30", r"SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q31", r"SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q32", r"SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q33", r"SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q34", r"SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q35", r"SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;"), + LabelledQuery("Q36", r"SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;"), + LabelledQuery("Q37", r"SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;"), + LabelledQuery("Q38", r"SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;"), + LabelledQuery("Q39", r"SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;"), + LabelledQuery("Q40", r"SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;"), + LabelledQuery("Q41", r"SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;"), + LabelledQuery("Q42", r"SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;"), + ### Custom Neon queries: + # I suggest using the NQ prefix (which stands for Neon Query) instead of Q + # to not intersect with the original ClickBench queries if their list is extended. + # + # LabelledQuery("NQ0", r"..."), + # LabelledQuery("NQ1", r"..."), + # ... + # fmt: on +) + + +def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> None: + # prepare connstr: + # - cut out password from connstr to pass it via env + # - add options to connstr + password = env.pg.default_options.get("password", None) + options = f"-cstatement_timeout=0 {env.pg.default_options.get('options', '')}" + connstr = env.pg.connstr(password=None, options=options) + + environ: Dict[str, str] = {} + if password is not None: + environ["PGPASSWORD"] = password + + label, query = labelled_query.label, labelled_query.query + + log.info(f"Running query {label} {times} times") + for i in range(times): + run = i + 1 + log.info(f"Run {run}/{times}") + with env.zenbenchmark.record_duration(f"{label}/{run}"): + env.pg_bin.run_capture(["psql", connstr, "-c", query], env=environ) + + +@pytest.mark.parametrize("query", QUERIES) +@pytest.mark.remote_cluster +def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare): + """ + An OLAP-style ClickHouse benchmark + + Based on https://github.com/ClickHouse/ClickBench/tree/c00135ca5b6a0d86fedcdbf998fdaa8ed85c1c3b/aurora-postgresql + The DB prepared manually in advance + """ + + run_psql(remote_compare, query, times=3)