test_runner: run benchmarks in parallel (#4683)

## Problem Benchmarks run takes about an hour on main branch (in a single job), which delays pipeline results. And it takes another hour if we want to restart the job due to some failures. ## Summary of changes - Use `pytest-split` plugin to run benchmarks on separate CI runners in 4 parallel jobs - Add `scripts/benchmark_durations.py` for getting benchmark durations from the database to help `pytest-split` schedule tests more evenly. It uses p99 for the last 10 days' results (durations). The current distribution could be better; each worker's durations vary from 9m to 35m, but this could be improved in consequent PRs.
2026-01-16 01:42:55 +00:00 · 2023-07-17 20:09:45 +01:00
parent e074ccf170
commit 4580f5085a
5 changed files with 204 additions and 4 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -150,6 +150,14 @@ runs:
          EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
        fi

+        # We use pytest-split plugin to run benchmarks in parallel on different CI runners
+        if [ "${TEST_SELECTION}" = "test_runner/performance" ] && [ "${{ inputs.build_type }}" != "remote" ]; then
+          mkdir -p $TEST_OUTPUT
+          poetry run ./scripts/benchmark_durations.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/benchmark_durations.json"
+
+          EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
+        fi
+
        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -396,13 +396,11 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
+        pytest_split_group: [ 1, 2, 3, 4 ]
        build_type: [ release ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 1

      - name: Pytest benchmarks
        uses: ./.github/actions/run-python-test-set
@@ -411,9 +409,11 @@ jobs:
          test_selection: performance
          run_in_parallel: false
          save_perf_report: ${{ github.ref_name == 'main' }}
+          extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}"
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones