mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 11:40:38 +00:00
test_runner: run benchmarks in parallel (#4683)
## Problem Benchmarks run takes about an hour on main branch (in a single job), which delays pipeline results. And it takes another hour if we want to restart the job due to some failures. ## Summary of changes - Use `pytest-split` plugin to run benchmarks on separate CI runners in 4 parallel jobs - Add `scripts/benchmark_durations.py` for getting benchmark durations from the database to help `pytest-split` schedule tests more evenly. It uses p99 for the last 10 days' results (durations). The current distribution could be better; each worker's durations vary from 9m to 35m, but this could be improved in consequent PRs.
This commit is contained in:
committed by
GitHub
parent
e074ccf170
commit
4580f5085a
@@ -150,6 +150,14 @@ runs:
|
||||
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
# We use pytest-split plugin to run benchmarks in parallel on different CI runners
|
||||
if [ "${TEST_SELECTION}" = "test_runner/performance" ] && [ "${{ inputs.build_type }}" != "remote" ]; then
|
||||
mkdir -p $TEST_OUTPUT
|
||||
poetry run ./scripts/benchmark_durations.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/benchmark_durations.json"
|
||||
|
||||
EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
|
||||
|
||||
6
.github/workflows/build_and_test.yml
vendored
6
.github/workflows/build_and_test.yml
vendored
@@ -396,13 +396,11 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pytest_split_group: [ 1, 2, 3, 4 ]
|
||||
build_type: [ release ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Pytest benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
@@ -411,9 +409,11 @@ jobs:
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ github.ref_name == 'main' }}
|
||||
extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}"
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
|
||||
16
poetry.lock
generated
16
poetry.lock
generated
@@ -1868,6 +1868,20 @@ files = [
|
||||
packaging = ">=17.1"
|
||||
pytest = ">=5.3"
|
||||
|
||||
[[package]]
|
||||
name = "pytest-split"
|
||||
version = "0.8.1"
|
||||
description = "Pytest plugin which splits the test suite to equally sized sub suites based on test execution time."
|
||||
optional = false
|
||||
python-versions = ">=3.7.1,<4.0"
|
||||
files = [
|
||||
{file = "pytest_split-0.8.1-py3-none-any.whl", hash = "sha256:74b110ea091bd147cc1c5f9665a59506e5cedfa66f96a89fb03e4ab447c2c168"},
|
||||
{file = "pytest_split-0.8.1.tar.gz", hash = "sha256:2d88bd3dc528689a7a3f58fc12ea165c3aa62e90795e420dfad920afe5612d6d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pytest = ">=5,<8"
|
||||
|
||||
[[package]]
|
||||
name = "pytest-timeout"
|
||||
version = "2.1.0"
|
||||
@@ -2513,4 +2527,4 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "fe771b153ef7e308d6d04421d0eb3f97d00780882277d2b4fc1f296054d8db79"
|
||||
content-hash = "e16a65d8fdff4e2173610e552e0e7306e301de2c640ae6082ef6cc5755f566d2"
|
||||
|
||||
@@ -36,6 +36,7 @@ pytest-httpserver = "^1.0.8"
|
||||
aiohttp = "3.7.4"
|
||||
pytest-rerunfailures = "^11.1.2"
|
||||
types-pytest-lazy-fixture = "^0.6.3.3"
|
||||
pytest-split = "^0.8.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.3.0"
|
||||
|
||||
177
scripts/benchmark_durations.py
Executable file
177
scripts/benchmark_durations.py
Executable file
@@ -0,0 +1,177 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
"""
|
||||
The script fetches the durations of benchmarks from the database and stores it in a file compatible with pytest-split plugin.
|
||||
"""
|
||||
|
||||
|
||||
BENCHMARKS_DURATION_QUERY = """
|
||||
SELECT
|
||||
DISTINCT parent_suite, suite, test,
|
||||
PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration_ms) as percentile_ms
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
|
||||
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
|
||||
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp,
|
||||
(jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'duration')::int as duration_ms
|
||||
FROM
|
||||
regress_test_results
|
||||
WHERE
|
||||
reference = 'refs/heads/main'
|
||||
) data
|
||||
WHERE
|
||||
timestamp > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND parent_suite = 'test_runner.performance'
|
||||
AND status = 'passed'
|
||||
GROUP BY
|
||||
parent_suite, suite, test
|
||||
;
|
||||
"""
|
||||
|
||||
# For out benchmarks the default distibution for 4 worked produces pretty uneven chunks,
|
||||
# the total duration varies from 8 to 40 minutes.
|
||||
# We use some pre-collected durations as a fallback to have a better distribution.
|
||||
FALLBACK_DURATION = {
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 57.0,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 28.0,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 71.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 27.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 11.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 30.0,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 40.0,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 5.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 10.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 19.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 66.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 30.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 60.0,
|
||||
"test_runner/performance/test_compaction.py::test_compaction": 77.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_copy.py::test_copy[neon]": 12.0,
|
||||
"test_runner/performance/test_copy.py::test_copy[vanilla]": 10.0,
|
||||
"test_runner/performance/test_gc_feedback.py::test_gc_feedback": 284.0,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 11.0,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 7.0,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 85.0,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 29.0,
|
||||
"test_runner/performance/test_layer_map.py::test_layer_map": 44.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 16.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 67.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 67.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 80.0,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 102.0,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 99.0,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[neon]": 9.0,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 2.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 4.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 80.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 68.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 11.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 10.0,
|
||||
"test_runner/performance/test_startup.py::test_startup_simple": 2.0,
|
||||
"test_runner/performance/test_startup.py::test_startup": 539.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 375.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 370.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 94.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 164.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 274.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 949.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 142.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 151.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 182.0,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 13.0,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.0,
|
||||
}
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
connstr = args.connstr
|
||||
interval_days = args.days
|
||||
output = args.output
|
||||
percentile = args.percentile
|
||||
|
||||
res: Dict[str, float] = {}
|
||||
|
||||
try:
|
||||
logging.info("connecting to the database...")
|
||||
with psycopg2.connect(connstr, connect_timeout=30) as conn:
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
logging.info("fetching benchmarks...")
|
||||
cur.execute(BENCHMARKS_DURATION_QUERY, (percentile, interval_days))
|
||||
rows = cur.fetchall()
|
||||
except psycopg2.OperationalError as exc:
|
||||
logging.error("cannot fetch benchmarks duration from the DB due to an error", exc)
|
||||
rows = []
|
||||
res = FALLBACK_DURATION
|
||||
|
||||
for row in rows:
|
||||
pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}"
|
||||
duration = row["percentile_ms"] / 1000
|
||||
logging.info(f"\t{pytest_name}: {duration}")
|
||||
res[pytest_name] = duration
|
||||
|
||||
logging.info(f"saving results to {output.name}")
|
||||
json.dump(res, output, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Get <percentile> of benchmarks duration for the last <N> days"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default=".test_durations",
|
||||
help="path to output json file (default: .test_durations)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--percentile",
|
||||
type=float,
|
||||
default="0.99",
|
||||
help="percentile (default: 0.99)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
required=False,
|
||||
default=10,
|
||||
type=int,
|
||||
help="how many days to look back for (default: 10)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"connstr",
|
||||
help="connection string to the test results database",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
level = logging.INFO
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
level=level,
|
||||
)
|
||||
|
||||
main(args)
|
||||
Reference in New Issue
Block a user