mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
## Problem We've got several issues with the current `benchmarks` job setup: - `benchmark_durations.json` file (that we generate in runtime to split tests into several jobs[0]) is not consistent between these jobs (and very not consistent with the file if we rerun the job). I.e. test selection for each job can be different, which could end up in missed tests in a test run. - `scripts/benchmark_durations` doesn't fetch all tests from the database (it doesn't expect any extra directories inside `test_runner/performance`) - For some reason, currently split into 4 groups ends up with the 4th group has no tests to run, which fails the job[1] - [0] https://github.com/neondatabase/neon/pull/4683 - [1] https://github.com/neondatabase/neon/issues/6629 ## Summary of changes - Generate `benchmark_durations.json` file once before we start `benchmarks` jobs (this makes it consistent across the jobs) and pass the file content through the GitHub Actions input (this makes it consistent for reruns) - `scripts/benchmark_durations` fix SQL query for getting all required tests - Split benchmarks into 5 jobs instead of 4 jobs.
172 lines
10 KiB
Python
Executable File
172 lines
10 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
from typing import Dict
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
|
|
"""
|
|
The script fetches the durations of benchmarks from the database and stores it in a file compatible with pytest-split plugin.
|
|
"""
|
|
|
|
|
|
BENCHMARKS_DURATION_QUERY = """
|
|
SELECT
|
|
DISTINCT parent_suite, suite, name,
|
|
PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration) as percentile_ms
|
|
FROM results
|
|
WHERE
|
|
started_at > CURRENT_DATE - INTERVAL '%s' day
|
|
AND starts_with(parent_suite, 'test_runner.performance')
|
|
AND status = 'passed'
|
|
GROUP BY
|
|
parent_suite, suite, name
|
|
;
|
|
"""
|
|
|
|
# For out benchmarks the default distibution for 4 worked produces pretty uneven chunks,
|
|
# the total duration varies from 8 to 40 minutes.
|
|
# We use some pre-collected durations as a fallback to have a better distribution.
|
|
FALLBACK_DURATION = {
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[1-13-30]": 400.15,
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[1-6-30]": 372.521,
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[10-13-30]": 420.017,
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[10-6-30]": 373.769,
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[100-13-30]": 678.742,
|
|
"test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[100-6-30]": 512.135,
|
|
"test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 58.036,
|
|
"test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 22.104,
|
|
"test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 126.073,
|
|
"test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 25.759,
|
|
"test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 6.885,
|
|
"test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 8.758,
|
|
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 18.275,
|
|
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 9.533,
|
|
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 12.09,
|
|
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 35.145,
|
|
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 22.28,
|
|
"test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 66.353,
|
|
"test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 75.487,
|
|
"test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 54.142,
|
|
"test_runner/performance/test_compaction.py::test_compaction": 110.715,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 11.68,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.384,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.315,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.783,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.647,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 17.04,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.01,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 11.902,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.077,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.4,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.33,
|
|
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.434,
|
|
"test_runner/performance/test_copy.py::test_copy[neon]": 13.817,
|
|
"test_runner/performance/test_copy.py::test_copy[vanilla]": 11.736,
|
|
"test_runner/performance/test_gc_feedback.py::test_gc_feedback": 575.735,
|
|
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.868,
|
|
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 14.393,
|
|
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 20.588,
|
|
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 30.849,
|
|
"test_runner/performance/test_layer_map.py::test_layer_map": 39.378,
|
|
"test_runner/performance/test_lazy_startup.py::test_lazy_startup": 2848.938,
|
|
"test_runner/performance/test_logical_replication.py::test_logical_replication": 120.952,
|
|
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 35.552,
|
|
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 66.762,
|
|
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 85.177,
|
|
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 92.12,
|
|
"test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 107.009,
|
|
"test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 99.582,
|
|
"test_runner/performance/test_random_writes.py::test_random_writes[neon]": 4.737,
|
|
"test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 2.686,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 3.271,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 50.719,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 15.992,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.566,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 13.542,
|
|
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 13.35,
|
|
"test_runner/performance/test_startup.py::test_startup_simple": 13.043,
|
|
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 194.841,
|
|
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 286.667,
|
|
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 85.577,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 297.626,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 646.187,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 989.776,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 125.638,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 123.554,
|
|
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 190.083,
|
|
"test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 21.016,
|
|
"test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 23.028,
|
|
}
|
|
|
|
|
|
def main(args: argparse.Namespace):
|
|
connstr = args.connstr
|
|
interval_days = args.days
|
|
output = args.output
|
|
percentile = args.percentile
|
|
|
|
res: Dict[str, float] = {}
|
|
|
|
try:
|
|
logging.info("connecting to the database...")
|
|
with psycopg2.connect(connstr, connect_timeout=30) as conn:
|
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
logging.info("fetching benchmarks...")
|
|
cur.execute(BENCHMARKS_DURATION_QUERY, (percentile, interval_days))
|
|
rows = cur.fetchall()
|
|
except psycopg2.OperationalError as exc:
|
|
logging.error("cannot fetch benchmarks duration from the DB due to an error", exc)
|
|
rows = []
|
|
res = FALLBACK_DURATION
|
|
|
|
for row in rows:
|
|
pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['name']}"
|
|
duration = row["percentile_ms"] / 1000
|
|
logging.info(f"\t{pytest_name}: {duration}")
|
|
res[pytest_name] = duration
|
|
|
|
logging.info(f"saving results to {output.name}")
|
|
json.dump(res, output, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Get <percentile> of benchmarks duration for the last <N> days"
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
type=argparse.FileType("w"),
|
|
default=".test_durations",
|
|
help="path to output json file (default: .test_durations)",
|
|
)
|
|
parser.add_argument(
|
|
"--percentile",
|
|
type=float,
|
|
default="0.99",
|
|
help="percentile (default: 0.99)",
|
|
)
|
|
parser.add_argument(
|
|
"--days",
|
|
required=False,
|
|
default=10,
|
|
type=int,
|
|
help="how many days to look back for (default: 10)",
|
|
)
|
|
parser.add_argument(
|
|
"connstr",
|
|
help="connection string to the test results database",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
level = logging.INFO
|
|
logging.basicConfig(
|
|
format="%(message)s",
|
|
level=level,
|
|
)
|
|
|
|
main(args)
|