CI(benchmarks): make job split consistent across reruns (#6614)

## Problem We've got several issues with the current `benchmarks` job setup: - `benchmark_durations.json` file (that we generate in runtime to split tests into several jobs[0]) is not consistent between these jobs (and very not consistent with the file if we rerun the job). I.e. test selection for each job can be different, which could end up in missed tests in a test run. - `scripts/benchmark_durations` doesn't fetch all tests from the database (it doesn't expect any extra directories inside `test_runner/performance`) - For some reason, currently split into 4 groups ends up with the 4th group has no tests to run, which fails the job[1] - [0] https://github.com/neondatabase/neon/pull/4683 - [1] https://github.com/neondatabase/neon/issues/6629 ## Summary of changes - Generate `benchmark_durations.json` file once before we start `benchmarks` jobs (this makes it consistent across the jobs) and pass the file content through the GitHub Actions input (this makes it consistent for reruns) - `scripts/benchmark_durations` fix SQL query for getting all required tests - Split benchmarks into 5 jobs instead of 4 jobs.
2025-12-22 21:59:59 +00:00 · 2024-02-06 17:00:55 +00:00
parent bb92721168
commit e65f0fe874
3 changed files with 111 additions and 67 deletions
--- a/scripts/benchmark_durations.py
+++ b/scripts/benchmark_durations.py
@@ -20,7 +20,7 @@ BENCHMARKS_DURATION_QUERY = """
    FROM results
    WHERE
        started_at > CURRENT_DATE - INTERVAL '%s' day
-        AND parent_suite = 'test_runner.performance'
+        AND starts_with(parent_suite, 'test_runner.performance')
        AND status = 'passed'
    GROUP BY
        parent_suite, suite, name
@@ -31,68 +31,75 @@ BENCHMARKS_DURATION_QUERY = """
 # the total duration varies from 8 to 40 minutes.
 # We use some pre-collected durations as a fallback to have a better distribution.
 FALLBACK_DURATION = {
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 62.144,
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 90.941,
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 26.053,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 25.67,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 14.497,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 18.852,
-    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 26.572,
-    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 6.259,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 21.206,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.474,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 11.262,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 94.225,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 68.159,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 76.719,
-    "test_runner/performance/test_compaction.py::test_compaction": 110.222,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 10.743,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.541,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.109,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.121,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.3,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.086,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 12.024,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.14,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.375,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.075,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.147,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.321,
-    "test_runner/performance/test_copy.py::test_copy[neon]": 16.579,
-    "test_runner/performance/test_copy.py::test_copy[vanilla]": 10.094,
-    "test_runner/performance/test_gc_feedback.py::test_gc_feedback": 590.157,
-    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.102,
-    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 8.677,
-    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 31.079,
-    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 38.119,
-    "test_runner/performance/test_layer_map.py::test_layer_map": 24.784,
-    "test_runner/performance/test_logical_replication.py::test_logical_replication": 117.707,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 21.194,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 59.068,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 73.235,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 82.586,
-    "test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 106.536,
-    "test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 98.753,
-    "test_runner/performance/test_random_writes.py::test_random_writes[neon]": 6.975,
-    "test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 3.69,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 3.529,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 64.522,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 40.964,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.55,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 12.189,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 13.899,
-    "test_runner/performance/test_startup.py::test_startup_simple": 2.51,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 527.245,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 583.46,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 113.653,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 233.728,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 419.093,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 982.461,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 116.522,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 115.583,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 155.282,
-    "test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 26.704,
-    "test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.088,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[1-13-30]": 400.15,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[1-6-30]": 372.521,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[10-13-30]": 420.017,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[10-6-30]": 373.769,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[100-13-30]": 678.742,
+    "test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_max_throughput_getpage_at_latest_lsn[100-6-30]": 512.135,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 58.036,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 22.104,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 126.073,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 25.759,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 6.885,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 8.758,
+    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 18.275,
+    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 9.533,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 12.09,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 35.145,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 22.28,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 66.353,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 75.487,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 54.142,
+    "test_runner/performance/test_compaction.py::test_compaction": 110.715,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 11.68,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.384,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.315,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.783,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.647,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 17.04,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.01,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 11.902,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.077,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.4,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.33,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.434,
+    "test_runner/performance/test_copy.py::test_copy[neon]": 13.817,
+    "test_runner/performance/test_copy.py::test_copy[vanilla]": 11.736,
+    "test_runner/performance/test_gc_feedback.py::test_gc_feedback": 575.735,
+    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.868,
+    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 14.393,
+    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 20.588,
+    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 30.849,
+    "test_runner/performance/test_layer_map.py::test_layer_map": 39.378,
+    "test_runner/performance/test_lazy_startup.py::test_lazy_startup": 2848.938,
+    "test_runner/performance/test_logical_replication.py::test_logical_replication": 120.952,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 35.552,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 66.762,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 85.177,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 92.12,
+    "test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 107.009,
+    "test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 99.582,
+    "test_runner/performance/test_random_writes.py::test_random_writes[neon]": 4.737,
+    "test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 2.686,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 3.271,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 50.719,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 15.992,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.566,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 13.542,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 13.35,
+    "test_runner/performance/test_startup.py::test_startup_simple": 13.043,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 194.841,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 286.667,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 85.577,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 297.626,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 646.187,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 989.776,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 125.638,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 123.554,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 190.083,
+    "test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 21.016,
+    "test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 23.028,
 }