Switch to querying new tests results DB (#5616)

## Problem We started to store test results in a new format in https://github.com/neondatabase/neon/pull/4549. This PR switches scripts to query this db. (we can completely remove old DB/ingestions scripts in a couple of weeks after the PR merged) ## Summary of changes - `scripts/benchmark_durations.py` query new database - `scripts/flaky_tests.py` query new database
2026-07-08 22:50:37 +00:00 · 2023-10-25 14:25:13 +01:00
parent 8b8be7bed4
commit 4778b6a12e
4 changed files with 87 additions and 106 deletions
--- a/scripts/benchmark_durations.py
+++ b/scripts/benchmark_durations.py
@@ -15,28 +15,15 @@ The script fetches the durations of benchmarks from the database and stores it i

 BENCHMARKS_DURATION_QUERY = """
    SELECT
-        DISTINCT parent_suite, suite, test,
-        PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration_ms) as percentile_ms
-    FROM
-        (
-            SELECT
-                jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
-                jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
-                to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp,
-                (jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'duration')::int as duration_ms
-            FROM
-                regress_test_results
-            WHERE
-                reference = 'refs/heads/main'
-        ) data
+        DISTINCT parent_suite, suite, name,
+        PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration) as percentile_ms
+    FROM results
    WHERE
-        timestamp > CURRENT_DATE - INTERVAL '%s' day
+        started_at > CURRENT_DATE - INTERVAL '%s' day
        AND parent_suite = 'test_runner.performance'
        AND status = 'passed'
    GROUP BY
-        parent_suite, suite, test
+        parent_suite, suite, name
    ;
 """

@@ -44,68 +31,69 @@ BENCHMARKS_DURATION_QUERY = """
 # the total duration varies from 8 to 40 minutes.
 # We use some pre-collected durations as a fallback to have a better distribution.
 FALLBACK_DURATION = {
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 57.0,
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 28.0,
-    "test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 71.0,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 27.0,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 11.0,
-    "test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 30.0,
-    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 40.0,
-    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 5.0,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.0,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 10.0,
-    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 19.0,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 66.0,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 30.0,
-    "test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 60.0,
-    "test_runner/performance/test_compaction.py::test_compaction": 77.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.0,
-    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.0,
-    "test_runner/performance/test_copy.py::test_copy[neon]": 12.0,
-    "test_runner/performance/test_copy.py::test_copy[vanilla]": 10.0,
-    "test_runner/performance/test_gc_feedback.py::test_gc_feedback": 284.0,
-    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 11.0,
-    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 7.0,
-    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 85.0,
-    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 29.0,
-    "test_runner/performance/test_layer_map.py::test_layer_map": 44.0,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 16.0,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 67.0,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 67.0,
-    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 80.0,
-    "test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 102.0,
-    "test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 99.0,
-    "test_runner/performance/test_random_writes.py::test_random_writes[neon]": 9.0,
-    "test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 2.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 4.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 80.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 68.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 11.0,
-    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 10.0,
-    "test_runner/performance/test_startup.py::test_startup_simple": 2.0,
-    "test_runner/performance/test_startup.py::test_startup": 539.0,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 375.0,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 370.0,
-    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 94.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 164.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 274.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 949.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 142.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 151.0,
-    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 182.0,
-    "test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 13.0,
-    "test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.0,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 62.144,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 90.941,
+    "test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 26.053,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 25.67,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 14.497,
+    "test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 18.852,
+    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 26.572,
+    "test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 6.259,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 21.206,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.474,
+    "test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 11.262,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 94.225,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 68.159,
+    "test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 76.719,
+    "test_runner/performance/test_compaction.py::test_compaction": 110.222,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 10.743,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.541,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.109,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.121,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.3,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.086,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 12.024,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.14,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.375,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.075,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.147,
+    "test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.321,
+    "test_runner/performance/test_copy.py::test_copy[neon]": 16.579,
+    "test_runner/performance/test_copy.py::test_copy[vanilla]": 10.094,
+    "test_runner/performance/test_gc_feedback.py::test_gc_feedback": 590.157,
+    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.102,
+    "test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 8.677,
+    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 31.079,
+    "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 38.119,
+    "test_runner/performance/test_layer_map.py::test_layer_map": 24.784,
+    "test_runner/performance/test_logical_replication.py::test_logical_replication": 117.707,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 21.194,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 59.068,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 73.235,
+    "test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 82.586,
+    "test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 106.536,
+    "test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 98.753,
+    "test_runner/performance/test_random_writes.py::test_random_writes[neon]": 6.975,
+    "test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 3.69,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 3.529,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 64.522,
+    "test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 40.964,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.55,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 12.189,
+    "test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 13.899,
+    "test_runner/performance/test_startup.py::test_startup": 890.114,
+    "test_runner/performance/test_startup.py::test_startup_simple": 2.51,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 527.245,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 583.46,
+    "test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 113.653,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 233.728,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 419.093,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 982.461,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 116.522,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 115.583,
+    "test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 155.282,
+    "test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 26.704,
+    "test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.088,
 }


@@ -130,7 +118,7 @@ def main(args: argparse.Namespace):
        res = FALLBACK_DURATION

    for row in rows:
-        pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}"
+        pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['name']}"
        duration = row["percentile_ms"] / 1000
        logging.info(f"\t{pytest_name}: {duration}")
        res[pytest_name] = duration
--- a/scripts/flaky_tests.py
+++ b/scripts/flaky_tests.py
@@ -9,28 +9,15 @@ from typing import DefaultDict, Dict
 import psycopg2
 import psycopg2.extras

-# We call the test "flaky" if it failed at least once on the main branch in the last N=10 days.
 FLAKY_TESTS_QUERY = """
    SELECT
-        DISTINCT parent_suite, suite, REGEXP_REPLACE(test, '(release|debug)-pg(\\d+)-?', '') as deparametrized_test
-    FROM
-        (
-            SELECT
-                reference,
-                jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
-                jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'retriesStatusChange' as retries_status_change,
-                to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' ->> 'start')::bigint / 1000)::date as timestamp
-            FROM
-                regress_test_results
-        ) data
+        DISTINCT parent_suite, suite, name
+    FROM results
    WHERE
-        timestamp > CURRENT_DATE - INTERVAL '%s' day
+        started_at > CURRENT_DATE - INTERVAL '%s' day
        AND (
            (status IN ('failed', 'broken') AND reference = 'refs/heads/main')
-            OR retries_status_change::boolean
+            OR flaky
        )
    ;
 """
@@ -63,12 +50,14 @@ def main(args: argparse.Namespace):
        if row["parent_suite"] != "test_runner.regress":
            continue

-        deparametrized_test = row["deparametrized_test"]
-        dash_if_needed = "" if deparametrized_test.endswith("[]") else "-"
-        parametrized_test = deparametrized_test.replace(
-            "[",
-            f"[{build_type}-pg{pg_version}{dash_if_needed}",
-        )
+        if row["name"].endswith("]"):
+            parametrized_test = row["name"].replace(
+                "[",
+                f"[{build_type}-pg{pg_version}-",
+            )
+        else:
+            parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}]"
+
        res[row["parent_suite"]][row["suite"]][parametrized_test] = True

        logging.info(