scripts/flaky_tests.py: Improve flaky tests detection (#5094)

## Problem We still need to rerun some builds manually because flaky tests weren't detected automatically. I found two reasons for it: - If a test is flaky on a particular build type, on a particular Postgres version, there's a high chance that this test is flaky on all configurations, but we don't automatically detect such cases. - We detect flaky tests only on the main branch, which requires manual retrigger runs for freshly made flaky tests. Both of them are fixed in the PR. ## Summary of changes - Spread flakiness of a single test to all configurations - Detect flaky tests in all branches (not only in the main) - Look back only at 7 days of test history (instead of 10)
2026-01-04 12:02:55 +00:00 · 2023-08-29 11:53:24 +01:00
parent babefdd3f9
commit 7e39a96441
2 changed files with 47 additions and 14 deletions
--- a/scripts/flaky_tests.py
+++ b/scripts/flaky_tests.py
@@ -12,25 +12,26 @@ import psycopg2.extras
 # We call the test "flaky" if it failed at least once on the main branch in the last N=10 days.
 FLAKY_TESTS_QUERY = """
    SELECT
-        DISTINCT parent_suite, suite, test
+        DISTINCT parent_suite, suite, REGEXP_REPLACE(test, '(release|debug)-pg(\\d+)-?', '') as deparametrized_test
    FROM
        (
            SELECT
-                revision,
-                jsonb_array_elements(data -> 'children') -> 'name' as parent_suite,
-                jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status,
-                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'retriesStatusChange' as retries_status_change,
-                to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp
+                reference,
+                jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
+                jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
+                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
+                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
+                jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'retriesStatusChange' as retries_status_change,
+                to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' ->> 'start')::bigint / 1000)::date as timestamp
            FROM
                regress_test_results
-            WHERE
-                reference = 'refs/heads/main'
        ) data
    WHERE
        timestamp > CURRENT_DATE - INTERVAL '%s' day
-        AND (status::text IN ('"failed"', '"broken"') OR retries_status_change::boolean)
+        AND (
+            (status IN ('failed', 'broken') AND reference = 'refs/heads/main')
+            OR retries_status_change::boolean
+        )
    ;
 """

@@ -40,6 +41,9 @@ def main(args: argparse.Namespace):
    interval_days = args.days
    output = args.output

+    build_type = args.build_type
+    pg_version = args.pg_version
+
    res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]]
    res = defaultdict(lambda: defaultdict(dict))

@@ -55,8 +59,21 @@ def main(args: argparse.Namespace):
        rows = []

    for row in rows:
-        logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}")
-        res[row["parent_suite"]][row["suite"]][row["test"]] = True
+        # We don't want to automatically rerun tests in a performance suite
+        if row["parent_suite"] != "test_runner.regress":
+            continue
+
+        deparametrized_test = row["deparametrized_test"]
+        dash_if_needed = "" if deparametrized_test.endswith("[]") else "-"
+        parametrized_test = deparametrized_test.replace(
+            "[",
+            f"[{build_type}-pg{pg_version}{dash_if_needed}",
+        )
+        res[row["parent_suite"]][row["suite"]][parametrized_test] = True
+
+        logging.info(
+            f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
+        )

    logging.info(f"saving results to {output.name}")
    json.dump(res, output, indent=2)
@@ -77,6 +94,18 @@ if __name__ == "__main__":
        type=int,
        help="how many days to look back for flaky tests (default: 10)",
    )
+    parser.add_argument(
+        "--build-type",
+        required=True,
+        type=str,
+        help="for which build type to create list of flaky tests (debug or release)",
+    )
+    parser.add_argument(
+        "--pg-version",
+        required=True,
+        type=int,
+        help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
+    )
    parser.add_argument(
        "connstr",
        help="connection string to the test results database",