scripts/flaky_tests.py: Improve flaky tests detection (#5094)

## Problem

We still need to rerun some builds manually because flaky tests weren't
detected automatically.
I found two reasons for it:
- If a test is flaky on a particular build type, on a particular
Postgres version, there's a high chance that this test is flaky on all
configurations, but we don't automatically detect such cases.
- We detect flaky tests only on the main branch, which requires manual
retrigger runs for freshly made flaky tests.
Both of them are fixed in the PR.

## Summary of changes
- Spread flakiness of a single test to all configurations
- Detect flaky tests in all branches (not only in the main)
- Look back only at  7 days of test history (instead of 10)
This commit is contained in:
Alexander Bayandin
2023-08-29 11:53:24 +01:00
committed by GitHub
parent babefdd3f9
commit 7e39a96441
2 changed files with 47 additions and 14 deletions

View File

@@ -12,25 +12,26 @@ import psycopg2.extras
# We call the test "flaky" if it failed at least once on the main branch in the last N=10 days.
FLAKY_TESTS_QUERY = """
SELECT
DISTINCT parent_suite, suite, test
DISTINCT parent_suite, suite, REGEXP_REPLACE(test, '(release|debug)-pg(\\d+)-?', '') as deparametrized_test
FROM
(
SELECT
revision,
jsonb_array_elements(data -> 'children') -> 'name' as parent_suite,
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'retriesStatusChange' as retries_status_change,
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp
reference,
jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'retriesStatusChange' as retries_status_change,
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' ->> 'start')::bigint / 1000)::date as timestamp
FROM
regress_test_results
WHERE
reference = 'refs/heads/main'
) data
WHERE
timestamp > CURRENT_DATE - INTERVAL '%s' day
AND (status::text IN ('"failed"', '"broken"') OR retries_status_change::boolean)
AND (
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
OR retries_status_change::boolean
)
;
"""
@@ -40,6 +41,9 @@ def main(args: argparse.Namespace):
interval_days = args.days
output = args.output
build_type = args.build_type
pg_version = args.pg_version
res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]]
res = defaultdict(lambda: defaultdict(dict))
@@ -55,8 +59,21 @@ def main(args: argparse.Namespace):
rows = []
for row in rows:
logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}")
res[row["parent_suite"]][row["suite"]][row["test"]] = True
# We don't want to automatically rerun tests in a performance suite
if row["parent_suite"] != "test_runner.regress":
continue
deparametrized_test = row["deparametrized_test"]
dash_if_needed = "" if deparametrized_test.endswith("[]") else "-"
parametrized_test = deparametrized_test.replace(
"[",
f"[{build_type}-pg{pg_version}{dash_if_needed}",
)
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
logging.info(
f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
)
logging.info(f"saving results to {output.name}")
json.dump(res, output, indent=2)
@@ -77,6 +94,18 @@ if __name__ == "__main__":
type=int,
help="how many days to look back for flaky tests (default: 10)",
)
parser.add_argument(
"--build-type",
required=True,
type=str,
help="for which build type to create list of flaky tests (debug or release)",
)
parser.add_argument(
"--pg-version",
required=True,
type=int,
help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
)
parser.add_argument(
"connstr",
help="connection string to the test results database",