From 7e39a9644185dbe4ea0c755d137cc02c764dc496 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 29 Aug 2023 11:53:24 +0100 Subject: [PATCH] scripts/flaky_tests.py: Improve flaky tests detection (#5094) ## Problem We still need to rerun some builds manually because flaky tests weren't detected automatically. I found two reasons for it: - If a test is flaky on a particular build type, on a particular Postgres version, there's a high chance that this test is flaky on all configurations, but we don't automatically detect such cases. - We detect flaky tests only on the main branch, which requires manual retrigger runs for freshly made flaky tests. Both of them are fixed in the PR. ## Summary of changes - Spread flakiness of a single test to all configurations - Detect flaky tests in all branches (not only in the main) - Look back only at 7 days of test history (instead of 10) --- .../actions/run-python-test-set/action.yml | 6 +- scripts/flaky_tests.py | 55 ++++++++++++++----- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 60ccc56738..013b446307 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -145,7 +145,11 @@ runs: if [ "${RERUN_FLAKY}" == "true" ]; then mkdir -p $TEST_OUTPUT - poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json" + poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" \ + --days 7 \ + --output "$TEST_OUTPUT/flaky.json" \ + --pg-version "${DEFAULT_PG_VERSION}" \ + --build-type "${BUILD_TYPE}" EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS" fi diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py index a3b29909e5..5ef2f76798 100755 --- a/scripts/flaky_tests.py +++ b/scripts/flaky_tests.py @@ -12,25 +12,26 @@ import psycopg2.extras # We call the test "flaky" if it failed at least once on the main branch in the last N=10 days. FLAKY_TESTS_QUERY = """ SELECT - DISTINCT parent_suite, suite, test + DISTINCT parent_suite, suite, REGEXP_REPLACE(test, '(release|debug)-pg(\\d+)-?', '') as deparametrized_test FROM ( SELECT - revision, - jsonb_array_elements(data -> 'children') -> 'name' as parent_suite, - jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite, - jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test, - jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status, - jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'retriesStatusChange' as retries_status_change, - to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp + reference, + jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite, + jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite, + jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test, + jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status, + jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'retriesStatusChange' as retries_status_change, + to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' ->> 'start')::bigint / 1000)::date as timestamp FROM regress_test_results - WHERE - reference = 'refs/heads/main' ) data WHERE timestamp > CURRENT_DATE - INTERVAL '%s' day - AND (status::text IN ('"failed"', '"broken"') OR retries_status_change::boolean) + AND ( + (status IN ('failed', 'broken') AND reference = 'refs/heads/main') + OR retries_status_change::boolean + ) ; """ @@ -40,6 +41,9 @@ def main(args: argparse.Namespace): interval_days = args.days output = args.output + build_type = args.build_type + pg_version = args.pg_version + res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] res = defaultdict(lambda: defaultdict(dict)) @@ -55,8 +59,21 @@ def main(args: argparse.Namespace): rows = [] for row in rows: - logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}") - res[row["parent_suite"]][row["suite"]][row["test"]] = True + # We don't want to automatically rerun tests in a performance suite + if row["parent_suite"] != "test_runner.regress": + continue + + deparametrized_test = row["deparametrized_test"] + dash_if_needed = "" if deparametrized_test.endswith("[]") else "-" + parametrized_test = deparametrized_test.replace( + "[", + f"[{build_type}-pg{pg_version}{dash_if_needed}", + ) + res[row["parent_suite"]][row["suite"]][parametrized_test] = True + + logging.info( + f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}" + ) logging.info(f"saving results to {output.name}") json.dump(res, output, indent=2) @@ -77,6 +94,18 @@ if __name__ == "__main__": type=int, help="how many days to look back for flaky tests (default: 10)", ) + parser.add_argument( + "--build-type", + required=True, + type=str, + help="for which build type to create list of flaky tests (debug or release)", + ) + parser.add_argument( + "--pg-version", + required=True, + type=int, + help="for which Postgres version to create list of flaky tests (14, 15, etc.)", + ) parser.add_argument( "connstr", help="connection string to the test results database",