From 105b8bb9d36297e0c5bc119c87ed1e94ce18cbd5 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 4 Apr 2023 12:21:54 +0100 Subject: [PATCH] test_runner: automatically rerun flaky tests (#3880) This PR adds a plugin that automatically reruns (up to 3 times) flaky tests. Internally, it uses data from `TEST_RESULT_CONNSTR` database and `pytest-rerunfailures` plugin. As the first approximation we consider the test flaky if it has failed on the main branch in the last 10 days. Flaky tests are fetched by `scripts/flaky_tests.py` script (it's possible to use it in a standalone mode to learn which tests are flaky), stored to a JSON file, and then the file is passed to the pytest plugin. --- .github/actions/allure-report/action.yml | 4 +- .../actions/run-python-test-set/action.yml | 12 +++ .github/workflows/build_and_test.yml | 3 + poetry.lock | 38 +++++--- pyproject.toml | 6 +- scripts/flaky_tests.py | 87 +++++++++++++++++++ test_runner/conftest.py | 1 + test_runner/fixtures/flaky.py | 58 +++++++++++++ test_runner/fixtures/utils.py | 2 +- 9 files changed, 195 insertions(+), 16 deletions(-) create mode 100755 scripts/flaky_tests.py create mode 100644 test_runner/fixtures/flaky.py diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml index 2d4cabdde5..e685006245 100644 --- a/.github/actions/allure-report/action.yml +++ b/.github/actions/allure-report/action.yml @@ -76,8 +76,8 @@ runs: rm -f ${ALLURE_ZIP} fi env: - ALLURE_VERSION: 2.19.0 - ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464 + ALLURE_VERSION: 2.21.0 + ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c - name: Upload Allure results if: ${{ inputs.action == 'store' }} diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 29b04a3478..11f5c78f19 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -44,6 +44,10 @@ inputs: description: 'Secret access key' required: false default: '' + rerun_flaky: + description: 'Whether to rerun flaky tests' + required: false + default: 'false' runs: using: "composite" @@ -101,6 +105,7 @@ runs: COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14 ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage') ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage') + RERUN_FLAKY: ${{ inputs.rerun_flaky }} shell: bash -euxo pipefail {0} run: | # PLATFORM will be embedded in the perf test report @@ -143,6 +148,13 @@ runs: EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS" fi + if [ "${RERUN_FLAKY}" == "true" ]; then + mkdir -p $TEST_OUTPUT + poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json" + + EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS" + fi + if [[ "${{ inputs.build_type }}" == "debug" ]]; then cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run) elif [[ "${{ inputs.build_type }}" == "release" ]]; then diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8482341b0c..8c108e7f50 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -335,6 +335,9 @@ jobs: real_s3_region: us-west-2 real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}" real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}" + rerun_flaky: true + env: + TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }} - name: Merge and upload coverage data if: matrix.build_type == 'debug' diff --git a/poetry.lock b/poetry.lock index 011d5d7817..7b368cd3b4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -79,37 +79,35 @@ sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"] [[package]] name = "allure-pytest" -version = "2.10.0" +version = "2.13.1" description = "Allure pytest integration" category = "main" optional = false python-versions = "*" files = [ - {file = "allure-pytest-2.10.0.tar.gz", hash = "sha256:3b2ab67629f4cbd8617abd817d2b22292c6eb7efd5584f992d1af8143aea6ee7"}, - {file = "allure_pytest-2.10.0-py3-none-any.whl", hash = "sha256:08274096594758447db54c3b2c382526ee04f1fe12119cdaee92d2d93c84b530"}, + {file = "allure-pytest-2.13.1.tar.gz", hash = "sha256:68d69456eeb65af4061ec06a80bc941163b0616e8216554d36b070a6bf070e08"}, + {file = "allure_pytest-2.13.1-py3-none-any.whl", hash = "sha256:a8de2fc3b3effe2d8f98801646920de3f055b779710f4c806dbee7c613c24633"}, ] [package.dependencies] -allure-python-commons = "2.10.0" +allure-python-commons = "2.13.1" pytest = ">=4.5.0" -six = ">=1.9.0" [[package]] name = "allure-python-commons" -version = "2.10.0" +version = "2.13.1" description = "Common module for integrate allure with python-based frameworks" category = "main" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" files = [ - {file = "allure-python-commons-2.10.0.tar.gz", hash = "sha256:d4d31344b0f0037a4a11e16b91b28cf0eeb23ffa0e50c27fcfc6aabe72212d3c"}, - {file = "allure_python_commons-2.10.0-py3-none-any.whl", hash = "sha256:2a717e8ca8d296bf89cd57f38fc3c21893bd7ea8cd02a6ae5420e6d1a6eda5d0"}, + {file = "allure-python-commons-2.13.1.tar.gz", hash = "sha256:3fc13e1da8ebb23f9ab5c9c72ad04595023cdd5078dbb8604939997faebed5cb"}, + {file = "allure_python_commons-2.13.1-py3-none-any.whl", hash = "sha256:d08e04867bddf44fef55def3d67f4bc25af58a1bf9fcffcf4ec3331f7f2ef0d0"}, ] [package.dependencies] attrs = ">=16.0.0" pluggy = ">=0.4.0" -six = ">=1.9.0" [[package]] name = "async-timeout" @@ -1932,6 +1930,22 @@ pytest = [ {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, ] +[[package]] +name = "pytest-rerunfailures" +version = "11.1.2" +description = "pytest plugin to re-run tests to eliminate flaky failures" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-rerunfailures-11.1.2.tar.gz", hash = "sha256:55611661e873f1cafa384c82f08d07883954f4b76435f4b8a5b470c1954573de"}, + {file = "pytest_rerunfailures-11.1.2-py3-none-any.whl", hash = "sha256:d21fe2e46d9774f8ad95f1aa799544ae95cac3a223477af94aa985adfae92b7e"}, +] + +[package.dependencies] +packaging = ">=17.1" +pytest = ">=5.3" + [[package]] name = "pytest-timeout" version = "2.1.0" @@ -2597,4 +2611,4 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "2515a9320c2960076012fbc036fb33c4f6a23515c8d143785931dc18c6722d91" +content-hash = "b689ffd6eae32b966f1744b5ac3343fe0dd26b31ee1f50e13daf5045ee0623e1" diff --git a/pyproject.toml b/pyproject.toml index f21c12b2e3..a51e91782e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ prometheus-client = "^0.14.1" pytest-timeout = "^2.1.0" Werkzeug = "^2.2.3" pytest-order = "^1.0.1" -allure-pytest = "^2.10.0" +allure-pytest = "^2.13.1" pytest-asyncio = "^0.19.0" toml = "^0.10.2" psutil = "^5.9.4" @@ -34,6 +34,7 @@ types-psutil = "^5.9.5.4" types-toml = "^0.10.8" pytest-httpserver = "^1.0.6" aiohttp = "3.7.4" +pytest-rerunfailures = "^11.1.2" [tool.poetry.group.dev.dependencies] black = "^23.1.0" @@ -69,6 +70,9 @@ strict = true module = [ "asyncpg.*", "pg8000.*", + "allure.*", + "allure_commons.*", + "allure_pytest.*", ] ignore_missing_imports = true diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py new file mode 100755 index 0000000000..829cc814e8 --- /dev/null +++ b/scripts/flaky_tests.py @@ -0,0 +1,87 @@ +#! /usr/bin/env python3 + +import argparse +import json +import logging +from collections import defaultdict +from typing import DefaultDict, Dict + +import psycopg2 +import psycopg2.extras + +# We call the test "flaky" if it failed at least once on the main branch in the last N=10 days. +FLAKY_TESTS_QUERY = """ + SELECT + DISTINCT parent_suite, suite, test + FROM + ( + SELECT + revision, + jsonb_array_elements(data -> 'children') -> 'name' as parent_suite, + jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite, + jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test, + jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status, + to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp + FROM + regress_test_results + WHERE + reference = 'refs/heads/main' + ) data + WHERE + timestamp > CURRENT_DATE - INTERVAL '%s' day + AND status::text IN ('"failed"', '"broken"') + ; +""" + + +def main(args: argparse.Namespace): + connstr = args.connstr + interval_days = args.days + output = args.output + + res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] + res = defaultdict(lambda: defaultdict(dict)) + + logging.info("connecting to the database...") + with psycopg2.connect(connstr, connect_timeout=10) as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + logging.info("fetching flaky tests...") + cur.execute(FLAKY_TESTS_QUERY, (interval_days,)) + rows = cur.fetchall() + + for row in rows: + logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}") + res[row["parent_suite"]][row["suite"]][row["test"]] = True + + logging.info(f"saving results to {output.name}") + json.dump(res, output, indent=2) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days") + parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="flaky.json", + help="path to output json file (default: flaky.json)", + ) + parser.add_argument( + "--days", + required=False, + default=10, + type=int, + help="how many days to look back for flaky tests (default: 10)", + ) + parser.add_argument( + "connstr", + help="connection string to the test results database", + ) + args = parser.parse_args() + + level = logging.INFO + logging.basicConfig( + format="%(message)s", + level=level, + ) + + main(args) diff --git a/test_runner/conftest.py b/test_runner/conftest.py index 8b7f6a2eea..75242b84ce 100644 --- a/test_runner/conftest.py +++ b/test_runner/conftest.py @@ -4,4 +4,5 @@ pytest_plugins = ( "fixtures.pg_stats", "fixtures.compare_fixtures", "fixtures.slow", + "fixtures.flaky", ) diff --git a/test_runner/fixtures/flaky.py b/test_runner/fixtures/flaky.py new file mode 100644 index 0000000000..9d7f8ead9a --- /dev/null +++ b/test_runner/fixtures/flaky.py @@ -0,0 +1,58 @@ +import json +from pathlib import Path +from typing import List + +import pytest +from _pytest.config import Config +from _pytest.config.argparsing import Parser +from allure_commons.types import LabelType +from allure_pytest.utils import allure_name, allure_suite_labels + +from fixtures.log_helper import log + +""" +The plugin reruns flaky tests. +It uses `pytest.mark.flaky` provided by `pytest-rerunfailures` plugin and flaky tests detected by `scripts/flaky_tests.py` + +Note: the logic of getting flaky tests is extracted to a separate script to avoid running it for each of N xdist workers +""" + + +def pytest_addoption(parser: Parser): + parser.addoption( + "--flaky-tests-json", + action="store", + type=Path, + help="Path to json file with flaky tests generated by scripts/flaky_tests.py", + ) + + +def pytest_collection_modifyitems(config: Config, items: List[pytest.Item]): + if not config.getoption("--flaky-tests-json"): + return + + # Any error with getting flaky tests aren't critical, so just do not rerun any tests + flaky_json = config.getoption("--flaky-tests-json") + if not flaky_json.exists(): + return + + content = flaky_json.read_text() + try: + flaky_tests = json.loads(content) + except ValueError: + log.error(f"Can't parse {content} as json") + return + + for item in items: + # Use the same logic for constructing test name as Allure does (we store allure-provided data in DB) + # Ref https://github.com/allure-framework/allure-python/blob/2.13.1/allure-pytest/src/listener.py#L98-L100 + allure_labels = dict(allure_suite_labels(item)) + parent_suite = str(allure_labels.get(LabelType.PARENT_SUITE)) + suite = str(allure_labels.get(LabelType.SUITE)) + params = item.callspec.params if hasattr(item, "callspec") else {} + name = allure_name(item, params) + + if flaky_tests.get(parent_suite, {}).get(suite, {}).get(name, False): + # Rerun 3 times = 1 original run + 2 reruns + log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times") + item.add_marker(pytest.mark.flaky(reruns=2)) diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index ce03658e8f..1e15fea3c2 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -7,7 +7,7 @@ import time from pathlib import Path from typing import Any, Callable, Dict, List, Tuple, TypeVar -import allure # type: ignore +import allure from psycopg2.extensions import cursor from fixtures.log_helper import log