test_runner: automatically rerun flaky tests (#3880)

This PR adds a plugin that automatically reruns (up to 3 times) flaky
tests. Internally, it uses data from `TEST_RESULT_CONNSTR` database and
`pytest-rerunfailures` plugin.

As the first approximation we consider the test flaky if it has failed on 
the main branch in the last 10 days.

Flaky tests are fetched by `scripts/flaky_tests.py` script (it's
possible to use it in a standalone mode to learn which tests are flaky),
stored to a JSON file, and then the file is passed to the pytest plugin.
This commit is contained in:
Alexander Bayandin
2023-04-04 12:21:54 +01:00
committed by GitHub
parent 846532112c
commit 105b8bb9d3
9 changed files with 195 additions and 16 deletions

View File

@@ -76,8 +76,8 @@ runs:
rm -f ${ALLURE_ZIP}
fi
env:
ALLURE_VERSION: 2.19.0
ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464
ALLURE_VERSION: 2.21.0
ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
- name: Upload Allure results
if: ${{ inputs.action == 'store' }}

View File

@@ -44,6 +44,10 @@ inputs:
description: 'Secret access key'
required: false
default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
required: false
default: 'false'
runs:
using: "composite"
@@ -101,6 +105,7 @@ runs:
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
@@ -143,6 +148,13 @@ runs:
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then

View File

@@ -335,6 +335,9 @@ jobs:
real_s3_region: us-west-2
real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
rerun_flaky: true
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'

38
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
[[package]]
name = "aiohttp"
@@ -79,37 +79,35 @@ sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"]
[[package]]
name = "allure-pytest"
version = "2.10.0"
version = "2.13.1"
description = "Allure pytest integration"
category = "main"
optional = false
python-versions = "*"
files = [
{file = "allure-pytest-2.10.0.tar.gz", hash = "sha256:3b2ab67629f4cbd8617abd817d2b22292c6eb7efd5584f992d1af8143aea6ee7"},
{file = "allure_pytest-2.10.0-py3-none-any.whl", hash = "sha256:08274096594758447db54c3b2c382526ee04f1fe12119cdaee92d2d93c84b530"},
{file = "allure-pytest-2.13.1.tar.gz", hash = "sha256:68d69456eeb65af4061ec06a80bc941163b0616e8216554d36b070a6bf070e08"},
{file = "allure_pytest-2.13.1-py3-none-any.whl", hash = "sha256:a8de2fc3b3effe2d8f98801646920de3f055b779710f4c806dbee7c613c24633"},
]
[package.dependencies]
allure-python-commons = "2.10.0"
allure-python-commons = "2.13.1"
pytest = ">=4.5.0"
six = ">=1.9.0"
[[package]]
name = "allure-python-commons"
version = "2.10.0"
version = "2.13.1"
description = "Common module for integrate allure with python-based frameworks"
category = "main"
optional = false
python-versions = ">=3.5"
python-versions = ">=3.6"
files = [
{file = "allure-python-commons-2.10.0.tar.gz", hash = "sha256:d4d31344b0f0037a4a11e16b91b28cf0eeb23ffa0e50c27fcfc6aabe72212d3c"},
{file = "allure_python_commons-2.10.0-py3-none-any.whl", hash = "sha256:2a717e8ca8d296bf89cd57f38fc3c21893bd7ea8cd02a6ae5420e6d1a6eda5d0"},
{file = "allure-python-commons-2.13.1.tar.gz", hash = "sha256:3fc13e1da8ebb23f9ab5c9c72ad04595023cdd5078dbb8604939997faebed5cb"},
{file = "allure_python_commons-2.13.1-py3-none-any.whl", hash = "sha256:d08e04867bddf44fef55def3d67f4bc25af58a1bf9fcffcf4ec3331f7f2ef0d0"},
]
[package.dependencies]
attrs = ">=16.0.0"
pluggy = ">=0.4.0"
six = ">=1.9.0"
[[package]]
name = "async-timeout"
@@ -1932,6 +1930,22 @@ pytest = [
{version = ">=6.2.4", markers = "python_version >= \"3.10\""},
]
[[package]]
name = "pytest-rerunfailures"
version = "11.1.2"
description = "pytest plugin to re-run tests to eliminate flaky failures"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest-rerunfailures-11.1.2.tar.gz", hash = "sha256:55611661e873f1cafa384c82f08d07883954f4b76435f4b8a5b470c1954573de"},
{file = "pytest_rerunfailures-11.1.2-py3-none-any.whl", hash = "sha256:d21fe2e46d9774f8ad95f1aa799544ae95cac3a223477af94aa985adfae92b7e"},
]
[package.dependencies]
packaging = ">=17.1"
pytest = ">=5.3"
[[package]]
name = "pytest-timeout"
version = "2.1.0"
@@ -2597,4 +2611,4 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "2515a9320c2960076012fbc036fb33c4f6a23515c8d143785931dc18c6722d91"
content-hash = "b689ffd6eae32b966f1744b5ac3343fe0dd26b31ee1f50e13daf5045ee0623e1"

View File

@@ -26,7 +26,7 @@ prometheus-client = "^0.14.1"
pytest-timeout = "^2.1.0"
Werkzeug = "^2.2.3"
pytest-order = "^1.0.1"
allure-pytest = "^2.10.0"
allure-pytest = "^2.13.1"
pytest-asyncio = "^0.19.0"
toml = "^0.10.2"
psutil = "^5.9.4"
@@ -34,6 +34,7 @@ types-psutil = "^5.9.5.4"
types-toml = "^0.10.8"
pytest-httpserver = "^1.0.6"
aiohttp = "3.7.4"
pytest-rerunfailures = "^11.1.2"
[tool.poetry.group.dev.dependencies]
black = "^23.1.0"
@@ -69,6 +70,9 @@ strict = true
module = [
"asyncpg.*",
"pg8000.*",
"allure.*",
"allure_commons.*",
"allure_pytest.*",
]
ignore_missing_imports = true

87
scripts/flaky_tests.py Executable file
View File

@@ -0,0 +1,87 @@
#! /usr/bin/env python3
import argparse
import json
import logging
from collections import defaultdict
from typing import DefaultDict, Dict
import psycopg2
import psycopg2.extras
# We call the test "flaky" if it failed at least once on the main branch in the last N=10 days.
FLAKY_TESTS_QUERY = """
SELECT
DISTINCT parent_suite, suite, test
FROM
(
SELECT
revision,
jsonb_array_elements(data -> 'children') -> 'name' as parent_suite,
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test,
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status,
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp
FROM
regress_test_results
WHERE
reference = 'refs/heads/main'
) data
WHERE
timestamp > CURRENT_DATE - INTERVAL '%s' day
AND status::text IN ('"failed"', '"broken"')
;
"""
def main(args: argparse.Namespace):
connstr = args.connstr
interval_days = args.days
output = args.output
res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]]
res = defaultdict(lambda: defaultdict(dict))
logging.info("connecting to the database...")
with psycopg2.connect(connstr, connect_timeout=10) as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
logging.info("fetching flaky tests...")
cur.execute(FLAKY_TESTS_QUERY, (interval_days,))
rows = cur.fetchall()
for row in rows:
logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}")
res[row["parent_suite"]][row["suite"]][row["test"]] = True
logging.info(f"saving results to {output.name}")
json.dump(res, output, indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days")
parser.add_argument(
"--output",
type=argparse.FileType("w"),
default="flaky.json",
help="path to output json file (default: flaky.json)",
)
parser.add_argument(
"--days",
required=False,
default=10,
type=int,
help="how many days to look back for flaky tests (default: 10)",
)
parser.add_argument(
"connstr",
help="connection string to the test results database",
)
args = parser.parse_args()
level = logging.INFO
logging.basicConfig(
format="%(message)s",
level=level,
)
main(args)

View File

@@ -4,4 +4,5 @@ pytest_plugins = (
"fixtures.pg_stats",
"fixtures.compare_fixtures",
"fixtures.slow",
"fixtures.flaky",
)

View File

@@ -0,0 +1,58 @@
import json
from pathlib import Path
from typing import List
import pytest
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from allure_commons.types import LabelType
from allure_pytest.utils import allure_name, allure_suite_labels
from fixtures.log_helper import log
"""
The plugin reruns flaky tests.
It uses `pytest.mark.flaky` provided by `pytest-rerunfailures` plugin and flaky tests detected by `scripts/flaky_tests.py`
Note: the logic of getting flaky tests is extracted to a separate script to avoid running it for each of N xdist workers
"""
def pytest_addoption(parser: Parser):
parser.addoption(
"--flaky-tests-json",
action="store",
type=Path,
help="Path to json file with flaky tests generated by scripts/flaky_tests.py",
)
def pytest_collection_modifyitems(config: Config, items: List[pytest.Item]):
if not config.getoption("--flaky-tests-json"):
return
# Any error with getting flaky tests aren't critical, so just do not rerun any tests
flaky_json = config.getoption("--flaky-tests-json")
if not flaky_json.exists():
return
content = flaky_json.read_text()
try:
flaky_tests = json.loads(content)
except ValueError:
log.error(f"Can't parse {content} as json")
return
for item in items:
# Use the same logic for constructing test name as Allure does (we store allure-provided data in DB)
# Ref https://github.com/allure-framework/allure-python/blob/2.13.1/allure-pytest/src/listener.py#L98-L100
allure_labels = dict(allure_suite_labels(item))
parent_suite = str(allure_labels.get(LabelType.PARENT_SUITE))
suite = str(allure_labels.get(LabelType.SUITE))
params = item.callspec.params if hasattr(item, "callspec") else {}
name = allure_name(item, params)
if flaky_tests.get(parent_suite, {}).get(suite, {}).get(name, False):
# Rerun 3 times = 1 original run + 2 reruns
log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times")
item.add_marker(pytest.mark.flaky(reruns=2))

View File

@@ -7,7 +7,7 @@ import time
from pathlib import Path
from typing import Any, Callable, Dict, List, Tuple, TypeVar
import allure # type: ignore
import allure
from psycopg2.extensions import cursor
from fixtures.log_helper import log