test_runner: rerun all failed tests (#9917)

## Problem

Currently, we rerun only known flaky tests. This approach was chosen to
reduce the number of tests that go unnoticed (by forcing people to take
a look at failed tests and rerun the job manually), but it has some
drawbacks:
- In PRs, people tend to push new changes without checking failed tests
(that's ok)
- In the main, tests are just restarted without checking
(understandable)
- Parametrised tests become flaky one by one, i.e. if `test[1]` is flaky
`, test[2]` is not marked as flaky automatically (which may or may not
be the case).

I suggest rerunning all failed tests to increase the stability of GitHub
jobs and using the Grafana Dashboard with flaky tests for deeper
analysis.

## Summary of changes
- Rerun all failed tests twice at max
This commit is contained in:
Alexander Bayandin
2024-11-28 19:02:57 +00:00
committed by GitHub
parent eb520a14ce
commit e04dd3be0b
9 changed files with 46 additions and 247 deletions

View File

@@ -36,8 +36,8 @@ inputs:
description: 'Region name for real s3 tests'
required: false
default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
rerun_failed:
description: 'Whether to rerun failed tests'
required: false
default: 'false'
pg_version:
@@ -108,7 +108,7 @@ runs:
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
RERUN_FAILED: ${{ inputs.rerun_failed }}
PG_VERSION: ${{ inputs.pg_version }}
shell: bash -euxo pipefail {0}
run: |
@@ -154,15 +154,8 @@ runs:
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" \
--days 7 \
--output "$TEST_OUTPUT/flaky.json" \
--pg-version "${DEFAULT_PG_VERSION}" \
--build-type "${BUILD_TYPE}"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
if [ "${RERUN_FAILED}" == "true" ]; then
EXTRA_PARAMS="--reruns 2 $EXTRA_PARAMS"
fi
# We use pytest-split plugin to run benchmarks in parallel on different CI runners

View File

@@ -293,7 +293,7 @@ jobs:
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_flaky: true
rerun_failed: true
pg_version: ${{ matrix.pg_version }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

12
poetry.lock generated
View File

@@ -2563,18 +2563,18 @@ pytest = "*"
[[package]]
name = "pytest-rerunfailures"
version = "13.0"
version = "15.0"
description = "pytest plugin to re-run tests to eliminate flaky failures"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.9"
files = [
{file = "pytest-rerunfailures-13.0.tar.gz", hash = "sha256:e132dbe420bc476f544b96e7036edd0a69707574209b6677263c950d19b09199"},
{file = "pytest_rerunfailures-13.0-py3-none-any.whl", hash = "sha256:34919cb3fcb1f8e5d4b940aa75ccdea9661bade925091873b7c6fa5548333069"},
{file = "pytest-rerunfailures-15.0.tar.gz", hash = "sha256:2d9ac7baf59f4c13ac730b47f6fa80e755d1ba0581da45ce30b72fb3542b4474"},
{file = "pytest_rerunfailures-15.0-py3-none-any.whl", hash = "sha256:dd150c4795c229ef44320adc9a0c0532c51b78bb7a6843a8c53556b9a611df1a"},
]
[package.dependencies]
packaging = ">=17.1"
pytest = ">=7"
pytest = ">=7.4,<8.2.2 || >8.2.2"
[[package]]
name = "pytest-split"
@@ -3524,4 +3524,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "21debe1116843e5d14bdf37d6e265c68c63a98a64ba04ec8b8a02af2e8d9f486"
content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9"

View File

@@ -33,7 +33,7 @@ types-psutil = "^5.9.5.12"
types-toml = "^0.10.8.6"
pytest-httpserver = "^1.0.8"
aiohttp = "3.10.11"
pytest-rerunfailures = "^13.0"
pytest-rerunfailures = "^15.0"
types-pytest-lazy-fixture = "^0.6.3.3"
pytest-split = "^0.8.1"
zstandard = "^0.21.0"

View File

@@ -1,147 +0,0 @@
#! /usr/bin/env python3
from __future__ import annotations
import argparse
import json
import logging
import os
from collections import defaultdict
from typing import TYPE_CHECKING
import psycopg2
import psycopg2.extras
import toml
if TYPE_CHECKING:
from typing import Any
FLAKY_TESTS_QUERY = """
SELECT
DISTINCT parent_suite, suite, name
FROM results
WHERE
started_at > CURRENT_DATE - INTERVAL '%s' day
AND (
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
OR flaky
)
;
"""
def main(args: argparse.Namespace):
connstr = args.connstr
interval_days = args.days
output = args.output
build_type = args.build_type
pg_version = args.pg_version
res: defaultdict[str, defaultdict[str, dict[str, bool]]]
res = defaultdict(lambda: defaultdict(dict))
try:
logging.info("connecting to the database...")
with psycopg2.connect(connstr, connect_timeout=30) as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
logging.info("fetching flaky tests...")
cur.execute(FLAKY_TESTS_QUERY, (interval_days,))
rows = cur.fetchall()
except psycopg2.OperationalError as exc:
logging.error("cannot fetch flaky tests from the DB due to an error", exc)
rows = []
# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not tokio-epoll-uring),
# use it to parametrize test name along with build_type and pg_version
#
# See test_runner/fixtures/parametrize.py for details
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in (
"",
"tokio-epoll-uring",
):
pageserver_virtual_file_io_engine_parameter = f"-{io_engine}"
else:
pageserver_virtual_file_io_engine_parameter = ""
# re-use existing records of flaky tests from before parametrization by compaction_algorithm
def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
"""Duplicated from parametrize.py"""
toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
if toml_table is None:
return None
v = toml.loads(toml_table)
assert isinstance(v, dict)
return v
pageserver_default_tenant_config_compaction_algorithm_parameter = ""
if (
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()
) is not None:
pageserver_default_tenant_config_compaction_algorithm_parameter = (
f"-{explicit_default['kind']}"
)
for row in rows:
# We don't want to automatically rerun tests in a performance suite
if row["parent_suite"] != "test_runner.regress":
continue
if row["name"].endswith("]"):
parametrized_test = row["name"].replace(
"[",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
)
else:
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
logging.info(
f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
)
logging.info(f"saving results to {output.name}")
json.dump(res, output, indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days")
parser.add_argument(
"--output",
type=argparse.FileType("w"),
default="flaky.json",
help="path to output json file (default: flaky.json)",
)
parser.add_argument(
"--days",
required=False,
default=10,
type=int,
help="how many days to look back for flaky tests (default: 10)",
)
parser.add_argument(
"--build-type",
required=True,
type=str,
help="for which build type to create list of flaky tests (debug or release)",
)
parser.add_argument(
"--pg-version",
required=True,
type=int,
help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
)
parser.add_argument(
"connstr",
help="connection string to the test results database",
)
args = parser.parse_args()
level = logging.INFO
logging.basicConfig(
format="%(message)s",
level=level,
)
main(args)

View File

@@ -13,5 +13,5 @@ pytest_plugins = (
"fixtures.pg_stats",
"fixtures.compare_fixtures",
"fixtures.slow",
"fixtures.flaky",
"fixtures.reruns",
)

View File

@@ -1,78 +0,0 @@
from __future__ import annotations
import json
from collections.abc import MutableMapping
from pathlib import Path
from typing import TYPE_CHECKING, cast
import pytest
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from allure_commons.types import LabelType
from allure_pytest.utils import allure_name, allure_suite_labels
from fixtures.log_helper import log
if TYPE_CHECKING:
from collections.abc import MutableMapping
from typing import Any
"""
The plugin reruns flaky tests.
It uses `pytest.mark.flaky` provided by `pytest-rerunfailures` plugin and flaky tests detected by `scripts/flaky_tests.py`
Note: the logic of getting flaky tests is extracted to a separate script to avoid running it for each of N xdist workers
"""
def pytest_addoption(parser: Parser):
parser.addoption(
"--flaky-tests-json",
action="store",
type=Path,
help="Path to json file with flaky tests generated by scripts/flaky_tests.py",
)
def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]):
if not config.getoption("--flaky-tests-json"):
return
# Any error with getting flaky tests aren't critical, so just do not rerun any tests
flaky_json = config.getoption("--flaky-tests-json")
if not flaky_json.exists():
return
content = flaky_json.read_text()
try:
flaky_tests = json.loads(content)
except ValueError:
log.error(f"Can't parse {content} as json")
return
for item in items:
# Use the same logic for constructing test name as Allure does (we store allure-provided data in DB)
# Ref https://github.com/allure-framework/allure-python/blob/2.13.1/allure-pytest/src/listener.py#L98-L100
allure_labels = dict(allure_suite_labels(item))
parent_suite = str(allure_labels.get(LabelType.PARENT_SUITE))
suite = str(allure_labels.get(LabelType.SUITE))
params = item.callspec.params if hasattr(item, "callspec") else {}
name = allure_name(item, params)
if flaky_tests.get(parent_suite, {}).get(suite, {}).get(name, False):
# Rerun 3 times = 1 original run + 2 reruns
log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times")
item.add_marker(pytest.mark.flaky(reruns=2))
# pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns),
# we can workaround it by setting `timeout_func_only` to True[1].
# Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2],
# but we still can do it using pytest marker.
#
# - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99
# - [2] https://github.com/pytest-dev/pytest-timeout/issues/142
timeout_marker = item.get_closest_marker("timeout")
if timeout_marker is not None:
kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs)
kwargs["func_only"] = True

View File

@@ -30,7 +30,7 @@ def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | No
test_name = request.node.name
test_dir = top_output_dir / f"{prefix or ''}{test_name.replace('/', '-')}"
# We rerun flaky tests multiple times, use a separate directory for each run.
# We rerun failed tests multiple times, use a separate directory for each run.
if (suffix := getattr(request.node, "execution_count", None)) is not None:
test_dir = test_dir.parent / f"{test_dir.name}-{suffix}"

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
from collections.abc import MutableMapping
from typing import TYPE_CHECKING, cast
import pytest
if TYPE_CHECKING:
from collections.abc import MutableMapping
from typing import Any
from _pytest.config import Config
def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]):
# pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns),
# we can workaround it by setting `timeout_func_only` to True[1].
# Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2],
# but we still can do it using pytest marker.
#
# - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99
# - [2] https://github.com/pytest-dev/pytest-timeout/issues/142
if not config.getoption("--reruns"):
return
for item in items:
timeout_marker = item.get_closest_marker("timeout")
if timeout_marker is not None:
kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs)
kwargs["func_only"] = True