mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
test_runner: rerun all failed tests (#9917)
## Problem Currently, we rerun only known flaky tests. This approach was chosen to reduce the number of tests that go unnoticed (by forcing people to take a look at failed tests and rerun the job manually), but it has some drawbacks: - In PRs, people tend to push new changes without checking failed tests (that's ok) - In the main, tests are just restarted without checking (understandable) - Parametrised tests become flaky one by one, i.e. if `test[1]` is flaky `, test[2]` is not marked as flaky automatically (which may or may not be the case). I suggest rerunning all failed tests to increase the stability of GitHub jobs and using the Grafana Dashboard with flaky tests for deeper analysis. ## Summary of changes - Rerun all failed tests twice at max
This commit is contained in:
committed by
GitHub
parent
eb520a14ce
commit
e04dd3be0b
17
.github/actions/run-python-test-set/action.yml
vendored
17
.github/actions/run-python-test-set/action.yml
vendored
@@ -36,8 +36,8 @@ inputs:
|
||||
description: 'Region name for real s3 tests'
|
||||
required: false
|
||||
default: ''
|
||||
rerun_flaky:
|
||||
description: 'Whether to rerun flaky tests'
|
||||
rerun_failed:
|
||||
description: 'Whether to rerun failed tests'
|
||||
required: false
|
||||
default: 'false'
|
||||
pg_version:
|
||||
@@ -108,7 +108,7 @@ runs:
|
||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
|
||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
||||
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
|
||||
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
||||
PG_VERSION: ${{ inputs.pg_version }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
@@ -154,15 +154,8 @@ runs:
|
||||
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
if [ "${RERUN_FLAKY}" == "true" ]; then
|
||||
mkdir -p $TEST_OUTPUT
|
||||
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" \
|
||||
--days 7 \
|
||||
--output "$TEST_OUTPUT/flaky.json" \
|
||||
--pg-version "${DEFAULT_PG_VERSION}" \
|
||||
--build-type "${BUILD_TYPE}"
|
||||
|
||||
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
|
||||
if [ "${RERUN_FAILED}" == "true" ]; then
|
||||
EXTRA_PARAMS="--reruns 2 $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
# We use pytest-split plugin to run benchmarks in parallel on different CI runners
|
||||
|
||||
@@ -293,7 +293,7 @@ jobs:
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: neon-github-ci-tests
|
||||
real_s3_region: eu-central-1
|
||||
rerun_flaky: true
|
||||
rerun_failed: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
12
poetry.lock
generated
12
poetry.lock
generated
@@ -2563,18 +2563,18 @@ pytest = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pytest-rerunfailures"
|
||||
version = "13.0"
|
||||
version = "15.0"
|
||||
description = "pytest plugin to re-run tests to eliminate flaky failures"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pytest-rerunfailures-13.0.tar.gz", hash = "sha256:e132dbe420bc476f544b96e7036edd0a69707574209b6677263c950d19b09199"},
|
||||
{file = "pytest_rerunfailures-13.0-py3-none-any.whl", hash = "sha256:34919cb3fcb1f8e5d4b940aa75ccdea9661bade925091873b7c6fa5548333069"},
|
||||
{file = "pytest-rerunfailures-15.0.tar.gz", hash = "sha256:2d9ac7baf59f4c13ac730b47f6fa80e755d1ba0581da45ce30b72fb3542b4474"},
|
||||
{file = "pytest_rerunfailures-15.0-py3-none-any.whl", hash = "sha256:dd150c4795c229ef44320adc9a0c0532c51b78bb7a6843a8c53556b9a611df1a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
packaging = ">=17.1"
|
||||
pytest = ">=7"
|
||||
pytest = ">=7.4,<8.2.2 || >8.2.2"
|
||||
|
||||
[[package]]
|
||||
name = "pytest-split"
|
||||
@@ -3524,4 +3524,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "21debe1116843e5d14bdf37d6e265c68c63a98a64ba04ec8b8a02af2e8d9f486"
|
||||
content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9"
|
||||
|
||||
@@ -33,7 +33,7 @@ types-psutil = "^5.9.5.12"
|
||||
types-toml = "^0.10.8.6"
|
||||
pytest-httpserver = "^1.0.8"
|
||||
aiohttp = "3.10.11"
|
||||
pytest-rerunfailures = "^13.0"
|
||||
pytest-rerunfailures = "^15.0"
|
||||
types-pytest-lazy-fixture = "^0.6.3.3"
|
||||
pytest-split = "^0.8.1"
|
||||
zstandard = "^0.21.0"
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import toml
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any
|
||||
|
||||
FLAKY_TESTS_QUERY = """
|
||||
SELECT
|
||||
DISTINCT parent_suite, suite, name
|
||||
FROM results
|
||||
WHERE
|
||||
started_at > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND (
|
||||
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
|
||||
OR flaky
|
||||
)
|
||||
;
|
||||
"""
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
connstr = args.connstr
|
||||
interval_days = args.days
|
||||
output = args.output
|
||||
|
||||
build_type = args.build_type
|
||||
pg_version = args.pg_version
|
||||
|
||||
res: defaultdict[str, defaultdict[str, dict[str, bool]]]
|
||||
res = defaultdict(lambda: defaultdict(dict))
|
||||
|
||||
try:
|
||||
logging.info("connecting to the database...")
|
||||
with psycopg2.connect(connstr, connect_timeout=30) as conn:
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
logging.info("fetching flaky tests...")
|
||||
cur.execute(FLAKY_TESTS_QUERY, (interval_days,))
|
||||
rows = cur.fetchall()
|
||||
except psycopg2.OperationalError as exc:
|
||||
logging.error("cannot fetch flaky tests from the DB due to an error", exc)
|
||||
rows = []
|
||||
|
||||
# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not tokio-epoll-uring),
|
||||
# use it to parametrize test name along with build_type and pg_version
|
||||
#
|
||||
# See test_runner/fixtures/parametrize.py for details
|
||||
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in (
|
||||
"",
|
||||
"tokio-epoll-uring",
|
||||
):
|
||||
pageserver_virtual_file_io_engine_parameter = f"-{io_engine}"
|
||||
else:
|
||||
pageserver_virtual_file_io_engine_parameter = ""
|
||||
|
||||
# re-use existing records of flaky tests from before parametrization by compaction_algorithm
|
||||
def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
|
||||
"""Duplicated from parametrize.py"""
|
||||
toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
|
||||
if toml_table is None:
|
||||
return None
|
||||
v = toml.loads(toml_table)
|
||||
assert isinstance(v, dict)
|
||||
return v
|
||||
|
||||
pageserver_default_tenant_config_compaction_algorithm_parameter = ""
|
||||
if (
|
||||
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()
|
||||
) is not None:
|
||||
pageserver_default_tenant_config_compaction_algorithm_parameter = (
|
||||
f"-{explicit_default['kind']}"
|
||||
)
|
||||
|
||||
for row in rows:
|
||||
# We don't want to automatically rerun tests in a performance suite
|
||||
if row["parent_suite"] != "test_runner.regress":
|
||||
continue
|
||||
|
||||
if row["name"].endswith("]"):
|
||||
parametrized_test = row["name"].replace(
|
||||
"[",
|
||||
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
|
||||
)
|
||||
else:
|
||||
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
|
||||
|
||||
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
|
||||
|
||||
logging.info(
|
||||
f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
|
||||
)
|
||||
|
||||
logging.info(f"saving results to {output.name}")
|
||||
json.dump(res, output, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days")
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default="flaky.json",
|
||||
help="path to output json file (default: flaky.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
required=False,
|
||||
default=10,
|
||||
type=int,
|
||||
help="how many days to look back for flaky tests (default: 10)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--build-type",
|
||||
required=True,
|
||||
type=str,
|
||||
help="for which build type to create list of flaky tests (debug or release)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pg-version",
|
||||
required=True,
|
||||
type=int,
|
||||
help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"connstr",
|
||||
help="connection string to the test results database",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
level = logging.INFO
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
level=level,
|
||||
)
|
||||
|
||||
main(args)
|
||||
@@ -13,5 +13,5 @@ pytest_plugins = (
|
||||
"fixtures.pg_stats",
|
||||
"fixtures.compare_fixtures",
|
||||
"fixtures.slow",
|
||||
"fixtures.flaky",
|
||||
"fixtures.reruns",
|
||||
)
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import MutableMapping
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
import pytest
|
||||
from _pytest.config import Config
|
||||
from _pytest.config.argparsing import Parser
|
||||
from allure_commons.types import LabelType
|
||||
from allure_pytest.utils import allure_name, allure_suite_labels
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any
|
||||
|
||||
|
||||
"""
|
||||
The plugin reruns flaky tests.
|
||||
It uses `pytest.mark.flaky` provided by `pytest-rerunfailures` plugin and flaky tests detected by `scripts/flaky_tests.py`
|
||||
|
||||
Note: the logic of getting flaky tests is extracted to a separate script to avoid running it for each of N xdist workers
|
||||
"""
|
||||
|
||||
|
||||
def pytest_addoption(parser: Parser):
|
||||
parser.addoption(
|
||||
"--flaky-tests-json",
|
||||
action="store",
|
||||
type=Path,
|
||||
help="Path to json file with flaky tests generated by scripts/flaky_tests.py",
|
||||
)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]):
|
||||
if not config.getoption("--flaky-tests-json"):
|
||||
return
|
||||
|
||||
# Any error with getting flaky tests aren't critical, so just do not rerun any tests
|
||||
flaky_json = config.getoption("--flaky-tests-json")
|
||||
if not flaky_json.exists():
|
||||
return
|
||||
|
||||
content = flaky_json.read_text()
|
||||
try:
|
||||
flaky_tests = json.loads(content)
|
||||
except ValueError:
|
||||
log.error(f"Can't parse {content} as json")
|
||||
return
|
||||
|
||||
for item in items:
|
||||
# Use the same logic for constructing test name as Allure does (we store allure-provided data in DB)
|
||||
# Ref https://github.com/allure-framework/allure-python/blob/2.13.1/allure-pytest/src/listener.py#L98-L100
|
||||
allure_labels = dict(allure_suite_labels(item))
|
||||
parent_suite = str(allure_labels.get(LabelType.PARENT_SUITE))
|
||||
suite = str(allure_labels.get(LabelType.SUITE))
|
||||
params = item.callspec.params if hasattr(item, "callspec") else {}
|
||||
name = allure_name(item, params)
|
||||
|
||||
if flaky_tests.get(parent_suite, {}).get(suite, {}).get(name, False):
|
||||
# Rerun 3 times = 1 original run + 2 reruns
|
||||
log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times")
|
||||
item.add_marker(pytest.mark.flaky(reruns=2))
|
||||
|
||||
# pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns),
|
||||
# we can workaround it by setting `timeout_func_only` to True[1].
|
||||
# Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2],
|
||||
# but we still can do it using pytest marker.
|
||||
#
|
||||
# - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99
|
||||
# - [2] https://github.com/pytest-dev/pytest-timeout/issues/142
|
||||
timeout_marker = item.get_closest_marker("timeout")
|
||||
if timeout_marker is not None:
|
||||
kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs)
|
||||
kwargs["func_only"] = True
|
||||
@@ -30,7 +30,7 @@ def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | No
|
||||
test_name = request.node.name
|
||||
test_dir = top_output_dir / f"{prefix or ''}{test_name.replace('/', '-')}"
|
||||
|
||||
# We rerun flaky tests multiple times, use a separate directory for each run.
|
||||
# We rerun failed tests multiple times, use a separate directory for each run.
|
||||
if (suffix := getattr(request.node, "execution_count", None)) is not None:
|
||||
test_dir = test_dir.parent / f"{test_dir.name}-{suffix}"
|
||||
|
||||
|
||||
31
test_runner/fixtures/reruns.py
Normal file
31
test_runner/fixtures/reruns.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import MutableMapping
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
import pytest
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any
|
||||
|
||||
from _pytest.config import Config
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]):
|
||||
# pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns),
|
||||
# we can workaround it by setting `timeout_func_only` to True[1].
|
||||
# Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2],
|
||||
# but we still can do it using pytest marker.
|
||||
#
|
||||
# - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99
|
||||
# - [2] https://github.com/pytest-dev/pytest-timeout/issues/142
|
||||
|
||||
if not config.getoption("--reruns"):
|
||||
return
|
||||
|
||||
for item in items:
|
||||
timeout_marker = item.get_closest_marker("timeout")
|
||||
if timeout_marker is not None:
|
||||
kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs)
|
||||
kwargs["func_only"] = True
|
||||
Reference in New Issue
Block a user