diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 275f161019..1159627302 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -36,8 +36,8 @@ inputs: description: 'Region name for real s3 tests' required: false default: '' - rerun_flaky: - description: 'Whether to rerun flaky tests' + rerun_failed: + description: 'Whether to rerun failed tests' required: false default: 'false' pg_version: @@ -108,7 +108,7 @@ runs: COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }} ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage') ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage') - RERUN_FLAKY: ${{ inputs.rerun_flaky }} + RERUN_FAILED: ${{ inputs.rerun_failed }} PG_VERSION: ${{ inputs.pg_version }} shell: bash -euxo pipefail {0} run: | @@ -154,15 +154,8 @@ runs: EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS" fi - if [ "${RERUN_FLAKY}" == "true" ]; then - mkdir -p $TEST_OUTPUT - poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" \ - --days 7 \ - --output "$TEST_OUTPUT/flaky.json" \ - --pg-version "${DEFAULT_PG_VERSION}" \ - --build-type "${BUILD_TYPE}" - - EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS" + if [ "${RERUN_FAILED}" == "true" ]; then + EXTRA_PARAMS="--reruns 2 $EXTRA_PARAMS" fi # We use pytest-split plugin to run benchmarks in parallel on different CI runners diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index bdf7c07c6a..42c32a23e3 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -293,7 +293,7 @@ jobs: run_with_real_s3: true real_s3_bucket: neon-github-ci-tests real_s3_region: eu-central-1 - rerun_flaky: true + rerun_failed: true pg_version: ${{ matrix.pg_version }} env: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} diff --git a/poetry.lock b/poetry.lock index e2fca7be47..59ae5cf1ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2563,18 +2563,18 @@ pytest = "*" [[package]] name = "pytest-rerunfailures" -version = "13.0" +version = "15.0" description = "pytest plugin to re-run tests to eliminate flaky failures" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "pytest-rerunfailures-13.0.tar.gz", hash = "sha256:e132dbe420bc476f544b96e7036edd0a69707574209b6677263c950d19b09199"}, - {file = "pytest_rerunfailures-13.0-py3-none-any.whl", hash = "sha256:34919cb3fcb1f8e5d4b940aa75ccdea9661bade925091873b7c6fa5548333069"}, + {file = "pytest-rerunfailures-15.0.tar.gz", hash = "sha256:2d9ac7baf59f4c13ac730b47f6fa80e755d1ba0581da45ce30b72fb3542b4474"}, + {file = "pytest_rerunfailures-15.0-py3-none-any.whl", hash = "sha256:dd150c4795c229ef44320adc9a0c0532c51b78bb7a6843a8c53556b9a611df1a"}, ] [package.dependencies] packaging = ">=17.1" -pytest = ">=7" +pytest = ">=7.4,<8.2.2 || >8.2.2" [[package]] name = "pytest-split" @@ -3524,4 +3524,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "21debe1116843e5d14bdf37d6e265c68c63a98a64ba04ec8b8a02af2e8d9f486" +content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9" diff --git a/pyproject.toml b/pyproject.toml index ccd3ab1864..01d15ee6bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ types-psutil = "^5.9.5.12" types-toml = "^0.10.8.6" pytest-httpserver = "^1.0.8" aiohttp = "3.10.11" -pytest-rerunfailures = "^13.0" +pytest-rerunfailures = "^15.0" types-pytest-lazy-fixture = "^0.6.3.3" pytest-split = "^0.8.1" zstandard = "^0.21.0" diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py deleted file mode 100755 index 3fb668ed2d..0000000000 --- a/scripts/flaky_tests.py +++ /dev/null @@ -1,147 +0,0 @@ -#! /usr/bin/env python3 - -from __future__ import annotations - -import argparse -import json -import logging -import os -from collections import defaultdict -from typing import TYPE_CHECKING - -import psycopg2 -import psycopg2.extras -import toml - -if TYPE_CHECKING: - from typing import Any - -FLAKY_TESTS_QUERY = """ - SELECT - DISTINCT parent_suite, suite, name - FROM results - WHERE - started_at > CURRENT_DATE - INTERVAL '%s' day - AND ( - (status IN ('failed', 'broken') AND reference = 'refs/heads/main') - OR flaky - ) - ; -""" - - -def main(args: argparse.Namespace): - connstr = args.connstr - interval_days = args.days - output = args.output - - build_type = args.build_type - pg_version = args.pg_version - - res: defaultdict[str, defaultdict[str, dict[str, bool]]] - res = defaultdict(lambda: defaultdict(dict)) - - try: - logging.info("connecting to the database...") - with psycopg2.connect(connstr, connect_timeout=30) as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - logging.info("fetching flaky tests...") - cur.execute(FLAKY_TESTS_QUERY, (interval_days,)) - rows = cur.fetchall() - except psycopg2.OperationalError as exc: - logging.error("cannot fetch flaky tests from the DB due to an error", exc) - rows = [] - - # If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not tokio-epoll-uring), - # use it to parametrize test name along with build_type and pg_version - # - # See test_runner/fixtures/parametrize.py for details - if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ( - "", - "tokio-epoll-uring", - ): - pageserver_virtual_file_io_engine_parameter = f"-{io_engine}" - else: - pageserver_virtual_file_io_engine_parameter = "" - - # re-use existing records of flaky tests from before parametrization by compaction_algorithm - def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None: - """Duplicated from parametrize.py""" - toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM") - if toml_table is None: - return None - v = toml.loads(toml_table) - assert isinstance(v, dict) - return v - - pageserver_default_tenant_config_compaction_algorithm_parameter = "" - if ( - explicit_default := get_pageserver_default_tenant_config_compaction_algorithm() - ) is not None: - pageserver_default_tenant_config_compaction_algorithm_parameter = ( - f"-{explicit_default['kind']}" - ) - - for row in rows: - # We don't want to automatically rerun tests in a performance suite - if row["parent_suite"] != "test_runner.regress": - continue - - if row["name"].endswith("]"): - parametrized_test = row["name"].replace( - "[", - f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-", - ) - else: - parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]" - - res[row["parent_suite"]][row["suite"]][parametrized_test] = True - - logging.info( - f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}" - ) - - logging.info(f"saving results to {output.name}") - json.dump(res, output, indent=2) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days") - parser.add_argument( - "--output", - type=argparse.FileType("w"), - default="flaky.json", - help="path to output json file (default: flaky.json)", - ) - parser.add_argument( - "--days", - required=False, - default=10, - type=int, - help="how many days to look back for flaky tests (default: 10)", - ) - parser.add_argument( - "--build-type", - required=True, - type=str, - help="for which build type to create list of flaky tests (debug or release)", - ) - parser.add_argument( - "--pg-version", - required=True, - type=int, - help="for which Postgres version to create list of flaky tests (14, 15, etc.)", - ) - parser.add_argument( - "connstr", - help="connection string to the test results database", - ) - args = parser.parse_args() - - level = logging.INFO - logging.basicConfig( - format="%(message)s", - level=level, - ) - - main(args) diff --git a/test_runner/conftest.py b/test_runner/conftest.py index 84eda52d33..887bfef478 100644 --- a/test_runner/conftest.py +++ b/test_runner/conftest.py @@ -13,5 +13,5 @@ pytest_plugins = ( "fixtures.pg_stats", "fixtures.compare_fixtures", "fixtures.slow", - "fixtures.flaky", + "fixtures.reruns", ) diff --git a/test_runner/fixtures/flaky.py b/test_runner/fixtures/flaky.py deleted file mode 100644 index 01634a29c5..0000000000 --- a/test_runner/fixtures/flaky.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import annotations - -import json -from collections.abc import MutableMapping -from pathlib import Path -from typing import TYPE_CHECKING, cast - -import pytest -from _pytest.config import Config -from _pytest.config.argparsing import Parser -from allure_commons.types import LabelType -from allure_pytest.utils import allure_name, allure_suite_labels - -from fixtures.log_helper import log - -if TYPE_CHECKING: - from collections.abc import MutableMapping - from typing import Any - - -""" -The plugin reruns flaky tests. -It uses `pytest.mark.flaky` provided by `pytest-rerunfailures` plugin and flaky tests detected by `scripts/flaky_tests.py` - -Note: the logic of getting flaky tests is extracted to a separate script to avoid running it for each of N xdist workers -""" - - -def pytest_addoption(parser: Parser): - parser.addoption( - "--flaky-tests-json", - action="store", - type=Path, - help="Path to json file with flaky tests generated by scripts/flaky_tests.py", - ) - - -def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]): - if not config.getoption("--flaky-tests-json"): - return - - # Any error with getting flaky tests aren't critical, so just do not rerun any tests - flaky_json = config.getoption("--flaky-tests-json") - if not flaky_json.exists(): - return - - content = flaky_json.read_text() - try: - flaky_tests = json.loads(content) - except ValueError: - log.error(f"Can't parse {content} as json") - return - - for item in items: - # Use the same logic for constructing test name as Allure does (we store allure-provided data in DB) - # Ref https://github.com/allure-framework/allure-python/blob/2.13.1/allure-pytest/src/listener.py#L98-L100 - allure_labels = dict(allure_suite_labels(item)) - parent_suite = str(allure_labels.get(LabelType.PARENT_SUITE)) - suite = str(allure_labels.get(LabelType.SUITE)) - params = item.callspec.params if hasattr(item, "callspec") else {} - name = allure_name(item, params) - - if flaky_tests.get(parent_suite, {}).get(suite, {}).get(name, False): - # Rerun 3 times = 1 original run + 2 reruns - log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times") - item.add_marker(pytest.mark.flaky(reruns=2)) - - # pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns), - # we can workaround it by setting `timeout_func_only` to True[1]. - # Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2], - # but we still can do it using pytest marker. - # - # - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99 - # - [2] https://github.com/pytest-dev/pytest-timeout/issues/142 - timeout_marker = item.get_closest_marker("timeout") - if timeout_marker is not None: - kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs) - kwargs["func_only"] = True diff --git a/test_runner/fixtures/paths.py b/test_runner/fixtures/paths.py index 1c71abea19..80777d65e9 100644 --- a/test_runner/fixtures/paths.py +++ b/test_runner/fixtures/paths.py @@ -30,7 +30,7 @@ def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | No test_name = request.node.name test_dir = top_output_dir / f"{prefix or ''}{test_name.replace('/', '-')}" - # We rerun flaky tests multiple times, use a separate directory for each run. + # We rerun failed tests multiple times, use a separate directory for each run. if (suffix := getattr(request.node, "execution_count", None)) is not None: test_dir = test_dir.parent / f"{test_dir.name}-{suffix}" diff --git a/test_runner/fixtures/reruns.py b/test_runner/fixtures/reruns.py new file mode 100644 index 0000000000..f2a25ae8f6 --- /dev/null +++ b/test_runner/fixtures/reruns.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from collections.abc import MutableMapping +from typing import TYPE_CHECKING, cast + +import pytest + +if TYPE_CHECKING: + from collections.abc import MutableMapping + from typing import Any + + from _pytest.config import Config + + +def pytest_collection_modifyitems(config: Config, items: list[pytest.Item]): + # pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns), + # we can workaround it by setting `timeout_func_only` to True[1]. + # Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2], + # but we still can do it using pytest marker. + # + # - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99 + # - [2] https://github.com/pytest-dev/pytest-timeout/issues/142 + + if not config.getoption("--reruns"): + return + + for item in items: + timeout_marker = item.get_closest_marker("timeout") + if timeout_marker is not None: + kwargs = cast("MutableMapping[str, Any]", timeout_marker.kwargs) + kwargs["func_only"] = True