mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-20 14:40:37 +00:00
test_runner: rerun all failed tests (#9917)
## Problem Currently, we rerun only known flaky tests. This approach was chosen to reduce the number of tests that go unnoticed (by forcing people to take a look at failed tests and rerun the job manually), but it has some drawbacks: - In PRs, people tend to push new changes without checking failed tests (that's ok) - In the main, tests are just restarted without checking (understandable) - Parametrised tests become flaky one by one, i.e. if `test[1]` is flaky `, test[2]` is not marked as flaky automatically (which may or may not be the case). I suggest rerunning all failed tests to increase the stability of GitHub jobs and using the Grafana Dashboard with flaky tests for deeper analysis. ## Summary of changes - Rerun all failed tests twice at max
This commit is contained in:
committed by
GitHub
parent
eb520a14ce
commit
e04dd3be0b
@@ -1,147 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import toml
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any
|
||||
|
||||
FLAKY_TESTS_QUERY = """
|
||||
SELECT
|
||||
DISTINCT parent_suite, suite, name
|
||||
FROM results
|
||||
WHERE
|
||||
started_at > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND (
|
||||
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
|
||||
OR flaky
|
||||
)
|
||||
;
|
||||
"""
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
connstr = args.connstr
|
||||
interval_days = args.days
|
||||
output = args.output
|
||||
|
||||
build_type = args.build_type
|
||||
pg_version = args.pg_version
|
||||
|
||||
res: defaultdict[str, defaultdict[str, dict[str, bool]]]
|
||||
res = defaultdict(lambda: defaultdict(dict))
|
||||
|
||||
try:
|
||||
logging.info("connecting to the database...")
|
||||
with psycopg2.connect(connstr, connect_timeout=30) as conn:
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
logging.info("fetching flaky tests...")
|
||||
cur.execute(FLAKY_TESTS_QUERY, (interval_days,))
|
||||
rows = cur.fetchall()
|
||||
except psycopg2.OperationalError as exc:
|
||||
logging.error("cannot fetch flaky tests from the DB due to an error", exc)
|
||||
rows = []
|
||||
|
||||
# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not tokio-epoll-uring),
|
||||
# use it to parametrize test name along with build_type and pg_version
|
||||
#
|
||||
# See test_runner/fixtures/parametrize.py for details
|
||||
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in (
|
||||
"",
|
||||
"tokio-epoll-uring",
|
||||
):
|
||||
pageserver_virtual_file_io_engine_parameter = f"-{io_engine}"
|
||||
else:
|
||||
pageserver_virtual_file_io_engine_parameter = ""
|
||||
|
||||
# re-use existing records of flaky tests from before parametrization by compaction_algorithm
|
||||
def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
|
||||
"""Duplicated from parametrize.py"""
|
||||
toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
|
||||
if toml_table is None:
|
||||
return None
|
||||
v = toml.loads(toml_table)
|
||||
assert isinstance(v, dict)
|
||||
return v
|
||||
|
||||
pageserver_default_tenant_config_compaction_algorithm_parameter = ""
|
||||
if (
|
||||
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()
|
||||
) is not None:
|
||||
pageserver_default_tenant_config_compaction_algorithm_parameter = (
|
||||
f"-{explicit_default['kind']}"
|
||||
)
|
||||
|
||||
for row in rows:
|
||||
# We don't want to automatically rerun tests in a performance suite
|
||||
if row["parent_suite"] != "test_runner.regress":
|
||||
continue
|
||||
|
||||
if row["name"].endswith("]"):
|
||||
parametrized_test = row["name"].replace(
|
||||
"[",
|
||||
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
|
||||
)
|
||||
else:
|
||||
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
|
||||
|
||||
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
|
||||
|
||||
logging.info(
|
||||
f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
|
||||
)
|
||||
|
||||
logging.info(f"saving results to {output.name}")
|
||||
json.dump(res, output, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days")
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default="flaky.json",
|
||||
help="path to output json file (default: flaky.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
required=False,
|
||||
default=10,
|
||||
type=int,
|
||||
help="how many days to look back for flaky tests (default: 10)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--build-type",
|
||||
required=True,
|
||||
type=str,
|
||||
help="for which build type to create list of flaky tests (debug or release)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pg-version",
|
||||
required=True,
|
||||
type=int,
|
||||
help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"connstr",
|
||||
help="connection string to the test results database",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
level = logging.INFO
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
level=level,
|
||||
)
|
||||
|
||||
main(args)
|
||||
Reference in New Issue
Block a user