mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-27 16:12:56 +00:00
This PR allows setting the `PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM` env var to override the `tenant_config.compaction_algorithm` field in the initial `pageserver.toml` for all tests. I tested manually that this works by halting a test using pdb and inspecting the `effective_config` in the tenant status managment API. If the env var is set, the tests are parametrized by the `kind` tag field, allowing to do a matrix build in CI and let Allure summarize everything in a nice report. If the env var is not set, the tests are not parametrized. So, merging this PR doesn't cause problems for flaky test detection. In fact, it doesn't cause any runtime change if the env var is not set. There are some tests in the test suite that set used to override the entire tenant_config using `NeonEnvBuilder.pageserver_config_override`. Since config overrides are merged non-recursively, such overrides that don't specify `kind = ` cause a fallback to pageserver's built-in `DEFAULT_COMPACTION_ALGORITHM`. Such cases can be found using ``` ["']tenant_config\s*[='"] ``` We'll deal with these tests in a future PR. closes https://github.com/neondatabase/neon/issues/7555
143 lines
4.6 KiB
Python
Executable File
143 lines
4.6 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
from collections import defaultdict
|
|
from typing import Any, DefaultDict, Dict, Optional
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import toml
|
|
|
|
FLAKY_TESTS_QUERY = """
|
|
SELECT
|
|
DISTINCT parent_suite, suite, name
|
|
FROM results
|
|
WHERE
|
|
started_at > CURRENT_DATE - INTERVAL '%s' day
|
|
AND (
|
|
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
|
|
OR flaky
|
|
)
|
|
;
|
|
"""
|
|
|
|
|
|
def main(args: argparse.Namespace):
|
|
connstr = args.connstr
|
|
interval_days = args.days
|
|
output = args.output
|
|
|
|
build_type = args.build_type
|
|
pg_version = args.pg_version
|
|
|
|
res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]]
|
|
res = defaultdict(lambda: defaultdict(dict))
|
|
|
|
try:
|
|
logging.info("connecting to the database...")
|
|
with psycopg2.connect(connstr, connect_timeout=30) as conn:
|
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
logging.info("fetching flaky tests...")
|
|
cur.execute(FLAKY_TESTS_QUERY, (interval_days,))
|
|
rows = cur.fetchall()
|
|
except psycopg2.OperationalError as exc:
|
|
logging.error("cannot fetch flaky tests from the DB due to an error", exc)
|
|
rows = []
|
|
|
|
# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not tokio-epoll-uring),
|
|
# use it to parametrize test name along with build_type and pg_version
|
|
#
|
|
# See test_runner/fixtures/parametrize.py for details
|
|
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in (
|
|
"",
|
|
"tokio-epoll-uring",
|
|
):
|
|
pageserver_virtual_file_io_engine_parameter = f"-{io_engine}"
|
|
else:
|
|
pageserver_virtual_file_io_engine_parameter = ""
|
|
|
|
# re-use existing records of flaky tests from before parametrization by compaction_algorithm
|
|
def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict[str, Any]]:
|
|
"""Duplicated from parametrize.py"""
|
|
toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
|
|
if toml_table is None:
|
|
return None
|
|
v = toml.loads(toml_table)
|
|
assert isinstance(v, dict)
|
|
return v
|
|
|
|
pageserver_default_tenant_config_compaction_algorithm_parameter = ""
|
|
if (
|
|
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()
|
|
) is not None:
|
|
pageserver_default_tenant_config_compaction_algorithm_parameter = (
|
|
f"-{explicit_default['kind']}"
|
|
)
|
|
|
|
for row in rows:
|
|
# We don't want to automatically rerun tests in a performance suite
|
|
if row["parent_suite"] != "test_runner.regress":
|
|
continue
|
|
|
|
if row["name"].endswith("]"):
|
|
parametrized_test = row["name"].replace(
|
|
"[",
|
|
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
|
|
)
|
|
else:
|
|
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
|
|
|
|
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
|
|
|
|
logging.info(
|
|
f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{parametrized_test}"
|
|
)
|
|
|
|
logging.info(f"saving results to {output.name}")
|
|
json.dump(res, output, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Detect flaky tests in the last N days")
|
|
parser.add_argument(
|
|
"--output",
|
|
type=argparse.FileType("w"),
|
|
default="flaky.json",
|
|
help="path to output json file (default: flaky.json)",
|
|
)
|
|
parser.add_argument(
|
|
"--days",
|
|
required=False,
|
|
default=10,
|
|
type=int,
|
|
help="how many days to look back for flaky tests (default: 10)",
|
|
)
|
|
parser.add_argument(
|
|
"--build-type",
|
|
required=True,
|
|
type=str,
|
|
help="for which build type to create list of flaky tests (debug or release)",
|
|
)
|
|
parser.add_argument(
|
|
"--pg-version",
|
|
required=True,
|
|
type=int,
|
|
help="for which Postgres version to create list of flaky tests (14, 15, etc.)",
|
|
)
|
|
parser.add_argument(
|
|
"connstr",
|
|
help="connection string to the test results database",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
level = logging.INFO
|
|
logging.basicConfig(
|
|
format="%(message)s",
|
|
level=level,
|
|
)
|
|
|
|
main(args)
|