From 4eedb3b6f188408cd74397f81e02fc4c416eb5f5 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 14 May 2024 18:03:08 +0200 Subject: [PATCH] test suite: allow overriding default compaction algorithm via env var (#7747) This PR allows setting the `PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM` env var to override the `tenant_config.compaction_algorithm` field in the initial `pageserver.toml` for all tests. I tested manually that this works by halting a test using pdb and inspecting the `effective_config` in the tenant status managment API. If the env var is set, the tests are parametrized by the `kind` tag field, allowing to do a matrix build in CI and let Allure summarize everything in a nice report. If the env var is not set, the tests are not parametrized. So, merging this PR doesn't cause problems for flaky test detection. In fact, it doesn't cause any runtime change if the env var is not set. There are some tests in the test suite that set used to override the entire tenant_config using `NeonEnvBuilder.pageserver_config_override`. Since config overrides are merged non-recursively, such overrides that don't specify `kind = ` cause a fallback to pageserver's built-in `DEFAULT_COMPACTION_ALGORITHM`. Such cases can be found using ``` ["']tenant_config\s*[='"] ``` We'll deal with these tests in a future PR. closes https://github.com/neondatabase/neon/issues/7555 --- scripts/flaky_tests.py | 25 ++++++++++++++++++++++--- test_runner/fixtures/neon_fixtures.py | 18 ++++++++++++++++++ test_runner/fixtures/parametrize.py | 27 ++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py index 878840fcee..919a9278a9 100755 --- a/scripts/flaky_tests.py +++ b/scripts/flaky_tests.py @@ -5,10 +5,11 @@ import json import logging import os from collections import defaultdict -from typing import DefaultDict, Dict +from typing import Any, DefaultDict, Dict, Optional import psycopg2 import psycopg2.extras +import toml FLAKY_TESTS_QUERY = """ SELECT @@ -58,6 +59,24 @@ def main(args: argparse.Namespace): else: pageserver_virtual_file_io_engine_parameter = "" + # re-use existing records of flaky tests from before parametrization by compaction_algorithm + def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict[str, Any]]: + """Duplicated from parametrize.py""" + toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM") + if toml_table is None: + return None + v = toml.loads(toml_table) + assert isinstance(v, dict) + return v + + pageserver_default_tenant_config_compaction_algorithm_parameter = "" + if ( + explicit_default := get_pageserver_default_tenant_config_compaction_algorithm() + ) is not None: + pageserver_default_tenant_config_compaction_algorithm_parameter = ( + f"-{explicit_default['kind']}" + ) + for row in rows: # We don't want to automatically rerun tests in a performance suite if row["parent_suite"] != "test_runner.regress": @@ -66,10 +85,10 @@ def main(args: argparse.Namespace): if row["name"].endswith("]"): parametrized_test = row["name"].replace( "[", - f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}-", + f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-", ) else: - parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}]" + parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]" res[row["parent_suite"]][row["suite"]][parametrized_test] = True diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 8432655370..62a4b974a3 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -467,6 +467,7 @@ class NeonEnvBuilder: initial_timeline: Optional[TimelineId] = None, pageserver_virtual_file_io_engine: Optional[str] = None, pageserver_aux_file_policy: Optional[AuxFileStore] = None, + pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -507,6 +508,14 @@ class NeonEnvBuilder: self.pageserver_virtual_file_io_engine: Optional[str] = pageserver_virtual_file_io_engine + self.pageserver_default_tenant_config_compaction_algorithm: Optional[ + Dict[str, Any] + ] = pageserver_default_tenant_config_compaction_algorithm + if self.pageserver_default_tenant_config_compaction_algorithm is not None: + log.debug( + f"Overriding pageserver default compaction algorithm to {self.pageserver_default_tenant_config_compaction_algorithm}" + ) + self.pageserver_get_vectored_impl: Optional[str] = None if os.getenv("PAGESERVER_GET_VECTORED_IMPL", "") == "vectored": self.pageserver_get_vectored_impl = "vectored" @@ -1103,6 +1112,11 @@ class NeonEnv: ps_cfg["get_impl"] = config.pageserver_get_impl if config.pageserver_validate_vectored_get is not None: ps_cfg["validate_vectored_get"] = config.pageserver_validate_vectored_get + if config.pageserver_default_tenant_config_compaction_algorithm is not None: + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config[ + "compaction_algorithm" + ] = config.pageserver_default_tenant_config_compaction_algorithm if self.pageserver_remote_storage is not None: ps_cfg["remote_storage"] = remote_storage_to_toml_dict( @@ -1304,6 +1318,7 @@ def _shared_simple_env( pg_version: PgVersion, pageserver_virtual_file_io_engine: str, pageserver_aux_file_policy: Optional[AuxFileStore], + pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]], ) -> Iterator[NeonEnv]: """ # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES @@ -1335,6 +1350,7 @@ def _shared_simple_env( test_output_dir=test_output_dir, pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine, pageserver_aux_file_policy=pageserver_aux_file_policy, + pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, ) as builder: env = builder.init_start() @@ -1375,6 +1391,7 @@ def neon_env_builder( top_output_dir: Path, pageserver_virtual_file_io_engine: str, pageserver_aux_file_policy: Optional[AuxFileStore] = None, + pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]] = None, ) -> Iterator[NeonEnvBuilder]: """ Fixture to create a Neon environment for test. @@ -1409,6 +1426,7 @@ def neon_env_builder( test_output_dir=test_output_dir, test_overlay_dir=test_overlay_dir, pageserver_aux_file_policy=pageserver_aux_file_policy, + pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, ) as builder: yield builder diff --git a/test_runner/fixtures/parametrize.py b/test_runner/fixtures/parametrize.py index 77523a542b..0227285822 100644 --- a/test_runner/fixtures/parametrize.py +++ b/test_runner/fixtures/parametrize.py @@ -1,7 +1,8 @@ import os -from typing import Optional +from typing import Any, Dict, Optional import pytest +import toml from _pytest.python import Metafunc from fixtures.pg_version import PgVersion @@ -37,6 +38,20 @@ def pageserver_aux_file_policy() -> Optional[AuxFileStore]: return None +def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict[str, Any]]: + toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM") + if toml_table is None: + return None + v = toml.loads(toml_table) + assert isinstance(v, dict) + return v + + +@pytest.fixture(scope="function", autouse=True) +def pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict[str, Any]]: + return get_pageserver_default_tenant_config_compaction_algorithm() + + def pytest_generate_tests(metafunc: Metafunc): if (bt := os.getenv("BUILD_TYPE")) is None: build_types = ["debug", "release"] @@ -60,6 +75,16 @@ def pytest_generate_tests(metafunc: Metafunc): ): metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine]) + # Same hack for pageserver_default_tenant_config_compaction_algorithm + if ( + explicit_default := get_pageserver_default_tenant_config_compaction_algorithm() + ) is not None: + metafunc.parametrize( + "pageserver_default_tenant_config_compaction_algorithm", + [explicit_default], + ids=[explicit_default["kind"]], + ) + # For performance tests, parametrize also by platform if ( "test_runner/performance" in metafunc.definition._nodeid