From 091a175a3e02d319468efe15bb9765a3a9e29f4b Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:29:54 +0200 Subject: [PATCH] Test versions mismatch (#9167) ## Problem We faced the problem of incompatibility of the different components of different versions. This should be detected automatically to prevent production bugs. ## Summary of changes The test for this situation was implemented Co-authored-by: Alexander Bayandin --- test_runner/README.md | 12 +++ test_runner/fixtures/neon_fixtures.py | 52 +++++++++++ test_runner/fixtures/paths.py | 37 +++++++- test_runner/fixtures/utils.py | 33 +++++++ test_runner/regress/test_compatibility.py | 91 +++++++++++++------ .../regress/test_storage_controller.py | 12 ++- 6 files changed, 206 insertions(+), 31 deletions(-) diff --git a/test_runner/README.md b/test_runner/README.md index d754e60d17..e087241c1f 100644 --- a/test_runner/README.md +++ b/test_runner/README.md @@ -64,10 +64,12 @@ By default performance tests are excluded. To run them explicitly pass performan Useful environment variables: `NEON_BIN`: The directory where neon binaries can be found. +`COMPATIBILITY_NEON_BIN`: The directory where the previous version of Neon binaries can be found `POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found. Since pageserver supports several postgres versions, `POSTGRES_DISTRIB_DIR` must contain a subdirectory for each version with naming convention `v{PG_VERSION}/`. Inside that dir, a `bin/postgres` binary should be present. +`COMPATIBILITY_POSTGRES_DISTRIB_DIR`: The directory where the prevoius version of postgres distribution can be found. `DEFAULT_PG_VERSION`: The version of Postgres to use, This is used to construct full path to the postgres binaries. Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16` @@ -294,6 +296,16 @@ def test_foobar2(neon_env_builder: NeonEnvBuilder): client.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id) ``` +All the test which rely on NeonEnvBuilder, can check the various version combinations of the components. +To do this yuo may want to add the parametrize decorator with the function fixtures.utils.allpairs_versions() +E.g. + +```python +@pytest.mark.parametrize(**fixtures.utils.allpairs_versions()) +def test_something( +... +``` + For more information about pytest fixtures, see https://docs.pytest.org/en/stable/fixture.html At the end of a test, all the nodes in the environment are automatically stopped, so you diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 9a60de922c..7789855fe4 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -75,6 +75,7 @@ from fixtures.safekeeper.http import SafekeeperHttpClient from fixtures.safekeeper.utils import wait_walreceivers_absent from fixtures.utils import ( ATTACHMENT_NAME_REGEX, + COMPONENT_BINARIES, allure_add_grafana_links, assert_no_errors, get_dir_size, @@ -316,11 +317,14 @@ class NeonEnvBuilder: run_id: uuid.UUID, mock_s3_server: MockS3Server, neon_binpath: Path, + compatibility_neon_binpath: Path, pg_distrib_dir: Path, + compatibility_pg_distrib_dir: Path, pg_version: PgVersion, test_name: str, top_output_dir: Path, test_output_dir: Path, + combination, test_overlay_dir: Optional[Path] = None, pageserver_remote_storage: Optional[RemoteStorage] = None, # toml that will be decomposed into `--config-override` flags during `pageserver --init` @@ -402,6 +406,19 @@ class NeonEnvBuilder: "test_" ), "Unexpectedly instantiated from outside a test function" self.test_name = test_name + self.compatibility_neon_binpath = compatibility_neon_binpath + self.compatibility_pg_distrib_dir = compatibility_pg_distrib_dir + self.version_combination = combination + self.mixdir = self.test_output_dir / "mixdir_neon" + if self.version_combination is not None: + assert ( + self.compatibility_neon_binpath is not None + ), "the environment variable COMPATIBILITY_NEON_BIN is required when using mixed versions" + assert ( + self.compatibility_pg_distrib_dir is not None + ), "the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required when using mixed versions" + self.mixdir.mkdir(mode=0o755, exist_ok=True) + self._mix_versions() def init_configs(self, default_remote_storage_if_missing: bool = True) -> NeonEnv: # Cannot create more than one environment from one builder @@ -602,6 +619,21 @@ class NeonEnvBuilder: return self.env + def _mix_versions(self): + assert self.version_combination is not None, "version combination must be set" + for component, paths in COMPONENT_BINARIES.items(): + directory = ( + self.neon_binpath + if self.version_combination[component] == "new" + else self.compatibility_neon_binpath + ) + for filename in paths: + destination = self.mixdir / filename + destination.symlink_to(directory / filename) + if self.version_combination["compute"] == "old": + self.pg_distrib_dir = self.compatibility_pg_distrib_dir + self.neon_binpath = self.mixdir + def overlay_mount(self, ident: str, srcdir: Path, dstdir: Path): """ Mount `srcdir` as an overlayfs mount at `dstdir`. @@ -1350,7 +1382,9 @@ def neon_simple_env( top_output_dir: Path, test_output_dir: Path, neon_binpath: Path, + compatibility_neon_binpath: Path, pg_distrib_dir: Path, + compatibility_pg_distrib_dir: Path, pg_version: PgVersion, pageserver_virtual_file_io_engine: str, pageserver_aux_file_policy: Optional[AuxFileStore], @@ -1365,6 +1399,11 @@ def neon_simple_env( # Create the environment in the per-test output directory repo_dir = get_test_repo_dir(request, top_output_dir) + combination = ( + request._pyfuncitem.callspec.params["combination"] + if "combination" in request._pyfuncitem.callspec.params + else None + ) with NeonEnvBuilder( top_output_dir=top_output_dir, @@ -1372,7 +1411,9 @@ def neon_simple_env( port_distributor=port_distributor, mock_s3_server=mock_s3_server, neon_binpath=neon_binpath, + compatibility_neon_binpath=compatibility_neon_binpath, pg_distrib_dir=pg_distrib_dir, + compatibility_pg_distrib_dir=compatibility_pg_distrib_dir, pg_version=pg_version, run_id=run_id, preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")), @@ -1382,6 +1423,7 @@ def neon_simple_env( pageserver_aux_file_policy=pageserver_aux_file_policy, pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, pageserver_virtual_file_io_mode=pageserver_virtual_file_io_mode, + combination=combination, ) as builder: env = builder.init_start() @@ -1395,7 +1437,9 @@ def neon_env_builder( port_distributor: PortDistributor, mock_s3_server: MockS3Server, neon_binpath: Path, + compatibility_neon_binpath: Path, pg_distrib_dir: Path, + compatibility_pg_distrib_dir: Path, pg_version: PgVersion, run_id: uuid.UUID, request: FixtureRequest, @@ -1422,6 +1466,11 @@ def neon_env_builder( # Create the environment in the test-specific output dir repo_dir = os.path.join(test_output_dir, "repo") + combination = ( + request._pyfuncitem.callspec.params["combination"] + if "combination" in request._pyfuncitem.callspec.params + else None + ) # Return the builder to the caller with NeonEnvBuilder( @@ -1430,7 +1479,10 @@ def neon_env_builder( port_distributor=port_distributor, mock_s3_server=mock_s3_server, neon_binpath=neon_binpath, + compatibility_neon_binpath=compatibility_neon_binpath, pg_distrib_dir=pg_distrib_dir, + compatibility_pg_distrib_dir=compatibility_pg_distrib_dir, + combination=combination, pg_version=pg_version, run_id=run_id, preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")), diff --git a/test_runner/fixtures/paths.py b/test_runner/fixtures/paths.py index cffeb47ee8..65f8e432b0 100644 --- a/test_runner/fixtures/paths.py +++ b/test_runner/fixtures/paths.py @@ -95,7 +95,29 @@ def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]: if not (binpath / "pageserver").exists(): raise Exception(f"neon binaries not found at '{binpath}'") - yield binpath + yield binpath.absolute() + + +@pytest.fixture(scope="session") +def compatibility_snapshot_dir() -> Iterator[Path]: + if os.getenv("REMOTE_ENV"): + return + compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") + assert ( + compatibility_snapshot_dir_env is not None + ), "COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg(PG_VERSION)` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)" + compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve() + yield compatibility_snapshot_dir + + +@pytest.fixture(scope="session") +def compatibility_neon_binpath() -> Optional[Iterator[Path]]: + if os.getenv("REMOTE_ENV"): + return + comp_binpath = None + if env_compatibility_neon_binpath := os.environ.get("COMPATIBILITY_NEON_BIN"): + comp_binpath = Path(env_compatibility_neon_binpath).resolve().absolute() + yield comp_binpath @pytest.fixture(scope="session") @@ -109,6 +131,19 @@ def pg_distrib_dir(base_dir: Path) -> Iterator[Path]: yield distrib_dir +@pytest.fixture(scope="session") +def compatibility_pg_distrib_dir() -> Optional[Iterator[Path]]: + compat_distrib_dir = None + if env_compat_postgres_bin := os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR"): + compat_distrib_dir = Path(env_compat_postgres_bin).resolve() + if not compat_distrib_dir.exists(): + raise Exception(f"compatibility postgres directory not found at {compat_distrib_dir}") + + if compat_distrib_dir: + log.info(f"compatibility_pg_distrib_dir is {compat_distrib_dir}") + yield compat_distrib_dir + + @pytest.fixture(scope="session") def top_output_dir(base_dir: Path) -> Iterator[Path]: # Compute the top-level directory for all tests. diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index ca1be35880..76575d330c 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -37,6 +37,23 @@ if TYPE_CHECKING: Fn = TypeVar("Fn", bound=Callable[..., Any]) +COMPONENT_BINARIES = { + "storage_controller": ("storage_controller",), + "storage_broker": ("storage_broker",), + "compute": ("compute_ctl",), + "safekeeper": ("safekeeper",), + "pageserver": ("pageserver", "pagectl"), +} +# Disable auto-formatting for better readability +# fmt: off +VERSIONS_COMBINATIONS = ( + {"storage_controller": "new", "storage_broker": "new", "compute": "new", "safekeeper": "new", "pageserver": "new"}, + {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "old"}, + {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "new"}, + {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "new", "pageserver": "new"}, + {"storage_controller": "old", "storage_broker": "old", "compute": "new", "safekeeper": "new", "pageserver": "new"}, +) +# fmt: on def subprocess_capture( @@ -607,3 +624,19 @@ def human_bytes(amt: float) -> str: amt = amt / 1024 raise RuntimeError("unreachable") + + +def allpairs_versions(): + """ + Returns a dictionary with arguments for pytest parametrize + to test the compatibility with the previous version of Neon components + combinations were pre-computed to test all the pairs of the components with + the different versions. + """ + ids = [] + for pair in VERSIONS_COMBINATIONS: + cur_id = [] + for component in sorted(pair.keys()): + cur_id.append(pair[component][0]) + ids.append(f"combination_{''.join(cur_id)}") + return {"argnames": "combination", "argvalues": VERSIONS_COMBINATIONS, "ids": ids} diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 791e38383e..96ba3dd5a4 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -9,6 +9,7 @@ from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING +import fixtures.utils import pytest import toml from fixtures.common_types import TenantId, TimelineId @@ -93,6 +94,34 @@ if TYPE_CHECKING: # # Run forward compatibility test # ./scripts/pytest -k test_forward_compatibility # +# +# How to run `test_version_mismatch` locally: +# +# export DEFAULT_PG_VERSION=16 +# export BUILD_TYPE=release +# export CHECK_ONDISK_DATA_COMPATIBILITY=true +# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} +# export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install +# export NEON_BIN=target/release +# export POSTGRES_DISTRIB_DIR=pg_install +# +# # Build previous version of binaries and store them somewhere: +# rm -rf pg_install target +# git checkout +# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc` +# mkdir -p neon_previous/target +# cp -a target/${BUILD_TYPE} ./neon_previous/target/${BUILD_TYPE} +# cp -a pg_install ./neon_previous/pg_install +# +# # Build current version of binaries and create a data snapshot: +# rm -rf pg_install target +# git checkout +# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc` +# ./scripts/pytest -k test_create_snapshot +# +# # Run the version mismatch test +# ./scripts/pytest -k test_version_mismatch + check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif( os.environ.get("CHECK_ONDISK_DATA_COMPATIBILITY") is None, @@ -166,16 +195,11 @@ def test_backward_compatibility( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion, + compatibility_snapshot_dir: Path, ): """ Test that the new binaries can read old data """ - compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") - assert ( - compatibility_snapshot_dir_env is not None - ), f"COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg{pg_version.v_prefixed}` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)" - compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve() - breaking_changes_allowed = ( os.environ.get("ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true" ) @@ -214,27 +238,11 @@ def test_forward_compatibility( test_output_dir: Path, top_output_dir: Path, pg_version: PgVersion, + compatibility_snapshot_dir: Path, ): """ Test that the old binaries can read new data """ - compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN") - assert compatibility_neon_bin_env is not None, ( - "COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries " - "(ideally generated by the previous version of Neon)" - ) - compatibility_neon_bin = Path(compatibility_neon_bin_env).resolve() - - compatibility_postgres_distrib_dir_env = os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR") - assert ( - compatibility_postgres_distrib_dir_env is not None - ), "COMPATIBILITY_POSTGRES_DISTRIB_DIR is not set. It should be set to a pg_install directrory (ideally generated by the previous version of Neon)" - compatibility_postgres_distrib_dir = Path(compatibility_postgres_distrib_dir_env).resolve() - - compatibility_snapshot_dir = ( - top_output_dir / f"compatibility_snapshot_pg{pg_version.v_prefixed}" - ) - breaking_changes_allowed = ( os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true" ) @@ -245,9 +253,14 @@ def test_forward_compatibility( # Use previous version's production binaries (pageserver, safekeeper, pg_distrib_dir, etc.). # But always use the current version's neon_local binary. # This is because we want to test the compatibility of the data format, not the compatibility of the neon_local CLI. - neon_env_builder.neon_binpath = compatibility_neon_bin - neon_env_builder.pg_distrib_dir = compatibility_postgres_distrib_dir - neon_env_builder.neon_local_binpath = neon_env_builder.neon_local_binpath + assert ( + neon_env_builder.compatibility_neon_binpath is not None + ), "the environment variable COMPATIBILITY_NEON_BIN is required" + assert ( + neon_env_builder.compatibility_pg_distrib_dir is not None + ), "the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required" + neon_env_builder.neon_binpath = neon_env_builder.compatibility_neon_binpath + neon_env_builder.pg_distrib_dir = neon_env_builder.compatibility_pg_distrib_dir env = neon_env_builder.from_repo_dir( compatibility_snapshot_dir / "repo", @@ -558,3 +571,29 @@ def test_historic_storage_formats( env.pageserver.http_client().timeline_compact( dataset.tenant_id, existing_timeline_id, force_image_layer_creation=True ) + + +@check_ondisk_data_compatibility_if_enabled +@pytest.mark.xdist_group("compatibility") +@pytest.mark.parametrize(**fixtures.utils.allpairs_versions()) +def test_versions_mismatch( + neon_env_builder: NeonEnvBuilder, + test_output_dir: Path, + pg_version: PgVersion, + compatibility_snapshot_dir, + combination, +): + """ + Checks compatibility of different combinations of versions of the components + """ + neon_env_builder.num_safekeepers = 3 + env = neon_env_builder.from_repo_dir( + compatibility_snapshot_dir / "repo", + ) + env.pageserver.allowed_errors.extend( + [".*ingesting record with timestamp lagging more than wait_lsn_timeout.+"] + ) + env.start() + check_neon_works( + env, test_output_dir, compatibility_snapshot_dir / "dump.sql", test_output_dir / "repo" + ) diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 1dcf0b254d..1dcc37c407 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -9,6 +9,7 @@ from datetime import datetime, timezone from enum import Enum from typing import TYPE_CHECKING +import fixtures.utils import pytest from fixtures.auth_tokens import TokenScope from fixtures.common_types import TenantId, TenantShardId, TimelineId @@ -38,7 +39,11 @@ from fixtures.pg_version import PgVersion, run_only_on_default_postgres from fixtures.port_distributor import PortDistributor from fixtures.remote_storage import RemoteStorageKind, s3_storage from fixtures.storage_controller_proxy import StorageControllerProxy -from fixtures.utils import run_pg_bench_small, subprocess_capture, wait_until +from fixtures.utils import ( + run_pg_bench_small, + subprocess_capture, + wait_until, +) from fixtures.workload import Workload from mypy_boto3_s3.type_defs import ( ObjectTypeDef, @@ -60,9 +65,8 @@ def get_node_shard_counts(env: NeonEnv, tenant_ids): return counts -def test_storage_controller_smoke( - neon_env_builder: NeonEnvBuilder, -): +@pytest.mark.parametrize(**fixtures.utils.allpairs_versions()) +def test_storage_controller_smoke(neon_env_builder: NeonEnvBuilder, combination): """ Test the basic lifecycle of a storage controller: - Restarting