Test versions mismatch (#9167)

## Problem
We faced the problem of incompatibility of the different components of
different versions.
This should be detected automatically to prevent production bugs.
## Summary of changes
The test for this situation was implemented

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
This commit is contained in:
a-masterov
2024-10-11 15:29:54 +02:00
committed by GitHub
parent 326cd80f0d
commit 091a175a3e
6 changed files with 206 additions and 31 deletions

View File

@@ -64,10 +64,12 @@ By default performance tests are excluded. To run them explicitly pass performan
Useful environment variables:
`NEON_BIN`: The directory where neon binaries can be found.
`COMPATIBILITY_NEON_BIN`: The directory where the previous version of Neon binaries can be found
`POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
Since pageserver supports several postgres versions, `POSTGRES_DISTRIB_DIR` must contain
a subdirectory for each version with naming convention `v{PG_VERSION}/`.
Inside that dir, a `bin/postgres` binary should be present.
`COMPATIBILITY_POSTGRES_DISTRIB_DIR`: The directory where the prevoius version of postgres distribution can be found.
`DEFAULT_PG_VERSION`: The version of Postgres to use,
This is used to construct full path to the postgres binaries.
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
@@ -294,6 +296,16 @@ def test_foobar2(neon_env_builder: NeonEnvBuilder):
client.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id)
```
All the test which rely on NeonEnvBuilder, can check the various version combinations of the components.
To do this yuo may want to add the parametrize decorator with the function fixtures.utils.allpairs_versions()
E.g.
```python
@pytest.mark.parametrize(**fixtures.utils.allpairs_versions())
def test_something(
...
```
For more information about pytest fixtures, see https://docs.pytest.org/en/stable/fixture.html
At the end of a test, all the nodes in the environment are automatically stopped, so you

View File

@@ -75,6 +75,7 @@ from fixtures.safekeeper.http import SafekeeperHttpClient
from fixtures.safekeeper.utils import wait_walreceivers_absent
from fixtures.utils import (
ATTACHMENT_NAME_REGEX,
COMPONENT_BINARIES,
allure_add_grafana_links,
assert_no_errors,
get_dir_size,
@@ -316,11 +317,14 @@ class NeonEnvBuilder:
run_id: uuid.UUID,
mock_s3_server: MockS3Server,
neon_binpath: Path,
compatibility_neon_binpath: Path,
pg_distrib_dir: Path,
compatibility_pg_distrib_dir: Path,
pg_version: PgVersion,
test_name: str,
top_output_dir: Path,
test_output_dir: Path,
combination,
test_overlay_dir: Optional[Path] = None,
pageserver_remote_storage: Optional[RemoteStorage] = None,
# toml that will be decomposed into `--config-override` flags during `pageserver --init`
@@ -402,6 +406,19 @@ class NeonEnvBuilder:
"test_"
), "Unexpectedly instantiated from outside a test function"
self.test_name = test_name
self.compatibility_neon_binpath = compatibility_neon_binpath
self.compatibility_pg_distrib_dir = compatibility_pg_distrib_dir
self.version_combination = combination
self.mixdir = self.test_output_dir / "mixdir_neon"
if self.version_combination is not None:
assert (
self.compatibility_neon_binpath is not None
), "the environment variable COMPATIBILITY_NEON_BIN is required when using mixed versions"
assert (
self.compatibility_pg_distrib_dir is not None
), "the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required when using mixed versions"
self.mixdir.mkdir(mode=0o755, exist_ok=True)
self._mix_versions()
def init_configs(self, default_remote_storage_if_missing: bool = True) -> NeonEnv:
# Cannot create more than one environment from one builder
@@ -602,6 +619,21 @@ class NeonEnvBuilder:
return self.env
def _mix_versions(self):
assert self.version_combination is not None, "version combination must be set"
for component, paths in COMPONENT_BINARIES.items():
directory = (
self.neon_binpath
if self.version_combination[component] == "new"
else self.compatibility_neon_binpath
)
for filename in paths:
destination = self.mixdir / filename
destination.symlink_to(directory / filename)
if self.version_combination["compute"] == "old":
self.pg_distrib_dir = self.compatibility_pg_distrib_dir
self.neon_binpath = self.mixdir
def overlay_mount(self, ident: str, srcdir: Path, dstdir: Path):
"""
Mount `srcdir` as an overlayfs mount at `dstdir`.
@@ -1350,7 +1382,9 @@ def neon_simple_env(
top_output_dir: Path,
test_output_dir: Path,
neon_binpath: Path,
compatibility_neon_binpath: Path,
pg_distrib_dir: Path,
compatibility_pg_distrib_dir: Path,
pg_version: PgVersion,
pageserver_virtual_file_io_engine: str,
pageserver_aux_file_policy: Optional[AuxFileStore],
@@ -1365,6 +1399,11 @@ def neon_simple_env(
# Create the environment in the per-test output directory
repo_dir = get_test_repo_dir(request, top_output_dir)
combination = (
request._pyfuncitem.callspec.params["combination"]
if "combination" in request._pyfuncitem.callspec.params
else None
)
with NeonEnvBuilder(
top_output_dir=top_output_dir,
@@ -1372,7 +1411,9 @@ def neon_simple_env(
port_distributor=port_distributor,
mock_s3_server=mock_s3_server,
neon_binpath=neon_binpath,
compatibility_neon_binpath=compatibility_neon_binpath,
pg_distrib_dir=pg_distrib_dir,
compatibility_pg_distrib_dir=compatibility_pg_distrib_dir,
pg_version=pg_version,
run_id=run_id,
preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")),
@@ -1382,6 +1423,7 @@ def neon_simple_env(
pageserver_aux_file_policy=pageserver_aux_file_policy,
pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
pageserver_virtual_file_io_mode=pageserver_virtual_file_io_mode,
combination=combination,
) as builder:
env = builder.init_start()
@@ -1395,7 +1437,9 @@ def neon_env_builder(
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
neon_binpath: Path,
compatibility_neon_binpath: Path,
pg_distrib_dir: Path,
compatibility_pg_distrib_dir: Path,
pg_version: PgVersion,
run_id: uuid.UUID,
request: FixtureRequest,
@@ -1422,6 +1466,11 @@ def neon_env_builder(
# Create the environment in the test-specific output dir
repo_dir = os.path.join(test_output_dir, "repo")
combination = (
request._pyfuncitem.callspec.params["combination"]
if "combination" in request._pyfuncitem.callspec.params
else None
)
# Return the builder to the caller
with NeonEnvBuilder(
@@ -1430,7 +1479,10 @@ def neon_env_builder(
port_distributor=port_distributor,
mock_s3_server=mock_s3_server,
neon_binpath=neon_binpath,
compatibility_neon_binpath=compatibility_neon_binpath,
pg_distrib_dir=pg_distrib_dir,
compatibility_pg_distrib_dir=compatibility_pg_distrib_dir,
combination=combination,
pg_version=pg_version,
run_id=run_id,
preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")),

View File

@@ -95,7 +95,29 @@ def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]:
if not (binpath / "pageserver").exists():
raise Exception(f"neon binaries not found at '{binpath}'")
yield binpath
yield binpath.absolute()
@pytest.fixture(scope="session")
def compatibility_snapshot_dir() -> Iterator[Path]:
if os.getenv("REMOTE_ENV"):
return
compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR")
assert (
compatibility_snapshot_dir_env is not None
), "COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg(PG_VERSION)` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)"
compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve()
yield compatibility_snapshot_dir
@pytest.fixture(scope="session")
def compatibility_neon_binpath() -> Optional[Iterator[Path]]:
if os.getenv("REMOTE_ENV"):
return
comp_binpath = None
if env_compatibility_neon_binpath := os.environ.get("COMPATIBILITY_NEON_BIN"):
comp_binpath = Path(env_compatibility_neon_binpath).resolve().absolute()
yield comp_binpath
@pytest.fixture(scope="session")
@@ -109,6 +131,19 @@ def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:
yield distrib_dir
@pytest.fixture(scope="session")
def compatibility_pg_distrib_dir() -> Optional[Iterator[Path]]:
compat_distrib_dir = None
if env_compat_postgres_bin := os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR"):
compat_distrib_dir = Path(env_compat_postgres_bin).resolve()
if not compat_distrib_dir.exists():
raise Exception(f"compatibility postgres directory not found at {compat_distrib_dir}")
if compat_distrib_dir:
log.info(f"compatibility_pg_distrib_dir is {compat_distrib_dir}")
yield compat_distrib_dir
@pytest.fixture(scope="session")
def top_output_dir(base_dir: Path) -> Iterator[Path]:
# Compute the top-level directory for all tests.

View File

@@ -37,6 +37,23 @@ if TYPE_CHECKING:
Fn = TypeVar("Fn", bound=Callable[..., Any])
COMPONENT_BINARIES = {
"storage_controller": ("storage_controller",),
"storage_broker": ("storage_broker",),
"compute": ("compute_ctl",),
"safekeeper": ("safekeeper",),
"pageserver": ("pageserver", "pagectl"),
}
# Disable auto-formatting for better readability
# fmt: off
VERSIONS_COMBINATIONS = (
{"storage_controller": "new", "storage_broker": "new", "compute": "new", "safekeeper": "new", "pageserver": "new"},
{"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "old"},
{"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "new"},
{"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "new", "pageserver": "new"},
{"storage_controller": "old", "storage_broker": "old", "compute": "new", "safekeeper": "new", "pageserver": "new"},
)
# fmt: on
def subprocess_capture(
@@ -607,3 +624,19 @@ def human_bytes(amt: float) -> str:
amt = amt / 1024
raise RuntimeError("unreachable")
def allpairs_versions():
"""
Returns a dictionary with arguments for pytest parametrize
to test the compatibility with the previous version of Neon components
combinations were pre-computed to test all the pairs of the components with
the different versions.
"""
ids = []
for pair in VERSIONS_COMBINATIONS:
cur_id = []
for component in sorted(pair.keys()):
cur_id.append(pair[component][0])
ids.append(f"combination_{''.join(cur_id)}")
return {"argnames": "combination", "argvalues": VERSIONS_COMBINATIONS, "ids": ids}

View File

@@ -9,6 +9,7 @@ from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING
import fixtures.utils
import pytest
import toml
from fixtures.common_types import TenantId, TimelineId
@@ -93,6 +94,34 @@ if TYPE_CHECKING:
# # Run forward compatibility test
# ./scripts/pytest -k test_forward_compatibility
#
#
# How to run `test_version_mismatch` locally:
#
# export DEFAULT_PG_VERSION=16
# export BUILD_TYPE=release
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
# export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
# export NEON_BIN=target/release
# export POSTGRES_DISTRIB_DIR=pg_install
#
# # Build previous version of binaries and store them somewhere:
# rm -rf pg_install target
# git checkout <previous version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
# mkdir -p neon_previous/target
# cp -a target/${BUILD_TYPE} ./neon_previous/target/${BUILD_TYPE}
# cp -a pg_install ./neon_previous/pg_install
#
# # Build current version of binaries and create a data snapshot:
# rm -rf pg_install target
# git checkout <current version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
# ./scripts/pytest -k test_create_snapshot
#
# # Run the version mismatch test
# ./scripts/pytest -k test_version_mismatch
check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
os.environ.get("CHECK_ONDISK_DATA_COMPATIBILITY") is None,
@@ -166,16 +195,11 @@ def test_backward_compatibility(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_version: PgVersion,
compatibility_snapshot_dir: Path,
):
"""
Test that the new binaries can read old data
"""
compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR")
assert (
compatibility_snapshot_dir_env is not None
), f"COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg{pg_version.v_prefixed}` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)"
compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve()
breaking_changes_allowed = (
os.environ.get("ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
)
@@ -214,27 +238,11 @@ def test_forward_compatibility(
test_output_dir: Path,
top_output_dir: Path,
pg_version: PgVersion,
compatibility_snapshot_dir: Path,
):
"""
Test that the old binaries can read new data
"""
compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN")
assert compatibility_neon_bin_env is not None, (
"COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries "
"(ideally generated by the previous version of Neon)"
)
compatibility_neon_bin = Path(compatibility_neon_bin_env).resolve()
compatibility_postgres_distrib_dir_env = os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR")
assert (
compatibility_postgres_distrib_dir_env is not None
), "COMPATIBILITY_POSTGRES_DISTRIB_DIR is not set. It should be set to a pg_install directrory (ideally generated by the previous version of Neon)"
compatibility_postgres_distrib_dir = Path(compatibility_postgres_distrib_dir_env).resolve()
compatibility_snapshot_dir = (
top_output_dir / f"compatibility_snapshot_pg{pg_version.v_prefixed}"
)
breaking_changes_allowed = (
os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
)
@@ -245,9 +253,14 @@ def test_forward_compatibility(
# Use previous version's production binaries (pageserver, safekeeper, pg_distrib_dir, etc.).
# But always use the current version's neon_local binary.
# This is because we want to test the compatibility of the data format, not the compatibility of the neon_local CLI.
neon_env_builder.neon_binpath = compatibility_neon_bin
neon_env_builder.pg_distrib_dir = compatibility_postgres_distrib_dir
neon_env_builder.neon_local_binpath = neon_env_builder.neon_local_binpath
assert (
neon_env_builder.compatibility_neon_binpath is not None
), "the environment variable COMPATIBILITY_NEON_BIN is required"
assert (
neon_env_builder.compatibility_pg_distrib_dir is not None
), "the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required"
neon_env_builder.neon_binpath = neon_env_builder.compatibility_neon_binpath
neon_env_builder.pg_distrib_dir = neon_env_builder.compatibility_pg_distrib_dir
env = neon_env_builder.from_repo_dir(
compatibility_snapshot_dir / "repo",
@@ -558,3 +571,29 @@ def test_historic_storage_formats(
env.pageserver.http_client().timeline_compact(
dataset.tenant_id, existing_timeline_id, force_image_layer_creation=True
)
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.parametrize(**fixtures.utils.allpairs_versions())
def test_versions_mismatch(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_version: PgVersion,
compatibility_snapshot_dir,
combination,
):
"""
Checks compatibility of different combinations of versions of the components
"""
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.from_repo_dir(
compatibility_snapshot_dir / "repo",
)
env.pageserver.allowed_errors.extend(
[".*ingesting record with timestamp lagging more than wait_lsn_timeout.+"]
)
env.start()
check_neon_works(
env, test_output_dir, compatibility_snapshot_dir / "dump.sql", test_output_dir / "repo"
)

View File

@@ -9,6 +9,7 @@ from datetime import datetime, timezone
from enum import Enum
from typing import TYPE_CHECKING
import fixtures.utils
import pytest
from fixtures.auth_tokens import TokenScope
from fixtures.common_types import TenantId, TenantShardId, TimelineId
@@ -38,7 +39,11 @@ from fixtures.pg_version import PgVersion, run_only_on_default_postgres
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import RemoteStorageKind, s3_storage
from fixtures.storage_controller_proxy import StorageControllerProxy
from fixtures.utils import run_pg_bench_small, subprocess_capture, wait_until
from fixtures.utils import (
run_pg_bench_small,
subprocess_capture,
wait_until,
)
from fixtures.workload import Workload
from mypy_boto3_s3.type_defs import (
ObjectTypeDef,
@@ -60,9 +65,8 @@ def get_node_shard_counts(env: NeonEnv, tenant_ids):
return counts
def test_storage_controller_smoke(
neon_env_builder: NeonEnvBuilder,
):
@pytest.mark.parametrize(**fixtures.utils.allpairs_versions())
def test_storage_controller_smoke(neon_env_builder: NeonEnvBuilder, combination):
"""
Test the basic lifecycle of a storage controller:
- Restarting