From c4f9f1dc6d7c9ac40c98e00240ff07c7e8ed1bb8 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 10 Nov 2022 09:06:34 +0000 Subject: [PATCH] Add data format forward compatibility tests (#2766) Add `test_forward_compatibility`, which checks if it's going to be possible to roll back a release to the previous version. The test uses artifacts (Neon & Postgres binaries) from the previous release to start Neon on the repo created by the current version. It performs exactly the same checks as `test_backward_compatibility` does. Single `ALLOW_BREAKING_CHANGES` env var got replaced by `ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE` & `ALLOW_FORWARD_COMPATIBILITY_BREAKAGE` and can be set by `backward compatibility breakage` and `forward compatibility breakage` labels respectively. --- .../actions/run-python-test-set/action.yml | 37 +- .github/workflows/build_and_test.yml | 43 +- poetry.lock | 25 +- pyproject.toml | 2 +- test_runner/fixtures/neon_fixtures.py | 33 ++ test_runner/fixtures/utils.py | 1 - test_runner/regress/test_compatibility.py | 367 +++++++++++------- 7 files changed, 302 insertions(+), 206 deletions(-) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 3459449e15..97783df444 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -55,6 +55,22 @@ runs: name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact path: /tmp/neon + - name: Download Neon binaries for the previous release + if: inputs.build_type != 'remote' + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact + path: /tmp/neon-previous + prefix: latest + + - name: Download compatibility snapshot for Postgres 14 + if: inputs.build_type != 'remote' + uses: ./.github/actions/download + with: + name: compatibility-snapshot-${{ inputs.build_type }}-pg14 + path: /tmp/compatibility_snapshot_pg14 + prefix: latest + - name: Checkout if: inputs.needs_postgres_source == 'true' uses: actions/checkout@v3 @@ -73,23 +89,18 @@ runs: shell: bash -euxo pipefail {0} run: ./scripts/pysync - - name: Download compatibility snapshot for Postgres 14 - if: inputs.build_type != 'remote' - uses: ./.github/actions/download - with: - name: compatibility-snapshot-${{ inputs.build_type }}-pg14 - path: /tmp/compatibility_snapshot_pg14 - prefix: latest - - name: Run pytest env: NEON_BIN: /tmp/neon/bin + COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin + COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install TEST_OUTPUT: /tmp/test_output BUILD_TYPE: ${{ inputs.build_type }} AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }} AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }} COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14 - ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes') + ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage') + ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage') shell: bash -euxo pipefail {0} run: | # PLATFORM will be embedded in the perf test report @@ -150,12 +161,16 @@ runs: # -n4 uses four processes to run tests via pytest-xdist # -s is not used to prevent pytest from capturing output, because tests are running # in parallel and logs are mixed between different tests + # --dist=loadgroup points tests marked with @pytest.mark.xdist_group to the same worker, + # to make @pytest.mark.order work with xdist + # mkdir -p $TEST_OUTPUT/allure/results "${cov_prefix[@]}" ./scripts/pytest \ --junitxml=$TEST_OUTPUT/junit.xml \ --alluredir=$TEST_OUTPUT/allure/results \ --tb=short \ --verbose \ + --dist=loadgroup \ -rA $TEST_SELECTION $EXTRA_PARAMS if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then @@ -169,8 +184,8 @@ runs: uses: ./.github/actions/upload with: name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }} - # The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test - path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/ + # The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test + path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/ prefix: latest - name: Create Allure report diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 707b20bcb8..b598949f2b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -268,32 +268,6 @@ jobs: if: matrix.build_type == 'debug' uses: ./.github/actions/save-coverage-data - upload-latest-artifacts: - runs-on: dev - container: - image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned - options: --init - needs: [ regress-tests ] - if: github.ref_name == 'main' - steps: - - name: Copy Neon artifact to the latest directory - shell: bash -euxo pipefail {0} - env: - BUCKET: neon-github-public-dev - PREFIX: artifacts/${{ github.run_id }} - run: | - for build_type in debug release; do - FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst - - S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true) - if [ -z "${S3_KEY}" ]; then - echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" - exit 1 - fi - - time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/artifacts/latest/${FILENAME} - done - benchmarks: runs-on: dev container: @@ -942,7 +916,7 @@ jobs: DOCKER_TAG=${{needs.tag.outputs.build-tag}} helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s - promote-compatibility-test-snapshot: + promote-compatibility-data: runs-on: dev container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned @@ -956,9 +930,24 @@ jobs: BUCKET: neon-github-public-dev PREFIX: artifacts/latest run: | + # Update compatibility snapshot for the release for build_type in debug release; do OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME} done + + # Update Neon artifact for the release (reuse already uploaded artifact) + for build_type in debug release; do + OLD_PREFIX=artifacts/${GITHUB_RUN_ID} + FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst + + S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true) + if [ -z "${S3_KEY}" ]; then + echo 2>&1 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist" + exit 1 + fi + + time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/${PREFIX}/${FILENAME} + done diff --git a/poetry.lock b/poetry.lock index 01265aaea1..551b267a87 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1207,18 +1207,6 @@ pytest = ">=6.1.0" [package.extras] testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] -[[package]] -name = "pytest-forked" -version = "1.4.0" -description = "run tests in isolated forked subprocesses" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -py = "*" -pytest = ">=3.10" - [[package]] name = "pytest-lazy-fixture" version = "0.6.3" @@ -1257,7 +1245,7 @@ pytest = ">=5.0.0" [[package]] name = "pytest-xdist" -version = "2.5.0" +version = "3.0.2" description = "pytest xdist plugin for distributed testing and loop-on-failing modes" category = "main" optional = false @@ -1266,7 +1254,6 @@ python-versions = ">=3.6" [package.dependencies] execnet = ">=1.1" pytest = ">=6.2.0" -pytest-forked = "*" [package.extras] psutil = ["psutil (>=3.0)"] @@ -1568,7 +1555,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "9352a89d49d34807f6a58f6c3f898acbd8cf3570e0f45ede973673644bde4d0e" +content-hash = "ebe16714bd4db1f34f005c9b72392f165618b020a2d0948cae20e0e8894c5517" [metadata.files] aiopg = [ @@ -2111,10 +2098,6 @@ pytest-asyncio = [ {file = "pytest-asyncio-0.19.0.tar.gz", hash = "sha256:ac4ebf3b6207259750bc32f4c1d8fcd7e79739edbc67ad0c58dd150b1d072fed"}, {file = "pytest_asyncio-0.19.0-py3-none-any.whl", hash = "sha256:7a97e37cfe1ed296e2e84941384bdd37c376453912d397ed39293e0916f521fa"}, ] -pytest-forked = [ - {file = "pytest-forked-1.4.0.tar.gz", hash = "sha256:8b67587c8f98cbbadfdd804539ed5455b6ed03802203485dd2f53c1422d7440e"}, - {file = "pytest_forked-1.4.0-py3-none-any.whl", hash = "sha256:bbbb6717efc886b9d64537b41fb1497cfaf3c9601276be8da2cccfea5a3c8ad8"}, -] pytest-lazy-fixture = [ {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, @@ -2128,8 +2111,8 @@ pytest-timeout = [ {file = "pytest_timeout-2.1.0-py3-none-any.whl", hash = "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6"}, ] pytest-xdist = [ - {file = "pytest-xdist-2.5.0.tar.gz", hash = "sha256:4580deca3ff04ddb2ac53eba39d76cb5dd5edeac050cb6fbc768b0dd712b4edf"}, - {file = "pytest_xdist-2.5.0-py3-none-any.whl", hash = "sha256:6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65"}, + {file = "pytest-xdist-3.0.2.tar.gz", hash = "sha256:688da9b814370e891ba5de650c9327d1a9d861721a524eb917e620eec3e90291"}, + {file = "pytest_xdist-3.0.2-py3-none-any.whl", hash = "sha256:9feb9a18e1790696ea23e1434fa73b325ed4998b0e9fcb221f16fd1945e6df1b"}, ] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, diff --git a/pyproject.toml b/pyproject.toml index 765e0b97eb..c2e2e2393b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ psycopg2-binary = "^2.9.1" typing-extensions = "^4.1.0" PyJWT = {version = "^2.1.0", extras = ["crypto"]} requests = "^2.26.0" -pytest-xdist = "^2.3.0" +pytest-xdist = "^3.0.2" asyncpg = "^0.24.0" aiopg = "^1.3.1" Jinja2 = "^3.0.2" diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 715c0753af..7a46a08f08 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -228,6 +228,7 @@ def can_bind(host: str, port: int) -> bool: class PortDistributor: def __init__(self, base_port: int, port_number: int): self.iterator = iter(range(base_port, base_port + port_number)) + self.port_map: Dict[int, int] = {} def get_port(self) -> int: for port in self.iterator: @@ -238,6 +239,38 @@ class PortDistributor: "port range configured for test is exhausted, consider enlarging the range" ) + def replace_with_new_port(self, value: Union[int, str]) -> Union[int, str]: + """ + Returns a new port for a port number in a string (like "localhost:1234") or int. + Replacements are memorised, so a substitution for the same port is always the same. + """ + + # TODO: replace with structural pattern matching for Python >= 3.10 + if isinstance(value, int): + return self._replace_port_int(value) + + if isinstance(value, str): + return self._replace_port_str(value) + + raise TypeError(f"unsupported type {type(value)} of {value=}") + + def _replace_port_int(self, value: int) -> int: + known_port = self.port_map.get(value) + if known_port is None: + known_port = self.port_map[value] = self.get_port() + + return known_port + + def _replace_port_str(self, value: str) -> str: + # Use regex to find port in a string + # urllib.parse.urlparse produces inconvenient results for cases without scheme like "localhost:5432" + # See https://bugs.python.org/issue27657 + ports = re.findall(r":(\d+)(?:/|$)", value) + assert len(ports) == 1, f"can't find port in {value}" + port_int = int(ports[0]) + + return value.replace(f":{port_int}", f":{self._replace_port_int(port_int)}") + @pytest.fixture(scope="session") def port_distributor(worker_base_port): diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index e73453f2c4..b04e02d3b8 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -17,7 +17,6 @@ Fn = TypeVar("Fn", bound=Callable[..., Any]) def get_self_dir() -> Path: """Get the path to the directory where this script lives.""" - # return os.path.dirname(os.path.abspath(__file__)) return Path(__file__).resolve().parent diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index b0643ec05e..306aa84040 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -1,12 +1,12 @@ import os -import re import shutil import subprocess from pathlib import Path -from typing import Any, Dict, Union +from typing import Any import pytest -import toml +import toml # TODO: replace with tomllib for Python >= 3.11 +from fixtures.log_helper import log from fixtures.neon_fixtures import ( NeonCli, NeonEnvBuilder, @@ -19,94 +19,185 @@ from fixtures.neon_fixtures import ( from fixtures.types import Lsn from pytest import FixtureRequest -DEFAILT_LOCAL_SNAPSHOT_DIR = "test_output/test_prepare_snapshot/compatibility_snapshot_pg14" +# +# A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases. +# - `test_create_snapshot` a script wrapped in a test that creates a data snapshot. +# - `test_backward_compatibility` checks that the current version of Neon can start/read/interract with a data snapshot created by the previous version. +# The path to the snapshot is configured by COMPATIBILITY_SNAPSHOT_DIR environment variable. +# If the breakage is intentional, the test can be xfaild with setting ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE=true. +# - `test_forward_compatibility` checks that a snapshot created by the current version can be started/read/interracted by the previous version of Neon. +# Paths to Neon and Postgres are configured by COMPATIBILITY_NEON_BIN and COMPATIBILITY_POSTGRES_DISTRIB_DIR environment variables. +# If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true. +# +# The file contains a couple of helper functions: +# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files). +# - check_neon_works performs the test itself, feel free to add more checks there. +# -def dump_differs(first: Path, second: Path, output: Path) -> bool: - """ - Runs diff(1) command on two SQL dumps and write the output to the given output file. - Returns True if the dumps differ, False otherwise. - """ +# Note: if renaming this test, don't forget to update a reference to it in a workflow file: +# "Upload compatibility snapshot" step in .github/actions/run-python-test-set/action.yml +@pytest.mark.xdist_group("compatibility") +@pytest.mark.order(before="test_forward_compatibility") +def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path): + # The test doesn't really test anything + # it creates a new snapshot for releases after we tested the current version against the previous snapshot in `test_backward_compatibility`. + # + # There's no cleanup here, it allows to adjust the data in `test_backward_compatibility` itself without re-collecting it. + neon_env_builder.pg_version = "14" + neon_env_builder.num_safekeepers = 3 + neon_env_builder.enable_local_fs_remote_storage() - with output.open("w") as stdout: - rv = subprocess.run( - [ - "diff", - "--unified", # Make diff output more readable - "--ignore-matching-lines=^--", # Ignore changes in comments - "--ignore-blank-lines", - str(first), - str(second), - ], - stdout=stdout, - ) + env = neon_env_builder.init_start() + pg = env.postgres.create_start("main") + pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()]) + pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()]) + pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]) - return rv.returncode != 0 + snapshot_config = toml.load(test_output_dir / "repo" / "config") + tenant_id = snapshot_config["default_tenant_id"] + timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id] + + pageserver_http = env.pageserver.http_client() + lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + + wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn) + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) + wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn) + + env.postgres.stop_all() + for sk in env.safekeepers: + sk.stop() + env.pageserver.stop() + + shutil.copytree(test_output_dir, test_output_dir / "compatibility_snapshot_pg14") + # Directory `test_output_dir / "compatibility_snapshot_pg14"` is uploaded to S3 in a workflow, keep the name in sync with it -class PortReplacer(object): - """ - Class-helper for replacing ports in config files. - """ - - def __init__(self, port_distributor: PortDistributor): - self.port_distributor = port_distributor - self.port_map: Dict[int, int] = {} - - def replace_port(self, value: Union[int, str]) -> Union[int, str]: - if isinstance(value, int): - if (known_port := self.port_map.get(value)) is not None: - return known_port - - self.port_map[value] = self.port_distributor.get_port() - return self.port_map[value] - - if isinstance(value, str): - # Use regex to find port in a string - # urllib.parse.urlparse produces inconvenient results for cases without scheme like "localhost:5432" - # See https://bugs.python.org/issue27657 - ports = re.findall(r":(\d+)(?:/|$)", value) - assert len(ports) == 1, f"can't find port in {value}" - port_int = int(ports[0]) - - if (known_port := self.port_map.get(port_int)) is not None: - return value.replace(f":{port_int}", f":{known_port}") - - self.port_map[port_int] = self.port_distributor.get_port() - return value.replace(f":{port_int}", f":{self.port_map[port_int]}") - - raise TypeError(f"unsupported type {type(value)} of {value=}") - - -@pytest.mark.order(after="test_prepare_snapshot") +@pytest.mark.xdist_group("compatibility") +@pytest.mark.order(after="test_create_snapshot") def test_backward_compatibility( pg_bin: PgBin, port_distributor: PortDistributor, test_output_dir: Path, - request: FixtureRequest, neon_binpath: Path, pg_distrib_dir: Path, + pg_version: str, + request: FixtureRequest, ): - compatibility_snapshot_dir = Path( - os.environ.get("COMPATIBILITY_SNAPSHOT_DIR", DEFAILT_LOCAL_SNAPSHOT_DIR) - ) - assert compatibility_snapshot_dir.exists(), ( - f"{compatibility_snapshot_dir} doesn't exist. Please run `test_prepare_snapshot` test first " - "to create the snapshot or set COMPATIBILITY_SNAPSHOT_DIR env variable to the existing snapshot" - ) - compatibility_snapshot_dir = compatibility_snapshot_dir.resolve() + compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") + assert ( + compatibility_snapshot_dir_env is not None + ), "COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg14` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)" + compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve() - # Make compatibility snapshot artifacts pickupable by Allure - # by copying the snapshot directory to the curent test output directory. - repo_dir = test_output_dir / "compatibility_snapshot" / "repo" + # Copy the snapshot to current directory, and prepare for the test + prepare_snapshot( + from_dir=compatibility_snapshot_dir, + to_dir=test_output_dir / "compatibility_snapshot", + port_distributor=port_distributor, + ) - shutil.copytree(compatibility_snapshot_dir / "repo", repo_dir) + breaking_changes_allowed = ( + os.environ.get("ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true" + ) + try: + check_neon_works( + test_output_dir / "compatibility_snapshot" / "repo", + neon_binpath, + pg_distrib_dir, + pg_version, + port_distributor, + test_output_dir, + pg_bin, + request, + ) + except Exception: + if breaking_changes_allowed: + pytest.xfail( + "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE env var" + ) + else: + raise + + assert ( + not breaking_changes_allowed + ), "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage" + + +@pytest.mark.xdist_group("compatibility") +@pytest.mark.order(after="test_create_snapshot") +def test_forward_compatibility( + test_output_dir: Path, + port_distributor: PortDistributor, + pg_version: str, + request: FixtureRequest, +): + compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN") + assert compatibility_neon_bin_env is not None, ( + "COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries " + "(ideally generated by the previous version of Neon)" + ) + compatibility_neon_bin = Path(compatibility_neon_bin_env).resolve() + + compatibility_postgres_distrib_dir_env = os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR") + assert ( + compatibility_postgres_distrib_dir_env is not None + ), "COMPATIBILITY_POSTGRES_DISTRIB_DIR is not set. It should be set to a pg_install directrory (ideally generated by the previous version of Neon)" + compatibility_postgres_distrib_dir = Path(compatibility_postgres_distrib_dir_env).resolve() + + compatibility_snapshot_dir = ( + test_output_dir.parent / "test_create_snapshot" / "compatibility_snapshot_pg14" + ) + # Copy the snapshot to current directory, and prepare for the test + prepare_snapshot( + from_dir=compatibility_snapshot_dir, + to_dir=test_output_dir / "compatibility_snapshot", + port_distributor=port_distributor, + ) + + breaking_changes_allowed = ( + os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true" + ) + try: + check_neon_works( + test_output_dir / "compatibility_snapshot" / "repo", + compatibility_neon_bin, + compatibility_postgres_distrib_dir, + pg_version, + port_distributor, + test_output_dir, + PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version), + request, + ) + except Exception: + if breaking_changes_allowed: + pytest.xfail( + "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE env var" + ) + else: + raise + + assert ( + not breaking_changes_allowed + ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage" + + +def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor): + assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist" + assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory" + assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql" + + log.info(f"Copying snapshot from {from_dir} to {to_dir}") + shutil.copytree(from_dir, to_dir) + + repo_dir = to_dir / "repo" # Remove old logs to avoid confusion in test artifacts for logfile in repo_dir.glob("**/*.log"): logfile.unlink() - # Remove tenants data for computes + # Remove tenants data for compute for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"): shutil.rmtree(tenant) @@ -115,20 +206,17 @@ def test_backward_compatibility( shutil.rmtree(tenant / "wal-redo-datadir.___temp") # Update paths and ports in config files - pr = PortReplacer(port_distributor) - pageserver_toml = repo_dir / "pageserver.toml" pageserver_config = toml.load(pageserver_toml) - new_local_path = pageserver_config["remote_storage"]["local_path"].replace( - "/test_prepare_snapshot/", - "/test_backward_compatibility/compatibility_snapshot/", + pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage" + pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port( + pageserver_config["listen_http_addr"] + ) + pageserver_config["listen_pg_addr"] = port_distributor.replace_with_new_port( + pageserver_config["listen_pg_addr"] ) - - pageserver_config["remote_storage"]["local_path"] = new_local_path - pageserver_config["listen_http_addr"] = pr.replace_port(pageserver_config["listen_http_addr"]) - pageserver_config["listen_pg_addr"] = pr.replace_port(pageserver_config["listen_pg_addr"]) pageserver_config["broker_endpoints"] = [ - pr.replace_port(ep) for ep in pageserver_config["broker_endpoints"] + port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"] ] with pageserver_toml.open("w") as f: @@ -137,17 +225,18 @@ def test_backward_compatibility( snapshot_config_toml = repo_dir / "config" snapshot_config = toml.load(snapshot_config_toml) snapshot_config["etcd_broker"]["broker_endpoints"] = [ - pr.replace_port(ep) for ep in snapshot_config["etcd_broker"]["broker_endpoints"] + port_distributor.replace_with_new_port(ep) + for ep in snapshot_config["etcd_broker"]["broker_endpoints"] ] - snapshot_config["pageserver"]["listen_http_addr"] = pr.replace_port( + snapshot_config["pageserver"]["listen_http_addr"] = port_distributor.replace_with_new_port( snapshot_config["pageserver"]["listen_http_addr"] ) - snapshot_config["pageserver"]["listen_pg_addr"] = pr.replace_port( + snapshot_config["pageserver"]["listen_pg_addr"] = port_distributor.replace_with_new_port( snapshot_config["pageserver"]["listen_pg_addr"] ) for sk in snapshot_config["safekeepers"]: - sk["http_port"] = pr.replace_port(sk["http_port"]) - sk["pg_port"] = pr.replace_port(sk["pg_port"]) + sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"]) + sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"]) with (snapshot_config_toml).open("w") as f: toml.dump(snapshot_config, f) @@ -159,7 +248,7 @@ def test_backward_compatibility( "--recursive", "--binary-file=without-match", "--files-with-matches", - "test_prepare_snapshot/repo", + "test_create_snapshot/repo", str(repo_dir), ], capture_output=True, @@ -167,44 +256,47 @@ def test_backward_compatibility( ) assert ( rv.returncode != 0 - ), f"there're files referencing `test_prepare_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}" + ), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}" - # NeonEnv stub to make NeonCli happy + +def check_neon_works( + repo_dir: Path, + neon_binpath: Path, + pg_distrib_dir: Path, + pg_version: str, + port_distributor: PortDistributor, + test_output_dir: Path, + pg_bin: PgBin, + request: FixtureRequest, +): + snapshot_config_toml = repo_dir / "config" + snapshot_config = toml.load(snapshot_config_toml) + snapshot_config["neon_distrib_dir"] = str(neon_binpath) + snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir) + with (snapshot_config_toml).open("w") as f: + toml.dump(snapshot_config, f) + + # TODO: replace with NeonEnvBuilder / NeonEnv config: Any = type("NeonEnvStub", (object,), {}) config.rust_log_override = None config.repo_dir = repo_dir - config.pg_version = "14" # Note: `pg_dumpall` (from pg_bin) version is set by DEFAULT_PG_VERSION_DEFAULT and can be overriden by DEFAULT_PG_VERSION env var + config.pg_version = pg_version config.initial_tenant = snapshot_config["default_tenant_id"] config.neon_binpath = neon_binpath config.pg_distrib_dir = pg_distrib_dir - # Check that we can start the project cli = NeonCli(config) - try: - cli.raw_cli(["start"]) - request.addfinalizer(lambda: cli.raw_cli(["stop"])) + cli.raw_cli(["start"]) + request.addfinalizer(lambda: cli.raw_cli(["stop"])) - result = cli.pg_start("main", port=port_distributor.get_port()) - request.addfinalizer(lambda: cli.pg_stop("main")) - except Exception: - breaking_changes_allowed = ( - os.environ.get("ALLOW_BREAKING_CHANGES", "false").lower() == "true" - ) - if breaking_changes_allowed: - pytest.xfail("Breaking changes are allowed by ALLOW_BREAKING_CHANGES env var") - else: - raise + pg_port = port_distributor.get_port() + cli.pg_start("main", port=pg_port) + request.addfinalizer(lambda: cli.pg_stop("main")) - connstr_all = re.findall(r"Starting postgres node at '([^']+)'", result.stdout) - assert len(connstr_all) == 1, f"can't parse connstr from {result.stdout}" - connstr = connstr_all[0] - - # Check that the project produces the same dump as the previous version. - # The assert itself deferred to the end of the test - # to allow us to perform checks that change data before failing + connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres" pg_bin.run(["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]) initial_dump_differs = dump_differs( - compatibility_snapshot_dir / "dump.sql", + repo_dir.parent / "dump.sql", test_output_dir / "dump.sql", test_output_dir / "dump.filediff", ) @@ -242,38 +334,23 @@ def test_backward_compatibility( assert not initial_dump_differs, "initial dump differs" -# Note: if renaming this test, don't forget to update a reference to it in a workflow file: -# "Upload compatibility snapshot" step in .github/actions/run-python-test-set/action.yml -def test_prepare_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path): - # The test doesn't really test anything - # it creates a new snapshot for releases after we tested the current version against the previous snapshot in `test_backward_compatibility`. - # - # There's no cleanup here, it allows to adjust the data in `test_backward_compatibility` itself without re-collecting it. - neon_env_builder.pg_version = "14" - neon_env_builder.num_safekeepers = 3 - neon_env_builder.enable_local_fs_remote_storage() +def dump_differs(first: Path, second: Path, output: Path) -> bool: + """ + Runs diff(1) command on two SQL dumps and write the output to the given output file. + Returns True if the dumps differ, False otherwise. + """ - env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") - pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()]) - pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()]) - pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]) + with output.open("w") as stdout: + rv = subprocess.run( + [ + "diff", + "--unified", # Make diff output more readable + "--ignore-matching-lines=^--", # Ignore changes in comments + "--ignore-blank-lines", + str(first), + str(second), + ], + stdout=stdout, + ) - snapshot_config = toml.load(test_output_dir / "repo" / "config") - tenant_id = snapshot_config["default_tenant_id"] - timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id] - - pageserver_http = env.pageserver.http_client() - lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) - - wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn) - pageserver_http.timeline_checkpoint(tenant_id, timeline_id) - wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn) - - env.postgres.stop_all() - for sk in env.safekeepers: - sk.stop() - env.pageserver.stop() - - shutil.copytree(test_output_dir, test_output_dir / "compatibility_snapshot_pg14") - # Directory `test_output_dir / "compatibility_snapshot_pg14"` is uploaded to S3 in a workflow, keep the name in sync with it + return rv.returncode != 0