diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 2b88f09b3d..d0836676c9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -186,7 +186,11 @@ jobs: runs-on: [ self-hosted, gen3, large ] container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }} - options: --init + # Raise locked memory limit for tokio-epoll-uring. + # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12), + # io_uring will account the memory of the CQ and SQ as locked. + # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391 + options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 strategy: fail-fast: false matrix: @@ -341,7 +345,9 @@ jobs: - name: Run rust tests run: | - ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES + for io_engine in std-fs tokio-epoll-uring ; do + NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES + done # Run separate tests for real S3 export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty @@ -419,13 +425,14 @@ jobs: runs-on: [ self-hosted, gen3, large ] container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }} - # Default shared memory is 64mb - options: --init --shm-size=512mb + # for changed limits, see comments on `options:` earlier in this file + options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 strategy: fail-fast: false matrix: build_type: [ debug, release ] pg_version: [ v14, v15, v16 ] + pageserver_virtual_file_io_engine: [ std-fs, tokio-epoll-uring ] steps: - name: Checkout uses: actions/checkout@v3 @@ -448,6 +455,7 @@ jobs: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} CHECK_ONDISK_DATA_COMPATIBILITY: nonempty BUILD_TAG: ${{ needs.tag.outputs.build-tag }} + PAGESERVER_VIRTUAL_FILE_IO_ENGINE: ${{ matrix.pageserver_virtual_file_io_engine }} - name: Merge and upload coverage data if: matrix.build_type == 'debug' && matrix.pg_version == 'v14' @@ -458,14 +466,16 @@ jobs: runs-on: [ self-hosted, gen3, small ] container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }} - # Default shared memory is 64mb - options: --init --shm-size=512mb + # for changed limits, see comments on `options:` earlier in this file + options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') strategy: fail-fast: false matrix: + # the amount of groups (N) should be reflected in `extra_params: --splits N ...` pytest_split_group: [ 1, 2, 3, 4 ] build_type: [ release ] + pageserver_virtual_file_io_engine: [ std-fs, tokio-epoll-uring ] steps: - name: Checkout uses: actions/checkout@v3 @@ -477,11 +487,12 @@ jobs: test_selection: performance run_in_parallel: false save_perf_report: ${{ github.ref_name == 'main' }} - extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }} + extra_params: --splits 4 --group ${{ matrix.pytest_split_group }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}" + PAGESERVER_VIRTUAL_FILE_IO_ENGINE: "${{ matrix.pageserver_virtual_file_io_engine }}" # XXX: no coverage data handling here, since benchmarks are run on release builds, # while coverage is currently collected for the debug ones diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py index b07e4bea9b..61a97f520d 100755 --- a/scripts/flaky_tests.py +++ b/scripts/flaky_tests.py @@ -3,6 +3,7 @@ import argparse import json import logging +import os from collections import defaultdict from typing import DefaultDict, Dict @@ -45,6 +46,15 @@ def main(args: argparse.Namespace): logging.error("cannot fetch flaky tests from the DB due to an error", exc) rows = [] + # If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not std-fs), + # use it to parametrize test name along with build_type and pg_version + # + # See test_runner/fixtures/parametrize.py for details + if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"): + pageserver_virtual_file_io_engine_parameter = f"-{io_engine}" + else: + pageserver_virtual_file_io_engine_parameter = "" + for row in rows: # We don't want to automatically rerun tests in a performance suite if row["parent_suite"] != "test_runner.regress": @@ -53,10 +63,10 @@ def main(args: argparse.Namespace): if row["name"].endswith("]"): parametrized_test = row["name"].replace( "[", - f"[{build_type}-pg{pg_version}-", + f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}-", ) else: - parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}]" + parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}]" res[row["parent_suite"]][row["suite"]][parametrized_test] = True diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 24b42b1802..66dab34147 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1197,6 +1197,7 @@ def _shared_simple_env( neon_binpath: Path, pg_distrib_dir: Path, pg_version: PgVersion, + pageserver_virtual_file_io_engine: str, ) -> Iterator[NeonEnv]: """ # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES @@ -1226,6 +1227,7 @@ def _shared_simple_env( preserve_database_files=pytestconfig.getoption("--preserve-database-files"), test_name=request.node.name, test_output_dir=test_output_dir, + pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine, ) as builder: env = builder.init_start() @@ -1264,6 +1266,7 @@ def neon_env_builder( request: FixtureRequest, test_overlay_dir: Path, top_output_dir: Path, + pageserver_virtual_file_io_engine: str, ) -> Iterator[NeonEnvBuilder]: """ Fixture to create a Neon environment for test. @@ -1293,6 +1296,7 @@ def neon_env_builder( broker=default_broker, run_id=run_id, preserve_database_files=pytestconfig.getoption("--preserve-database-files"), + pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine, test_name=request.node.name, test_output_dir=test_output_dir, test_overlay_dir=test_overlay_dir, diff --git a/test_runner/fixtures/parametrize.py b/test_runner/fixtures/parametrize.py index 53350138dd..d8ac92abb6 100644 --- a/test_runner/fixtures/parametrize.py +++ b/test_runner/fixtures/parametrize.py @@ -8,7 +8,7 @@ from _pytest.python import Metafunc from fixtures.pg_version import PgVersion """ -Dynamically parametrize tests by Postgres version and build type (debug/release/remote) +Dynamically parametrize tests by Postgres version, build type (debug/release/remote), and possibly by other parameters """ @@ -31,11 +31,12 @@ def build_type(request: FixtureRequest) -> Optional[str]: return None -def pytest_generate_tests(metafunc: Metafunc): - # Do not parametrize performance tests yet, we need to prepare grafana charts first - if "test_runner/performance" in metafunc.definition._nodeid: - return +@pytest.fixture(scope="function", autouse=True) +def pageserver_virtual_file_io_engine(request: FixtureRequest) -> Optional[str]: + return None + +def pytest_generate_tests(metafunc: Metafunc): if (v := os.environ.get("DEFAULT_PG_VERSION")) is None: pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET] else: @@ -46,5 +47,12 @@ def pytest_generate_tests(metafunc: Metafunc): else: build_types = [bt.lower()] - metafunc.parametrize("build_type", build_types) - metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions)) + # Do not parametrize performance tests yet by Postgres version or build type, we need to prepare grafana charts first + if "test_runner/performance" not in metafunc.definition._nodeid: + metafunc.parametrize("build_type", build_types) + metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions)) + + # A hacky way to parametrize tests only for `pageserver_virtual_file_io_engine=tokio-epoll-uring` + # And do not change test name for default `pageserver_virtual_file_io_engine=std-fs` to keep tests statistics + if (io_engine := os.environ.get("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"): + metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine])