From efe03d5a1ccce8e0f53e733d61fd0e3d0dd904f8 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Thu, 5 Sep 2024 16:29:48 +0300 Subject: [PATCH] build: sync between benchies (#8919) Sometimes, the benchmarks fail to start up pageserver in 10s without any obvious reason. Benchmarks run sequentially on otherwise idle runners. Try running `sync(2)` after each bench to force a cleaner slate. Implement this via: - SYNC_AFTER_EACH_TEST environment variable enabled autouse fixture - autouse fixture seems to be outermost fixture, so it works as expected - set SYNC_AFTER_EACH_TEST=true for benchmarks in build_and_test workflow Evidence: https://neon-github-public-dev.s3.amazonaws.com/reports/main/10678984691/index.html#suites/5008d72a1ba3c0d618a030a938fc035c/1210266507534c0f/ --------- Co-authored-by: Alexander Bayandin --- .github/workflows/build_and_test.yml | 1 + test_runner/fixtures/compare_fixtures.py | 26 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 53d33b420f..ee5fd1b0c6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -286,6 +286,7 @@ jobs: PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}" PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring + SYNC_AFTER_EACH_TEST: true # XXX: no coverage data handling here, since benchmarks are run on release builds, # while coverage is currently collected for the debug ones diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index 98a9dd7184..7c4a8db36f 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -1,3 +1,5 @@ +import os +import time from abc import ABC, abstractmethod from contextlib import _GeneratorContextManager, contextmanager @@ -8,6 +10,7 @@ import pytest from _pytest.fixtures import FixtureRequest from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.log_helper import log from fixtures.neon_fixtures import ( NeonEnv, PgBin, @@ -333,3 +336,26 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare: fixture = request.getfixturevalue(request.param) assert isinstance(fixture, PgCompare), f"test error: fixture {fixture} is not PgCompare" return fixture + + +@pytest.fixture(scope="function", autouse=True) +def sync_after_each_test(): + # The fixture calls `sync(2)` after each test if `SYNC_AFTER_EACH_TEST` env var is `true` + # + # In CI, `SYNC_AFTER_EACH_TEST` is set to `true` only for benchmarks (`test_runner/performance`) + # that are run on self-hosted runners because some of these tests are pretty write-heavy + # and create issues to start the processes within 10s + key = "SYNC_AFTER_EACH_TEST" + enabled = os.environ.get(key) == "true" + + yield + + if not enabled: + # regress test, or running locally + return + + start = time.time() + # we only run benches on unices, the method might not exist on windows + os.sync() + elapsed = time.time() - start + log.info(f"called sync after test {elapsed=}")