From d54624153df1bc6d2f879044a5d66fd09eca19f8 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 2 Oct 2024 17:44:25 +0100 Subject: [PATCH] tests: sync_after_each_test -> sync_between_tests (#9239) ## Problem We are seeing frequent pageserver startup timelines while it calls syncfs(). There is an existing fixture that syncs _after_ tests, but not before the first one. We hypothesize that some failures are happening on the first test in a job. ## Summary of changes - extend the existing sync_after_each_test to be a sync between all tests, including sync'ing before running the first test. That should remove any ambiguity about whether the sync is happening on the correct node. This is an alternative to https://github.com/neondatabase/neon/pull/8957 -- I didn't realize until I saw Alexander's comment on that PR that we have an existing hook that syncs filesystems and can be extended. --- .github/workflows/build_and_test.yml | 2 +- test_runner/fixtures/compare_fixtures.py | 30 ++++++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8cb49d5d76..a759efb56c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -341,7 +341,7 @@ jobs: PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}" PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring - SYNC_AFTER_EACH_TEST: true + SYNC_BETWEEN_TESTS: true # XXX: no coverage data handling here, since benchmarks are run on release builds, # while coverage is currently collected for the debug ones diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index 770b32b11e..fb9c2d2b86 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -340,23 +340,27 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare: @pytest.fixture(scope="function", autouse=True) -def sync_after_each_test(): - # The fixture calls `sync(2)` after each test if `SYNC_AFTER_EACH_TEST` env var is `true` +def sync_between_tests(): + # The fixture calls `sync(2)` after each test if `SYNC_BETWEEN_TESTS` env var is `true` # - # In CI, `SYNC_AFTER_EACH_TEST` is set to `true` only for benchmarks (`test_runner/performance`) + # In CI, `SYNC_BETWEEN_TESTS` is set to `true` only for benchmarks (`test_runner/performance`) # that are run on self-hosted runners because some of these tests are pretty write-heavy # and create issues to start the processes within 10s - key = "SYNC_AFTER_EACH_TEST" + key = "SYNC_BETWEEN_TESTS" enabled = os.environ.get(key) == "true" + if enabled: + start = time.time() + # we only run benches on unices, the method might not exist on windows + os.sync() + elapsed = time.time() - start + log.info(f"called sync before test {elapsed=}") + yield - if not enabled: - # regress test, or running locally - return - - start = time.time() - # we only run benches on unices, the method might not exist on windows - os.sync() - elapsed = time.time() - start - log.info(f"called sync after test {elapsed=}") + if enabled: + start = time.time() + # we only run benches on unices, the method might not exist on windows + os.sync() + elapsed = time.time() - start + log.info(f"called sync after test {elapsed=}")