From d54624153df1bc6d2f879044a5d66fd09eca19f8 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 2 Oct 2024 17:44:25 +0100
Subject: [PATCH] tests: sync_after_each_test -> sync_between_tests (#9239)

## Problem

We are seeing frequent pageserver startup timelines while it calls
syncfs(). There is an existing fixture that syncs _after_ tests, but not
before the first one. We hypothesize that some failures are happening on
the first test in a job.

## Summary of changes

- extend the existing sync_after_each_test to be a sync between all
tests, including sync'ing before running the first test. That should
remove any ambiguity about whether the sync is happening on the correct
node.

This is an alternative to https://github.com/neondatabase/neon/pull/8957
-- I didn't realize until I saw Alexander's comment on that PR that we
have an existing hook that syncs filesystems and can be extended.
---
 .github/workflows/build_and_test.yml     |  2 +-
 test_runner/fixtures/compare_fixtures.py | 30 ++++++++++++++----------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 8cb49d5d76..a759efb56c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -341,7 +341,7 @@ jobs:
           PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
           TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
           PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
-          SYNC_AFTER_EACH_TEST: true
+          SYNC_BETWEEN_TESTS: true
       # XXX: no coverage data handling here, since benchmarks are run on release builds,
       # while coverage is currently collected for the debug ones
 
diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py
index 770b32b11e..fb9c2d2b86 100644
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -340,23 +340,27 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare:
 
 
 @pytest.fixture(scope="function", autouse=True)
-def sync_after_each_test():
-    # The fixture calls `sync(2)` after each test if `SYNC_AFTER_EACH_TEST` env var is `true`
+def sync_between_tests():
+    # The fixture calls `sync(2)` after each test if `SYNC_BETWEEN_TESTS` env var is `true`
     #
-    # In CI, `SYNC_AFTER_EACH_TEST` is set to `true` only for benchmarks (`test_runner/performance`)
+    # In CI, `SYNC_BETWEEN_TESTS` is set to `true` only for benchmarks (`test_runner/performance`)
     # that are run on self-hosted runners because some of these tests are pretty write-heavy
     # and create issues to start the processes within 10s
-    key = "SYNC_AFTER_EACH_TEST"
+    key = "SYNC_BETWEEN_TESTS"
     enabled = os.environ.get(key) == "true"
 
+    if enabled:
+        start = time.time()
+        # we only run benches on unices, the method might not exist on windows
+        os.sync()
+        elapsed = time.time() - start
+        log.info(f"called sync before test {elapsed=}")
+
     yield
 
-    if not enabled:
-        # regress test, or running locally
-        return
-
-    start = time.time()
-    # we only run benches on unices, the method might not exist on windows
-    os.sync()
-    elapsed = time.time() - start
-    log.info(f"called sync after test {elapsed=}")
+    if enabled:
+        start = time.time()
+        # we only run benches on unices, the method might not exist on windows
+        os.sync()
+        elapsed = time.time() - start
+        log.info(f"called sync after test {elapsed=}")