build: sync between benchies (#8919)

Sometimes, the benchmarks fail to start up pageserver in 10s without any
obvious reason. Benchmarks run sequentially on otherwise idle runners.
Try running `sync(2)` after each bench to force a cleaner slate.

Implement this via:
- SYNC_AFTER_EACH_TEST environment variable enabled autouse fixture
- autouse fixture seems to be outermost fixture, so it works as expected
- set SYNC_AFTER_EACH_TEST=true for benchmarks in build_and_test
workflow

Evidence:
https://neon-github-public-dev.s3.amazonaws.com/reports/main/10678984691/index.html#suites/5008d72a1ba3c0d618a030a938fc035c/1210266507534c0f/

---------

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
This commit is contained in:
Joonas Koivunen
2024-09-05 16:29:48 +03:00
committed by GitHub
parent 850421ec06
commit efe03d5a1c
2 changed files with 27 additions and 0 deletions

View File

@@ -286,6 +286,7 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
SYNC_AFTER_EACH_TEST: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones

View File

@@ -1,3 +1,5 @@
import os
import time
from abc import ABC, abstractmethod
from contextlib import _GeneratorContextManager, contextmanager
@@ -8,6 +10,7 @@ import pytest
from _pytest.fixtures import FixtureRequest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
PgBin,
@@ -333,3 +336,26 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare:
fixture = request.getfixturevalue(request.param)
assert isinstance(fixture, PgCompare), f"test error: fixture {fixture} is not PgCompare"
return fixture
@pytest.fixture(scope="function", autouse=True)
def sync_after_each_test():
# The fixture calls `sync(2)` after each test if `SYNC_AFTER_EACH_TEST` env var is `true`
#
# In CI, `SYNC_AFTER_EACH_TEST` is set to `true` only for benchmarks (`test_runner/performance`)
# that are run on self-hosted runners because some of these tests are pretty write-heavy
# and create issues to start the processes within 10s
key = "SYNC_AFTER_EACH_TEST"
enabled = os.environ.get(key) == "true"
yield
if not enabled:
# regress test, or running locally
return
start = time.time()
# we only run benches on unices, the method might not exist on windows
os.sync()
elapsed = time.time() - start
log.info(f"called sync after test {elapsed=}")