From 9ba9f32dfe35ad99335497f7d22c14ba02ebea9f Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Wed, 12 Jun 2024 16:10:57 +0200 Subject: [PATCH] Reactivate page bench test in CI after ignoring CopyFail error in pageserver (#8023) ## Problem Testcase page bench test_pageserver_max_throughput_getpage_at_latest_lsn had been deactivated because it was flaky. We now ignore copy fail error messages like in https://github.com/neondatabase/neon/blob/270d3be507643f068120b52838c497f6c1b45b61/test_runner/regress/test_pageserver_getpage_throttle.py#L17-L20 and want to reactivate it to see it it is still flaky ## Summary of changes - reactivate the test in CI - ignore CopyFail error message during page bench test cases ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --- ...geserver_max_throughput_getpage_at_latest_lsn.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py index 1a0012397c..772a39fe35 100644 --- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py +++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from typing import Any, Dict, Tuple @@ -35,10 +34,6 @@ from performance.pageserver.util import ( @pytest.mark.timeout( 10000 ) # TODO: this value is just "a really high number"; have this per instance type -@pytest.mark.skipif( - os.getenv("CI", "false") == "true", - reason="The test if flaky on CI: https://github.com/neondatabase/neon/issues/6724", -) def test_pageserver_max_throughput_getpage_at_latest_lsn( neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker, @@ -91,6 +86,14 @@ def test_pageserver_max_throughput_getpage_at_latest_lsn( n_tenants, setup_wrapper, ) + + env.pageserver.allowed_errors.append( + # https://github.com/neondatabase/neon/issues/6925 + # https://github.com/neondatabase/neon/issues/6390 + # https://github.com/neondatabase/neon/issues/6724 + r".*query handler for.*pagestream.*failed: unexpected message: CopyFail during COPY.*" + ) + run_benchmark_max_throughput_latest_lsn(env, pg_bin, record, duration)