Always compile failpoints support, add runtime config option to enable it.

It's annoying that many of the tests required a special build with the "testing" feature. I think it's better to have a runtime check. It adds a few CPU instructions to where failpoints are defined, even if they are disabled, but that's a small price to pay for the convenience. Fixes issue 2531, although differently from what was discussed on that issue.
2026-01-14 17:02:56 +00:00 · 2022-12-07 23:36:51 +02:00
parent f5a735ac3b
commit e0c43396bf
23 changed files with 120 additions and 98 deletions
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -6,9 +6,6 @@ Prerequisites:
 - Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
 - Neon and Postgres binaries
    - See the root [README.md](/README.md) for build directions
-      If you want to test tests with test-only APIs, you would need to add `--features testing` to Rust code build commands.
-      For convenience, repository cargo config contains `build_testing` alias, that serves as a subcommand, adding the required feature flags.
-      Usage example: `cargo build_testing --release` is equivalent to `cargo build --features testing --release`
    - Tests can be run from the git tree; or see the environment variables
      below to run from other directories.
 - The neon git repo, including the postgres submodule
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -587,6 +587,7 @@ class NeonEnvBuilder:
        auth_enabled: bool = False,
        rust_log_override: Optional[str] = None,
        default_branch_name: str = DEFAULT_BRANCH_NAME,
+        testing_mode: bool = True,
    ):
        self.repo_dir = repo_dir
        self.rust_log_override = rust_log_override
@@ -608,6 +609,7 @@ class NeonEnvBuilder:
        self.neon_binpath = neon_binpath
        self.pg_distrib_dir = pg_distrib_dir
        self.pg_version = pg_version
+        self.testing_mode = testing_mode

    def init(self) -> NeonEnv:
        # Cannot create more than one environment from one builder
@@ -858,6 +860,7 @@ class NeonEnv:
            http=self.port_distributor.get_port(),
        )
        pageserver_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
+        pageserver_testing_mode = "true" if config.testing_mode else "false"

        toml += textwrap.dedent(
            f"""
@@ -866,6 +869,7 @@ class NeonEnv:
            listen_pg_addr = 'localhost:{pageserver_port.pg}'
            listen_http_addr = 'localhost:{pageserver_port.http}'
            auth_type = '{pageserver_auth_type}'
+            testing_mode = {pageserver_testing_mode}
        """
        )

@@ -978,6 +982,10 @@ def _shared_simple_env(
        pg_distrib_dir=pg_distrib_dir,
        pg_version=pg_version,
        run_id=run_id,
+        # Disable failpoint support. Failpoints could have unexpected consequences
+        # when the pageserver is shared by concurrent tests. Also, it might affect
+        # performance, and we use the shared simple env in performance tests.
+        testing_mode=False,
    ) as builder:
        env = builder.init_start()

@@ -1048,11 +1056,10 @@ class PageserverApiException(Exception):


 class PageserverHttpClient(requests.Session):
-    def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
+    def __init__(self, port: int, auth_token: Optional[str] = None):
        super().__init__()
        self.port = port
        self.auth_token = auth_token
-        self.is_testing_enabled_or_skip = is_testing_enabled_or_skip

        if auth_token is not None:
            self.headers["Authorization"] = f"Bearer {auth_token}"
@@ -1071,8 +1078,6 @@ class PageserverHttpClient(requests.Session):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()

    def configure_failpoints(self, config_strings: Tuple[str, str] | List[Tuple[str, str]]):
-        self.is_testing_enabled_or_skip()
-
        if isinstance(config_strings, tuple):
            pairs = [config_strings]
        else:
@@ -1212,8 +1217,6 @@ class PageserverHttpClient(requests.Session):
    def timeline_gc(
        self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int]
    ) -> dict[str, Any]:
-        self.is_testing_enabled_or_skip()
-
        log.info(
            f"Requesting GC: tenant {tenant_id}, timeline {timeline_id}, gc_horizon {repr(gc_horizon)}"
        )
@@ -1229,8 +1232,6 @@ class PageserverHttpClient(requests.Session):
        return res_json

    def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId):
-        self.is_testing_enabled_or_skip()
-
        log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}")
        res = self.put(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact"
@@ -1254,8 +1255,6 @@ class PageserverHttpClient(requests.Session):
        return res_json

    def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
-        self.is_testing_enabled_or_skip()
-
        log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
        res = self.put(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint"
@@ -1815,10 +1814,6 @@ class NeonPageserver(PgProtocol):
    ):
        self.stop(immediate=True)

-    def is_testing_enabled_or_skip(self):
-        if '"testing"' not in self.version:
-            pytest.skip("pageserver was built without 'testing' feature")
-
    def is_profiling_enabled_or_skip(self):
        if '"profiling"' not in self.version:
            pytest.skip("pageserver was built without 'profiling' feature")
@@ -1827,7 +1822,6 @@ class NeonPageserver(PgProtocol):
        return PageserverHttpClient(
            port=self.service_port.http,
            auth_token=auth_token,
-            is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
        )

    def assert_no_errors(self):
--- a/test_runner/performance/README.md
+++ b/test_runner/performance/README.md
@@ -3,7 +3,7 @@
 First make a release build. The profiling flag is optional, used only for tests that
 generate flame graphs. The `-s` flag just silences a lot of output, and makes it
 easier to see if you have compile errors without scrolling up.
-`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=testing,profiling" make -s -j8`
+`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=profiling" make -s -j8`

 NOTE: the `profiling` flag only works on linux because we use linux-specific
 libc APIs like `libc::timer_t`.
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -327,7 +327,6 @@ def check_neon_works(
    auth_token = snapshot_config["pageserver"]["auth_token"]
    pageserver_http = PageserverHttpClient(
        port=pageserver_port,
-        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
        auth_token=auth_token,
    )

--- a/test_runner/regress/test_recovery.py
+++ b/test_runner/regress/test_recovery.py
@@ -13,7 +13,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"

    env = neon_env_builder.init()
-    env.pageserver.is_testing_enabled_or_skip()

    neon_env_builder.start()

--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -58,7 +58,6 @@ def new_pageserver_service(
    pageserver_client = PageserverHttpClient(
        port=http_port,
        auth_token=None,
-        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
    )
    try:
        pageserver_process = start_in_background(
@@ -360,7 +359,6 @@ def test_tenant_relocation(
    new_pageserver_http = PageserverHttpClient(
        port=new_pageserver_http_port,
        auth_token=None,
-        is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
    )

    with new_pageserver_service(