Always compile failpoints support, add runtime config option to enable it.

It's annoying that many of the tests required a special build with the
"testing" feature. I think it's better to have a runtime check. It adds
a few CPU instructions to where failpoints are defined, even if they are
disabled, but that's a small price to pay for the convenience.

Fixes issue 2531, although differently from what was discussed on that
issue.
This commit is contained in:
Heikki Linnakangas
2022-12-07 23:36:51 +02:00
parent f5a735ac3b
commit e0c43396bf
23 changed files with 120 additions and 98 deletions

View File

@@ -6,9 +6,6 @@ Prerequisites:
- Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
- Neon and Postgres binaries
- See the root [README.md](/README.md) for build directions
If you want to test tests with test-only APIs, you would need to add `--features testing` to Rust code build commands.
For convenience, repository cargo config contains `build_testing` alias, that serves as a subcommand, adding the required feature flags.
Usage example: `cargo build_testing --release` is equivalent to `cargo build --features testing --release`
- Tests can be run from the git tree; or see the environment variables
below to run from other directories.
- The neon git repo, including the postgres submodule

View File

@@ -587,6 +587,7 @@ class NeonEnvBuilder:
auth_enabled: bool = False,
rust_log_override: Optional[str] = None,
default_branch_name: str = DEFAULT_BRANCH_NAME,
testing_mode: bool = True,
):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
@@ -608,6 +609,7 @@ class NeonEnvBuilder:
self.neon_binpath = neon_binpath
self.pg_distrib_dir = pg_distrib_dir
self.pg_version = pg_version
self.testing_mode = testing_mode
def init(self) -> NeonEnv:
# Cannot create more than one environment from one builder
@@ -858,6 +860,7 @@ class NeonEnv:
http=self.port_distributor.get_port(),
)
pageserver_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
pageserver_testing_mode = "true" if config.testing_mode else "false"
toml += textwrap.dedent(
f"""
@@ -866,6 +869,7 @@ class NeonEnv:
listen_pg_addr = 'localhost:{pageserver_port.pg}'
listen_http_addr = 'localhost:{pageserver_port.http}'
auth_type = '{pageserver_auth_type}'
testing_mode = {pageserver_testing_mode}
"""
)
@@ -978,6 +982,10 @@ def _shared_simple_env(
pg_distrib_dir=pg_distrib_dir,
pg_version=pg_version,
run_id=run_id,
# Disable failpoint support. Failpoints could have unexpected consequences
# when the pageserver is shared by concurrent tests. Also, it might affect
# performance, and we use the shared simple env in performance tests.
testing_mode=False,
) as builder:
env = builder.init_start()
@@ -1048,11 +1056,10 @@ class PageserverApiException(Exception):
class PageserverHttpClient(requests.Session):
def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
def __init__(self, port: int, auth_token: Optional[str] = None):
super().__init__()
self.port = port
self.auth_token = auth_token
self.is_testing_enabled_or_skip = is_testing_enabled_or_skip
if auth_token is not None:
self.headers["Authorization"] = f"Bearer {auth_token}"
@@ -1071,8 +1078,6 @@ class PageserverHttpClient(requests.Session):
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
def configure_failpoints(self, config_strings: Tuple[str, str] | List[Tuple[str, str]]):
self.is_testing_enabled_or_skip()
if isinstance(config_strings, tuple):
pairs = [config_strings]
else:
@@ -1212,8 +1217,6 @@ class PageserverHttpClient(requests.Session):
def timeline_gc(
self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int]
) -> dict[str, Any]:
self.is_testing_enabled_or_skip()
log.info(
f"Requesting GC: tenant {tenant_id}, timeline {timeline_id}, gc_horizon {repr(gc_horizon)}"
)
@@ -1229,8 +1232,6 @@ class PageserverHttpClient(requests.Session):
return res_json
def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId):
self.is_testing_enabled_or_skip()
log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}")
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact"
@@ -1254,8 +1255,6 @@ class PageserverHttpClient(requests.Session):
return res_json
def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
self.is_testing_enabled_or_skip()
log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint"
@@ -1815,10 +1814,6 @@ class NeonPageserver(PgProtocol):
):
self.stop(immediate=True)
def is_testing_enabled_or_skip(self):
if '"testing"' not in self.version:
pytest.skip("pageserver was built without 'testing' feature")
def is_profiling_enabled_or_skip(self):
if '"profiling"' not in self.version:
pytest.skip("pageserver was built without 'profiling' feature")
@@ -1827,7 +1822,6 @@ class NeonPageserver(PgProtocol):
return PageserverHttpClient(
port=self.service_port.http,
auth_token=auth_token,
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
)
def assert_no_errors(self):

View File

@@ -3,7 +3,7 @@
First make a release build. The profiling flag is optional, used only for tests that
generate flame graphs. The `-s` flag just silences a lot of output, and makes it
easier to see if you have compile errors without scrolling up.
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=testing,profiling" make -s -j8`
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=profiling" make -s -j8`
NOTE: the `profiling` flag only works on linux because we use linux-specific
libc APIs like `libc::timer_t`.

View File

@@ -327,7 +327,6 @@ def check_neon_works(
auth_token = snapshot_config["pageserver"]["auth_token"]
pageserver_http = PageserverHttpClient(
port=pageserver_port,
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
auth_token=auth_token,
)

View File

@@ -13,7 +13,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
env = neon_env_builder.init()
env.pageserver.is_testing_enabled_or_skip()
neon_env_builder.start()

View File

@@ -58,7 +58,6 @@ def new_pageserver_service(
pageserver_client = PageserverHttpClient(
port=http_port,
auth_token=None,
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
)
try:
pageserver_process = start_in_background(
@@ -360,7 +359,6 @@ def test_tenant_relocation(
new_pageserver_http = PageserverHttpClient(
port=new_pageserver_http_port,
auth_token=None,
is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
)
with new_pageserver_service(