SELECT 💣(); (#8270)

## Problem
We want to be able to test how our infrastructure reacts on segfaults in
Postgres (for example, we collect cores, and get some required
logs/metrics, etc)

## Summary of changes
- Add `trigger_segfauls` function to `neon_test_utils` to trigger a
segfault in Postgres
- Add `trigger_panic` function to `neon_test_utils` to trigger SIGABRT
(by using `elog(PANIC, ...))
- Fix cleanup logic in regression tests in endpoint crashed
This commit is contained in:
Alexander Bayandin
2024-07-05 15:12:01 +01:00
committed by Vlad Lazar
parent 54428fec1e
commit 832480dcca
6 changed files with 80 additions and 6 deletions

View File

@@ -943,6 +943,8 @@ class NeonEnvBuilder:
# if the test threw an exception, don't check for errors
# as a failing assertion would cause the cleanup below to fail
ps_assert_metric_no_errors=(exc_type is None),
# do not fail on endpoint errors to allow the rest of cleanup to proceed
fail_on_endpoint_errors=False,
)
cleanup_error = None
@@ -1214,11 +1216,11 @@ class NeonEnv:
for f in futs:
f.result()
def stop(self, immediate=False, ps_assert_metric_no_errors=False):
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
"""
After this method returns, there should be no child processes running.
"""
self.endpoints.stop_all()
self.endpoints.stop_all(fail_on_endpoint_errors)
# Stop storage controller before pageservers: we don't want it to spuriously
# detect a pageserver "failure" during test teardown
@@ -3899,9 +3901,17 @@ class EndpointFactory:
pageserver_id=pageserver_id,
)
def stop_all(self) -> "EndpointFactory":
def stop_all(self, fail_on_error=True) -> "EndpointFactory":
exception = None
for ep in self.endpoints:
ep.stop()
try:
ep.stop()
except Exception as e:
log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}")
exception = e
if fail_on_error and exception is not None:
raise exception
return self

View File

@@ -0,0 +1,23 @@
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
@pytest.mark.parametrize(
"sql_func",
[
"trigger_panic",
"trigger_segfault",
"💣", # calls `trigger_segfault` internally
],
)
def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
"""
Test that triggering crash from neon_test_utils crashes the endpoint
"""
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_endpoint_crash")
endpoint = env.endpoints.create_start("test_endpoint_crash")
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
endpoint.safe_psql(f"SELECT {sql_func}();")