mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-16 18:02:56 +00:00
SELECT 💣(); (#8270)
## Problem We want to be able to test how our infrastructure reacts on segfaults in Postgres (for example, we collect cores, and get some required logs/metrics, etc) ## Summary of changes - Add `trigger_segfauls` function to `neon_test_utils` to trigger a segfault in Postgres - Add `trigger_panic` function to `neon_test_utils` to trigger SIGABRT (by using `elog(PANIC, ...)) - Fix cleanup logic in regression tests in endpoint crashed
This commit is contained in:
committed by
Vlad Lazar
parent
a020f55a80
commit
94e9811ca4
@@ -7,7 +7,7 @@ OBJS = \
|
||||
neontest.o
|
||||
|
||||
EXTENSION = neon_test_utils
|
||||
DATA = neon_test_utils--1.2.sql
|
||||
DATA = neon_test_utils--1.3.sql
|
||||
PGFILEDESC = "neon_test_utils - helpers for neon testing and debugging"
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
|
||||
@@ -45,3 +45,21 @@ CREATE FUNCTION neon_xlogflush(lsn pg_lsn DEFAULT NULL)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'neon_xlogflush'
|
||||
LANGUAGE C PARALLEL UNSAFE;
|
||||
|
||||
CREATE FUNCTION trigger_panic()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'trigger_panic'
|
||||
LANGUAGE C PARALLEL UNSAFE;
|
||||
|
||||
CREATE FUNCTION trigger_segfault()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'trigger_segfault'
|
||||
LANGUAGE C PARALLEL UNSAFE;
|
||||
|
||||
-- Alias for `trigger_segfault`, just because `SELECT 💣()` looks fun
|
||||
CREATE OR REPLACE FUNCTION 💣() RETURNS void
|
||||
LANGUAGE plpgsql AS $$
|
||||
BEGIN
|
||||
PERFORM trigger_segfault();
|
||||
END;
|
||||
$$;
|
||||
@@ -1,6 +1,6 @@
|
||||
# neon_test_utils extension
|
||||
comment = 'helpers for neon testing and debugging'
|
||||
default_version = '1.2'
|
||||
default_version = '1.3'
|
||||
module_pathname = '$libdir/neon_test_utils'
|
||||
relocatable = true
|
||||
trusted = true
|
||||
|
||||
@@ -42,6 +42,8 @@ PG_FUNCTION_INFO_V1(clear_buffer_cache);
|
||||
PG_FUNCTION_INFO_V1(get_raw_page_at_lsn);
|
||||
PG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex);
|
||||
PG_FUNCTION_INFO_V1(neon_xlogflush);
|
||||
PG_FUNCTION_INFO_V1(trigger_panic);
|
||||
PG_FUNCTION_INFO_V1(trigger_segfault);
|
||||
|
||||
/*
|
||||
* Linkage to functions in neon module.
|
||||
@@ -489,3 +491,24 @@ neon_xlogflush(PG_FUNCTION_ARGS)
|
||||
XLogFlush(lsn);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to trigger panic.
|
||||
*/
|
||||
Datum
|
||||
trigger_panic(PG_FUNCTION_ARGS)
|
||||
{
|
||||
elog(PANIC, "neon_test_utils: panic");
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to trigger a segfault.
|
||||
*/
|
||||
Datum
|
||||
trigger_segfault(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int *ptr = NULL;
|
||||
*ptr = 42;
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
@@ -943,6 +943,8 @@ class NeonEnvBuilder:
|
||||
# if the test threw an exception, don't check for errors
|
||||
# as a failing assertion would cause the cleanup below to fail
|
||||
ps_assert_metric_no_errors=(exc_type is None),
|
||||
# do not fail on endpoint errors to allow the rest of cleanup to proceed
|
||||
fail_on_endpoint_errors=False,
|
||||
)
|
||||
cleanup_error = None
|
||||
|
||||
@@ -1214,11 +1216,11 @@ class NeonEnv:
|
||||
for f in futs:
|
||||
f.result()
|
||||
|
||||
def stop(self, immediate=False, ps_assert_metric_no_errors=False):
|
||||
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
|
||||
"""
|
||||
After this method returns, there should be no child processes running.
|
||||
"""
|
||||
self.endpoints.stop_all()
|
||||
self.endpoints.stop_all(fail_on_endpoint_errors)
|
||||
|
||||
# Stop storage controller before pageservers: we don't want it to spuriously
|
||||
# detect a pageserver "failure" during test teardown
|
||||
@@ -3900,9 +3902,17 @@ class EndpointFactory:
|
||||
pageserver_id=pageserver_id,
|
||||
)
|
||||
|
||||
def stop_all(self) -> "EndpointFactory":
|
||||
def stop_all(self, fail_on_error=True) -> "EndpointFactory":
|
||||
exception = None
|
||||
for ep in self.endpoints:
|
||||
ep.stop()
|
||||
try:
|
||||
ep.stop()
|
||||
except Exception as e:
|
||||
log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}")
|
||||
exception = e
|
||||
|
||||
if fail_on_error and exception is not None:
|
||||
raise exception
|
||||
|
||||
return self
|
||||
|
||||
|
||||
23
test_runner/regress/test_endpoint_crash.py
Normal file
23
test_runner/regress/test_endpoint_crash.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sql_func",
|
||||
[
|
||||
"trigger_panic",
|
||||
"trigger_segfault",
|
||||
"💣", # calls `trigger_segfault` internally
|
||||
],
|
||||
)
|
||||
def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
|
||||
"""
|
||||
Test that triggering crash from neon_test_utils crashes the endpoint
|
||||
"""
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_endpoint_crash")
|
||||
endpoint = env.endpoints.create_start("test_endpoint_crash")
|
||||
|
||||
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
|
||||
with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
|
||||
endpoint.safe_psql(f"SELECT {sql_func}();")
|
||||
Reference in New Issue
Block a user