mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 04:50:38 +00:00
implement failpoint lib replacement
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
//! Failpoint support code shared between pageserver and safekeepers.
|
||||
//!
|
||||
//! This module provides a compatibility layer over the new neon_failpoint crate.
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
pub use neon_failpoint::{configure_failpoint as apply_failpoint, init, has_failpoints};
|
||||
|
||||
/// Declare a failpoint that can use to `pause` failpoint action.
|
||||
/// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||
/// This is now a compatibility wrapper around the new neon_failpoint crate.
|
||||
///
|
||||
/// Optionally pass a cancellation token, and this failpoint will drop out of
|
||||
/// its pause when the cancellation token fires. This is useful for testing
|
||||
@@ -11,9 +14,6 @@ use tokio_util::sync::CancellationToken;
|
||||
/// The macro evaluates to a Result in that case, where Ok(()) is the case
|
||||
/// where the failpoint was not paused, and Err() is the case where cancellation
|
||||
/// token fired while evaluating the failpoint.
|
||||
///
|
||||
/// Remember to unpause the failpoint in the test; until that happens, one of the
|
||||
/// limited number of spawn_blocking thread pool threads is leaked.
|
||||
#[macro_export]
|
||||
macro_rules! pausable_failpoint {
|
||||
($name:literal) => {{
|
||||
@@ -24,26 +24,10 @@ macro_rules! pausable_failpoint {
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
let failpoint_fut = ::tokio::task::spawn_blocking({
|
||||
let current = ::tracing::Span::current();
|
||||
move || {
|
||||
let _entered = current.entered();
|
||||
::tracing::info!("at failpoint {}", $name);
|
||||
::fail::fail_point!($name);
|
||||
}
|
||||
});
|
||||
let cancel_fut = async move {
|
||||
$cancel.cancelled().await;
|
||||
};
|
||||
::tokio::select! {
|
||||
res = failpoint_fut => {
|
||||
res.expect("spawn_blocking");
|
||||
// continue with execution
|
||||
Ok(())
|
||||
},
|
||||
_ = cancel_fut => {
|
||||
Err(())
|
||||
}
|
||||
match ::neon_failpoint::failpoint_with_cancellation($name, None, $cancel).await {
|
||||
::neon_failpoint::FailpointResult::Continue => Ok(()),
|
||||
::neon_failpoint::FailpointResult::Return(_) => Ok(()),
|
||||
::neon_failpoint::FailpointResult::Cancelled => Err(()),
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
@@ -53,7 +37,7 @@ macro_rules! pausable_failpoint {
|
||||
|
||||
pub use pausable_failpoint;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
/// use with neon_failpoint::configure_failpoint("$name", "sleep(2000)")
|
||||
///
|
||||
/// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
|
||||
/// specified time (in milliseconds). The main difference is that we use async
|
||||
@@ -66,120 +50,32 @@ pub use pausable_failpoint;
|
||||
#[macro_export]
|
||||
macro_rules! __failpoint_sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
// If the failpoint is used with a "return" action, set should_sleep to the
|
||||
// returned value (as string). Otherwise it's set to None.
|
||||
let should_sleep = (|| {
|
||||
::fail::fail_point!($name, |x| x);
|
||||
::std::option::Option::None
|
||||
})();
|
||||
|
||||
// Sleep if the action was a returned value
|
||||
if let ::std::option::Option::Some(duration_str) = should_sleep {
|
||||
$crate::failpoint_support::failpoint_sleep_helper($name, duration_str).await
|
||||
if cfg!(feature = "testing") {
|
||||
::neon_failpoint::failpoint($name, None).await;
|
||||
}
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
// If the failpoint is used with a "return" action, set should_sleep to the
|
||||
// returned value (as string). Otherwise it's set to None.
|
||||
let should_sleep = (|| {
|
||||
::fail::fail_point!($name, |x| x);
|
||||
::std::option::Option::None
|
||||
})();
|
||||
|
||||
// Sleep if the action was a returned value
|
||||
if let ::std::option::Option::Some(duration_str) = should_sleep {
|
||||
$crate::failpoint_support::failpoint_sleep_cancellable_helper(
|
||||
$name,
|
||||
duration_str,
|
||||
$cancel,
|
||||
)
|
||||
.await
|
||||
if cfg!(feature = "testing") {
|
||||
::neon_failpoint::failpoint_with_cancellation($name, None, $cancel).await;
|
||||
}
|
||||
}};
|
||||
}
|
||||
pub use __failpoint_sleep_millis_async as sleep_millis_async;
|
||||
|
||||
// Helper function used by the macro. (A function has nicer scoping so we
|
||||
// don't need to decorate everything with "::")
|
||||
// Helper functions are no longer needed as the new implementation handles this internally
|
||||
// but we keep them for backward compatibility
|
||||
#[doc(hidden)]
|
||||
pub async fn failpoint_sleep_helper(name: &'static str, duration_str: String) {
|
||||
let millis = duration_str.parse::<u64>().unwrap();
|
||||
let d = std::time::Duration::from_millis(millis);
|
||||
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", name, d);
|
||||
tokio::time::sleep(d).await;
|
||||
tracing::info!("failpoint {:?}: sleep done", name);
|
||||
pub async fn failpoint_sleep_helper(_name: &'static str, _duration_str: String) {
|
||||
// This is now handled by the neon_failpoint crate internally
|
||||
tracing::warn!("failpoint_sleep_helper is deprecated, use neon_failpoint directly");
|
||||
}
|
||||
|
||||
// Helper function used by the macro. (A function has nicer scoping so we
|
||||
// don't need to decorate everything with "::")
|
||||
#[doc(hidden)]
|
||||
pub async fn failpoint_sleep_cancellable_helper(
|
||||
name: &'static str,
|
||||
duration_str: String,
|
||||
cancel: &CancellationToken,
|
||||
_name: &'static str,
|
||||
_duration_str: String,
|
||||
_cancel: &CancellationToken,
|
||||
) {
|
||||
let millis = duration_str.parse::<u64>().unwrap();
|
||||
let d = std::time::Duration::from_millis(millis);
|
||||
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", name, d);
|
||||
tokio::time::timeout(d, cancel.cancelled()).await.ok();
|
||||
tracing::info!("failpoint {:?}: sleep done", name);
|
||||
}
|
||||
|
||||
/// Initialize the configured failpoints
|
||||
///
|
||||
/// You must call this function before any concurrent threads do operations.
|
||||
pub fn init() -> fail::FailScenario<'static> {
|
||||
// The failpoints lib provides support for parsing the `FAILPOINTS` env var.
|
||||
// We want non-default behavior for `exit`, though, so, we handle it separately.
|
||||
//
|
||||
// Format for FAILPOINTS is "name=actions" separated by ";".
|
||||
let actions = std::env::var("FAILPOINTS");
|
||||
if actions.is_ok() {
|
||||
// SAFETY: this function should before any threads start and access env vars concurrently
|
||||
unsafe {
|
||||
std::env::remove_var("FAILPOINTS");
|
||||
}
|
||||
} else {
|
||||
// let the library handle non-utf8, or nothing for not present
|
||||
}
|
||||
|
||||
let scenario = fail::FailScenario::setup();
|
||||
|
||||
if let Ok(val) = actions {
|
||||
val.split(';')
|
||||
.enumerate()
|
||||
.map(|(i, s)| s.split_once('=').ok_or((i, s)))
|
||||
.for_each(|res| {
|
||||
let (name, actions) = match res {
|
||||
Ok(t) => t,
|
||||
Err((i, s)) => {
|
||||
panic!(
|
||||
"startup failpoints: missing action on the {}th failpoint; try `{s}=return`",
|
||||
i + 1,
|
||||
);
|
||||
}
|
||||
};
|
||||
if let Err(e) = apply_failpoint(name, actions) {
|
||||
panic!("startup failpoints: failed to apply failpoint {name}={actions}: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
scenario
|
||||
}
|
||||
|
||||
pub fn apply_failpoint(name: &str, actions: &str) -> Result<(), String> {
|
||||
if actions == "exit" {
|
||||
fail::cfg_callback(name, exit_failpoint)
|
||||
} else {
|
||||
fail::cfg(name, actions)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn exit_failpoint() {
|
||||
tracing::info!("Exit requested by failpoint");
|
||||
std::process::exit(1);
|
||||
// This is now handled by the neon_failpoint crate internally
|
||||
tracing::warn!("failpoint_sleep_cancellable_helper is deprecated, use neon_failpoint directly");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user