From a3e05b5abf05fb10571b83a968bb91b3b0a9b1a4 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 11 Jul 2025 15:39:12 +0200 Subject: [PATCH] everything compiles --- compute_tools/src/bin/compute_ctl.rs | 4 +--- libs/neon_failpoint/src/lib.rs | 6 ++++++ libs/utils/src/failpoint_support.rs | 19 +++---------------- pageserver/src/bin/pageserver.rs | 18 +++++------------- pageserver/src/page_service.rs | 2 +- pageserver/src/tenant/timeline.rs | 2 +- storage_controller/src/service.rs | 3 ++- 7 files changed, 19 insertions(+), 35 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index db7746b8eb..a4d94af46e 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -154,7 +154,7 @@ impl Cli { fn main() -> Result<()> { let cli = Cli::parse(); - let scenario = failpoint_support::init(); + failpoint_support::init().unwrap(); // For historical reasons, the main thread that processes the config and launches postgres // is synchronous, but we always have this tokio runtime available and we "enter" it so @@ -201,8 +201,6 @@ fn main() -> Result<()> { let exit_code = compute_node.run()?; - scenario.teardown(); - deinit_and_exit(exit_code); } diff --git a/libs/neon_failpoint/src/lib.rs b/libs/neon_failpoint/src/lib.rs index 2ef8951cc5..ed66f09c09 100644 --- a/libs/neon_failpoint/src/lib.rs +++ b/libs/neon_failpoint/src/lib.rs @@ -173,6 +173,12 @@ pub fn has_failpoints() -> bool { cfg!(feature = "testing") || std::env::var("FAILPOINTS").is_ok() } +pub fn list() -> Vec<(impl std::fmt::Display, impl std::fmt::Display)> { + FAILPOINTS.read().iter().map(|(name, config)| { + (name.clone(), format!("{config:?}")) + }).collect::>() +} + /// Execute a failpoint with optional context pub fn failpoint(name: &str, context: Option<&FailpointContext>) -> Either + Send>>> { failpoint_with_cancellation(name, context, &CancellationToken::new()) diff --git a/libs/utils/src/failpoint_support.rs b/libs/utils/src/failpoint_support.rs index c93490a97a..0716445968 100644 --- a/libs/utils/src/failpoint_support.rs +++ b/libs/utils/src/failpoint_support.rs @@ -16,27 +16,14 @@ macro_rules! pausable_failpoint { }; } -/// use with neon_failpoint::configure_failpoint("$name", "sleep(2000)") -/// -/// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the -/// specified time (in milliseconds). The main difference is that we use async -/// tokio sleep function. Another difference is that we print lines to the log, -/// which can be useful in tests to check that the failpoint was hit. -/// -/// Optionally pass a cancellation token, and this failpoint will drop out of -/// its sleep when the cancellation token fires. This is useful for testing -/// cases where we would like to block something, but test its clean shutdown behavior. +/// Mere forward to neon_failpoint::failpoint #[macro_export] macro_rules! __failpoint_sleep_millis_async { ($name:literal) => {{ - if cfg!(feature = "testing") { - ::neon_failpoint::failpoint($name, None).await; - } + let _ = ::neon_failpoint::pausable_failpoint!($name); }}; ($name:literal, $cancel:expr) => {{ - if cfg!(feature = "testing") { - ::neon_failpoint::failpoint_with_cancellation($name, None, $cancel).await; - } + let _ = ::neon_failpoint::pausable_failpoint!($name, $cancel); }}; } pub use __failpoint_sleep_millis_async as sleep_millis_async; diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 299fe7e159..d00202b5ed 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -68,7 +68,7 @@ const FEATURES: &[&str] = &[ fn version() -> String { format!( "{GIT_VERSION} failpoints: {}, features: {:?}", - fail::has_failpoints(), + neon_failpoint::has_failpoints(), FEATURES, ) } @@ -84,7 +84,7 @@ fn main() -> anyhow::Result<()> { } // Initialize up failpoints support - let scenario = failpoint_support::init(); + failpoint_support::init().unwrap(); let workdir = arg_matches .get_one::("workdir") @@ -221,7 +221,6 @@ fn main() -> anyhow::Result<()> { start_pageserver(launch_ts, conf, ignored, otel_guard).context("Failed to start pageserver")?; - scenario.teardown(); Ok(()) } @@ -366,16 +365,9 @@ fn start_pageserver( // If any failpoints were set from FAILPOINTS environment variable, // print them to the log for debugging purposes - let failpoints = fail::list(); - if !failpoints.is_empty() { - info!( - "started with failpoints: {}", - failpoints - .iter() - .map(|(name, actions)| format!("{name}={actions}")) - .collect::>() - .join(";") - ) + let failpoints = neon_failpoint::list(); + for (name, actions) in failpoints { + info!("starting with failpoint: {name} {actions}"); } // Create and lock PID file. This ensures that there cannot be more than one diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index eb01fea095..a59754df7d 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -336,7 +336,7 @@ async fn page_service_conn_main( let default_timeout_ms = 10 * 60 * 1000; // 10 minutes by default let socket_timeout_ms = (|| { - fail::fail_point_sync!("simulated-bad-compute-connection", |avg_timeout_ms| { + fail::fail_point_sync!("simulated-bad-compute-connection", |avg_timeout_ms: Option| { // Exponential distribution for simulating // poor network conditions, expect about avg_timeout_ms to be around 15 // in tests diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 3cd34fc1f1..65b077563c 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5184,7 +5184,7 @@ impl Timeline { *self.applied_gc_cutoff_lsn.read(), ); - neon_failpoint::fail_point_sync!("checkpoint-before-saving-metadata", |x| bail!( + neon_failpoint::fail_point_sync!("checkpoint-before-saving-metadata", |x: Option| bail!( "{}", x.unwrap() )); diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 9c1b81d261..6dc272c789 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -66,6 +66,7 @@ use utils::lsn::Lsn; use utils::shard::ShardIndex; use utils::sync::gate::{Gate, GateGuard}; use utils::{failpoint_support, pausable_failpoint}; +use neon_failpoint as fail; use crate::background_node_operations::{ Delete, Drain, Fill, MAX_RECONCILES_PER_OPERATION, Operation, OperationError, OperationHandler, @@ -6026,7 +6027,7 @@ impl Service { tenant_id: TenantId, split_req: TenantShardSplitRequest, ) -> Result { - fail::fail_point!("shard-split-validation", |_| Err(ApiError::BadRequest( + fail::fail_point_sync!("shard-split-validation", |_| Err(ApiError::BadRequest( anyhow::anyhow!("failpoint") )));