diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index a1d051f150..4d0f8006aa 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -146,6 +146,9 @@ pub(crate) enum NotifyError { // A response indicates we will never succeed, such as 400 or 404 #[error("Non-retryable error {0}")] Fatal(StatusCode), + + #[error("neon_local error: {0}")] + NeonLocal(anyhow::Error), } enum MaybeSendResult { @@ -278,7 +281,7 @@ impl ComputeHook { async fn do_notify_local( &self, reconfigure_request: &ComputeHookNotifyRequest, - ) -> anyhow::Result<()> { + ) -> Result<(), NotifyError> { // neon_local updates are not safe to call concurrently, use a lock to serialize // all calls to this function let _locked = self.neon_local_lock.lock().await; @@ -321,7 +324,8 @@ impl ComputeHook { tracing::info!("Reconfiguring endpoint {}", endpoint_name,); endpoint .reconfigure(compute_pageservers.clone(), *stripe_size) - .await?; + .await + .map_err(NotifyError::NeonLocal)?; } } @@ -510,7 +514,7 @@ impl ComputeHook { } else { self.do_notify_local(&request).await.map_err(|e| { // This path is for testing only, so munge the error into our prod-style error type. - tracing::error!("Local notification hook failed: {e}"); + tracing::error!("neon_local notification hook failed: {e}"); NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR) }) }; diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index 147d5705d3..c5b09e3608 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -106,6 +106,11 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [ ".*startup_reconcile: Could not scan node.*", # Tests run in dev mode ".*Starting in dev mode.*", + # Tests that stop endpoints & use the storage controller's neon_local notification + # mechanism might fail (neon_local's stopping and endpoint isn't atomic wrt the storage + # controller's attempts to notify the endpoint). + ".*reconciler.*neon_local notification hook failed.*", + ".*reconciler.*neon_local error.*", ]