From 010cd34635dd42b451e85bbf4a3a9148134afdc5 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 24 Jun 2025 17:00:50 +0100 Subject: [PATCH] simplify error handling --- proxy/src/compute/mod.rs | 12 ++++++++ proxy/src/proxy/connect_compute.rs | 22 ++++---------- proxy/src/proxy/retry.rs | 4 +++ proxy/src/proxy/tests/mod.rs | 47 +++++------------------------- 4 files changed, 29 insertions(+), 56 deletions(-) diff --git a/proxy/src/compute/mod.rs b/proxy/src/compute/mod.rs index 4c03f422e1..0516cfa613 100644 --- a/proxy/src/compute/mod.rs +++ b/proxy/src/compute/mod.rs @@ -86,6 +86,14 @@ pub(crate) enum ConnectionError { #[error("error acquiring resource permit: {0}")] TooManyConnectionAttempts(#[from] ApiLockError), + + #[cfg(test)] + #[error("retryable: {retryable}, wakeable: {wakeable}, kind: {kind:?}")] + TestError { + retryable: bool, + wakeable: bool, + kind: crate::error::ErrorKind, + }, } impl UserFacingError for ConnectionError { @@ -96,6 +104,8 @@ impl UserFacingError for ConnectionError { "Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.".to_owned() } ConnectionError::TlsError(_) => COULD_NOT_CONNECT.to_owned(), + #[cfg(test)] + ConnectionError::TestError { .. } => self.to_string(), } } } @@ -106,6 +116,8 @@ impl ReportableError for ConnectionError { ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute, ConnectionError::WakeComputeError(e) => e.get_error_kind(), ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(), + #[cfg(test)] + ConnectionError::TestError { kind, .. } => *kind, } } } diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 36c174030f..0b944bb7f0 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -5,14 +5,12 @@ use tracing::{debug, info, warn}; use crate::compute::{self, COULD_NOT_CONNECT, ComputeConnection}; use crate::config::{ComputeConfig, RetryConfig}; use crate::context::RequestContext; -use crate::control_plane::errors::WakeComputeError; use crate::control_plane::locks::ApiLocks; use crate::control_plane::{self, NodeInfo}; -use crate::error::ReportableError; use crate::metrics::{ ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType, }; -use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute, retry_after, should_retry}; +use crate::proxy::retry::{ShouldRetryWakeCompute, retry_after, should_retry}; use crate::proxy::wake_compute::{WakeComputeBackend, wake_compute}; use crate::types::Host; @@ -38,14 +36,12 @@ pub(crate) fn invalidate_cache(node_info: control_plane::CachedNodeInfo) -> Node #[async_trait] pub(crate) trait ConnectMechanism { type Connection; - type ConnectError: ReportableError; - type Error: From; async fn connect_once( &self, ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, config: &ComputeConfig, - ) -> Result; + ) -> Result; } pub(crate) struct TcpMechanism { @@ -58,8 +54,6 @@ pub(crate) struct TcpMechanism { #[async_trait] impl ConnectMechanism for TcpMechanism { type Connection = ComputeConnection; - type ConnectError = compute::ConnectionError; - type Error = compute::ConnectionError; #[tracing::instrument(skip_all, fields( pid = tracing::field::Empty, @@ -70,7 +64,7 @@ impl ConnectMechanism for TcpMechanism { ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, config: &ComputeConfig, - ) -> Result { + ) -> Result { let permit = self.locks.get_permit(&node_info.conn_info.host).await?; permit.release_result(node_info.connect(ctx, config, self.direct).await) } @@ -84,11 +78,7 @@ pub(crate) async fn connect_to_compute Result -where - M::ConnectError: CouldRetry + ShouldRetryWakeCompute + std::fmt::Debug, - M::Error: From, -{ +) -> Result { let mut num_retries = 0; let node_info = wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?; @@ -122,7 +112,7 @@ where }, num_retries.into(), ); - return Err(err.into()); + return Err(err); } node_info } else { @@ -163,7 +153,7 @@ where }, num_retries.into(), ); - return Err(e.into()); + return Err(e); } warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT); diff --git a/proxy/src/proxy/retry.rs b/proxy/src/proxy/retry.rs index 32868e444c..382b41cd38 100644 --- a/proxy/src/proxy/retry.rs +++ b/proxy/src/proxy/retry.rs @@ -36,6 +36,8 @@ impl CouldRetry for compute::ConnectionError { compute::ConnectionError::TlsError(err) => err.could_retry(), compute::ConnectionError::WakeComputeError(err) => err.could_retry(), compute::ConnectionError::TooManyConnectionAttempts(_) => false, + #[cfg(test)] + compute::ConnectionError::TestError { retryable, .. } => *retryable, } } } @@ -45,6 +47,8 @@ impl ShouldRetryWakeCompute for compute::ConnectionError { match self { // the cache entry was not checked for validity compute::ConnectionError::TooManyConnectionAttempts(_) => false, + #[cfg(test)] + compute::ConnectionError::TestError { wakeable, .. } => *wakeable, _ => true, } } diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 29a269208a..c3c8e4d431 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -10,7 +10,7 @@ use async_trait::async_trait; use http::StatusCode; use postgres_client::config::SslMode; use postgres_client::tls::{MakeTlsConnect, NoTls}; -use retry::{ShouldRetryWakeCompute, retry_after}; +use retry::retry_after; use rstest::rstest; use rustls::crypto::ring; use rustls::pki_types; @@ -20,6 +20,7 @@ use tracing_test::traced_test; use super::retry::CouldRetry; use super::*; use crate::auth::backend::{ComputeUserInfo, MaybeOwned}; +use crate::compute::ConnectionError; use crate::config::{ComputeConfig, RetryConfig}; use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient}; use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status}; @@ -423,71 +424,37 @@ impl TestConnectMechanism { #[derive(Debug)] struct TestConnection; -#[derive(Debug)] -struct TestConnectError { - retryable: bool, - wakeable: bool, - kind: crate::error::ErrorKind, -} - -impl ReportableError for TestConnectError { - fn get_error_kind(&self) -> crate::error::ErrorKind { - self.kind - } -} - -impl std::fmt::Display for TestConnectError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:?}") - } -} - -impl std::error::Error for TestConnectError {} - -impl CouldRetry for TestConnectError { - fn could_retry(&self) -> bool { - self.retryable - } -} -impl ShouldRetryWakeCompute for TestConnectError { - fn should_retry_wake_compute(&self) -> bool { - self.wakeable - } -} - #[async_trait] impl ConnectMechanism for TestConnectMechanism { type Connection = TestConnection; - type ConnectError = TestConnectError; - type Error = anyhow::Error; async fn connect_once( &self, _ctx: &RequestContext, _node_info: &control_plane::CachedNodeInfo, _config: &ComputeConfig, - ) -> Result { + ) -> Result { let mut counter = self.counter.lock().unwrap(); let action = self.sequence[*counter]; *counter += 1; match action { ConnectAction::Connect => Ok(TestConnection), - ConnectAction::Retry => Err(TestConnectError { + ConnectAction::Retry => Err(ConnectionError::TestError { retryable: true, wakeable: true, kind: ErrorKind::Compute, }), - ConnectAction::RetryNoWake => Err(TestConnectError { + ConnectAction::RetryNoWake => Err(ConnectionError::TestError { retryable: true, wakeable: false, kind: ErrorKind::Compute, }), - ConnectAction::Fail => Err(TestConnectError { + ConnectAction::Fail => Err(ConnectionError::TestError { retryable: false, wakeable: true, kind: ErrorKind::Compute, }), - ConnectAction::FailNoWake => Err(TestConnectError { + ConnectAction::FailNoWake => Err(ConnectionError::TestError { retryable: false, wakeable: false, kind: ErrorKind::Compute,