From ca461ce2fd34c3cde0a0ce0082a3a59bf2619784 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 29 Sep 2023 17:33:13 +0100 Subject: [PATCH] proxy: force no retry some errors --- proxy/src/console/provider.rs | 14 +++++++++++++ proxy/src/proxy.rs | 38 +++++++++++++++++++++++++++++++++++ proxy/src/proxy/tests.rs | 3 +++ 3 files changed, 55 insertions(+) diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs index 7d587ff1ec..d2d8038675 100644 --- a/proxy/src/console/provider.rs +++ b/proxy/src/console/provider.rs @@ -95,6 +95,20 @@ pub mod errors { _ => false, } } + + fn cannot_retry(&self) -> bool { + match self { + // retry some transport errors + Self::Transport(io) => io.cannot_retry(), + // locked can be returned when the endpoint was in transition + // or when quotas are exceeded. don't retry when quotas are exceeded + Self::Console { + status: http::StatusCode::LOCKED, + ref text, + } => text.contains("quota"), + _ => false, + } + } } impl From for ApiError { diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index c8f534b2b7..96e299b428 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -414,6 +414,9 @@ where Ok(res) => return Ok(res), Err(e) => { error!(error = ?e, "could not connect to compute node"); + if e.cannot_retry() { + return Err(e.into()); + } (invalidate_cache(node_info), e) } }; @@ -501,6 +504,8 @@ pub fn handle_try_wake( pub trait ShouldRetry { fn could_retry(&self) -> bool; + /// return true if retrying definitely won't make a difference - or is harmful + fn cannot_retry(&self) -> bool; fn should_retry(&self, num_retries: u32) -> bool { match self { _ if num_retries >= NUM_RETRIES_CONNECT => false, @@ -517,6 +522,9 @@ impl ShouldRetry for io::Error { ErrorKind::ConnectionRefused | ErrorKind::AddrNotAvailable | ErrorKind::TimedOut ) } + fn cannot_retry(&self) -> bool { + false + } } impl ShouldRetry for tokio_postgres::error::DbError { @@ -530,6 +538,22 @@ impl ShouldRetry for tokio_postgres::error::DbError { | &SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION, ) } + fn cannot_retry(&self) -> bool { + use tokio_postgres::error::SqlState; + match self.code() { + &SqlState::TOO_MANY_CONNECTIONS + | &SqlState::INVALID_CATALOG_NAME + | &SqlState::INVALID_PASSWORD => true, + // pgbouncer errors? + &SqlState::PROTOCOL_VIOLATION => matches!( + self.message(), + "no more connections allowed (max_client_conn)" + | "server login has been failing, try again later (server_login_retry)" + | "query_wait_timeout" + ), + _ => false, + } + } } impl ShouldRetry for tokio_postgres::Error { @@ -542,6 +566,13 @@ impl ShouldRetry for tokio_postgres::Error { false } } + fn cannot_retry(&self) -> bool { + if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { + tokio_postgres::error::DbError::cannot_retry(db_err) + } else { + false + } + } } impl ShouldRetry for compute::ConnectionError { @@ -552,6 +583,13 @@ impl ShouldRetry for compute::ConnectionError { _ => false, } } + fn cannot_retry(&self) -> bool { + match self { + compute::ConnectionError::Postgres(err) => err.cannot_retry(), + compute::ConnectionError::CouldNotConnect(err) => err.cannot_retry(), + _ => false, + } + } } pub fn retry_after(num_retries: u32) -> time::Duration { diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs index 9615017883..61d2717f06 100644 --- a/proxy/src/proxy/tests.rs +++ b/proxy/src/proxy/tests.rs @@ -365,6 +365,9 @@ impl ShouldRetry for TestConnectError { fn could_retry(&self) -> bool { self.retryable } + fn cannot_retry(&self) -> bool { + false + } } #[async_trait]