From 90cadfa986327d6ae29bfef32a6a60d67f19c845 Mon Sep 17 00:00:00 2001 From: Anna Khanova <32508607+khanova@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:26:21 +0200 Subject: [PATCH] proxy: Adjust retry wake compute (#7537) ## Problem Right now we always do retry wake compute. ## Summary of changes Create a list of errors when we could avoid needless retries. --- proxy/src/proxy/connect_compute.rs | 9 +++++++- proxy/src/proxy/retry.rs | 34 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index f561085588..da6223209f 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -133,10 +133,17 @@ where error!(error = ?err, "could not connect to compute node"); - let node_info = if !node_info.cached() { + let node_info = if !node_info.cached() || !err.should_retry_database_address() { // If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry. // Do not need to retrieve a new node_info, just return the old one. if !err.should_retry(num_retries, connect_to_compute_retry_config) { + Metrics::get().proxy.retries_metric.observe( + RetriesMetricGroup { + outcome: ConnectOutcome::Failed, + retry_type, + }, + num_retries.into(), + ); return Err(err.into()); } node_info diff --git a/proxy/src/proxy/retry.rs b/proxy/src/proxy/retry.rs index 082e06caa3..36a05ba190 100644 --- a/proxy/src/proxy/retry.rs +++ b/proxy/src/proxy/retry.rs @@ -10,6 +10,9 @@ pub trait ShouldRetry { err => err.could_retry(), } } + fn should_retry_database_address(&self) -> bool { + true + } } impl ShouldRetry for io::Error { @@ -33,6 +36,21 @@ impl ShouldRetry for tokio_postgres::error::DbError { | &SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION, ) } + fn should_retry_database_address(&self) -> bool { + use tokio_postgres::error::SqlState; + // Here are errors that happens after the user successfully authenticated to the database. + // TODO: there are pgbouncer errors that should be retried, but they are not listed here. + !matches!( + self.code(), + &SqlState::TOO_MANY_CONNECTIONS + | &SqlState::OUT_OF_MEMORY + | &SqlState::SYNTAX_ERROR + | &SqlState::T_R_SERIALIZATION_FAILURE + | &SqlState::INVALID_CATALOG_NAME + | &SqlState::INVALID_SCHEMA_NAME + | &SqlState::INVALID_PARAMETER_VALUE + ) + } } impl ShouldRetry for tokio_postgres::Error { @@ -45,6 +63,15 @@ impl ShouldRetry for tokio_postgres::Error { false } } + fn should_retry_database_address(&self) -> bool { + if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) { + io::Error::should_retry_database_address(io_err) + } else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) { + tokio_postgres::error::DbError::should_retry_database_address(db_err) + } else { + true + } + } } impl ShouldRetry for compute::ConnectionError { @@ -55,6 +82,13 @@ impl ShouldRetry for compute::ConnectionError { _ => false, } } + fn should_retry_database_address(&self) -> bool { + match self { + compute::ConnectionError::Postgres(err) => err.should_retry_database_address(), + compute::ConnectionError::CouldNotConnect(err) => err.should_retry_database_address(), + _ => true, + } + } } pub fn retry_after(num_retries: u32, config: RetryConfig) -> time::Duration {