reduce proxy timeouts (#4708)

## Problem 10 retries * 10 second timeouts makes for a very long retry window. ## Summary of changes Adds a 2s timeout to sql_over_http connections, and also reduces the 10s timeout in TCP.
2026-01-09 22:42:57 +00:00 · 2023-07-17 20:05:26 +01:00
parent 196943c78f
commit e074ccf170
1 changed files with 4 additions and 22 deletions
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -31,7 +31,8 @@ use utils::measured_stream::MeasuredStream;

 /// Number of times we should retry the `/proxy_wake_compute` http request.
 /// Retry duration is BASE_RETRY_WAIT_DURATION * 1.5^n
-const NUM_RETRIES_WAKE_COMPUTE: u32 = 10;
+const NUM_RETRIES_CONNECT: u32 = 10;
+const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
 const BASE_RETRY_WAIT_DURATION: time::Duration = time::Duration::from_millis(100);

 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
@@ -393,25 +394,6 @@ where
    let mut state = ConnectionState::<M::ConnectError>::Cached(node_info);

    loop {
-        // Set a shorter timeout for the initial connection attempt.
-        //
-        // In case we try to connect to an outdated address that is no longer valid, the
-        // default behavior of Kubernetes is to drop the packets, causing us to wait for
-        // the entire timeout period. We want to fail fast in such cases.
-        //
-        // A specific case to consider is when we have cached compute node information
-        // with a 4-minute TTL (Time To Live), but the user has executed a `/suspend` API
-        // call, resulting in the nonexistence of the compute node.
-        //
-        // We only use caching in case of scram proxy backed by the console, so reduce
-        // the timeout only in that case.
-        let is_scram_proxy = matches!(creds, auth::BackendType::Console(_, _));
-        let timeout = if is_scram_proxy && num_retries == 0 {
-            time::Duration::from_secs(2)
-        } else {
-            time::Duration::from_secs(10)
-        };
-
        match state {
            ConnectionState::Invalid(config, err) => {
                match try_wake(&config, extra, creds).await {
@@ -437,7 +419,7 @@ where
                }
            }
            ConnectionState::Cached(node_info) => {
-                match mechanism.connect_once(&node_info, timeout).await {
+                match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
                    Ok(res) => return Ok(res),
                    Err(e) => {
                        error!(error = ?e, "could not connect to compute node");
@@ -497,7 +479,7 @@ pub trait ShouldRetry {
        match self {
            // retry all errors at least once
            _ if num_retries == 0 => true,
-            _ if num_retries >= NUM_RETRIES_WAKE_COMPUTE => false,
+            _ if num_retries >= NUM_RETRIES_CONNECT => false,
            err => err.could_retry(),
        }
    }