mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 22:42:57 +00:00
reduce proxy timeouts (#4708)
## Problem 10 retries * 10 second timeouts makes for a very long retry window. ## Summary of changes Adds a 2s timeout to sql_over_http connections, and also reduces the 10s timeout in TCP.
This commit is contained in:
@@ -31,7 +31,8 @@ use utils::measured_stream::MeasuredStream;
|
||||
|
||||
/// Number of times we should retry the `/proxy_wake_compute` http request.
|
||||
/// Retry duration is BASE_RETRY_WAIT_DURATION * 1.5^n
|
||||
const NUM_RETRIES_WAKE_COMPUTE: u32 = 10;
|
||||
const NUM_RETRIES_CONNECT: u32 = 10;
|
||||
const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
|
||||
const BASE_RETRY_WAIT_DURATION: time::Duration = time::Duration::from_millis(100);
|
||||
|
||||
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
||||
@@ -393,25 +394,6 @@ where
|
||||
let mut state = ConnectionState::<M::ConnectError>::Cached(node_info);
|
||||
|
||||
loop {
|
||||
// Set a shorter timeout for the initial connection attempt.
|
||||
//
|
||||
// In case we try to connect to an outdated address that is no longer valid, the
|
||||
// default behavior of Kubernetes is to drop the packets, causing us to wait for
|
||||
// the entire timeout period. We want to fail fast in such cases.
|
||||
//
|
||||
// A specific case to consider is when we have cached compute node information
|
||||
// with a 4-minute TTL (Time To Live), but the user has executed a `/suspend` API
|
||||
// call, resulting in the nonexistence of the compute node.
|
||||
//
|
||||
// We only use caching in case of scram proxy backed by the console, so reduce
|
||||
// the timeout only in that case.
|
||||
let is_scram_proxy = matches!(creds, auth::BackendType::Console(_, _));
|
||||
let timeout = if is_scram_proxy && num_retries == 0 {
|
||||
time::Duration::from_secs(2)
|
||||
} else {
|
||||
time::Duration::from_secs(10)
|
||||
};
|
||||
|
||||
match state {
|
||||
ConnectionState::Invalid(config, err) => {
|
||||
match try_wake(&config, extra, creds).await {
|
||||
@@ -437,7 +419,7 @@ where
|
||||
}
|
||||
}
|
||||
ConnectionState::Cached(node_info) => {
|
||||
match mechanism.connect_once(&node_info, timeout).await {
|
||||
match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
|
||||
Ok(res) => return Ok(res),
|
||||
Err(e) => {
|
||||
error!(error = ?e, "could not connect to compute node");
|
||||
@@ -497,7 +479,7 @@ pub trait ShouldRetry {
|
||||
match self {
|
||||
// retry all errors at least once
|
||||
_ if num_retries == 0 => true,
|
||||
_ if num_retries >= NUM_RETRIES_WAKE_COMPUTE => false,
|
||||
_ if num_retries >= NUM_RETRIES_CONNECT => false,
|
||||
err => err.could_retry(),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user