proxy: add connect compute concurrency lock (#7607)

## Problem

Too many connect_compute attempts can overwhelm postgres, getting the
connections stuck.

## Summary of changes

Limit number of connection attempts that can happen at a given time.
This commit is contained in:
Conrad Ludgate
2024-05-03 16:45:24 +01:00
committed by GitHub
parent a3fe12b6d8
commit 9b65946566
10 changed files with 112 additions and 35 deletions

View File

@@ -9,11 +9,13 @@ use crate::{
config::{AuthenticationConfig, ProxyConfig},
console::{
errors::{GetAuthInfoError, WakeComputeError},
locks::ApiLocks,
CachedNodeInfo,
},
context::RequestMonitoring,
error::{ErrorKind, ReportableError, UserFacingError},
proxy::connect_compute::ConnectMechanism,
proxy::{connect_compute::ConnectMechanism, retry::ShouldRetry},
Host,
};
use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool};
@@ -105,6 +107,7 @@ impl PoolingBackend {
conn_id,
conn_info,
pool: self.pool.clone(),
locks: &self.config.connect_compute_locks,
},
&backend,
false, // do not allow self signed compute for http flow
@@ -154,16 +157,31 @@ impl UserFacingError for HttpConnError {
}
}
impl ShouldRetry for HttpConnError {
fn could_retry(&self) -> bool {
match self {
HttpConnError::ConnectionError(e) => e.could_retry(),
HttpConnError::ConnectionClosedAbruptly(_) => false,
HttpConnError::GetAuthInfo(_) => false,
HttpConnError::AuthError(_) => false,
HttpConnError::WakeCompute(_) => false,
}
}
}
struct TokioMechanism {
pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
conn_info: ConnInfo,
conn_id: uuid::Uuid,
/// connect_to_compute concurrency lock
locks: &'static ApiLocks<Host>,
}
#[async_trait]
impl ConnectMechanism for TokioMechanism {
type Connection = Client<tokio_postgres::Client>;
type ConnectError = tokio_postgres::Error;
type ConnectError = HttpConnError;
type Error = HttpConnError;
async fn connect_once(
@@ -172,6 +190,9 @@ impl ConnectMechanism for TokioMechanism {
node_info: &CachedNodeInfo,
timeout: Duration,
) -> Result<Self::Connection, Self::ConnectError> {
let host = node_info.config.get_host()?;
let permit = self.locks.get_permit(&host).await?;
let mut config = (*node_info.config).clone();
let config = config
.user(&self.conn_info.user_info.user)
@@ -182,6 +203,7 @@ impl ConnectMechanism for TokioMechanism {
let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
let (client, connection) = config.connect(tokio_postgres::NoTls).await?;
drop(pause);
drop(permit);
tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));
Ok(poll_client(