proxy: add connect compute concurrency lock (#7607)

## Problem

Too many connect_compute attempts can overwhelm postgres, getting the
connections stuck.

## Summary of changes

Limit number of connection attempts that can happen at a given time.
This commit is contained in:
Conrad Ludgate
2024-05-03 16:45:24 +01:00
committed by GitHub
parent a3fe12b6d8
commit 9b65946566
10 changed files with 112 additions and 35 deletions

View File

@@ -17,7 +17,7 @@ use crate::{
scram, EndpointCacheKey,
};
use dashmap::DashMap;
use std::{sync::Arc, time::Duration};
use std::{hash::Hash, sync::Arc, time::Duration};
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
use tokio::time::Instant;
use tracing::info;
@@ -447,16 +447,16 @@ impl ApiCaches {
}
/// Various caches for [`console`](super).
pub struct ApiLocks {
pub struct ApiLocks<K> {
name: &'static str,
node_locks: DashMap<EndpointCacheKey, Arc<Semaphore>>,
node_locks: DashMap<K, Arc<Semaphore>>,
permits: usize,
timeout: Duration,
epoch: std::time::Duration,
metrics: &'static ApiLockMetrics,
}
impl ApiLocks {
impl<K: Hash + Eq + Clone> ApiLocks<K> {
pub fn new(
name: &'static str,
permits: usize,
@@ -475,10 +475,7 @@ impl ApiLocks {
})
}
pub async fn get_wake_compute_permit(
&self,
key: &EndpointCacheKey,
) -> Result<WakeComputePermit, errors::WakeComputeError> {
pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, errors::WakeComputeError> {
if self.permits == 0 {
return Ok(WakeComputePermit { permit: None });
}

View File

@@ -13,7 +13,7 @@ use crate::{
http,
metrics::{CacheOutcome, Metrics},
rate_limiter::EndpointRateLimiter,
scram, Normalize,
scram, EndpointCacheKey, Normalize,
};
use crate::{cache::Cached, context::RequestMonitoring};
use futures::TryFutureExt;
@@ -25,7 +25,7 @@ use tracing::{error, info, info_span, warn, Instrument};
pub struct Api {
endpoint: http::Endpoint,
pub caches: &'static ApiCaches,
pub locks: &'static ApiLocks,
pub locks: &'static ApiLocks<EndpointCacheKey>,
pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
jwt: String,
}
@@ -35,7 +35,7 @@ impl Api {
pub fn new(
endpoint: http::Endpoint,
caches: &'static ApiCaches,
locks: &'static ApiLocks,
locks: &'static ApiLocks<EndpointCacheKey>,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Self {
let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
@@ -289,7 +289,7 @@ impl super::Api for Api {
return Err(WakeComputeError::TooManyConnections);
}
let permit = self.locks.get_wake_compute_permit(&key).await?;
let permit = self.locks.get_permit(&key).await?;
// after getting back a permit - it's possible the cache was filled
// double check