From 00d90ce76a230d7afc9994df9fafd688c76ebd57 Mon Sep 17 00:00:00 2001 From: Anna Khanova <32508607+khanova@users.noreply.github.com> Date: Mon, 18 Dec 2023 16:04:47 +0100 Subject: [PATCH] Added cache for get role secret (#6165) ## Problem Currently if we are getting many consecutive connections to the same user/ep we will send a lot of traffic to the console. ## Summary of changes Cache with ttl=4min proxy_get_role_secret response. Note: this is the temporary hack, notifier listener is WIP. --- proxy/src/auth/backend.rs | 9 ++------- proxy/src/bin/proxy.rs | 16 +++++++++++++-- proxy/src/config.rs | 4 ++-- proxy/src/console/provider.rs | 13 +++++++++---- proxy/src/console/provider/mock.rs | 6 +++--- proxy/src/console/provider/neon.rs | 31 ++++++++++++++++++++++-------- proxy/src/scram/key.rs | 2 +- proxy/src/scram/secret.rs | 1 + proxy/src/serverless/conn_pool.rs | 1 - 9 files changed, 55 insertions(+), 28 deletions(-) diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs index 3b09e05bd2..0c867dfd61 100644 --- a/proxy/src/auth/backend.rs +++ b/proxy/src/auth/backend.rs @@ -9,7 +9,6 @@ use tokio_postgres::config::AuthKeys; use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::validate_password_and_exchange; use crate::console::errors::GetAuthInfoError; -use crate::console::provider::AuthInfo; use crate::console::AuthSecret; use crate::proxy::connect_compute::handle_try_wake; use crate::proxy::retry::retry_after; @@ -187,17 +186,13 @@ async fn auth_quirks( }; info!("fetching user's authentication info"); - // TODO(anna): this will slow down both "hacks" below; we probably need a cache. - let AuthInfo { - secret, - allowed_ips, - } = api.get_auth_info(extra, &info).await?; + let allowed_ips = api.get_allowed_ips(extra, &info).await?; // check allowed list if !check_peer_addr_is_in_list(&info.inner.peer_addr, &allowed_ips) { return Err(auth::AuthError::ip_address_not_allowed()); } - let secret = secret.unwrap_or_else(|| { + let secret = api.get_role_secret(extra, &info).await?.unwrap_or_else(|| { // If we don't have an authentication secret, we mock one to // prevent malicious probing (possible due to missing protocol steps). // This mocked secret will never lead to successful authentication. diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index ae4c42bcb1..be3989d387 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -6,6 +6,7 @@ use proxy::config::HttpConfig; use proxy::console; use proxy::console::provider::AllowedIpsCache; use proxy::console::provider::NodeInfoCache; +use proxy::console::provider::RoleSecretCache; use proxy::http; use proxy::rate_limiter::EndpointRateLimiter; use proxy::rate_limiter::RateBucketInfo; @@ -86,7 +87,7 @@ struct ProxyCliArgs { #[clap(long)] metric_collection_interval: Option, /// cache for `wake_compute` api method (use `size=0` to disable) - #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)] + #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)] wake_compute_cache: String, /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable). #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)] @@ -127,8 +128,11 @@ struct ProxyCliArgs { #[clap(flatten)] aimd_config: proxy::rate_limiter::AimdConfig, /// cache for `allowed_ips` (use `size=0` to disable) - #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)] + #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)] allowed_ips_cache: String, + /// cache for `role_secret` (use `size=0` to disable) + #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)] + role_secret_cache: String, /// disable ip check for http requests. If it is too time consuming, it could be turned off. #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)] disable_ip_check_for_http: bool, @@ -266,9 +270,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { AuthBackend::Console => { let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?; let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?; + let role_secret_cache_config: CacheOptions = args.role_secret_cache.parse()?; info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}"); info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}"); + info!("Using RoleSecretCache (wake_compute) with options={role_secret_cache_config:?}"); let caches = Box::leak(Box::new(console::caches::ApiCaches { node_info: NodeInfoCache::new( "node_info_cache", @@ -282,6 +288,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { allowed_ips_cache_config.ttl, false, ), + role_secret: RoleSecretCache::new( + "role_secret_cache", + role_secret_cache_config.size, + role_secret_cache_config.ttl, + false, + ), })); let config::WakeComputeLockOptions { diff --git a/proxy/src/config.rs b/proxy/src/config.rs index f932df4058..2ed248af8d 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -310,10 +310,10 @@ pub struct CacheOptions { impl CacheOptions { /// Default options for [`crate::console::provider::NodeInfoCache`]. - pub const DEFAULT_OPTIONS_NODE_INFO: &'static str = "size=4000,ttl=4m"; + pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m"; /// Parse cache options passed via cmdline. - /// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`]. + /// Example: [`Self::CACHE_DEFAULT_OPTIONS`]. fn parse(options: &str) -> anyhow::Result { let mut size = None; let mut ttl = None; diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs index 8d399f26ea..7ef5e950b0 100644 --- a/proxy/src/console/provider.rs +++ b/proxy/src/console/provider.rs @@ -10,6 +10,7 @@ use crate::{ }; use async_trait::async_trait; use dashmap::DashMap; +use smol_str::SmolStr; use std::{sync::Arc, time::Duration}; use tokio::{ sync::{OwnedSemaphorePermit, Semaphore}, @@ -216,6 +217,7 @@ impl ConsoleReqExtra { } /// Auth secret which is managed by the cloud. +#[derive(Clone)] pub enum AuthSecret { #[cfg(feature = "testing")] /// Md5 hash of user's password. @@ -250,18 +252,19 @@ pub struct NodeInfo { pub type NodeInfoCache = TimedLru, NodeInfo>; pub type CachedNodeInfo = timed_lru::Cached<&'static NodeInfoCache>; -pub type AllowedIpsCache = TimedLru, Arc>>; +pub type AllowedIpsCache = TimedLru>>; +pub type RoleSecretCache = TimedLru<(SmolStr, SmolStr), Option>; /// This will allocate per each call, but the http requests alone /// already require a few allocations, so it should be fine. #[async_trait] pub trait Api { /// Get the client's auth secret for authentication. - async fn get_auth_info( + async fn get_role_secret( &self, extra: &ConsoleReqExtra, creds: &ComputeUserInfo, - ) -> Result; + ) -> Result, errors::GetAuthInfoError>; async fn get_allowed_ips( &self, @@ -282,7 +285,9 @@ pub struct ApiCaches { /// Cache for the `wake_compute` API method. pub node_info: NodeInfoCache, /// Cache for the `get_allowed_ips`. TODO(anna): use notifications listener instead. - pub allowed_ips: TimedLru, Arc>>, + pub allowed_ips: AllowedIpsCache, + /// Cache for the `get_role_secret`. TODO(anna): use notifications listener instead. + pub role_secret: RoleSecretCache, } /// Various caches for [`console`](super). diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs index c464b4daf2..9c4a7447c6 100644 --- a/proxy/src/console/provider/mock.rs +++ b/proxy/src/console/provider/mock.rs @@ -142,12 +142,12 @@ async fn get_execute_postgres_query( #[async_trait] impl super::Api for Api { #[tracing::instrument(skip_all)] - async fn get_auth_info( + async fn get_role_secret( &self, _extra: &ConsoleReqExtra, creds: &ComputeUserInfo, - ) -> Result { - self.do_get_auth_info(creds).await + ) -> Result, GetAuthInfoError> { + Ok(self.do_get_auth_info(creds).await?.secret) } async fn get_allowed_ips( diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs index f748c9a41f..5bb91313c4 100644 --- a/proxy/src/console/provider/neon.rs +++ b/proxy/src/console/provider/neon.rs @@ -159,12 +159,24 @@ impl Api { #[async_trait] impl super::Api for Api { #[tracing::instrument(skip_all)] - async fn get_auth_info( + async fn get_role_secret( &self, extra: &ConsoleReqExtra, creds: &ComputeUserInfo, - ) -> Result { - self.do_get_auth_info(extra, creds).await + ) -> Result, GetAuthInfoError> { + let ep = creds.endpoint.clone(); + let user = creds.inner.user.clone(); + if let Some(role_secret) = self.caches.role_secret.get(&(ep.clone(), user.clone())) { + return Ok(role_secret.clone()); + } + let auth_info = self.do_get_auth_info(extra, creds).await?; + self.caches + .role_secret + .insert((ep.clone(), user), auth_info.secret.clone()); + self.caches + .allowed_ips + .insert(ep, Arc::new(auth_info.allowed_ips)); + Ok(auth_info.secret) } async fn get_allowed_ips( @@ -172,8 +184,7 @@ impl super::Api for Api { extra: &ConsoleReqExtra, creds: &ComputeUserInfo, ) -> Result>, GetAuthInfoError> { - let key: &str = &creds.endpoint; - if let Some(allowed_ips) = self.caches.allowed_ips.get(key) { + if let Some(allowed_ips) = self.caches.allowed_ips.get(&creds.endpoint) { ALLOWED_IPS_BY_CACHE_OUTCOME .with_label_values(&["hit"]) .inc(); @@ -182,10 +193,14 @@ impl super::Api for Api { ALLOWED_IPS_BY_CACHE_OUTCOME .with_label_values(&["miss"]) .inc(); - let allowed_ips = Arc::new(self.do_get_auth_info(extra, creds).await?.allowed_ips); + let auth_info = self.do_get_auth_info(extra, creds).await?; + let allowed_ips = Arc::new(auth_info.allowed_ips); + let ep = creds.endpoint.clone(); + let user = creds.inner.user.clone(); self.caches - .allowed_ips - .insert(key.into(), allowed_ips.clone()); + .role_secret + .insert((ep.clone(), user), auth_info.secret); + self.caches.allowed_ips.insert(ep, allowed_ips.clone()); Ok(allowed_ips) } diff --git a/proxy/src/scram/key.rs b/proxy/src/scram/key.rs index e9c65fcef3..bd93fb2b70 100644 --- a/proxy/src/scram/key.rs +++ b/proxy/src/scram/key.rs @@ -6,7 +6,7 @@ pub const SCRAM_KEY_LEN: usize = 32; /// One of the keys derived from the [password](super::password::SaltedPassword). /// We use the same structure for all keys, i.e. /// `ClientKey`, `StoredKey`, and `ServerKey`. -#[derive(Default, PartialEq, Eq)] +#[derive(Clone, Default, PartialEq, Eq)] #[repr(transparent)] pub struct ScramKey { bytes: [u8; SCRAM_KEY_LEN], diff --git a/proxy/src/scram/secret.rs b/proxy/src/scram/secret.rs index 424beccec9..9e74e07af1 100644 --- a/proxy/src/scram/secret.rs +++ b/proxy/src/scram/secret.rs @@ -5,6 +5,7 @@ use super::key::ScramKey; /// Server secret is produced from [password](super::password::SaltedPassword) /// and is used throughout the authentication process. +#[derive(Clone)] pub struct ServerSecret { /// Number of iterations for `PBKDF2` function. pub iterations: u32, diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index ab8903418b..df2d1bea32 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -431,7 +431,6 @@ async fn connect_to_compute( application_name: APP_NAME.to_string(), options: console_options, }; - // TODO(anna): this is a bit hacky way, consider using console notification listener. if !config.disable_ip_check_for_http { let allowed_ips = backend.get_allowed_ips(&extra).await?; if !check_peer_addr_is_in_list(&peer_addr, &allowed_ips) {