Added cache for get role secret (#6165)

## Problem

Currently if we are getting many consecutive connections to the same
user/ep we will send a lot of traffic to the console.

## Summary of changes

Cache with ttl=4min proxy_get_role_secret response.

Note: this is the temporary hack, notifier listener is WIP.
This commit is contained in:
Anna Khanova
2023-12-18 16:04:47 +01:00
committed by GitHub
parent 33cb9a68f7
commit 00d90ce76a
9 changed files with 55 additions and 28 deletions

View File

@@ -9,7 +9,6 @@ use tokio_postgres::config::AuthKeys;
use crate::auth::credentials::check_peer_addr_is_in_list;
use crate::auth::validate_password_and_exchange;
use crate::console::errors::GetAuthInfoError;
use crate::console::provider::AuthInfo;
use crate::console::AuthSecret;
use crate::proxy::connect_compute::handle_try_wake;
use crate::proxy::retry::retry_after;
@@ -187,17 +186,13 @@ async fn auth_quirks(
};
info!("fetching user's authentication info");
// TODO(anna): this will slow down both "hacks" below; we probably need a cache.
let AuthInfo {
secret,
allowed_ips,
} = api.get_auth_info(extra, &info).await?;
let allowed_ips = api.get_allowed_ips(extra, &info).await?;
// check allowed list
if !check_peer_addr_is_in_list(&info.inner.peer_addr, &allowed_ips) {
return Err(auth::AuthError::ip_address_not_allowed());
}
let secret = secret.unwrap_or_else(|| {
let secret = api.get_role_secret(extra, &info).await?.unwrap_or_else(|| {
// If we don't have an authentication secret, we mock one to
// prevent malicious probing (possible due to missing protocol steps).
// This mocked secret will never lead to successful authentication.

View File

@@ -6,6 +6,7 @@ use proxy::config::HttpConfig;
use proxy::console;
use proxy::console::provider::AllowedIpsCache;
use proxy::console::provider::NodeInfoCache;
use proxy::console::provider::RoleSecretCache;
use proxy::http;
use proxy::rate_limiter::EndpointRateLimiter;
use proxy::rate_limiter::RateBucketInfo;
@@ -86,7 +87,7 @@ struct ProxyCliArgs {
#[clap(long)]
metric_collection_interval: Option<String>,
/// cache for `wake_compute` api method (use `size=0` to disable)
#[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
#[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
wake_compute_cache: String,
/// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
#[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
@@ -127,8 +128,11 @@ struct ProxyCliArgs {
#[clap(flatten)]
aimd_config: proxy::rate_limiter::AimdConfig,
/// cache for `allowed_ips` (use `size=0` to disable)
#[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
#[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
allowed_ips_cache: String,
/// cache for `role_secret` (use `size=0` to disable)
#[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
role_secret_cache: String,
/// disable ip check for http requests. If it is too time consuming, it could be turned off.
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
disable_ip_check_for_http: bool,
@@ -266,9 +270,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
AuthBackend::Console => {
let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?;
let role_secret_cache_config: CacheOptions = args.role_secret_cache.parse()?;
info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}");
info!("Using RoleSecretCache (wake_compute) with options={role_secret_cache_config:?}");
let caches = Box::leak(Box::new(console::caches::ApiCaches {
node_info: NodeInfoCache::new(
"node_info_cache",
@@ -282,6 +288,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
allowed_ips_cache_config.ttl,
false,
),
role_secret: RoleSecretCache::new(
"role_secret_cache",
role_secret_cache_config.size,
role_secret_cache_config.ttl,
false,
),
}));
let config::WakeComputeLockOptions {

View File

@@ -310,10 +310,10 @@ pub struct CacheOptions {
impl CacheOptions {
/// Default options for [`crate::console::provider::NodeInfoCache`].
pub const DEFAULT_OPTIONS_NODE_INFO: &'static str = "size=4000,ttl=4m";
pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
/// Parse cache options passed via cmdline.
/// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
/// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
fn parse(options: &str) -> anyhow::Result<Self> {
let mut size = None;
let mut ttl = None;

View File

@@ -10,6 +10,7 @@ use crate::{
};
use async_trait::async_trait;
use dashmap::DashMap;
use smol_str::SmolStr;
use std::{sync::Arc, time::Duration};
use tokio::{
sync::{OwnedSemaphorePermit, Semaphore},
@@ -216,6 +217,7 @@ impl ConsoleReqExtra {
}
/// Auth secret which is managed by the cloud.
#[derive(Clone)]
pub enum AuthSecret {
#[cfg(feature = "testing")]
/// Md5 hash of user's password.
@@ -250,18 +252,19 @@ pub struct NodeInfo {
pub type NodeInfoCache = TimedLru<Arc<str>, NodeInfo>;
pub type CachedNodeInfo = timed_lru::Cached<&'static NodeInfoCache>;
pub type AllowedIpsCache = TimedLru<Arc<str>, Arc<Vec<String>>>;
pub type AllowedIpsCache = TimedLru<SmolStr, Arc<Vec<String>>>;
pub type RoleSecretCache = TimedLru<(SmolStr, SmolStr), Option<AuthSecret>>;
/// This will allocate per each call, but the http requests alone
/// already require a few allocations, so it should be fine.
#[async_trait]
pub trait Api {
/// Get the client's auth secret for authentication.
async fn get_auth_info(
async fn get_role_secret(
&self,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, errors::GetAuthInfoError>;
) -> Result<Option<AuthSecret>, errors::GetAuthInfoError>;
async fn get_allowed_ips(
&self,
@@ -282,7 +285,9 @@ pub struct ApiCaches {
/// Cache for the `wake_compute` API method.
pub node_info: NodeInfoCache,
/// Cache for the `get_allowed_ips`. TODO(anna): use notifications listener instead.
pub allowed_ips: TimedLru<Arc<str>, Arc<Vec<String>>>,
pub allowed_ips: AllowedIpsCache,
/// Cache for the `get_role_secret`. TODO(anna): use notifications listener instead.
pub role_secret: RoleSecretCache,
}
/// Various caches for [`console`](super).

View File

@@ -142,12 +142,12 @@ async fn get_execute_postgres_query(
#[async_trait]
impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn get_auth_info(
async fn get_role_secret(
&self,
_extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, GetAuthInfoError> {
self.do_get_auth_info(creds).await
) -> Result<Option<AuthSecret>, GetAuthInfoError> {
Ok(self.do_get_auth_info(creds).await?.secret)
}
async fn get_allowed_ips(

View File

@@ -159,12 +159,24 @@ impl Api {
#[async_trait]
impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn get_auth_info(
async fn get_role_secret(
&self,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, GetAuthInfoError> {
self.do_get_auth_info(extra, creds).await
) -> Result<Option<AuthSecret>, GetAuthInfoError> {
let ep = creds.endpoint.clone();
let user = creds.inner.user.clone();
if let Some(role_secret) = self.caches.role_secret.get(&(ep.clone(), user.clone())) {
return Ok(role_secret.clone());
}
let auth_info = self.do_get_auth_info(extra, creds).await?;
self.caches
.role_secret
.insert((ep.clone(), user), auth_info.secret.clone());
self.caches
.allowed_ips
.insert(ep, Arc::new(auth_info.allowed_ips));
Ok(auth_info.secret)
}
async fn get_allowed_ips(
@@ -172,8 +184,7 @@ impl super::Api for Api {
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
let key: &str = &creds.endpoint;
if let Some(allowed_ips) = self.caches.allowed_ips.get(key) {
if let Some(allowed_ips) = self.caches.allowed_ips.get(&creds.endpoint) {
ALLOWED_IPS_BY_CACHE_OUTCOME
.with_label_values(&["hit"])
.inc();
@@ -182,10 +193,14 @@ impl super::Api for Api {
ALLOWED_IPS_BY_CACHE_OUTCOME
.with_label_values(&["miss"])
.inc();
let allowed_ips = Arc::new(self.do_get_auth_info(extra, creds).await?.allowed_ips);
let auth_info = self.do_get_auth_info(extra, creds).await?;
let allowed_ips = Arc::new(auth_info.allowed_ips);
let ep = creds.endpoint.clone();
let user = creds.inner.user.clone();
self.caches
.allowed_ips
.insert(key.into(), allowed_ips.clone());
.role_secret
.insert((ep.clone(), user), auth_info.secret);
self.caches.allowed_ips.insert(ep, allowed_ips.clone());
Ok(allowed_ips)
}

View File

@@ -6,7 +6,7 @@ pub const SCRAM_KEY_LEN: usize = 32;
/// One of the keys derived from the [password](super::password::SaltedPassword).
/// We use the same structure for all keys, i.e.
/// `ClientKey`, `StoredKey`, and `ServerKey`.
#[derive(Default, PartialEq, Eq)]
#[derive(Clone, Default, PartialEq, Eq)]
#[repr(transparent)]
pub struct ScramKey {
bytes: [u8; SCRAM_KEY_LEN],

View File

@@ -5,6 +5,7 @@ use super::key::ScramKey;
/// Server secret is produced from [password](super::password::SaltedPassword)
/// and is used throughout the authentication process.
#[derive(Clone)]
pub struct ServerSecret {
/// Number of iterations for `PBKDF2` function.
pub iterations: u32,

View File

@@ -431,7 +431,6 @@ async fn connect_to_compute(
application_name: APP_NAME.to_string(),
options: console_options,
};
// TODO(anna): this is a bit hacky way, consider using console notification listener.
if !config.disable_ip_check_for_http {
let allowed_ips = backend.get_allowed_ips(&extra).await?;
if !check_peer_addr_is_in_list(&peer_addr, &allowed_ips) {