mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 14:32:57 +00:00
## Problem Would be nice to have a bit more info on cold start metrics. ## Summary of changes * Change connect compute latency to include `cold_start_info`. * Update `ColdStartInfo` to include HttpPoolHit and WarmCached. * Several changes to make more use of interned strings
557 lines
19 KiB
Rust
557 lines
19 KiB
Rust
use std::{
|
|
collections::HashSet,
|
|
convert::Infallible,
|
|
sync::{atomic::AtomicU64, Arc},
|
|
time::Duration,
|
|
};
|
|
|
|
use dashmap::DashMap;
|
|
use rand::{thread_rng, Rng};
|
|
use smol_str::SmolStr;
|
|
use tokio::time::Instant;
|
|
use tracing::{debug, info};
|
|
|
|
use crate::{
|
|
auth::IpPattern,
|
|
config::ProjectInfoCacheOptions,
|
|
console::AuthSecret,
|
|
intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
|
|
EndpointId, RoleName,
|
|
};
|
|
|
|
use super::{Cache, Cached};
|
|
|
|
pub trait ProjectInfoCache {
|
|
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
|
|
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
|
|
fn enable_ttl(&self);
|
|
fn disable_ttl(&self);
|
|
}
|
|
|
|
struct Entry<T> {
|
|
created_at: Instant,
|
|
value: T,
|
|
}
|
|
|
|
impl<T> Entry<T> {
|
|
pub fn new(value: T) -> Self {
|
|
Self {
|
|
created_at: Instant::now(),
|
|
value,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T> From<T> for Entry<T> {
|
|
fn from(value: T) -> Self {
|
|
Self::new(value)
|
|
}
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct EndpointInfo {
|
|
secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
|
|
allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
|
|
}
|
|
|
|
impl EndpointInfo {
|
|
fn check_ignore_cache(ignore_cache_since: Option<Instant>, created_at: Instant) -> bool {
|
|
match ignore_cache_since {
|
|
None => false,
|
|
Some(t) => t < created_at,
|
|
}
|
|
}
|
|
pub fn get_role_secret(
|
|
&self,
|
|
role_name: RoleNameInt,
|
|
valid_since: Instant,
|
|
ignore_cache_since: Option<Instant>,
|
|
) -> Option<(Option<AuthSecret>, bool)> {
|
|
if let Some(secret) = self.secret.get(&role_name) {
|
|
if valid_since < secret.created_at {
|
|
return Some((
|
|
secret.value.clone(),
|
|
Self::check_ignore_cache(ignore_cache_since, secret.created_at),
|
|
));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
pub fn get_allowed_ips(
|
|
&self,
|
|
valid_since: Instant,
|
|
ignore_cache_since: Option<Instant>,
|
|
) -> Option<(Arc<Vec<IpPattern>>, bool)> {
|
|
if let Some(allowed_ips) = &self.allowed_ips {
|
|
if valid_since < allowed_ips.created_at {
|
|
return Some((
|
|
allowed_ips.value.clone(),
|
|
Self::check_ignore_cache(ignore_cache_since, allowed_ips.created_at),
|
|
));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
pub fn invalidate_allowed_ips(&mut self) {
|
|
self.allowed_ips = None;
|
|
}
|
|
pub fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
|
|
self.secret.remove(&role_name);
|
|
}
|
|
}
|
|
|
|
/// Cache for project info.
|
|
/// This is used to cache auth data for endpoints.
|
|
/// Invalidation is done by console notifications or by TTL (if console notifications are disabled).
|
|
///
|
|
/// We also store endpoint-to-project mapping in the cache, to be able to access per-endpoint data.
|
|
/// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?
|
|
/// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.
|
|
pub struct ProjectInfoCacheImpl {
|
|
cache: DashMap<EndpointIdInt, EndpointInfo>,
|
|
|
|
project2ep: DashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
|
|
config: ProjectInfoCacheOptions,
|
|
|
|
start_time: Instant,
|
|
ttl_disabled_since_us: AtomicU64,
|
|
}
|
|
|
|
impl ProjectInfoCache for ProjectInfoCacheImpl {
|
|
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
|
|
info!("invalidating allowed ips for project `{}`", project_id);
|
|
let endpoints = self
|
|
.project2ep
|
|
.get(&project_id)
|
|
.map(|kv| kv.value().clone())
|
|
.unwrap_or_default();
|
|
for endpoint_id in endpoints {
|
|
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
|
endpoint_info.invalidate_allowed_ips();
|
|
}
|
|
}
|
|
}
|
|
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt) {
|
|
info!(
|
|
"invalidating role secret for project_id `{}` and role_name `{}`",
|
|
project_id, role_name,
|
|
);
|
|
let endpoints = self
|
|
.project2ep
|
|
.get(&project_id)
|
|
.map(|kv| kv.value().clone())
|
|
.unwrap_or_default();
|
|
for endpoint_id in endpoints {
|
|
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
|
endpoint_info.invalidate_role_secret(role_name);
|
|
}
|
|
}
|
|
}
|
|
fn enable_ttl(&self) {
|
|
self.ttl_disabled_since_us
|
|
.store(u64::MAX, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
|
|
fn disable_ttl(&self) {
|
|
let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
|
|
self.ttl_disabled_since_us
|
|
.store(new_ttl, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
}
|
|
|
|
impl ProjectInfoCacheImpl {
|
|
pub fn new(config: ProjectInfoCacheOptions) -> Self {
|
|
Self {
|
|
cache: DashMap::new(),
|
|
project2ep: DashMap::new(),
|
|
config,
|
|
ttl_disabled_since_us: AtomicU64::new(u64::MAX),
|
|
start_time: Instant::now(),
|
|
}
|
|
}
|
|
|
|
pub fn get_role_secret(
|
|
&self,
|
|
endpoint_id: &EndpointId,
|
|
role_name: &RoleName,
|
|
) -> Option<Cached<&Self, Option<AuthSecret>>> {
|
|
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
|
let role_name = RoleNameInt::get(role_name)?;
|
|
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
|
let endpoint_info = self.cache.get(&endpoint_id)?;
|
|
let (value, ignore_cache) =
|
|
endpoint_info.get_role_secret(role_name, valid_since, ignore_cache_since)?;
|
|
if !ignore_cache {
|
|
let cached = Cached {
|
|
token: Some((
|
|
self,
|
|
CachedLookupInfo::new_role_secret(endpoint_id, role_name),
|
|
)),
|
|
value,
|
|
};
|
|
return Some(cached);
|
|
}
|
|
Some(Cached::new_uncached(value))
|
|
}
|
|
pub fn get_allowed_ips(
|
|
&self,
|
|
endpoint_id: &EndpointId,
|
|
) -> Option<Cached<&Self, Arc<Vec<IpPattern>>>> {
|
|
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
|
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
|
let endpoint_info = self.cache.get(&endpoint_id)?;
|
|
let value = endpoint_info.get_allowed_ips(valid_since, ignore_cache_since);
|
|
let (value, ignore_cache) = value?;
|
|
if !ignore_cache {
|
|
let cached = Cached {
|
|
token: Some((self, CachedLookupInfo::new_allowed_ips(endpoint_id))),
|
|
value,
|
|
};
|
|
return Some(cached);
|
|
}
|
|
Some(Cached::new_uncached(value))
|
|
}
|
|
pub fn insert_role_secret(
|
|
&self,
|
|
project_id: ProjectIdInt,
|
|
endpoint_id: EndpointIdInt,
|
|
role_name: RoleNameInt,
|
|
secret: Option<AuthSecret>,
|
|
) {
|
|
if self.cache.len() >= self.config.size {
|
|
// If there are too many entries, wait until the next gc cycle.
|
|
return;
|
|
}
|
|
self.insert_project2endpoint(project_id, endpoint_id);
|
|
let mut entry = self.cache.entry(endpoint_id).or_default();
|
|
if entry.secret.len() < self.config.max_roles {
|
|
entry.secret.insert(role_name, secret.into());
|
|
}
|
|
}
|
|
pub fn insert_allowed_ips(
|
|
&self,
|
|
project_id: ProjectIdInt,
|
|
endpoint_id: EndpointIdInt,
|
|
allowed_ips: Arc<Vec<IpPattern>>,
|
|
) {
|
|
if self.cache.len() >= self.config.size {
|
|
// If there are too many entries, wait until the next gc cycle.
|
|
return;
|
|
}
|
|
self.insert_project2endpoint(project_id, endpoint_id);
|
|
self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
|
|
}
|
|
fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
|
|
if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {
|
|
endpoints.insert(endpoint_id);
|
|
} else {
|
|
self.project2ep
|
|
.insert(project_id, HashSet::from([endpoint_id]));
|
|
}
|
|
}
|
|
fn get_cache_times(&self) -> (Instant, Option<Instant>) {
|
|
let mut valid_since = Instant::now() - self.config.ttl;
|
|
// Only ignore cache if ttl is disabled.
|
|
let ttl_disabled_since_us = self
|
|
.ttl_disabled_since_us
|
|
.load(std::sync::atomic::Ordering::Relaxed);
|
|
let ignore_cache_since = if ttl_disabled_since_us != u64::MAX {
|
|
let ignore_cache_since = self.start_time + Duration::from_micros(ttl_disabled_since_us);
|
|
// We are fine if entry is not older than ttl or was added before we are getting notifications.
|
|
valid_since = valid_since.min(ignore_cache_since);
|
|
Some(ignore_cache_since)
|
|
} else {
|
|
None
|
|
};
|
|
(valid_since, ignore_cache_since)
|
|
}
|
|
|
|
pub async fn gc_worker(&self) -> anyhow::Result<Infallible> {
|
|
let mut interval =
|
|
tokio::time::interval(self.config.gc_interval / (self.cache.shards().len()) as u32);
|
|
loop {
|
|
interval.tick().await;
|
|
if self.cache.len() < self.config.size {
|
|
// If there are not too many entries, wait until the next gc cycle.
|
|
continue;
|
|
}
|
|
self.gc();
|
|
}
|
|
}
|
|
|
|
fn gc(&self) {
|
|
let shard = thread_rng().gen_range(0..self.project2ep.shards().len());
|
|
debug!(shard, "project_info_cache: performing epoch reclamation");
|
|
|
|
// acquire a random shard lock
|
|
let mut removed = 0;
|
|
let shard = self.project2ep.shards()[shard].write();
|
|
for (_, endpoints) in shard.iter() {
|
|
for endpoint in endpoints.get().iter() {
|
|
self.cache.remove(endpoint);
|
|
removed += 1;
|
|
}
|
|
}
|
|
// We can drop this shard only after making sure that all endpoints are removed.
|
|
drop(shard);
|
|
info!("project_info_cache: removed {removed} endpoints");
|
|
}
|
|
}
|
|
|
|
/// Lookup info for project info cache.
|
|
/// This is used to invalidate cache entries.
|
|
pub struct CachedLookupInfo {
|
|
/// Search by this key.
|
|
endpoint_id: EndpointIdInt,
|
|
lookup_type: LookupType,
|
|
}
|
|
|
|
impl CachedLookupInfo {
|
|
pub(self) fn new_role_secret(endpoint_id: EndpointIdInt, role_name: RoleNameInt) -> Self {
|
|
Self {
|
|
endpoint_id,
|
|
lookup_type: LookupType::RoleSecret(role_name),
|
|
}
|
|
}
|
|
pub(self) fn new_allowed_ips(endpoint_id: EndpointIdInt) -> Self {
|
|
Self {
|
|
endpoint_id,
|
|
lookup_type: LookupType::AllowedIps,
|
|
}
|
|
}
|
|
}
|
|
|
|
enum LookupType {
|
|
RoleSecret(RoleNameInt),
|
|
AllowedIps,
|
|
}
|
|
|
|
impl Cache for ProjectInfoCacheImpl {
|
|
type Key = SmolStr;
|
|
// Value is not really used here, but we need to specify it.
|
|
type Value = SmolStr;
|
|
|
|
type LookupInfo<Key> = CachedLookupInfo;
|
|
|
|
fn invalidate(&self, key: &Self::LookupInfo<SmolStr>) {
|
|
match &key.lookup_type {
|
|
LookupType::RoleSecret(role_name) => {
|
|
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
|
endpoint_info.invalidate_role_secret(*role_name);
|
|
}
|
|
}
|
|
LookupType::AllowedIps => {
|
|
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
|
endpoint_info.invalidate_allowed_ips();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::{scram::ServerSecret, ProjectId};
|
|
|
|
#[tokio::test]
|
|
async fn test_project_info_cache_settings() {
|
|
tokio::time::pause();
|
|
let cache = ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
|
|
size: 2,
|
|
max_roles: 2,
|
|
ttl: Duration::from_secs(1),
|
|
gc_interval: Duration::from_secs(600),
|
|
});
|
|
let project_id: ProjectId = "project".into();
|
|
let endpoint_id: EndpointId = "endpoint".into();
|
|
let user1: RoleName = "user1".into();
|
|
let user2: RoleName = "user2".into();
|
|
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
|
let secret2 = None;
|
|
let allowed_ips = Arc::new(vec![
|
|
"127.0.0.1".parse().unwrap(),
|
|
"127.0.0.2".parse().unwrap(),
|
|
]);
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user1).into(),
|
|
secret1.clone(),
|
|
);
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user2).into(),
|
|
secret2.clone(),
|
|
);
|
|
cache.insert_allowed_ips(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
allowed_ips.clone(),
|
|
);
|
|
|
|
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
|
assert!(cached.cached());
|
|
assert_eq!(cached.value, secret1);
|
|
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
|
assert!(cached.cached());
|
|
assert_eq!(cached.value, secret2);
|
|
|
|
// Shouldn't add more than 2 roles.
|
|
let user3: RoleName = "user3".into();
|
|
let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user3).into(),
|
|
secret3.clone(),
|
|
);
|
|
assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());
|
|
|
|
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
|
assert!(cached.cached());
|
|
assert_eq!(cached.value, allowed_ips);
|
|
|
|
tokio::time::advance(Duration::from_secs(2)).await;
|
|
let cached = cache.get_role_secret(&endpoint_id, &user1);
|
|
assert!(cached.is_none());
|
|
let cached = cache.get_role_secret(&endpoint_id, &user2);
|
|
assert!(cached.is_none());
|
|
let cached = cache.get_allowed_ips(&endpoint_id);
|
|
assert!(cached.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_project_info_cache_invalidations() {
|
|
tokio::time::pause();
|
|
let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
|
|
size: 2,
|
|
max_roles: 2,
|
|
ttl: Duration::from_secs(1),
|
|
gc_interval: Duration::from_secs(600),
|
|
}));
|
|
cache.clone().disable_ttl();
|
|
tokio::time::advance(Duration::from_secs(2)).await;
|
|
|
|
let project_id: ProjectId = "project".into();
|
|
let endpoint_id: EndpointId = "endpoint".into();
|
|
let user1: RoleName = "user1".into();
|
|
let user2: RoleName = "user2".into();
|
|
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
|
let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
|
|
let allowed_ips = Arc::new(vec![
|
|
"127.0.0.1".parse().unwrap(),
|
|
"127.0.0.2".parse().unwrap(),
|
|
]);
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user1).into(),
|
|
secret1.clone(),
|
|
);
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user2).into(),
|
|
secret2.clone(),
|
|
);
|
|
cache.insert_allowed_ips(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
allowed_ips.clone(),
|
|
);
|
|
|
|
tokio::time::advance(Duration::from_secs(2)).await;
|
|
// Nothing should be invalidated.
|
|
|
|
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
|
// TTL is disabled, so it should be impossible to invalidate this value.
|
|
assert!(!cached.cached());
|
|
assert_eq!(cached.value, secret1);
|
|
|
|
cached.invalidate(); // Shouldn't do anything.
|
|
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
|
assert_eq!(cached.value, secret1);
|
|
|
|
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
|
assert!(!cached.cached());
|
|
assert_eq!(cached.value, secret2);
|
|
|
|
// The only way to invalidate this value is to invalidate via the api.
|
|
cache.invalidate_role_secret_for_project((&project_id).into(), (&user2).into());
|
|
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
|
|
|
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
|
assert!(!cached.cached());
|
|
assert_eq!(cached.value, allowed_ips);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_disable_ttl_invalidate_added_before() {
|
|
tokio::time::pause();
|
|
let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
|
|
size: 2,
|
|
max_roles: 2,
|
|
ttl: Duration::from_secs(1),
|
|
gc_interval: Duration::from_secs(600),
|
|
}));
|
|
|
|
let project_id: ProjectId = "project".into();
|
|
let endpoint_id: EndpointId = "endpoint".into();
|
|
let user1: RoleName = "user1".into();
|
|
let user2: RoleName = "user2".into();
|
|
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
|
let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
|
|
let allowed_ips = Arc::new(vec![
|
|
"127.0.0.1".parse().unwrap(),
|
|
"127.0.0.2".parse().unwrap(),
|
|
]);
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user1).into(),
|
|
secret1.clone(),
|
|
);
|
|
cache.clone().disable_ttl();
|
|
tokio::time::advance(Duration::from_millis(100)).await;
|
|
cache.insert_role_secret(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
(&user2).into(),
|
|
secret2.clone(),
|
|
);
|
|
|
|
// Added before ttl was disabled + ttl should be still cached.
|
|
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
|
assert!(cached.cached());
|
|
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
|
assert!(cached.cached());
|
|
|
|
tokio::time::advance(Duration::from_secs(1)).await;
|
|
// Added before ttl was disabled + ttl should expire.
|
|
assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
|
|
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
|
|
|
// Added after ttl was disabled + ttl should not be cached.
|
|
cache.insert_allowed_ips(
|
|
(&project_id).into(),
|
|
(&endpoint_id).into(),
|
|
allowed_ips.clone(),
|
|
);
|
|
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
|
assert!(!cached.cached());
|
|
|
|
tokio::time::advance(Duration::from_secs(1)).await;
|
|
// Added before ttl was disabled + ttl still should expire.
|
|
assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
|
|
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
|
// Shouldn't be invalidated.
|
|
|
|
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
|
assert!(!cached.cached());
|
|
assert_eq!(cached.value, allowed_ips);
|
|
}
|
|
}
|