mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-21 15:10:44 +00:00
[proxy] introduce moka for the project-info cache (#12710)
## Problem LKB-2502 The garbage collection of the project info cache is garbage. What we observed: If we get unlucky, we might throw away a very hot entry if the cache is full. The GC loop is dependent on getting a lucky shard of the projects2ep table that clears a lot of cold entries. The GC does not take into account active use, and the interval it runs at is too sparse to do any good. Can we switch to a proper cache implementation? Complications: 1. We need to invalidate by project/account. 2. We need to expire based on `retry_delay_ms`. ## Summary of changes 1. Replace `retry_delay_ms: Duration` with `retry_at: Instant` when deserializing. 2. Split the EndpointControls from the RoleControls into two different caches. 3. Introduce an expiry policy based on error retry info. 4. Introduce `moka` as a dependency, replacing our `TimedLru`. See the follow up PR for changing all TimedLru instances to use moka: #12726.
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use ::http::HeaderName;
|
||||
use ::http::header::AUTHORIZATION;
|
||||
@@ -17,6 +16,7 @@ use tracing::{Instrument, debug, info, info_span, warn};
|
||||
use super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute};
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::backend::jwt::AuthRule;
|
||||
use crate::cache::common::DEFAULT_ERROR_TTL;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::caches::ApiCaches;
|
||||
use crate::control_plane::errors::{
|
||||
@@ -118,7 +118,6 @@ impl NeonControlPlaneClient {
|
||||
cache_key.into(),
|
||||
role.into(),
|
||||
msg.clone(),
|
||||
retry_info.map(|r| Duration::from_millis(r.retry_delay_ms)),
|
||||
);
|
||||
|
||||
Err(err)
|
||||
@@ -347,18 +346,11 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
) -> Result<RoleAccessControl, GetAuthInfoError> {
|
||||
let key = endpoint.normalize();
|
||||
|
||||
if let Some((role_control, ttl)) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_role_secret_with_ttl(&key, role)
|
||||
{
|
||||
if let Some(role_control) = self.caches.project_info.get_role_secret(&key, role) {
|
||||
return match role_control {
|
||||
Err(mut msg) => {
|
||||
Err(msg) => {
|
||||
info!(key = &*key, "found cached get_role_access_control error");
|
||||
|
||||
// if retry_delay_ms is set change it to the remaining TTL
|
||||
replace_retry_delay_ms(&mut msg, |_| ttl.as_millis() as u64);
|
||||
|
||||
Err(GetAuthInfoError::ApiError(ControlPlaneError::Message(msg)))
|
||||
}
|
||||
Ok(role_control) => {
|
||||
@@ -383,17 +375,14 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
) -> Result<EndpointAccessControl, GetAuthInfoError> {
|
||||
let key = endpoint.normalize();
|
||||
|
||||
if let Some((control, ttl)) = self.caches.project_info.get_endpoint_access_with_ttl(&key) {
|
||||
if let Some(control) = self.caches.project_info.get_endpoint_access(&key) {
|
||||
return match control {
|
||||
Err(mut msg) => {
|
||||
Err(msg) => {
|
||||
info!(
|
||||
key = &*key,
|
||||
"found cached get_endpoint_access_control error"
|
||||
);
|
||||
|
||||
// if retry_delay_ms is set change it to the remaining TTL
|
||||
replace_retry_delay_ms(&mut msg, |_| ttl.as_millis() as u64);
|
||||
|
||||
Err(GetAuthInfoError::ApiError(ControlPlaneError::Message(msg)))
|
||||
}
|
||||
Ok(control) => {
|
||||
@@ -426,17 +415,12 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
|
||||
macro_rules! check_cache {
|
||||
() => {
|
||||
if let Some(cached) = self.caches.node_info.get_with_created_at(&key) {
|
||||
let (cached, (info, created_at)) = cached.take_value();
|
||||
if let Some(cached) = self.caches.node_info.get(&key) {
|
||||
let (cached, info) = cached.take_value();
|
||||
return match info {
|
||||
Err(mut msg) => {
|
||||
Err(msg) => {
|
||||
info!(key = &*key, "found cached wake_compute error");
|
||||
|
||||
// if retry_delay_ms is set, reduce it by the amount of time it spent in cache
|
||||
replace_retry_delay_ms(&mut msg, |delay| {
|
||||
delay.saturating_sub(created_at.elapsed().as_millis() as u64)
|
||||
});
|
||||
|
||||
Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(
|
||||
msg,
|
||||
)))
|
||||
@@ -503,9 +487,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
"created a cache entry for the wake compute error"
|
||||
);
|
||||
|
||||
let ttl = retry_info.map_or(Duration::from_secs(30), |r| {
|
||||
Duration::from_millis(r.retry_delay_ms)
|
||||
});
|
||||
let ttl = retry_info.map_or(DEFAULT_ERROR_TTL, |r| r.retry_at - Instant::now());
|
||||
|
||||
self.caches.node_info.insert_ttl(key, Err(msg.clone()), ttl);
|
||||
|
||||
@@ -517,14 +499,6 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_retry_delay_ms(msg: &mut ControlPlaneErrorMessage, f: impl FnOnce(u64) -> u64) {
|
||||
if let Some(status) = &mut msg.status
|
||||
&& let Some(retry_info) = &mut status.details.retry_info
|
||||
{
|
||||
retry_info.retry_delay_ms = f(retry_info.retry_delay_ms);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse http response body, taking status code into account.
|
||||
fn parse_body<T: for<'a> serde::Deserialize<'a>>(
|
||||
status: StatusCode,
|
||||
|
||||
@@ -13,7 +13,7 @@ use tracing::{debug, info};
|
||||
use super::{EndpointAccessControl, RoleAccessControl};
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
|
||||
use crate::cache::project_info::ProjectInfoCacheImpl;
|
||||
use crate::cache::project_info::ProjectInfoCache;
|
||||
use crate::config::{CacheOptions, ProjectInfoCacheOptions};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::{CachedNodeInfo, ControlPlaneApi, NodeInfoCache, errors};
|
||||
@@ -119,7 +119,7 @@ pub struct ApiCaches {
|
||||
/// Cache for the `wake_compute` API method.
|
||||
pub(crate) node_info: NodeInfoCache,
|
||||
/// Cache which stores project_id -> endpoint_ids mapping.
|
||||
pub project_info: Arc<ProjectInfoCacheImpl>,
|
||||
pub project_info: Arc<ProjectInfoCache>,
|
||||
}
|
||||
|
||||
impl ApiCaches {
|
||||
@@ -134,7 +134,7 @@ impl ApiCaches {
|
||||
wake_compute_cache_config.ttl,
|
||||
true,
|
||||
),
|
||||
project_info: Arc::new(ProjectInfoCacheImpl::new(project_info_cache_config)),
|
||||
project_info: Arc::new(ProjectInfoCache::new(project_info_cache_config)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use std::fmt::{self, Display};
|
||||
use std::time::Duration;
|
||||
|
||||
use measured::FixedCardinalityLabel;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smol_str::SmolStr;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::auth::IpPattern;
|
||||
use crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
@@ -231,7 +233,13 @@ impl Reason {
|
||||
#[derive(Copy, Clone, Debug, Deserialize)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct RetryInfo {
|
||||
pub(crate) retry_delay_ms: u64,
|
||||
#[serde(rename = "retry_delay_ms", deserialize_with = "milliseconds_from_now")]
|
||||
pub(crate) retry_at: Instant,
|
||||
}
|
||||
|
||||
fn milliseconds_from_now<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Instant, D::Error> {
|
||||
let millis = u64::deserialize(d)?;
|
||||
Ok(Instant::now() + Duration::from_millis(millis))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
|
||||
Reference in New Issue
Block a user