mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-30 19:40:39 +00:00
proxy: report metrics based on cold start info (#7324)
## Problem Would be nice to have a bit more info on cold start metrics. ## Summary of changes * Change connect compute latency to include `cold_start_info`. * Update `ColdStartInfo` to include HttpPoolHit and WarmCached. * Several changes to make more use of interned strings
This commit is contained in:
@@ -3,7 +3,7 @@ use std::fmt;
|
||||
|
||||
use crate::auth::IpPattern;
|
||||
|
||||
use crate::{BranchId, EndpointId, ProjectId};
|
||||
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
|
||||
|
||||
/// Generic error response with human-readable description.
|
||||
/// Note that we can't always present it to user as is.
|
||||
@@ -18,7 +18,7 @@ pub struct ConsoleError {
|
||||
pub struct GetRoleSecret {
|
||||
pub role_secret: Box<str>,
|
||||
pub allowed_ips: Option<Vec<IpPattern>>,
|
||||
pub project_id: Option<ProjectId>,
|
||||
pub project_id: Option<ProjectIdInt>,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit sensitive info.
|
||||
@@ -93,22 +93,47 @@ impl fmt::Debug for DatabaseInfo {
|
||||
|
||||
/// Various labels for prometheus metrics.
|
||||
/// Also known as `ProxyMetricsAuxInfo` in the console.
|
||||
#[derive(Debug, Deserialize, Clone, Default)]
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct MetricsAuxInfo {
|
||||
pub endpoint_id: EndpointId,
|
||||
pub project_id: ProjectId,
|
||||
pub branch_id: BranchId,
|
||||
pub cold_start_info: Option<ColdStartInfo>,
|
||||
pub endpoint_id: EndpointIdInt,
|
||||
pub project_id: ProjectIdInt,
|
||||
pub branch_id: BranchIdInt,
|
||||
#[serde(default)]
|
||||
pub cold_start_info: ColdStartInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ColdStartInfo {
|
||||
#[default]
|
||||
Unknown = 0,
|
||||
Warm = 1,
|
||||
PoolHit = 2,
|
||||
PoolMiss = 3,
|
||||
Unknown,
|
||||
/// Compute was already running
|
||||
Warm,
|
||||
#[serde(rename = "pool_hit")]
|
||||
/// Compute was not running but there was an available VM
|
||||
VmPoolHit,
|
||||
#[serde(rename = "pool_miss")]
|
||||
/// Compute was not running and there were no VMs available
|
||||
VmPoolMiss,
|
||||
|
||||
// not provided by control plane
|
||||
/// Connection available from HTTP pool
|
||||
HttpPoolHit,
|
||||
/// Cached connection info
|
||||
WarmCached,
|
||||
}
|
||||
|
||||
impl ColdStartInfo {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ColdStartInfo::Unknown => "unknown",
|
||||
ColdStartInfo::Warm => "warm",
|
||||
ColdStartInfo::VmPoolHit => "pool_hit",
|
||||
ColdStartInfo::VmPoolMiss => "pool_miss",
|
||||
ColdStartInfo::HttpPoolHit => "http_pool_hit",
|
||||
ColdStartInfo::WarmCached => "warm_cached",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -12,7 +12,8 @@ use crate::{
|
||||
compute,
|
||||
config::{CacheOptions, ProjectInfoCacheOptions},
|
||||
context::RequestMonitoring,
|
||||
scram, EndpointCacheKey, ProjectId,
|
||||
intern::ProjectIdInt,
|
||||
scram, EndpointCacheKey,
|
||||
};
|
||||
use dashmap::DashMap;
|
||||
use std::{sync::Arc, time::Duration};
|
||||
@@ -271,7 +272,7 @@ pub struct AuthInfo {
|
||||
/// List of IP addresses allowed for the autorization.
|
||||
pub allowed_ips: Vec<IpPattern>,
|
||||
/// Project ID. This is used for cache invalidation.
|
||||
pub project_id: Option<ProjectId>,
|
||||
pub project_id: Option<ProjectIdInt>,
|
||||
}
|
||||
|
||||
/// Info for establishing a connection to a compute node.
|
||||
|
||||
@@ -4,10 +4,16 @@ use super::{
|
||||
errors::{ApiError, GetAuthInfoError, WakeComputeError},
|
||||
AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
|
||||
};
|
||||
use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
|
||||
use crate::context::RequestMonitoring;
|
||||
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
|
||||
use crate::{auth::IpPattern, cache::Cached};
|
||||
use crate::{
|
||||
console::{
|
||||
messages::MetricsAuxInfo,
|
||||
provider::{CachedAllowedIps, CachedRoleSecret},
|
||||
},
|
||||
BranchId, EndpointId, ProjectId,
|
||||
};
|
||||
use futures::TryFutureExt;
|
||||
use std::{str::FromStr, sync::Arc};
|
||||
use thiserror::Error;
|
||||
@@ -114,7 +120,12 @@ impl Api {
|
||||
|
||||
let node = NodeInfo {
|
||||
config,
|
||||
aux: Default::default(),
|
||||
aux: MetricsAuxInfo {
|
||||
endpoint_id: (&EndpointId::from("endpoint")).into(),
|
||||
project_id: (&ProjectId::from("project")).into(),
|
||||
branch_id: (&BranchId::from("branch")).into(),
|
||||
cold_start_info: crate::console::messages::ColdStartInfo::Warm,
|
||||
},
|
||||
allow_self_signed_compute: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -181,15 +181,16 @@ impl super::Api for Api {
|
||||
}
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let ep_int = ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
&project_id,
|
||||
ep,
|
||||
user,
|
||||
project_id,
|
||||
ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
&project_id,
|
||||
ep,
|
||||
project_id,
|
||||
ep_int,
|
||||
Arc::new(auth_info.allowed_ips),
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
@@ -217,15 +218,16 @@ impl super::Api for Api {
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let user = &user_info.user;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let ep_int = ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
&project_id,
|
||||
ep,
|
||||
user,
|
||||
project_id,
|
||||
ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches
|
||||
.project_info
|
||||
.insert_allowed_ips(&project_id, ep, allowed_ips.clone());
|
||||
.insert_allowed_ips(project_id, ep_int, allowed_ips.clone());
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok((
|
||||
@@ -248,8 +250,7 @@ impl super::Api for Api {
|
||||
// which means that we might cache it to reduce the load and latency.
|
||||
if let Some(cached) = self.caches.node_info.get(&key) {
|
||||
info!(key = &*key, "found cached compute node info");
|
||||
info!("cold_start_info=warm");
|
||||
ctx.set_cold_start_info(ColdStartInfo::Warm);
|
||||
ctx.set_project(cached.aux.clone());
|
||||
return Ok(cached);
|
||||
}
|
||||
|
||||
@@ -260,17 +261,21 @@ impl super::Api for Api {
|
||||
if permit.should_check_cache() {
|
||||
if let Some(cached) = self.caches.node_info.get(&key) {
|
||||
info!(key = &*key, "found cached compute node info");
|
||||
info!("cold_start_info=warm");
|
||||
ctx.set_cold_start_info(ColdStartInfo::Warm);
|
||||
ctx.set_project(cached.aux.clone());
|
||||
return Ok(cached);
|
||||
}
|
||||
}
|
||||
|
||||
let node = self.do_wake_compute(ctx, user_info).await?;
|
||||
let mut node = self.do_wake_compute(ctx, user_info).await?;
|
||||
ctx.set_project(node.aux.clone());
|
||||
let cold_start_info = node.aux.cold_start_info.clone().unwrap_or_default();
|
||||
info!(?cold_start_info, "woken up a compute node");
|
||||
let (_, cached) = self.caches.node_info.insert(key.clone(), node);
|
||||
let cold_start_info = node.aux.cold_start_info;
|
||||
info!("woken up a compute node");
|
||||
|
||||
// store the cached node as 'warm'
|
||||
node.aux.cold_start_info = ColdStartInfo::WarmCached;
|
||||
let (_, mut cached) = self.caches.node_info.insert(key.clone(), node);
|
||||
cached.aux.cold_start_info = cold_start_info;
|
||||
|
||||
info!(key = &*key, "created a cache entry for compute node info");
|
||||
|
||||
Ok(cached)
|
||||
|
||||
Reference in New Issue
Block a user