proxy: report metrics based on cold start info (#7324)

## Problem

Would be nice to have a bit more info on cold start metrics.

## Summary of changes

* Change connect compute latency to include `cold_start_info`.
* Update `ColdStartInfo` to include HttpPoolHit and WarmCached.
* Several changes to make more use of interned strings
This commit is contained in:
Conrad Ludgate
2024-04-05 16:14:50 +01:00
committed by GitHub
parent 0fa517eb80
commit 55da8eff4f
17 changed files with 274 additions and 160 deletions

View File

@@ -3,7 +3,7 @@ use std::fmt;
use crate::auth::IpPattern;
use crate::{BranchId, EndpointId, ProjectId};
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
/// Generic error response with human-readable description.
/// Note that we can't always present it to user as is.
@@ -18,7 +18,7 @@ pub struct ConsoleError {
pub struct GetRoleSecret {
pub role_secret: Box<str>,
pub allowed_ips: Option<Vec<IpPattern>>,
pub project_id: Option<ProjectId>,
pub project_id: Option<ProjectIdInt>,
}
// Manually implement debug to omit sensitive info.
@@ -93,22 +93,47 @@ impl fmt::Debug for DatabaseInfo {
/// Various labels for prometheus metrics.
/// Also known as `ProxyMetricsAuxInfo` in the console.
#[derive(Debug, Deserialize, Clone, Default)]
#[derive(Debug, Deserialize, Clone)]
pub struct MetricsAuxInfo {
pub endpoint_id: EndpointId,
pub project_id: ProjectId,
pub branch_id: BranchId,
pub cold_start_info: Option<ColdStartInfo>,
pub endpoint_id: EndpointIdInt,
pub project_id: ProjectIdInt,
pub branch_id: BranchIdInt,
#[serde(default)]
pub cold_start_info: ColdStartInfo,
}
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum ColdStartInfo {
#[default]
Unknown = 0,
Warm = 1,
PoolHit = 2,
PoolMiss = 3,
Unknown,
/// Compute was already running
Warm,
#[serde(rename = "pool_hit")]
/// Compute was not running but there was an available VM
VmPoolHit,
#[serde(rename = "pool_miss")]
/// Compute was not running and there were no VMs available
VmPoolMiss,
// not provided by control plane
/// Connection available from HTTP pool
HttpPoolHit,
/// Cached connection info
WarmCached,
}
impl ColdStartInfo {
pub fn as_str(&self) -> &'static str {
match self {
ColdStartInfo::Unknown => "unknown",
ColdStartInfo::Warm => "warm",
ColdStartInfo::VmPoolHit => "pool_hit",
ColdStartInfo::VmPoolMiss => "pool_miss",
ColdStartInfo::HttpPoolHit => "http_pool_hit",
ColdStartInfo::WarmCached => "warm_cached",
}
}
}
#[cfg(test)]

View File

@@ -12,7 +12,8 @@ use crate::{
compute,
config::{CacheOptions, ProjectInfoCacheOptions},
context::RequestMonitoring,
scram, EndpointCacheKey, ProjectId,
intern::ProjectIdInt,
scram, EndpointCacheKey,
};
use dashmap::DashMap;
use std::{sync::Arc, time::Duration};
@@ -271,7 +272,7 @@ pub struct AuthInfo {
/// List of IP addresses allowed for the autorization.
pub allowed_ips: Vec<IpPattern>,
/// Project ID. This is used for cache invalidation.
pub project_id: Option<ProjectId>,
pub project_id: Option<ProjectIdInt>,
}
/// Info for establishing a connection to a compute node.

View File

@@ -4,10 +4,16 @@ use super::{
errors::{ApiError, GetAuthInfoError, WakeComputeError},
AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
};
use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
use crate::context::RequestMonitoring;
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
use crate::{auth::IpPattern, cache::Cached};
use crate::{
console::{
messages::MetricsAuxInfo,
provider::{CachedAllowedIps, CachedRoleSecret},
},
BranchId, EndpointId, ProjectId,
};
use futures::TryFutureExt;
use std::{str::FromStr, sync::Arc};
use thiserror::Error;
@@ -114,7 +120,12 @@ impl Api {
let node = NodeInfo {
config,
aux: Default::default(),
aux: MetricsAuxInfo {
endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::console::messages::ColdStartInfo::Warm,
},
allow_self_signed_compute: false,
};

View File

@@ -181,15 +181,16 @@ impl super::Api for Api {
}
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
if let Some(project_id) = auth_info.project_id {
let ep_int = ep.into();
self.caches.project_info.insert_role_secret(
&project_id,
ep,
user,
project_id,
ep_int,
user.into(),
auth_info.secret.clone(),
);
self.caches.project_info.insert_allowed_ips(
&project_id,
ep,
project_id,
ep_int,
Arc::new(auth_info.allowed_ips),
);
ctx.set_project_id(project_id);
@@ -217,15 +218,16 @@ impl super::Api for Api {
let allowed_ips = Arc::new(auth_info.allowed_ips);
let user = &user_info.user;
if let Some(project_id) = auth_info.project_id {
let ep_int = ep.into();
self.caches.project_info.insert_role_secret(
&project_id,
ep,
user,
project_id,
ep_int,
user.into(),
auth_info.secret.clone(),
);
self.caches
.project_info
.insert_allowed_ips(&project_id, ep, allowed_ips.clone());
.insert_allowed_ips(project_id, ep_int, allowed_ips.clone());
ctx.set_project_id(project_id);
}
Ok((
@@ -248,8 +250,7 @@ impl super::Api for Api {
// which means that we might cache it to reduce the load and latency.
if let Some(cached) = self.caches.node_info.get(&key) {
info!(key = &*key, "found cached compute node info");
info!("cold_start_info=warm");
ctx.set_cold_start_info(ColdStartInfo::Warm);
ctx.set_project(cached.aux.clone());
return Ok(cached);
}
@@ -260,17 +261,21 @@ impl super::Api for Api {
if permit.should_check_cache() {
if let Some(cached) = self.caches.node_info.get(&key) {
info!(key = &*key, "found cached compute node info");
info!("cold_start_info=warm");
ctx.set_cold_start_info(ColdStartInfo::Warm);
ctx.set_project(cached.aux.clone());
return Ok(cached);
}
}
let node = self.do_wake_compute(ctx, user_info).await?;
let mut node = self.do_wake_compute(ctx, user_info).await?;
ctx.set_project(node.aux.clone());
let cold_start_info = node.aux.cold_start_info.clone().unwrap_or_default();
info!(?cold_start_info, "woken up a compute node");
let (_, cached) = self.caches.node_info.insert(key.clone(), node);
let cold_start_info = node.aux.cold_start_info;
info!("woken up a compute node");
// store the cached node as 'warm'
node.aux.cold_start_info = ColdStartInfo::WarmCached;
let (_, mut cached) = self.caches.node_info.insert(key.clone(), node);
cached.aux.cold_start_info = cold_start_info;
info!(key = &*key, "created a cache entry for compute node info");
Ok(cached)