From 56033189c10dc93fa0098f5ae77a951b481edc15 Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Fri, 28 Feb 2025 19:58:42 +0200 Subject: [PATCH] feat(proxy): Log latency after connect to compute (#11048) ## Problem To measure latency accurate we should associate the testodrome role within a latency data ## Summary of changes Add latency logging to associate different roles within a latency. Relates to the #22486 --- proxy/src/compute.rs | 5 +++-- proxy/src/context/mod.rs | 11 ++++++++++- proxy/src/metrics.rs | 28 +++++++++++++++++++++------- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 2560187608..dfa6015b10 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -287,8 +287,9 @@ impl ConnCfg { // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), - "connected to compute node at {host} ({socket_addr}) sslmode={:?}", - self.0.get_ssl_mode() + "connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}", + self.0.get_ssl_mode(), + ctx.get_proxy_latency(), ); // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw. diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index f87f4e9ef8..e10a04b4f1 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -17,7 +17,8 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::error::ErrorKind; use crate::intern::{BranchIdInt, ProjectIdInt}; use crate::metrics::{ - ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting, + ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol, + Waiting, }; use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra}; use crate::types::{DbName, EndpointId, RoleName}; @@ -346,6 +347,14 @@ impl RequestContext { } } + pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated { + self.0 + .try_lock() + .expect("should not deadlock") + .latency_timer + .accumulated() + } + pub(crate) fn success(&self) { self.0 .try_lock() diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index db1f096de1..b6a2a059ea 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -394,21 +394,31 @@ pub enum RedisMsgKind { HDel, } -#[derive(Default)] -struct Accumulated { +#[derive(Default, Clone)] +pub struct LatencyAccumulated { cplane: time::Duration, client: time::Duration, compute: time::Duration, retry: time::Duration, } +impl std::fmt::Display for LatencyAccumulated { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "client: {:?}, cplane: {:?}, compute: {:?}, retry: {:?}", + self.client, self.cplane, self.compute, self.retry + ) + } +} + pub struct LatencyTimer { // time since the stopwatch was started start: time::Instant, // time since the stopwatch was stopped stop: Option, // accumulated time on the stopwatch - accumulated: Accumulated, + accumulated: LatencyAccumulated, // label data protocol: Protocol, cold_start_info: ColdStartInfo, @@ -422,7 +432,7 @@ impl LatencyTimer { Self { start: time::Instant::now(), stop: None, - accumulated: Accumulated::default(), + accumulated: LatencyAccumulated::default(), protocol, cold_start_info: ColdStartInfo::Unknown, // assume failed unless otherwise specified @@ -435,7 +445,7 @@ impl LatencyTimer { Self { start: time::Instant::now(), stop: None, - accumulated: Accumulated::default(), + accumulated: LatencyAccumulated::default(), protocol, cold_start_info: ColdStartInfo::Unknown, // assume failed unless otherwise specified @@ -465,6 +475,10 @@ impl LatencyTimer { // success self.outcome = ConnectOutcome::Success; } + + pub fn accumulated(&self) -> LatencyAccumulated { + self.accumulated.clone() + } } #[derive(FixedCardinalityLabel, Clone, Copy, Debug)] @@ -511,7 +525,7 @@ impl Drop for LatencyTimer { duration.saturating_sub(accumulated_total).as_secs_f64(), ); - // Exclude client cplane, compue communication from the accumulated time. + // Exclude client, cplane, compute communication from the accumulated time. let accumulated_total = self.accumulated.client + self.accumulated.cplane + self.accumulated.compute; metric.observe( @@ -524,7 +538,7 @@ impl Drop for LatencyTimer { duration.saturating_sub(accumulated_total).as_secs_f64(), ); - // Exclude client cplane, compue, retry communication from the accumulated time. + // Exclude client, cplane, compute, retry communication from the accumulated time. let accumulated_total = self.accumulated.client + self.accumulated.cplane + self.accumulated.compute