feat(proxy): Log latency after connect to compute (#11048)

## Problem
To measure latency accurate we should associate the testodrome role
within a latency data

## Summary of changes
Add latency logging to associate different roles within a latency.

Relates to the #22486
This commit is contained in:
Ivan Efremov
2025-02-28 19:58:42 +02:00
committed by GitHub
parent d857f63e3b
commit 56033189c1
3 changed files with 34 additions and 10 deletions

View File

@@ -287,8 +287,9 @@ impl ConnCfg {
// TODO: lots of useful info but maybe we can move it elsewhere (eg traces?)
info!(
cold_start_info = ctx.cold_start_info().as_str(),
"connected to compute node at {host} ({socket_addr}) sslmode={:?}",
self.0.get_ssl_mode()
"connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}",
self.0.get_ssl_mode(),
ctx.get_proxy_latency(),
);
// NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.

View File

@@ -17,7 +17,8 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::error::ErrorKind;
use crate::intern::{BranchIdInt, ProjectIdInt};
use crate::metrics::{
ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting,
ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol,
Waiting,
};
use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
use crate::types::{DbName, EndpointId, RoleName};
@@ -346,6 +347,14 @@ impl RequestContext {
}
}
pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated {
self.0
.try_lock()
.expect("should not deadlock")
.latency_timer
.accumulated()
}
pub(crate) fn success(&self) {
self.0
.try_lock()

View File

@@ -394,21 +394,31 @@ pub enum RedisMsgKind {
HDel,
}
#[derive(Default)]
struct Accumulated {
#[derive(Default, Clone)]
pub struct LatencyAccumulated {
cplane: time::Duration,
client: time::Duration,
compute: time::Duration,
retry: time::Duration,
}
impl std::fmt::Display for LatencyAccumulated {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"client: {:?}, cplane: {:?}, compute: {:?}, retry: {:?}",
self.client, self.cplane, self.compute, self.retry
)
}
}
pub struct LatencyTimer {
// time since the stopwatch was started
start: time::Instant,
// time since the stopwatch was stopped
stop: Option<time::Instant>,
// accumulated time on the stopwatch
accumulated: Accumulated,
accumulated: LatencyAccumulated,
// label data
protocol: Protocol,
cold_start_info: ColdStartInfo,
@@ -422,7 +432,7 @@ impl LatencyTimer {
Self {
start: time::Instant::now(),
stop: None,
accumulated: Accumulated::default(),
accumulated: LatencyAccumulated::default(),
protocol,
cold_start_info: ColdStartInfo::Unknown,
// assume failed unless otherwise specified
@@ -435,7 +445,7 @@ impl LatencyTimer {
Self {
start: time::Instant::now(),
stop: None,
accumulated: Accumulated::default(),
accumulated: LatencyAccumulated::default(),
protocol,
cold_start_info: ColdStartInfo::Unknown,
// assume failed unless otherwise specified
@@ -465,6 +475,10 @@ impl LatencyTimer {
// success
self.outcome = ConnectOutcome::Success;
}
pub fn accumulated(&self) -> LatencyAccumulated {
self.accumulated.clone()
}
}
#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
@@ -511,7 +525,7 @@ impl Drop for LatencyTimer {
duration.saturating_sub(accumulated_total).as_secs_f64(),
);
// Exclude client cplane, compue communication from the accumulated time.
// Exclude client, cplane, compute communication from the accumulated time.
let accumulated_total =
self.accumulated.client + self.accumulated.cplane + self.accumulated.compute;
metric.observe(
@@ -524,7 +538,7 @@ impl Drop for LatencyTimer {
duration.saturating_sub(accumulated_total).as_secs_f64(),
);
// Exclude client cplane, compue, retry communication from the accumulated time.
// Exclude client, cplane, compute, retry communication from the accumulated time.
let accumulated_total = self.accumulated.client
+ self.accumulated.cplane
+ self.accumulated.compute