diff --git a/proxy/src/http/conn_pool.rs b/proxy/src/http/conn_pool.rs index ed84b0f2bf..7a5f00d627 100644 --- a/proxy/src/http/conn_pool.rs +++ b/proxy/src/http/conn_pool.rs @@ -194,9 +194,10 @@ impl GlobalConnPool { info!("pool: cached connection '{conn_info}' is closed, opening a new one"); connect_to_compute(self.proxy_config, conn_info, session_id, latency_timer).await } else { - latency_timer.pool_hit(); info!("pool: reusing connection '{conn_info}'"); client.session.send(session_id)?; + latency_timer.pool_hit(); + latency_timer.success(); return Ok(Client { inner: Some(client), span: Span::current(), diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 12bf1b6a69..ec878b696e 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -96,7 +96,9 @@ static COMPUTE_CONNECTION_LATENCY: Lazy = Lazy::new(|| { register_histogram_vec!( "proxy_compute_connection_latency_seconds", "Time it took for proxy to establish a connection to the compute endpoint", - &["protocol", "cache_miss", "pool_miss"], + // http/ws/tcp, true/false, true/false, success/failure + // 3 * 2 * 2 * 2 = 24 counters + &["protocol", "cache_miss", "pool_miss", "outcome"], // largest bucket = 2^16 * 0.5ms = 32s exponential_buckets(0.0005, 2.0, 16).unwrap(), ) @@ -105,19 +107,22 @@ static COMPUTE_CONNECTION_LATENCY: Lazy = Lazy::new(|| { pub struct LatencyTimer { start: Instant, - pool_miss: bool, - cache_miss: bool, protocol: &'static str, + cache_miss: bool, + pool_miss: bool, + outcome: &'static str, } impl LatencyTimer { pub fn new(protocol: &'static str) -> Self { Self { start: Instant::now(), + protocol, cache_miss: false, // by default we don't do pooling pool_miss: true, - protocol, + // assume failed unless otherwise specified + outcome: "failed", } } @@ -128,6 +133,10 @@ impl LatencyTimer { pub fn pool_hit(&mut self) { self.pool_miss = false; } + + pub fn success(mut self) { + self.outcome = "success"; + } } impl Drop for LatencyTimer { @@ -138,6 +147,7 @@ impl Drop for LatencyTimer { self.protocol, bool_to_str(self.cache_miss), bool_to_str(self.pool_miss), + self.outcome, ]) .observe(duration) } @@ -547,7 +557,10 @@ where // try once let (config, err) = match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await { - Ok(res) => return Ok(res), + Ok(res) => { + latency_timer.success(); + return Ok(res); + } Err(e) => { error!(error = ?e, "could not connect to compute node"); (invalidate_cache(node_info), e) @@ -601,7 +614,10 @@ where info!("wake_compute success. attempting to connect"); loop { match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await { - Ok(res) => return Ok(res), + Ok(res) => { + latency_timer.success(); + return Ok(res); + } Err(e) => { let retriable = e.should_retry(num_retries); if !retriable {