mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
proxy: Exclude compute and retries (#7529)
## Problem Alerts fire if the connection the compute is slow. ## Summary of changes Exclude compute and retry from latencies.
This commit is contained in:
@@ -260,7 +260,9 @@ impl ConnCfg {
|
||||
aux: MetricsAuxInfo,
|
||||
timeout: Duration,
|
||||
) -> Result<PostgresConnection, ConnectionError> {
|
||||
let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
|
||||
let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
|
||||
drop(pause);
|
||||
|
||||
let tls_connector = native_tls::TlsConnector::builder()
|
||||
.danger_accept_invalid_certs(allow_self_signed_compute)
|
||||
@@ -270,7 +272,9 @@ impl ConnCfg {
|
||||
let tls = MakeTlsConnect::<tokio::net::TcpStream>::make_tls_connect(&mut mk_tls, host)?;
|
||||
|
||||
// connect_raw() will not use TLS if sslmode is "disable"
|
||||
let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
|
||||
let (client, connection) = self.0.connect_raw(stream, tls).await?;
|
||||
drop(pause);
|
||||
tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));
|
||||
let stream = connection.stream.into_inner();
|
||||
|
||||
|
||||
@@ -284,6 +284,8 @@ pub struct ComputeConnectionLatencyGroup {
|
||||
pub enum LatencyExclusions {
|
||||
Client,
|
||||
ClientAndCplane,
|
||||
ClientCplaneCompute,
|
||||
ClientCplaneComputeRetry,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
@@ -352,6 +354,7 @@ pub enum Waiting {
|
||||
Cplane,
|
||||
Client,
|
||||
Compute,
|
||||
RetryTimeout,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -359,6 +362,7 @@ struct Accumulated {
|
||||
cplane: time::Duration,
|
||||
client: time::Duration,
|
||||
compute: time::Duration,
|
||||
retry: time::Duration,
|
||||
}
|
||||
|
||||
pub struct LatencyTimer {
|
||||
@@ -421,6 +425,7 @@ impl Drop for LatencyTimerPause<'_> {
|
||||
Waiting::Cplane => self.timer.accumulated.cplane += dur,
|
||||
Waiting::Client => self.timer.accumulated.client += dur,
|
||||
Waiting::Compute => self.timer.accumulated.compute += dur,
|
||||
Waiting::RetryTimeout => self.timer.accumulated.retry += dur,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -464,6 +469,34 @@ impl Drop for LatencyTimer {
|
||||
},
|
||||
duration.saturating_sub(accumulated_total).as_secs_f64(),
|
||||
);
|
||||
|
||||
// Exclude client cplane, compue communication from the accumulated time.
|
||||
let accumulated_total =
|
||||
self.accumulated.client + self.accumulated.cplane + self.accumulated.compute;
|
||||
metric.observe(
|
||||
ComputeConnectionLatencyGroup {
|
||||
protocol: self.protocol,
|
||||
cold_start_info: self.cold_start_info,
|
||||
outcome: self.outcome,
|
||||
excluded: LatencyExclusions::ClientCplaneCompute,
|
||||
},
|
||||
duration.saturating_sub(accumulated_total).as_secs_f64(),
|
||||
);
|
||||
|
||||
// Exclude client cplane, compue, retry communication from the accumulated time.
|
||||
let accumulated_total = self.accumulated.client
|
||||
+ self.accumulated.cplane
|
||||
+ self.accumulated.compute
|
||||
+ self.accumulated.retry;
|
||||
metric.observe(
|
||||
ComputeConnectionLatencyGroup {
|
||||
protocol: self.protocol,
|
||||
cold_start_info: self.cold_start_info,
|
||||
outcome: self.outcome,
|
||||
excluded: LatencyExclusions::ClientCplaneComputeRetry,
|
||||
},
|
||||
duration.saturating_sub(accumulated_total).as_secs_f64(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -194,6 +194,10 @@ where
|
||||
let wait_duration = retry_after(num_retries, connect_to_compute_retry_config);
|
||||
num_retries += 1;
|
||||
|
||||
let pause = ctx
|
||||
.latency_timer
|
||||
.pause(crate::metrics::Waiting::RetryTimeout);
|
||||
time::sleep(wait_duration).await;
|
||||
drop(pause);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,7 +54,11 @@ pub async fn wake_compute<B: ComputeConnectBackend>(
|
||||
|
||||
let wait_duration = retry_after(*num_retries, config);
|
||||
*num_retries += 1;
|
||||
let pause = ctx
|
||||
.latency_timer
|
||||
.pause(crate::metrics::Waiting::RetryTimeout);
|
||||
tokio::time::sleep(wait_duration).await;
|
||||
drop(pause);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -179,7 +179,9 @@ impl ConnectMechanism for TokioMechanism {
|
||||
.dbname(&self.conn_info.dbname)
|
||||
.connect_timeout(timeout);
|
||||
|
||||
let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
|
||||
let (client, connection) = config.connect(tokio_postgres::NoTls).await?;
|
||||
drop(pause);
|
||||
|
||||
tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));
|
||||
Ok(poll_client(
|
||||
|
||||
Reference in New Issue
Block a user