From e01c8f238c2ce776b9d5eff8f62f65b0ba1fa19a Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 3 Jul 2025 08:46:48 +0100 Subject: [PATCH] [proxy] update noisy error logging (#12438) Health checks for pg-sni-router open a TCP connection and immediately close it again. This is noisy. We will filter out any EOF errors on the first message. "acquired permit" debug log is incorrect since it logs when we timedout as well. This fixes the debug log. --- proxy/src/binary/pg_sni_router.rs | 19 +++++++++++++++++-- proxy/src/control_plane/client/mod.rs | 7 ++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/proxy/src/binary/pg_sni_router.rs b/proxy/src/binary/pg_sni_router.rs index b877aaddef..4ac8b6a995 100644 --- a/proxy/src/binary/pg_sni_router.rs +++ b/proxy/src/binary/pg_sni_router.rs @@ -4,6 +4,7 @@ //! This allows connecting to pods/services running in the same Kubernetes cluster from //! the outside. Similar to an ingress controller for HTTPS. +use std::io; use std::net::SocketAddr; use std::path::Path; use std::sync::Arc; @@ -229,7 +230,6 @@ pub(super) async fn task_main( .set_nodelay(true) .context("failed to set socket option")?; - info!(%peer_addr, "serving"); let ctx = RequestContext::new( session_id, ConnectionInfo { @@ -241,6 +241,14 @@ pub(super) async fn task_main( handle_client(ctx, dest_suffix, tls_config, compute_tls_config, socket).await } .unwrap_or_else(|e| { + if let Some(FirstMessage(io_error)) = e.downcast_ref() { + // this is noisy. if we get EOF on the very first message that's likely + // just NLB doing a healthcheck. + if io_error.kind() == io::ErrorKind::UnexpectedEof { + return; + } + } + // Acknowledge that the task has finished with an error. error!("per-client task finished with an error: {e:#}"); }) @@ -257,12 +265,19 @@ pub(super) async fn task_main( Ok(()) } +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +struct FirstMessage(io::Error); + async fn ssl_handshake( ctx: &RequestContext, raw_stream: S, tls_config: Arc, ) -> anyhow::Result> { - let (mut stream, msg) = PqStream::parse_startup(Stream::from_raw(raw_stream)).await?; + let (mut stream, msg) = PqStream::parse_startup(Stream::from_raw(raw_stream)) + .await + .map_err(FirstMessage)?; + match msg { FeStartupPacket::SslRequest { direct: None } => { let raw = stream.accept_tls().await?; diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index 4e5f5c7899..2ffc589df6 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -213,7 +213,12 @@ impl ApiLocks { self.metrics .semaphore_acquire_seconds .observe(now.elapsed().as_secs_f64()); - debug!("acquired permit {:?}", now.elapsed().as_secs_f64()); + + if permit.is_ok() { + debug!(elapsed = ?now.elapsed(), "acquired permit"); + } else { + debug!(elapsed = ?now.elapsed(), "timed out acquiring permit"); + } Ok(WakeComputePermit { permit: permit? }) }