From 34c1295594c70bf124c0a6a85fa21d51dff7967b Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Tue, 10 Dec 2024 12:14:28 +0200 Subject: [PATCH] [proxy] impr: Additional logging for cancellation queries (#10039) ## Problem Since cancellation tasks spawned in the background sometimes logs missing context. https://neondb.slack.com/archives/C060N3SEF9D/p1733427801527419?thread_ts=1733419882.560159&cid=C060N3SEF9D ## Summary of changes Add `session_id` and change loglevel for cancellation queries --- proxy/src/cancellation.rs | 3 +- proxy/src/console_redirect_proxy.rs | 54 ++++++++++++++++------------- proxy/src/proxy/mod.rs | 54 ++++++++++++++++------------- proxy/src/redis/notifications.rs | 4 +++ 4 files changed, 64 insertions(+), 51 deletions(-) diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index 7bc5587a25..ed717507ee 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -115,7 +115,8 @@ impl CancellationHandler

{ IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()), }; if !self.limiter.lock().unwrap().check(subnet_key, 1) { - tracing::debug!("Rate limit exceeded. Skipping cancellation message"); + // log only the subnet part of the IP address to know which subnet is rate limited + tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}"); Metrics::get() .proxy .cancellation_requests_total diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 7db1179eea..65702e0e4c 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -163,32 +163,36 @@ pub(crate) async fn handle_client( let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client); let do_handshake = handshake(ctx, stream, tls, record_handshake_error); - let (mut stream, params) = - match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? { - HandshakeData::Startup(stream, params) => (stream, params), - HandshakeData::Cancel(cancel_key_data) => { - // spawn a task to cancel the session, but don't wait for it - cancellations.spawn({ - let cancellation_handler_clone = Arc::clone(&cancellation_handler); - let session_id = ctx.session_id(); - let peer_ip = ctx.peer_addr(); - async move { - drop( - cancellation_handler_clone - .cancel_session( - cancel_key_data, - session_id, - peer_ip, - config.authentication_config.ip_allowlist_check_enabled, - ) - .await, - ); - } - }); + let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake) + .await?? + { + HandshakeData::Startup(stream, params) => (stream, params), + HandshakeData::Cancel(cancel_key_data) => { + // spawn a task to cancel the session, but don't wait for it + cancellations.spawn({ + let cancellation_handler_clone = Arc::clone(&cancellation_handler); + let session_id = ctx.session_id(); + let peer_ip = ctx.peer_addr(); + let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id); + cancel_span.follows_from(tracing::Span::current()); + async move { + drop( + cancellation_handler_clone + .cancel_session( + cancel_key_data, + session_id, + peer_ip, + config.authentication_config.ip_allowlist_check_enabled, + ) + .instrument(cancel_span) + .await, + ); + } + }); - return Ok(None); - } - }; + return Ok(None); + } + }; drop(pause); ctx.set_db_options(params.clone()); diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index f74eb5940f..cc04bc5e5c 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -272,32 +272,36 @@ pub(crate) async fn handle_client( let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client); let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error); - let (mut stream, params) = - match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? { - HandshakeData::Startup(stream, params) => (stream, params), - HandshakeData::Cancel(cancel_key_data) => { - // spawn a task to cancel the session, but don't wait for it - cancellations.spawn({ - let cancellation_handler_clone = Arc::clone(&cancellation_handler); - let session_id = ctx.session_id(); - let peer_ip = ctx.peer_addr(); - async move { - drop( - cancellation_handler_clone - .cancel_session( - cancel_key_data, - session_id, - peer_ip, - config.authentication_config.ip_allowlist_check_enabled, - ) - .await, - ); - } - }); + let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake) + .await?? + { + HandshakeData::Startup(stream, params) => (stream, params), + HandshakeData::Cancel(cancel_key_data) => { + // spawn a task to cancel the session, but don't wait for it + cancellations.spawn({ + let cancellation_handler_clone = Arc::clone(&cancellation_handler); + let session_id = ctx.session_id(); + let peer_ip = ctx.peer_addr(); + let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id); + cancel_span.follows_from(tracing::Span::current()); + async move { + drop( + cancellation_handler_clone + .cancel_session( + cancel_key_data, + session_id, + peer_ip, + config.authentication_config.ip_allowlist_check_enabled, + ) + .instrument(cancel_span) + .await, + ); + } + }); - return Ok(None); - } - }; + return Ok(None); + } + }; drop(pause); ctx.set_db_options(params.clone()); diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index 9ac07b7e90..f3aa97c032 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -13,6 +13,7 @@ use crate::cache::project_info::ProjectInfoCache; use crate::cancellation::{CancelMap, CancellationHandler}; use crate::intern::{ProjectIdInt, RoleNameInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; +use tracing::Instrument; const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates"; pub(crate) const PROXY_CHANNEL_NAME: &str = "neondb-proxy-to-proxy-updates"; @@ -143,6 +144,8 @@ impl MessageHandler { let peer_addr = cancel_session .peer_addr .unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED)); + let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?cancel_session.session_id); + cancel_span.follows_from(tracing::Span::current()); // This instance of cancellation_handler doesn't have a RedisPublisherClient so it can't publish the message. match self .cancellation_handler @@ -152,6 +155,7 @@ impl MessageHandler { peer_addr, cancel_session.peer_addr.is_some(), ) + .instrument(cancel_span) .await { Ok(()) => {}