From 3ae0b2149e1a80975e098a4156b424e636cc550f Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Wed, 20 Nov 2024 10:14:28 +0000 Subject: [PATCH] chore(proxy): demote a ton of logs for successful connection attempts (#9803) See https://github.com/neondatabase/cloud/issues/14378 In collaboration with @cloneable and @awarus, we sifted through logs and simply demoted some logs to debug. This is not at all finished and there are more logs to review, but we ran out of time in the session we organised. In any slightly more nuanced cases, we didn't touch the log, instead leaving a TODO comment. --- proxy/src/auth/backend/classic.rs | 8 +++++--- proxy/src/auth/backend/hacks.rs | 2 +- proxy/src/auth/backend/mod.rs | 7 ++++--- proxy/src/auth/credentials.rs | 10 +++++----- proxy/src/auth/flow.rs | 2 ++ proxy/src/bin/local_proxy.rs | 1 + proxy/src/bin/proxy.rs | 1 + proxy/src/cancellation.rs | 10 +++++----- proxy/src/compute.rs | 5 +++-- proxy/src/console_redirect_proxy.rs | 2 +- proxy/src/context/mod.rs | 9 +++++++-- proxy/src/control_plane/client/mod.rs | 4 ++-- proxy/src/control_plane/client/neon.rs | 13 +++++++++---- proxy/src/stream.rs | 1 + 14 files changed, 47 insertions(+), 28 deletions(-) diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 6d26c99832..87a02133c8 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -1,5 +1,5 @@ use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use super::{ComputeCredentials, ComputeUserInfo}; use crate::auth::backend::ComputeCredentialKeys; @@ -21,11 +21,11 @@ pub(super) async fn authenticate( let scram_keys = match secret { #[cfg(any(test, feature = "testing"))] AuthSecret::Md5(_) => { - info!("auth endpoint chooses MD5"); + debug!("auth endpoint chooses MD5"); return Err(auth::AuthError::bad_auth_method("MD5")); } AuthSecret::Scram(secret) => { - info!("auth endpoint chooses SCRAM"); + debug!("auth endpoint chooses SCRAM"); let scram = auth::Scram(&secret, ctx); let auth_outcome = tokio::time::timeout( @@ -50,6 +50,8 @@ pub(super) async fn authenticate( let client_key = match auth_outcome { sasl::Outcome::Success(key) => key, sasl::Outcome::Failure(reason) => { + // TODO: warnings? + // TODO: should we get rid of this because double logging? info!("auth backend failed with an error: {reason}"); return Err(auth::AuthError::password_failed(&*creds.user)); } diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs index 1411d908a5..e651df1d34 100644 --- a/proxy/src/auth/backend/hacks.rs +++ b/proxy/src/auth/backend/hacks.rs @@ -73,7 +73,7 @@ pub(crate) async fn password_hack_no_authentication( .get_password() .await?; - info!(project = &*payload.endpoint, "received missing parameter"); + debug!(project = &*payload.endpoint, "received missing parameter"); // Report tentative success; compute node will check the password anyway. Ok(( diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 242fe99de2..83c72e7be0 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -14,7 +14,7 @@ use ipnet::{Ipv4Net, Ipv6Net}; use local::LocalBackend; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_postgres::config::AuthKeys; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint}; @@ -286,7 +286,7 @@ async fn auth_quirks( Ok(info) => (info, None), }; - info!("fetching user's authentication info"); + debug!("fetching user's authentication info"); let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?; // check allowed list @@ -404,7 +404,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { ) -> auth::Result> { let res = match self { Self::ControlPlane(api, user_info) => { - info!( + debug!( user = &*user_info.user, project = user_info.endpoint(), "performing authentication using the console" @@ -427,6 +427,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { } }; + // TODO: replace with some metric info!("user successfully authenticated"); Ok(res) } diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index ddecae6af5..dab9007400 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -7,7 +7,7 @@ use std::str::FromStr; use itertools::Itertools; use pq_proto::StartupMessageParams; use thiserror::Error; -use tracing::{info, warn}; +use tracing::{debug, warn}; use crate::auth::password_hack::parse_endpoint_param; use crate::context::RequestMonitoring; @@ -147,22 +147,22 @@ impl ComputeUserInfoMaybeEndpoint { } let metrics = Metrics::get(); - info!(%user, "credentials"); + debug!(%user, "credentials"); if sni.is_some() { - info!("Connection with sni"); + debug!("Connection with sni"); metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni); } else if endpoint.is_some() { metrics .proxy .accepted_connections_by_sni .inc(SniKind::NoSni); - info!("Connection without sni"); + debug!("Connection without sni"); } else { metrics .proxy .accepted_connections_by_sni .inc(SniKind::PasswordHack); - info!("Connection with password hack"); + debug!("Connection with password hack"); } let options = NeonOptions::parse_params(params); diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 6294549ff6..1740b59b14 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -178,6 +178,8 @@ impl AuthFlow<'_, S, Scram<'_>> { SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus), _ => {} } + + // TODO: make this a metric instead info!("client chooses {}", sasl.method); let outcome = sasl::SaslStream::new(self.stream, sasl.message) diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index 41b0e11e85..c4ec1300f2 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -125,6 +125,7 @@ async fn main() -> anyhow::Result<()> { Metrics::install(Arc::new(ThreadPoolMetrics::new(0))); + // TODO: refactor these to use labels debug!("Version: {GIT_VERSION}"); debug!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index fda5b25961..232721338d 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -288,6 +288,7 @@ async fn main() -> anyhow::Result<()> { let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); + // TODO: refactor these to use labels info!("Version: {GIT_VERSION}"); info!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index db0970adcb..3ad2d55b53 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -7,7 +7,7 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; use tokio_postgres::{CancelToken, NoTls}; -use tracing::info; +use tracing::{debug, info}; use uuid::Uuid; use crate::error::ReportableError; @@ -73,7 +73,7 @@ impl CancellationHandler

{ break key; }; - info!("registered new query cancellation key {key}"); + debug!("registered new query cancellation key {key}"); Session { key, cancellation_handler: self, @@ -165,7 +165,7 @@ impl CancelClosure { pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; self.cancel_token.cancel_query_raw(socket, NoTls).await?; - info!("query was cancelled"); + debug!("query was cancelled"); Ok(()) } } @@ -182,7 +182,7 @@ impl

Session

{ /// Store the cancel token for the given session. /// This enables query cancellation in `crate::proxy::prepare_client_connection`. pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData { - info!("enabling query cancellation for this session"); + debug!("enabling query cancellation for this session"); self.cancellation_handler .map .insert(self.key, Some(cancel_closure)); @@ -194,7 +194,7 @@ impl

Session

{ impl

Drop for Session

{ fn drop(&mut self) { self.cancellation_handler.map.remove(&self.key); - info!("dropped query cancellation key {}", &self.key); + debug!("dropped query cancellation key {}", &self.key); } } diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index ca4a348ed8..b8876b44eb 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -14,7 +14,7 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::tls::MakeTlsConnect; use tokio_postgres_rustls::MakeRustlsConnect; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; use crate::cancellation::CancelClosure; @@ -213,7 +213,7 @@ impl ConnCfg { }; let connect_once = |host, port| { - info!("trying to connect to compute node at {host}:{port}"); + debug!("trying to connect to compute node at {host}:{port}"); connect_with_timeout(host, port).and_then(|socket| async { let socket_addr = socket.peer_addr()?; // This prevents load balancer from severing the connection. @@ -328,6 +328,7 @@ impl ConnCfg { tracing::Span::current().record("pid", tracing::field::display(client.get_process_id())); let stream = connection.stream.into_inner(); + // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), "connected to compute node at {host} ({socket_addr}) sslmode={:?}", diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index cc456f3667..8e71f552a5 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -146,7 +146,7 @@ pub(crate) async fn handle_client( stream: S, conn_gauge: NumClientConnectionsGuard<'static>, ) -> Result>, ClientRequestError> { - info!( + debug!( protocol = %ctx.protocol(), "handling interactive connection from client" ); diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 6cf99c0c97..d057ee0bfd 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams; use smol_str::SmolStr; use tokio::sync::mpsc; use tracing::field::display; -use tracing::{debug, info, info_span, Span}; +use tracing::{debug, info_span, Span}; use try_lock::TryLock; use uuid::Uuid; @@ -122,6 +122,7 @@ impl RequestMonitoring { protocol: Protocol, region: &'static str, ) -> Self { + // TODO: be careful with long lived spans let span = info_span!( "connect_request", %protocol, @@ -384,6 +385,10 @@ impl RequestMonitoringInner { } else { ConnectOutcome::Failed }; + + // TODO: get rid of entirely/refactor + // check for false positives + // AND false negatives if let Some(rejected) = self.rejected { let ep = self .endpoint_id @@ -391,7 +396,7 @@ impl RequestMonitoringInner { .map(|x| x.as_str()) .unwrap_or_default(); // This makes sense only if cache is disabled - info!( + debug!( ?outcome, ?rejected, ?ep, diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index e388d8a538..50903e2f1e 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -8,7 +8,7 @@ use std::time::Duration; use dashmap::DashMap; use tokio::time::Instant; -use tracing::info; +use tracing::{debug, info}; use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError}; use crate::auth::backend::ComputeUserInfo; @@ -214,7 +214,7 @@ impl ApiLocks { self.metrics .semaphore_acquire_seconds .observe(now.elapsed().as_secs_f64()); - info!("acquired permit {:?}", now.elapsed().as_secs_f64()); + debug!("acquired permit {:?}", now.elapsed().as_secs_f64()); Ok(WakeComputePermit { permit: permit? }) } diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs index 26ff4e1402..8f4ae13f33 100644 --- a/proxy/src/control_plane/client/neon.rs +++ b/proxy/src/control_plane/client/neon.rs @@ -73,6 +73,8 @@ impl NeonControlPlaneClient { .endpoints_cache .is_valid(ctx, &user_info.endpoint.normalize()) { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. info!("endpoint is not valid, skipping the request"); return Ok(AuthInfo::default()); } @@ -92,7 +94,7 @@ impl NeonControlPlaneClient { ]) .build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -104,10 +106,12 @@ impl NeonControlPlaneClient { // TODO(anna): retry Err(e) => { return if e.get_reason().is_not_found() { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. Ok(AuthInfo::default()) } else { Err(e.into()) - } + }; } }; @@ -163,7 +167,7 @@ impl NeonControlPlaneClient { .build() .map_err(GetEndpointJwksError::RequestBuild)?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self @@ -220,7 +224,7 @@ impl NeonControlPlaneClient { let request = request_builder.build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -249,6 +253,7 @@ impl NeonControlPlaneClient { Ok(node) } .map_err(crate::error::log_error) + // TODO: redo this span stuff .instrument(info_span!("http", id = request_id)) .await } diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs index 89df48c5d3..11f426819d 100644 --- a/proxy/src/stream.rs +++ b/proxy/src/stream.rs @@ -133,6 +133,7 @@ impl PqStream { msg: &'static str, error_kind: ErrorKind, ) -> Result { + // TODO: only log this for actually interesting errors tracing::info!( kind = error_kind.to_metric_label(), msg,