chore(proxy): demote a ton of logs for successful connection attempts (#9803)

See https://github.com/neondatabase/cloud/issues/14378

In collaboration with @cloneable and @awarus, we sifted through logs and
simply demoted some logs to debug. This is not at all finished and there
are more logs to review, but we ran out of time in the session we
organised. In any slightly more nuanced cases, we didn't touch the log,
instead leaving a TODO comment.
This commit is contained in:
Conrad Ludgate
2024-11-20 10:14:28 +00:00
committed by GitHub
parent 0a499a3176
commit 3ae0b2149e
14 changed files with 47 additions and 28 deletions

View File

@@ -1,5 +1,5 @@
use tokio::io::{AsyncRead, AsyncWrite};
use tracing::{info, warn};
use tracing::{debug, info, warn};
use super::{ComputeCredentials, ComputeUserInfo};
use crate::auth::backend::ComputeCredentialKeys;
@@ -21,11 +21,11 @@ pub(super) async fn authenticate(
let scram_keys = match secret {
#[cfg(any(test, feature = "testing"))]
AuthSecret::Md5(_) => {
info!("auth endpoint chooses MD5");
debug!("auth endpoint chooses MD5");
return Err(auth::AuthError::bad_auth_method("MD5"));
}
AuthSecret::Scram(secret) => {
info!("auth endpoint chooses SCRAM");
debug!("auth endpoint chooses SCRAM");
let scram = auth::Scram(&secret, ctx);
let auth_outcome = tokio::time::timeout(
@@ -50,6 +50,8 @@ pub(super) async fn authenticate(
let client_key = match auth_outcome {
sasl::Outcome::Success(key) => key,
sasl::Outcome::Failure(reason) => {
// TODO: warnings?
// TODO: should we get rid of this because double logging?
info!("auth backend failed with an error: {reason}");
return Err(auth::AuthError::password_failed(&*creds.user));
}

View File

@@ -73,7 +73,7 @@ pub(crate) async fn password_hack_no_authentication(
.get_password()
.await?;
info!(project = &*payload.endpoint, "received missing parameter");
debug!(project = &*payload.endpoint, "received missing parameter");
// Report tentative success; compute node will check the password anyway.
Ok((

View File

@@ -14,7 +14,7 @@ use ipnet::{Ipv4Net, Ipv6Net};
use local::LocalBackend;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio_postgres::config::AuthKeys;
use tracing::{info, warn};
use tracing::{debug, info, warn};
use crate::auth::credentials::check_peer_addr_is_in_list;
use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
@@ -286,7 +286,7 @@ async fn auth_quirks(
Ok(info) => (info, None),
};
info!("fetching user's authentication info");
debug!("fetching user's authentication info");
let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
// check allowed list
@@ -404,7 +404,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
) -> auth::Result<Backend<'a, ComputeCredentials>> {
let res = match self {
Self::ControlPlane(api, user_info) => {
info!(
debug!(
user = &*user_info.user,
project = user_info.endpoint(),
"performing authentication using the console"
@@ -427,6 +427,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
}
};
// TODO: replace with some metric
info!("user successfully authenticated");
Ok(res)
}

View File

@@ -7,7 +7,7 @@ use std::str::FromStr;
use itertools::Itertools;
use pq_proto::StartupMessageParams;
use thiserror::Error;
use tracing::{info, warn};
use tracing::{debug, warn};
use crate::auth::password_hack::parse_endpoint_param;
use crate::context::RequestMonitoring;
@@ -147,22 +147,22 @@ impl ComputeUserInfoMaybeEndpoint {
}
let metrics = Metrics::get();
info!(%user, "credentials");
debug!(%user, "credentials");
if sni.is_some() {
info!("Connection with sni");
debug!("Connection with sni");
metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni);
} else if endpoint.is_some() {
metrics
.proxy
.accepted_connections_by_sni
.inc(SniKind::NoSni);
info!("Connection without sni");
debug!("Connection without sni");
} else {
metrics
.proxy
.accepted_connections_by_sni
.inc(SniKind::PasswordHack);
info!("Connection with password hack");
debug!("Connection with password hack");
}
let options = NeonOptions::parse_params(params);

View File

@@ -178,6 +178,8 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus),
_ => {}
}
// TODO: make this a metric instead
info!("client chooses {}", sasl.method);
let outcome = sasl::SaslStream::new(self.stream, sasl.message)

View File

@@ -125,6 +125,7 @@ async fn main() -> anyhow::Result<()> {
Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
// TODO: refactor these to use labels
debug!("Version: {GIT_VERSION}");
debug!("Build_tag: {BUILD_TAG}");
let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {

View File

@@ -288,6 +288,7 @@ async fn main() -> anyhow::Result<()> {
let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
// TODO: refactor these to use labels
info!("Version: {GIT_VERSION}");
info!("Build_tag: {BUILD_TAG}");
let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {

View File

@@ -7,7 +7,7 @@ use thiserror::Error;
use tokio::net::TcpStream;
use tokio::sync::Mutex;
use tokio_postgres::{CancelToken, NoTls};
use tracing::info;
use tracing::{debug, info};
use uuid::Uuid;
use crate::error::ReportableError;
@@ -73,7 +73,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
break key;
};
info!("registered new query cancellation key {key}");
debug!("registered new query cancellation key {key}");
Session {
key,
cancellation_handler: self,
@@ -165,7 +165,7 @@ impl CancelClosure {
pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> {
let socket = TcpStream::connect(self.socket_addr).await?;
self.cancel_token.cancel_query_raw(socket, NoTls).await?;
info!("query was cancelled");
debug!("query was cancelled");
Ok(())
}
}
@@ -182,7 +182,7 @@ impl<P> Session<P> {
/// Store the cancel token for the given session.
/// This enables query cancellation in `crate::proxy::prepare_client_connection`.
pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData {
info!("enabling query cancellation for this session");
debug!("enabling query cancellation for this session");
self.cancellation_handler
.map
.insert(self.key, Some(cancel_closure));
@@ -194,7 +194,7 @@ impl<P> Session<P> {
impl<P> Drop for Session<P> {
fn drop(&mut self) {
self.cancellation_handler.map.remove(&self.key);
info!("dropped query cancellation key {}", &self.key);
debug!("dropped query cancellation key {}", &self.key);
}
}

View File

@@ -14,7 +14,7 @@ use thiserror::Error;
use tokio::net::TcpStream;
use tokio_postgres::tls::MakeTlsConnect;
use tokio_postgres_rustls::MakeRustlsConnect;
use tracing::{error, info, warn};
use tracing::{debug, error, info, warn};
use crate::auth::parse_endpoint_param;
use crate::cancellation::CancelClosure;
@@ -213,7 +213,7 @@ impl ConnCfg {
};
let connect_once = |host, port| {
info!("trying to connect to compute node at {host}:{port}");
debug!("trying to connect to compute node at {host}:{port}");
connect_with_timeout(host, port).and_then(|socket| async {
let socket_addr = socket.peer_addr()?;
// This prevents load balancer from severing the connection.
@@ -328,6 +328,7 @@ impl ConnCfg {
tracing::Span::current().record("pid", tracing::field::display(client.get_process_id()));
let stream = connection.stream.into_inner();
// TODO: lots of useful info but maybe we can move it elsewhere (eg traces?)
info!(
cold_start_info = ctx.cold_start_info().as_str(),
"connected to compute node at {host} ({socket_addr}) sslmode={:?}",

View File

@@ -146,7 +146,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
stream: S,
conn_gauge: NumClientConnectionsGuard<'static>,
) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
info!(
debug!(
protocol = %ctx.protocol(),
"handling interactive connection from client"
);

View File

@@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams;
use smol_str::SmolStr;
use tokio::sync::mpsc;
use tracing::field::display;
use tracing::{debug, info, info_span, Span};
use tracing::{debug, info_span, Span};
use try_lock::TryLock;
use uuid::Uuid;
@@ -122,6 +122,7 @@ impl RequestMonitoring {
protocol: Protocol,
region: &'static str,
) -> Self {
// TODO: be careful with long lived spans
let span = info_span!(
"connect_request",
%protocol,
@@ -384,6 +385,10 @@ impl RequestMonitoringInner {
} else {
ConnectOutcome::Failed
};
// TODO: get rid of entirely/refactor
// check for false positives
// AND false negatives
if let Some(rejected) = self.rejected {
let ep = self
.endpoint_id
@@ -391,7 +396,7 @@ impl RequestMonitoringInner {
.map(|x| x.as_str())
.unwrap_or_default();
// This makes sense only if cache is disabled
info!(
debug!(
?outcome,
?rejected,
?ep,

View File

@@ -8,7 +8,7 @@ use std::time::Duration;
use dashmap::DashMap;
use tokio::time::Instant;
use tracing::info;
use tracing::{debug, info};
use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
use crate::auth::backend::ComputeUserInfo;
@@ -214,7 +214,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
self.metrics
.semaphore_acquire_seconds
.observe(now.elapsed().as_secs_f64());
info!("acquired permit {:?}", now.elapsed().as_secs_f64());
debug!("acquired permit {:?}", now.elapsed().as_secs_f64());
Ok(WakeComputePermit { permit: permit? })
}

View File

@@ -73,6 +73,8 @@ impl NeonControlPlaneClient {
.endpoints_cache
.is_valid(ctx, &user_info.endpoint.normalize())
{
// TODO: refactor this because it's weird
// this is a failure to authenticate but we return Ok.
info!("endpoint is not valid, skipping the request");
return Ok(AuthInfo::default());
}
@@ -92,7 +94,7 @@ impl NeonControlPlaneClient {
])
.build()?;
info!(url = request.url().as_str(), "sending http request");
debug!(url = request.url().as_str(), "sending http request");
let start = Instant::now();
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
let response = self.endpoint.execute(request).await?;
@@ -104,10 +106,12 @@ impl NeonControlPlaneClient {
// TODO(anna): retry
Err(e) => {
return if e.get_reason().is_not_found() {
// TODO: refactor this because it's weird
// this is a failure to authenticate but we return Ok.
Ok(AuthInfo::default())
} else {
Err(e.into())
}
};
}
};
@@ -163,7 +167,7 @@ impl NeonControlPlaneClient {
.build()
.map_err(GetEndpointJwksError::RequestBuild)?;
info!(url = request.url().as_str(), "sending http request");
debug!(url = request.url().as_str(), "sending http request");
let start = Instant::now();
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
let response = self
@@ -220,7 +224,7 @@ impl NeonControlPlaneClient {
let request = request_builder.build()?;
info!(url = request.url().as_str(), "sending http request");
debug!(url = request.url().as_str(), "sending http request");
let start = Instant::now();
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
let response = self.endpoint.execute(request).await?;
@@ -249,6 +253,7 @@ impl NeonControlPlaneClient {
Ok(node)
}
.map_err(crate::error::log_error)
// TODO: redo this span stuff
.instrument(info_span!("http", id = request_id))
.await
}

View File

@@ -133,6 +133,7 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
msg: &'static str,
error_kind: ErrorKind,
) -> Result<T, ReportedError> {
// TODO: only log this for actually interesting errors
tracing::info!(
kind = error_kind.to_metric_label(),
msg,