diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 5ed0f747c2..d8ee1e8b64 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -10,7 +10,7 @@ use crate::{ stream::PqStream, }; use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::info; +use tracing::{error, info, warn}; pub(super) async fn authenticate( api: &impl console::Api, @@ -55,11 +55,17 @@ pub(super) async fn authenticate( let mut num_retries = 0; let mut node = loop { let wake_res = api.wake_compute(extra, creds).await; - match handle_try_wake(wake_res, num_retries)? { - ControlFlow::Continue(_) => num_retries += 1, - ControlFlow::Break(n) => break n, + match handle_try_wake(wake_res, num_retries) { + Err(e) => { + error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node"); + return Err(e.into()); + } + Ok(ControlFlow::Continue(e)) => { + warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node"); + num_retries += 1; + } + Ok(ControlFlow::Break(n)) => break n, } - info!(num_retries, "retrying wake compute"); }; if let Some(keys) = scram_keys { use tokio_postgres::config::AuthKeys; diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index b1cf2a8559..e96b79ed92 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -230,7 +230,8 @@ pub struct PostgresConnection { } impl ConnCfg { - async fn do_connect( + /// Connect to a corresponding compute node. + pub async fn connect( &self, allow_self_signed_compute: bool, timeout: Duration, @@ -270,20 +271,6 @@ impl ConnCfg { Ok(connection) } - - /// Connect to a corresponding compute node. - pub async fn connect( - &self, - allow_self_signed_compute: bool, - timeout: Duration, - ) -> Result { - self.do_connect(allow_self_signed_compute, timeout) - .inspect_err(|err| { - // Immediately log the error we have at our disposal. - error!("couldn't connect to compute node: {err}"); - }) - .await - } } /// Retrieve `options` from a startup message, dropping all proxy-secific flags. diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 5f59957b2d..e6fcf901d9 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -23,7 +23,7 @@ use tokio::{ time, }; use tokio_util::sync::CancellationToken; -use tracing::{error, info, warn}; +use tracing::{error, info, info_span, warn, Instrument}; use utils::measured_stream::MeasuredStream; /// Number of times we should retry the `/proxy_wake_compute` http request. @@ -101,21 +101,20 @@ pub async fn task_main( tokio::select! { accept_result = listener.accept() => { let (socket, peer_addr) = accept_result?; - info!("accepted postgres client connection from {peer_addr}"); let session_id = uuid::Uuid::new_v4(); let cancel_map = Arc::clone(&cancel_map); connections.spawn( async move { - info!("spawned a task for {peer_addr}"); + info!("accepted postgres client connection"); socket .set_nodelay(true) .context("failed to set socket option")?; - handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp) - .await + handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp).await } + .instrument(info_span!("handle_client", ?session_id, %peer_addr)) .unwrap_or_else(move |e| { // Acknowledge that the task has finished with an error. error!(?session_id, "per-client task finished with an error: {e:#}"); @@ -183,7 +182,6 @@ impl ClientMode { } } -#[tracing::instrument(fields(session_id = ?session_id), skip_all)] pub async fn handle_client( config: &'static ProxyConfig, cancel_map: &CancelMap, @@ -425,11 +423,17 @@ where auth::BackendType::Test(x) => x.wake_compute(), }; - match handle_try_wake(wake_res, num_retries)? { + match handle_try_wake(wake_res, num_retries) { + Err(e) => { + error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node"); + return Err(e.into()); + } // failed to wake up but we can continue to retry - ControlFlow::Continue(_) => {} + Ok(ControlFlow::Continue(e)) => { + warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node"); + } // successfully woke up a compute node and can break the wakeup loop - ControlFlow::Break(mut node_info) => { + Ok(ControlFlow::Break(mut node_info)) => { node_info.config.reuse_password(&config); mechanism.update_connect_config(&mut node_info.config); break node_info; @@ -440,7 +444,6 @@ where num_retries += 1; time::sleep(wait_duration).await; - info!(num_retries, "retrying wake compute"); }; // now that we have a new node, try connect to it repeatedly. @@ -451,10 +454,12 @@ where match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await { Ok(res) => return Ok(res), Err(e) => { - error!(error = ?e, "could not connect to compute node"); - if !e.should_retry(num_retries) { + let retriable = e.should_retry(num_retries); + if !retriable { + error!(error = ?e, num_retries, retriable, "couldn't connect to compute node"); return Err(e.into()); } + warn!(error = ?e, num_retries, retriable, "couldn't connect to compute node"); } } @@ -462,7 +467,6 @@ where num_retries += 1; time::sleep(wait_duration).await; - info!(num_retries, "retrying connect_once"); } }