From 68e7e0f0f6580552bbea48d2b85dab761d2134fd Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 20 Jan 2025 18:26:33 +0100 Subject: [PATCH] pageserver: log on potentially stuck connection manager loop --- .../tenant/timeline/walreceiver/connection_manager.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index 583d6309ab..2b99a29a8d 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -107,6 +107,8 @@ pub(super) async fn connection_manager_loop_step( let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?; debug!("Subscribed for broker timeline updates"); + const WARN_ON_INACTIVE_AFTER: Duration = Duration::from_secs(60); + loop { let time_until_next_retry = connection_manager_state.time_until_next_retry(); let any_activity = connection_manager_state.wal_connection.is_some() @@ -287,6 +289,15 @@ pub(super) async fn connection_manager_loop_step( let _ = broker_client.publish_one(msg).await; debug!("Discovery request sent to the broker"); None + } => {}, + // Observability arm: if there's no active connection and we've received no inputs + // for a long while, then the loop might be stuck. + Some(()) = async { + if !any_activity { + tokio::time::sleep(WARN_ON_INACTIVE_AFTER).await; + tracing::warn!("Connection is inactive and received no inputs for a long time"); + } + None } => {} }