From 706977fb7714aa87516ac478db183d0304aa2287 Mon Sep 17 00:00:00 2001 From: duguorong009 <80258679+duguorong009@users.noreply.github.com> Date: Thu, 7 Sep 2023 19:17:18 +0800 Subject: [PATCH] fix(pageserver): add the walreceiver state to tenant timeline GET api endpoint (#5196) Add a `walreceiver_state` field to `TimelineInfo` (response of `GET /v1/tenant/:tenant_id/timeline/:timeline_id`) and while doing that, refactor out a common `Timeline::walreceiver_state(..)`. No OpenAPI changes, because this is an internal debugging addition. Fixes #3115. Co-authored-by: Joonas Koivunen --- libs/pageserver_api/src/models.rs | 2 ++ pageserver/src/http/routes.rs | 4 ++++ pageserver/src/tenant/timeline.rs | 20 ++++++++++--------- pageserver/src/tenant/timeline/walreceiver.rs | 2 +- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 340463afa7..f354296be2 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -381,6 +381,8 @@ pub struct TimelineInfo { pub pg_version: u32, pub state: TimelineState, + + pub walreceiver_status: String, } #[derive(Debug, Clone, Serialize)] diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index c661fe9e3f..a8e914ba08 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -285,6 +285,8 @@ async fn build_timeline_info_common( let state = timeline.current_state(); let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0)); + let walreceiver_status = timeline.walreceiver_status(); + let info = TimelineInfo { tenant_id: timeline.tenant_id, timeline_id: timeline.timeline_id, @@ -305,6 +307,8 @@ async fn build_timeline_info_common( pg_version: timeline.pg_version, state, + + walreceiver_status, }; Ok(info) } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b616c3c572..cb8b842cf6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -585,15 +585,7 @@ impl Timeline { Err(e) => { // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo drop(_timer); - let walreceiver_status = { - match &*self.walreceiver.lock().unwrap() { - None => "stopping or stopped".to_string(), - Some(walreceiver) => match walreceiver.status() { - Some(status) => status.to_human_readable_string(), - None => "Not active".to_string(), - }, - } - }; + let walreceiver_status = self.walreceiver_status(); Err(anyhow::Error::new(e).context({ format!( "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}", @@ -607,6 +599,16 @@ impl Timeline { } } + pub(crate) fn walreceiver_status(&self) -> String { + match &*self.walreceiver.lock().unwrap() { + None => "stopping or stopped".to_string(), + Some(walreceiver) => match walreceiver.status() { + Some(status) => status.to_human_readable_string(), + None => "Not active".to_string(), + }, + } + } + /// Check that it is valid to request operations with that lsn. pub fn check_lsn_is_in_scope( &self, diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs index ccff735c3c..842bc3675c 100644 --- a/pageserver/src/tenant/timeline/walreceiver.rs +++ b/pageserver/src/tenant/timeline/walreceiver.rs @@ -135,7 +135,7 @@ impl WalReceiver { .await; } - pub(super) fn status(&self) -> Option { + pub(crate) fn status(&self) -> Option { self.manager_status.read().unwrap().clone() } }