fix(pageserver): add the walreceiver state to tenant timeline GET api endpoint (#5196)

Add a `walreceiver_state` field to `TimelineInfo` (response of `GET /v1/tenant/:tenant_id/timeline/:timeline_id`) and while doing that, refactor out a common `Timeline::walreceiver_state(..)`. No OpenAPI changes, because this is an internal debugging addition.

Fixes #3115.

Co-authored-by: Joonas Koivunen <joonas.koivunen@gmail.com>
This commit is contained in:
duguorong009
2023-09-07 19:17:18 +08:00
committed by GitHub
parent 7ba0f5c08d
commit 706977fb77
4 changed files with 18 additions and 10 deletions

View File

@@ -381,6 +381,8 @@ pub struct TimelineInfo {
pub pg_version: u32,
pub state: TimelineState,
pub walreceiver_status: String,
}
#[derive(Debug, Clone, Serialize)]

View File

@@ -285,6 +285,8 @@ async fn build_timeline_info_common(
let state = timeline.current_state();
let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));
let walreceiver_status = timeline.walreceiver_status();
let info = TimelineInfo {
tenant_id: timeline.tenant_id,
timeline_id: timeline.timeline_id,
@@ -305,6 +307,8 @@ async fn build_timeline_info_common(
pg_version: timeline.pg_version,
state,
walreceiver_status,
};
Ok(info)
}

View File

@@ -585,15 +585,7 @@ impl Timeline {
Err(e) => {
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
drop(_timer);
let walreceiver_status = {
match &*self.walreceiver.lock().unwrap() {
None => "stopping or stopped".to_string(),
Some(walreceiver) => match walreceiver.status() {
Some(status) => status.to_human_readable_string(),
None => "Not active".to_string(),
},
}
};
let walreceiver_status = self.walreceiver_status();
Err(anyhow::Error::new(e).context({
format!(
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
@@ -607,6 +599,16 @@ impl Timeline {
}
}
pub(crate) fn walreceiver_status(&self) -> String {
match &*self.walreceiver.lock().unwrap() {
None => "stopping or stopped".to_string(),
Some(walreceiver) => match walreceiver.status() {
Some(status) => status.to_human_readable_string(),
None => "Not active".to_string(),
},
}
}
/// Check that it is valid to request operations with that lsn.
pub fn check_lsn_is_in_scope(
&self,

View File

@@ -135,7 +135,7 @@ impl WalReceiver {
.await;
}
pub(super) fn status(&self) -> Option<ConnectionManagerStatus> {
pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {
self.manager_status.read().unwrap().clone()
}
}