diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 35150b210e..ae389826d5 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -373,7 +373,7 @@ impl From for PageStreamError { match value { e @ WaitLsnError::Timeout(_) => Self::LsnTimeout(e), WaitLsnError::Shutdown => Self::Shutdown, - WaitLsnError::BadState => Self::Reconnect("Timeline is not active".into()), + e @ WaitLsnError::BadState { .. } => Self::Reconnect(format!("{e}").into()), } } } @@ -383,7 +383,7 @@ impl From for QueryError { match value { e @ WaitLsnError::Timeout(_) => Self::Other(anyhow::Error::new(e)), WaitLsnError::Shutdown => Self::Shutdown, - WaitLsnError::BadState => Self::Reconnect, + WaitLsnError::BadState { .. } => Self::Reconnect, } } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 311338554c..0a9637884f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1507,7 +1507,7 @@ impl Tenant { .wait_lsn(*lsn, timeline::WaitLsnWaiter::Tenant, ctx) .await .map_err(|e| match e { - e @ (WaitLsnError::Timeout(_) | WaitLsnError::BadState) => { + e @ (WaitLsnError::Timeout(_) | WaitLsnError::BadState { .. }) => { CreateTimelineError::AncestorLsn(anyhow::anyhow!(e)) } WaitLsnError::Shutdown => CreateTimelineError::ShuttingDown, @@ -4308,9 +4308,10 @@ mod tests { // This needs to traverse to the parent, and fails. let err = newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await.unwrap_err(); - assert!(err - .to_string() - .contains("will not become active. Current state: Broken")); + assert!(err.to_string().starts_with(&format!( + "Bad state on timeline {}: Broken", + tline.timeline_id + ))); Ok(()) } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d07e2352fa..b498876465 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -496,7 +496,7 @@ pub(crate) enum PageReconstructError { Other(#[from] anyhow::Error), #[error("Ancestor LSN wait error: {0}")] - AncestorLsnTimeout(#[from] WaitLsnError), + AncestorLsnTimeout(WaitLsnError), #[error("timeline shutting down")] Cancelled, @@ -651,11 +651,14 @@ pub(crate) enum GetReadyAncestorError { #[error("Ancestor LSN wait error: {0}")] AncestorLsnTimeout(#[from] WaitLsnError), + #[error("Bad state on timeline {timeline_id}: {state:?}")] + BadState { + timeline_id: TimelineId, + state: TimelineState, + }, + #[error("Cancelled")] Cancelled, - - #[error(transparent)] - Other(#[from] anyhow::Error), } #[derive(Clone, Copy)] @@ -690,8 +693,8 @@ pub(crate) enum WaitLsnError { Shutdown, // Called on an timeline not in active state or shutting down - #[error("Bad state (not active)")] - BadState, + #[error("Bad timeline state: {0:?}")] + BadState(TimelineState), // Timeout expired while waiting for LSN to catch up with goal. #[error("{0}")] @@ -756,8 +759,8 @@ impl From for PageReconstructError { match e { AncestorStopping(tid) => PageReconstructError::AncestorStopping(tid), AncestorLsnTimeout(wait_err) => PageReconstructError::AncestorLsnTimeout(wait_err), + bad_state @ BadState { .. } => PageReconstructError::Other(anyhow::anyhow!(bad_state)), Cancelled => PageReconstructError::Cancelled, - Other(other) => PageReconstructError::Other(other), } } } @@ -1466,10 +1469,11 @@ impl Timeline { who_is_waiting: WaitLsnWaiter<'_>, ctx: &RequestContext, /* Prepare for use by cancellation */ ) -> Result<(), WaitLsnError> { - if self.cancel.is_cancelled() { + let state = self.current_state(); + if self.cancel.is_cancelled() || matches!(state, TimelineState::Stopping) { return Err(WaitLsnError::Shutdown); - } else if !self.is_active() { - return Err(WaitLsnError::BadState); + } else if !matches!(state, TimelineState::Active) { + return Err(WaitLsnError::BadState(state)); } if cfg!(debug_assertions) { @@ -3193,17 +3197,21 @@ impl Timeline { } // Recurse into ancestor if needed - if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn { - trace!( - "going into ancestor {}, cont_lsn is {}", - timeline.ancestor_lsn, - cont_lsn - ); + if let Some(ancestor_timeline) = timeline.ancestor_timeline.as_ref() { + if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn { + trace!( + "going into ancestor {}, cont_lsn is {}", + timeline.ancestor_lsn, + cont_lsn + ); - timeline_owned = timeline.get_ready_ancestor_timeline(ctx).await?; - timeline = &*timeline_owned; - prev_lsn = None; - continue 'outer; + timeline_owned = timeline + .get_ready_ancestor_timeline(ancestor_timeline, ctx) + .await?; + timeline = &*timeline_owned; + prev_lsn = None; + continue 'outer; + } } let guard = timeline.layers.read().await; @@ -3352,10 +3360,10 @@ impl Timeline { break None; } - // Not fully retrieved but no ancestor timeline. - if timeline.ancestor_timeline.is_none() { + let Some(ancestor_timeline) = timeline.ancestor_timeline.as_ref() else { + // Not fully retrieved but no ancestor timeline. break Some(keyspace); - } + }; // Now we see if there are keys covered by the image layer but does not exist in the // image layer, which means that the key does not exist. @@ -3375,7 +3383,7 @@ impl Timeline { // Take the min to avoid reconstructing a page with data newer than request Lsn. cont_lsn = std::cmp::min(Lsn(request_lsn.0 + 1), Lsn(timeline.ancestor_lsn.0 + 1)); timeline_owned = timeline - .get_ready_ancestor_timeline(ctx) + .get_ready_ancestor_timeline(ancestor_timeline, ctx) .await .map_err(GetVectoredError::GetReadyAncestorError)?; timeline = &*timeline_owned; @@ -3547,13 +3555,9 @@ impl Timeline { async fn get_ready_ancestor_timeline( &self, + ancestor: &Arc, ctx: &RequestContext, ) -> Result, GetReadyAncestorError> { - let ancestor = match self.get_ancestor_timeline() { - Ok(timeline) => timeline, - Err(e) => return Err(GetReadyAncestorError::from(e)), - }; - // It's possible that the ancestor timeline isn't active yet, or // is active but hasn't yet caught up to the branch point. Wait // for it. @@ -3586,11 +3590,10 @@ impl Timeline { )); } Err(state) => { - return Err(GetReadyAncestorError::Other(anyhow::anyhow!( - "Timeline {} will not become active. Current state: {:?}", - ancestor.timeline_id, - &state, - ))); + return Err(GetReadyAncestorError::BadState { + timeline_id: ancestor.timeline_id, + state, + }); } } ancestor @@ -3599,21 +3602,17 @@ impl Timeline { .map_err(|e| match e { e @ WaitLsnError::Timeout(_) => GetReadyAncestorError::AncestorLsnTimeout(e), WaitLsnError::Shutdown => GetReadyAncestorError::Cancelled, - e @ WaitLsnError::BadState => GetReadyAncestorError::Other(anyhow::anyhow!(e)), + WaitLsnError::BadState(state) => GetReadyAncestorError::BadState { + timeline_id: ancestor.timeline_id, + state, + }, })?; - Ok(ancestor) + Ok(ancestor.clone()) } - pub(crate) fn get_ancestor_timeline(&self) -> anyhow::Result> { - let ancestor = self.ancestor_timeline.as_ref().with_context(|| { - format!( - "Ancestor is missing. Timeline id: {} Ancestor id {:?}", - self.timeline_id, - self.get_ancestor_timeline_id(), - ) - })?; - Ok(Arc::clone(ancestor)) + pub(crate) fn get_ancestor_timeline(&self) -> Option> { + self.ancestor_timeline.clone() } pub(crate) fn get_shard_identity(&self) -> &ShardIdentity {