diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 76dc52fa16..203451baa5 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -88,6 +88,7 @@ use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart; use crate::tenant::remote_timeline_client::INITDB_PATH; use crate::tenant::storage_layer::DeltaLayer; use crate::tenant::storage_layer::ImageLayer; +use crate::walredo::Error; use crate::InitializationOrder; use std::collections::hash_map::Entry; use std::collections::BTreeSet; @@ -343,7 +344,7 @@ impl WalRedoManager { base_img: Option<(Lsn, bytes::Bytes)>, records: Vec<(Lsn, crate::walrecord::NeonWalRecord)>, pg_version: u32, - ) -> anyhow::Result { + ) -> Result { match self { Self::Prod(mgr) => { mgr.request_redo(key, lsn, base_img, records, pg_version) @@ -3953,7 +3954,7 @@ pub(crate) mod harness { base_img: Option<(Lsn, Bytes)>, records: Vec<(Lsn, NeonWalRecord)>, _pg_version: u32, - ) -> anyhow::Result { + ) -> Result { let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1)); if records_neon { // For Neon wal records, we can decode without spawning postgres, so do so. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 5398ad399c..20a781ae5b 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5235,10 +5235,14 @@ impl Timeline { .map_err(PageReconstructError::WalRedo)? .request_redo(key, request_lsn, data.img, data.records, self.pg_version) .await - .context("reconstruct a page image") { Ok(img) => img, - Err(e) => return Err(PageReconstructError::WalRedo(e)), + Err(e) => { + return Err(match e { + crate::walredo::Error::Cancelled => PageReconstructError::Cancelled, + crate::walredo::Error::Other(e) => PageReconstructError::WalRedo(e), + }) + } }; Ok(img) diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index d562540bde..5b43098380 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -68,6 +68,20 @@ pub struct PostgresRedoManager { redo_process: heavier_once_cell::OnceCell>, } +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("cancelled")] + Cancelled, + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +macro_rules! bail { + ($($arg:tt)*) => { + return Err(Error::Other(anyhow::anyhow!($($arg)*))); + } +} + /// /// Public interface of WAL redo manager /// @@ -88,9 +102,9 @@ impl PostgresRedoManager { base_img: Option<(Lsn, Bytes)>, records: Vec<(Lsn, NeonWalRecord)>, pg_version: u32, - ) -> anyhow::Result { + ) -> Result { if records.is_empty() { - anyhow::bail!("invalid WAL redo request with no records"); + bail!("invalid WAL redo request with no records"); } let base_img_lsn = base_img.as_ref().map(|p| p.0).unwrap_or(Lsn::INVALID); @@ -203,7 +217,7 @@ impl PostgresRedoManager { records: &[(Lsn, NeonWalRecord)], wal_redo_timeout: Duration, pg_version: u32, - ) -> anyhow::Result { + ) -> Result { *(self.last_redo_at.lock().unwrap()) = Some(Instant::now()); let (rel, blknum) = key.to_rel_block().context("invalid record")?; @@ -315,7 +329,7 @@ impl PostgresRedoManager { } n_attempts += 1; if n_attempts > MAX_RETRY_ATTEMPTS || result.is_ok() { - return result; + return result.map_err(Error::Other); } } } @@ -329,7 +343,7 @@ impl PostgresRedoManager { lsn: Lsn, base_img: Option, records: &[(Lsn, NeonWalRecord)], - ) -> anyhow::Result { + ) -> Result { let start_time = Instant::now(); let mut page = BytesMut::new(); @@ -338,7 +352,7 @@ impl PostgresRedoManager { page.extend_from_slice(&fpi[..]); } else { // All the current WAL record types that we can handle require a base image. - anyhow::bail!("invalid neon WAL redo request with no base image"); + bail!("invalid neon WAL redo request with no base image"); } // Apply all the WAL records in the batch