diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index c4bc23af4b..5dae1902c1 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -1940,22 +1940,21 @@ impl LayeredTimeline { // for redo. let rel = seg.rel; let rel_blknum = seg.segno * RELISH_SEG_SIZE + seg_blknum; - let (cached_lsn_opt, cached_page_opt) = match self.lookup_cached_page(&rel, rel_blknum, lsn) - { + let cached_page_img = match self.lookup_cached_page(&rel, rel_blknum, lsn) { Some((cached_lsn, cached_img)) => { match cached_lsn.cmp(&lsn) { cmp::Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check cmp::Ordering::Equal => return Ok(cached_img), // exact LSN match, return the image cmp::Ordering::Greater => panic!(), // the returned lsn should never be after the requested lsn } - (Some(cached_lsn), Some((cached_lsn, cached_img))) + Some((cached_lsn, cached_img)) } - None => (None, None), + None => None, }; let mut data = PageReconstructData { records: Vec::new(), - page_img: None, + page_img: cached_page_img, }; // Holds an Arc reference to 'layer_ref' when iterating in the loop below. @@ -1968,15 +1967,14 @@ impl LayeredTimeline { let mut curr_lsn = lsn; loop { let result = layer_ref - .get_page_reconstruct_data(seg_blknum, curr_lsn, cached_lsn_opt, &mut data) + .get_page_reconstruct_data(seg_blknum, curr_lsn, &mut data) .with_context(|| { format!( - "Failed to get reconstruct data {} {:?} {} {} {:?}", + "Failed to get reconstruct data {} {:?} {} {}", layer_ref.get_seg_tag(), layer_ref.filename(), seg_blknum, curr_lsn, - cached_lsn_opt, ) })?; match result { @@ -2023,16 +2021,6 @@ impl LayeredTimeline { lsn, ); } - PageReconstructResult::Cached => { - let (cached_lsn, cached_img) = cached_page_opt.unwrap(); - assert!(data.page_img.is_none()); - if let Some((first_rec_lsn, first_rec)) = data.records.first() { - assert!(&cached_lsn < first_rec_lsn); - assert!(!first_rec.will_init()); - } - data.page_img = Some(cached_img); - break; - } } } @@ -2054,12 +2042,12 @@ impl LayeredTimeline { // If we have a page image, and no WAL, we're all set if data.records.is_empty() { - if let Some(img) = &data.page_img { + if let Some((img_lsn, img)) = &data.page_img { trace!( "found page image for blk {} in {} at {}, no WAL redo required", rel_blknum, rel, - request_lsn + img_lsn ); Ok(img.clone()) } else { @@ -2086,11 +2074,13 @@ impl LayeredTimeline { ); Ok(ZERO_PAGE.clone()) } else { - if data.page_img.is_some() { + let base_img = if let Some((_lsn, img)) = data.page_img { trace!("found {} WAL records and a base image for blk {} in {} at {}, performing WAL redo", data.records.len(), rel_blknum, rel, request_lsn); + Some(img) } else { trace!("found {} WAL records that will init the page for blk {} in {} at {}, performing WAL redo", data.records.len(), rel_blknum, rel, request_lsn); - } + None + }; let last_rec_lsn = data.records.last().unwrap().0; @@ -2098,7 +2088,7 @@ impl LayeredTimeline { rel, rel_blknum, request_lsn, - data.page_img.clone(), + base_img, data.records, )?; diff --git a/pageserver/src/layered_repository/delta_layer.rs b/pageserver/src/layered_repository/delta_layer.rs index 17d5eef0a5..7434b8de11 100644 --- a/pageserver/src/layered_repository/delta_layer.rs +++ b/pageserver/src/layered_repository/delta_layer.rs @@ -208,16 +208,15 @@ impl Layer for DeltaLayer { &self, blknum: SegmentBlk, lsn: Lsn, - cached_img_lsn: Option, reconstruct_data: &mut PageReconstructData, ) -> Result { let mut need_image = true; assert!((0..RELISH_SEG_SIZE).contains(&blknum)); - match &cached_img_lsn { - Some(cached_lsn) if &self.end_lsn <= cached_lsn => { - return Ok(PageReconstructResult::Cached) + match &reconstruct_data.page_img { + Some((cached_lsn, _)) if &self.end_lsn <= cached_lsn => { + return Ok(PageReconstructResult::Complete) } _ => {} } @@ -240,9 +239,9 @@ impl Layer for DeltaLayer { .iter() .rev(); for ((_blknum, pv_lsn), blob_range) in iter { - match &cached_img_lsn { - Some(cached_lsn) if pv_lsn <= cached_lsn => { - return Ok(PageReconstructResult::Cached) + match &reconstruct_data.page_img { + Some((cached_lsn, _)) if pv_lsn <= cached_lsn => { + return Ok(PageReconstructResult::Complete) } _ => {} } @@ -252,7 +251,7 @@ impl Layer for DeltaLayer { match pv { PageVersion::Page(img) => { // Found a page image, return it - reconstruct_data.page_img = Some(img); + reconstruct_data.page_img = Some((*pv_lsn, img)); need_image = false; break; } diff --git a/pageserver/src/layered_repository/image_layer.rs b/pageserver/src/layered_repository/image_layer.rs index 33311f896b..24445ff7e9 100644 --- a/pageserver/src/layered_repository/image_layer.rs +++ b/pageserver/src/layered_repository/image_layer.rs @@ -145,14 +145,15 @@ impl Layer for ImageLayer { &self, blknum: SegmentBlk, lsn: Lsn, - cached_img_lsn: Option, reconstruct_data: &mut PageReconstructData, ) -> Result { assert!((0..RELISH_SEG_SIZE).contains(&blknum)); assert!(lsn >= self.lsn); - match cached_img_lsn { - Some(cached_lsn) if self.lsn <= cached_lsn => return Ok(PageReconstructResult::Cached), + match reconstruct_data.page_img { + Some((cached_lsn, _)) if self.lsn <= cached_lsn => { + return Ok(PageReconstructResult::Complete) + } _ => {} } @@ -195,7 +196,7 @@ impl Layer for ImageLayer { } }; - reconstruct_data.page_img = Some(Bytes::from(buf)); + reconstruct_data.page_img = Some((self.lsn, Bytes::from(buf))); Ok(PageReconstructResult::Complete) } diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs index fe4a06d0a5..17b061b20e 100644 --- a/pageserver/src/layered_repository/inmemory_layer.rs +++ b/pageserver/src/layered_repository/inmemory_layer.rs @@ -224,7 +224,6 @@ impl Layer for InMemoryLayer { &self, blknum: SegmentBlk, lsn: Lsn, - cached_img_lsn: Option, reconstruct_data: &mut PageReconstructData, ) -> Result { let mut need_image = true; @@ -238,9 +237,9 @@ impl Layer for InMemoryLayer { if let Some(vec_map) = inner.page_versions.get(&blknum) { let slice = vec_map.slice_range(..=lsn); for (entry_lsn, pos) in slice.iter().rev() { - match &cached_img_lsn { - Some(cached_lsn) if entry_lsn <= cached_lsn => { - return Ok(PageReconstructResult::Cached) + match &reconstruct_data.page_img { + Some((cached_lsn, _)) if entry_lsn <= cached_lsn => { + return Ok(PageReconstructResult::Complete) } _ => {} } @@ -248,7 +247,7 @@ impl Layer for InMemoryLayer { let pv = inner.read_pv(*pos)?; match pv { PageVersion::Page(img) => { - reconstruct_data.page_img = Some(img); + reconstruct_data.page_img = Some((*entry_lsn, img)); need_image = false; break; } diff --git a/pageserver/src/layered_repository/storage_layer.rs b/pageserver/src/layered_repository/storage_layer.rs index 99fdaa6845..8976491fc0 100644 --- a/pageserver/src/layered_repository/storage_layer.rs +++ b/pageserver/src/layered_repository/storage_layer.rs @@ -71,15 +71,26 @@ pub enum PageVersion { } /// -/// Data needed to reconstruct a page version +/// Struct used to communicate across calls to 'get_page_reconstruct_data'. /// -/// 'page_img' is the old base image of the page to start the WAL replay with. -/// It can be None, if the first WAL record initializes the page (will_init) -/// 'records' contains the records to apply over the base image. +/// Before first call to get_page_reconstruct_data, you can fill in 'page_img' +/// if you have an older cached version of the page available. That can save +/// work in 'get_page_reconstruct_data', as it can stop searching for page +/// versions when all the WAL records going back to the cached image have been +/// collected. +/// +/// When get_page_reconstruct_data returns Complete, 'page_img' is set to an +/// image of the page, or the oldest WAL record in 'records' is a will_init-type +/// record that initializes the page without requiring a previous image. +/// +/// If 'get_page_reconstruct_data' returns Continue, some 'records' may have +/// been collected, but there are more records outside the current layer. Pass +/// the same PageReconstructData struct in the next 'get_page_reconstruct_data' +/// call, to collect more records. /// pub struct PageReconstructData { pub records: Vec<(Lsn, ZenithWalRecord)>, - pub page_img: Option, + pub page_img: Option<(Lsn, Bytes)>, } /// Return value from Layer::get_page_reconstruct_data @@ -93,8 +104,6 @@ pub enum PageReconstructResult { /// the returned LSN. This is usually considered an error, but might be OK /// in some circumstances. Missing(Lsn), - /// Use the cached image at `cached_img_lsn` as the base image - Cached, } /// @@ -138,19 +147,16 @@ pub trait Layer: Send + Sync { /// It is up to the caller to collect more data from previous layer and /// perform WAL redo, if necessary. /// - /// `cached_img_lsn` should be set to a cached page image's lsn < `lsn`. - /// This function will only return data after `cached_img_lsn`. - /// /// See PageReconstructResult for possible return values. The collected data /// is appended to reconstruct_data; the caller should pass an empty struct - /// on first call. If this returns PageReconstructResult::Continue, look up - /// the predecessor layer and call again with the same 'reconstruct_data' - /// to collect more data. + /// on first call, or a struct with a cached older image of the page if one + /// is available. If this returns PageReconstructResult::Continue, look up + /// the predecessor layer and call again with the same 'reconstruct_data' to + /// collect more data. fn get_page_reconstruct_data( &self, blknum: SegmentBlk, lsn: Lsn, - cached_img_lsn: Option, reconstruct_data: &mut PageReconstructData, ) -> Result;