diff --git a/pageserver/src/layered_repository/delta_layer.rs b/pageserver/src/layered_repository/delta_layer.rs index 182e3e6c5c..9f975cbd91 100644 --- a/pageserver/src/layered_repository/delta_layer.rs +++ b/pageserver/src/layered_repository/delta_layer.rs @@ -205,22 +205,22 @@ impl Layer for DeltaLayer { for ((_blknum, pv_lsn), blob_range) in iter { let pv = PageVersion::des(&read_blob(&page_version_reader, blob_range)?)?; - if let Some(img) = pv.page_image { - // Found a page image, return it - reconstruct_data.page_img = Some(img); - need_image = false; - break; - } else if let Some(rec) = pv.record { - let will_init = rec.will_init; - reconstruct_data.records.push((*pv_lsn, rec)); - if will_init { - // This WAL record initializes the page, so no need to go further back + match pv { + PageVersion::Page(img) => { + // Found a page image, return it + reconstruct_data.page_img = Some(img); need_image = false; break; } - } else { - // No base image, and no WAL record. Huh? - bail!("no page image or WAL record for requested page"); + PageVersion::Wal(rec) => { + let will_init = rec.will_init; + reconstruct_data.records.push((*pv_lsn, rec)); + if will_init { + // This WAL record initializes the page, so no need to go further back + need_image = false; + break; + } + } } } @@ -311,19 +311,22 @@ impl Layer for DeltaLayer { let buf = read_blob(&chapter, blob_range)?; let pv = PageVersion::des(&buf)?; - if let Some(img) = pv.page_image.as_ref() { - write!(&mut desc, " img {} bytes", img.len())?; - } - if let Some(rec) = pv.record.as_ref() { - let wal_desc = waldecoder::describe_wal_record(&rec.rec); - write!( - &mut desc, - " rec {} bytes will_init: {} {}", - rec.rec.len(), - rec.will_init, - wal_desc - )?; + match pv { + PageVersion::Page(img) => { + write!(&mut desc, " img {} bytes", img.len())?; + } + PageVersion::Wal(rec) => { + let wal_desc = waldecoder::describe_wal_record(&rec.rec); + write!( + &mut desc, + " rec {} bytes will_init: {} {}", + rec.rec.len(), + rec.will_init, + wal_desc + )?; + } } + println!(" blk {} at {}: {}", blk, lsn, desc); } diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs index 028e72267c..1722f2ad60 100644 --- a/pageserver/src/layered_repository/inmemory_layer.rs +++ b/pageserver/src/layered_repository/inmemory_layer.rs @@ -13,7 +13,7 @@ use crate::layered_repository::{DeltaLayer, ImageLayer}; use crate::repository::WALRecord; use crate::PageServerConf; use crate::{ZTenantId, ZTimelineId}; -use anyhow::{bail, ensure, Result}; +use anyhow::{ensure, Result}; use bytes::Bytes; use log::*; use std::path::PathBuf; @@ -184,21 +184,21 @@ impl Layer for InMemoryLayer { .get_block_lsn_range(blknum, ..=lsn) .iter() .rev(); - for (entry_lsn, entry) in iter { - if let Some(img) = &entry.page_image { - reconstruct_data.page_img = Some(img.clone()); - need_image = false; - break; - } else if let Some(rec) = &entry.record { - reconstruct_data.records.push((*entry_lsn, rec.clone())); - if rec.will_init { - // This WAL record initializes the page, so no need to go further back + for (entry_lsn, pv) in iter { + match pv { + PageVersion::Page(img) => { + reconstruct_data.page_img = Some(img.clone()); need_image = false; break; } - } else { - // No base image, and no WAL record. Huh? - bail!("no page image or WAL record for requested page"); + PageVersion::Wal(rec) => { + reconstruct_data.records.push((*entry_lsn, rec.clone())); + if rec.will_init { + // This WAL record initializes the page, so no need to go further back + need_image = false; + break; + } + } } } // release lock on 'inner' @@ -286,13 +286,12 @@ impl Layer for InMemoryLayer { } for (blknum, lsn, pv) in inner.page_versions.ordered_page_version_iter(None) { - println!( - "blk {} at {}: {}/{}\n", - blknum, - lsn, - pv.page_image.is_some(), - pv.record.is_some() - ); + let pv_description = match pv { + PageVersion::Page(_img) => "page", + PageVersion::Wal(_rec) => "wal", + }; + + println!("blk {} at {}: {}\n", blknum, lsn, pv_description); } Ok(()) @@ -357,26 +356,12 @@ impl InMemoryLayer { /// Remember new page version, as a WAL record over previous version pub fn put_wal_record(&self, lsn: Lsn, blknum: u32, rec: WALRecord) -> u32 { - self.put_page_version( - blknum, - lsn, - PageVersion { - page_image: None, - record: Some(rec), - }, - ) + self.put_page_version(blknum, lsn, PageVersion::Wal(rec)) } /// Remember new page version, as a full page image pub fn put_page_image(&self, blknum: u32, lsn: Lsn, img: Bytes) -> u32 { - self.put_page_version( - blknum, - lsn, - PageVersion { - page_image: Some(img), - record: None, - }, - ) + self.put_page_version(blknum, lsn, PageVersion::Page(img)) } /// Common subroutine of the public put_wal_record() and put_page_image() functions. @@ -396,11 +381,10 @@ impl InMemoryLayer { inner.assert_writeable(); let mut mem_usage = 0; - if let Some(img) = &pv.page_image { - mem_usage += img.len(); - } else if let Some(rec) = &pv.record { - mem_usage += rec.rec.len(); - } + mem_usage += match &pv { + PageVersion::Page(img) => img.len(), + PageVersion::Wal(rec) => rec.rec.len(), + }; let old = inner.page_versions.append_or_update_last(blknum, lsn, pv); @@ -441,10 +425,7 @@ impl InMemoryLayer { // subsequent call to initialize the gap page. let gapstart = self.seg.segno * RELISH_SEG_SIZE + oldsize; for gapblknum in gapstart..blknum { - let zeropv = PageVersion { - page_image: Some(ZERO_PAGE.clone()), - record: None, - }; + let zeropv = PageVersion::Page(ZERO_PAGE.clone()); trace!( "filling gap blk {} with zeros for write of {}", gapblknum, diff --git a/pageserver/src/layered_repository/page_versions.rs b/pageserver/src/layered_repository/page_versions.rs index 90321f96cd..5a7741dac4 100644 --- a/pageserver/src/layered_repository/page_versions.rs +++ b/pageserver/src/layered_repository/page_versions.rs @@ -104,12 +104,9 @@ impl<'a> Iterator for OrderedPageVersionIter<'a> { #[cfg(test)] mod tests { - use super::*; + use bytes::Bytes; - const EMPTY_PAGE_VERSION: PageVersion = PageVersion { - page_image: None, - record: None, - }; + use super::*; #[test] fn test_ordered_iter() { @@ -117,9 +114,16 @@ mod tests { const BLOCKS: u32 = 1000; const LSNS: u64 = 50; + let empty_page = Bytes::from_static(&[0u8; 8192]); + let empty_page_version = PageVersion::Page(empty_page); + for blknum in 0..BLOCKS { for lsn in 0..LSNS { - let old = page_versions.append_or_update_last(blknum, Lsn(lsn), EMPTY_PAGE_VERSION); + let old = page_versions.append_or_update_last( + blknum, + Lsn(lsn), + empty_page_version.clone(), + ); assert!(old.is_none()); } } diff --git a/pageserver/src/layered_repository/storage_layer.rs b/pageserver/src/layered_repository/storage_layer.rs index 9bbe9cd76d..bd800c72a4 100644 --- a/pageserver/src/layered_repository/storage_layer.rs +++ b/pageserver/src/layered_repository/storage_layer.rs @@ -51,23 +51,10 @@ impl SegmentTag { /// /// A page version can be stored as a full page image, or as WAL record that needs /// to be applied over the previous page version to reconstruct this version. -/// -/// It's also possible to have both a WAL record and a page image in the same -/// PageVersion. That happens if page version is originally stored as a WAL record -/// but it is later reconstructed by a GetPage@LSN request by performing WAL -/// redo. The get_page_at_lsn() code will store the reconstructed pag image next to -/// the WAL record in that case. TODO: That's pretty accidental, not the result -/// of any grand design. If we want to keep reconstructed page versions around, we -/// probably should have a separate buffer cache so that we could control the -/// replacement policy globally. Or if we keep a reconstructed page image, we -/// could throw away the WAL record. -/// #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PageVersion { - /// an 8kb page image - pub page_image: Option, - /// WAL record to get from previous page version to this one. - pub record: Option, +pub enum PageVersion { + Page(Bytes), + Wal(WALRecord), } ///