diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs index 7f7492f4a5..7266879a43 100644 --- a/pageserver/src/layered_repository/inmemory_layer.rs +++ b/pageserver/src/layered_repository/inmemory_layer.rs @@ -43,6 +43,8 @@ pub struct InMemoryLayerInner { /// If this relation was dropped, remember when that happened. drop_lsn: Option, + base_images: Vec, + /// /// All versions of all pages in the layer are are kept here. /// Indexed by block number and LSN. @@ -127,7 +129,18 @@ impl Layer for InMemoryLayer { } } - // release lock on 'page_versions' + // Use the base image, if needed + if need_base_image_lsn.is_some() { + let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize; + if let Some(img) = inner.base_images.get(base_blknum) { + reconstruct_data.page_img = Some(img.clone()); + need_base_image_lsn = None; + } else { + bail!("inmem: no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn); + } + } + + // release lock on 'inner' } Ok(need_base_image_lsn) @@ -135,18 +148,20 @@ impl Layer for InMemoryLayer { /// Get size of the relation at given LSN fn get_seg_size(&self, lsn: Lsn) -> Result { + assert!(lsn >= self.start_lsn); + // Scan the BTreeMap backwards, starting from the given entry. let inner = self.inner.lock().unwrap(); let mut iter = inner.segsizes.range((Included(&Lsn(0)), Included(&lsn))); + let result; if let Some((_entry_lsn, entry)) = iter.next_back() { - let result = *entry; - drop(inner); - trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result); - Ok(result) + result = *entry; } else { - bail!("No size found for {} at {} in memory", self.seg, lsn); + result = inner.base_images.len() as u32; } + trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result); + Ok(result) } /// Does this segment exist at given LSN? @@ -198,6 +213,7 @@ impl InMemoryLayer { oldest_pending_lsn, inner: Mutex::new(InMemoryLayerInner { drop_lsn: None, + base_images: Vec::new(), page_versions: BTreeMap::new(), segsizes: BTreeMap::new(), mem_used: 0, @@ -270,7 +286,7 @@ impl InMemoryLayer { if let Some((_entry_lsn, entry)) = iter.next_back() { oldsize = *entry; } else { - oldsize = 0; + oldsize = inner.base_images.len() as u32; //bail!("No old size found for {} at {}", self.tag, lsn); } if newsize > oldsize { @@ -326,14 +342,6 @@ impl InMemoryLayer { start_lsn: Lsn, oldest_pending_lsn: Lsn, ) -> Result { - trace!( - "initializing new InMemoryLayer for writing {} on timeline {} at {}", - src.get_seg_tag(), - timelineid, - start_lsn - ); - let mut page_versions = BTreeMap::new(); - let mut segsizes = BTreeMap::new(); let mut mem_used = 0; let seg = src.get_seg_tag(); @@ -342,21 +350,27 @@ impl InMemoryLayer { let size; if seg.rel.is_blocky() { size = src.get_seg_size(start_lsn)?; - segsizes.insert(start_lsn, size); startblk = seg.segno * RELISH_SEG_SIZE; } else { size = 1; startblk = 0; } - for blknum in startblk..(startblk + size) { + trace!( + "initializing new InMemoryLayer for writing {} on timeline {} at {}, size {}", + src.get_seg_tag(), + timelineid, + start_lsn, + size, + ); + + let mut base_images: Vec = Vec::new(); + for blknum in startblk..(startblk+size) { let img = timeline.materialize_page(seg, blknum, start_lsn, src)?; - let pv = PageVersion { - page_image: Some(img), - record: None, - }; - mem_used += pv.get_mem_size(); - page_versions.insert((blknum, start_lsn), pv); + + mem_used += img.len(); + + base_images.push(img); } Ok(InMemoryLayer { @@ -368,8 +382,9 @@ impl InMemoryLayer { oldest_pending_lsn, inner: Mutex::new(InMemoryLayerInner { drop_lsn: None, - page_versions: page_versions, - segsizes: segsizes, + base_images: base_images, + page_versions: BTreeMap::new(), + segsizes: BTreeMap::new(), mem_used: mem_used, }), }) @@ -413,6 +428,7 @@ impl InMemoryLayer { }; // Divide all the page versions into old and new at the 'end_lsn' cutoff point. + let before_base_images = inner.base_images.clone(); let mut before_page_versions; let mut before_segsizes; let mut after_page_versions; @@ -456,6 +472,7 @@ impl InMemoryLayer { self.start_lsn, end_lsn, dropped, + before_base_images, before_page_versions, before_segsizes, )?; diff --git a/pageserver/src/layered_repository/snapshot_layer.rs b/pageserver/src/layered_repository/snapshot_layer.rs index 084d023cbe..34f69fb93c 100644 --- a/pageserver/src/layered_repository/snapshot_layer.rs +++ b/pageserver/src/layered_repository/snapshot_layer.rs @@ -36,14 +36,17 @@ //! //! A snapshot file is constructed using the 'bookfile' crate. Each file consists of two //! parts: the page versions and the relation sizes. They are stored as separate chapters. +//! FIXME //! use crate::layered_repository::storage_layer::{ Layer, PageReconstructData, PageVersion, SegmentTag, }; use crate::layered_repository::filename::{SnapshotFileName}; +use crate::layered_repository::RELISH_SEG_SIZE; use crate::PageServerConf; use crate::{ZTenantId, ZTimelineId}; use anyhow::{bail, Result}; +use bytes::Bytes; use log::*; use std::collections::BTreeMap; use std::fs; @@ -61,8 +64,9 @@ use zenith_utils::lsn::Lsn; // Magic constant to identify a Zenith snapshot file static SNAPSHOT_FILE_MAGIC: u32 = 0x5A616E01; -static PAGE_VERSIONS_CHAPTER: u64 = 1; -static REL_SIZES_CHAPTER: u64 = 2; +static BASE_IMAGES_CHAPTER: u64 = 1; +static PAGE_VERSIONS_CHAPTER: u64 = 2; +static REL_SIZES_CHAPTER: u64 = 3; /// /// SnapshotLayer is the in-memory data structure associated with an @@ -94,6 +98,9 @@ pub struct SnapshotLayerInner { /// loaded into memory yet. loaded: bool, + // indexed by block number (within segment) + base_images: Vec, + /// All versions of all pages in the file are are kept here. /// Indexed by block number and LSN. page_versions: BTreeMap<(u32, Lsn), PageVersion>, @@ -159,6 +166,17 @@ impl Layer for SnapshotLayer { } } + // Use the base image, if needed + if need_base_image_lsn.is_some() { + let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize; + if let Some(img) = inner.base_images.get(base_blknum) { + reconstruct_data.page_img = Some(img.clone()); + need_base_image_lsn = None; + } else { + bail!("no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn); + } + } + // release lock on 'inner' } @@ -167,26 +185,21 @@ impl Layer for SnapshotLayer { /// Get size of the relation at given LSN fn get_seg_size(&self, lsn: Lsn) -> Result { + + assert!(lsn >= self.start_lsn); + // Scan the BTreeMap backwards, starting from the given entry. let inner = self.load()?; let mut iter = inner.relsizes.range((Included(&Lsn(0)), Included(&lsn))); + let result; if let Some((_entry_lsn, entry)) = iter.next_back() { - let result = *entry; - drop(inner); - trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result); - Ok(result) + result = *entry; } else { - error!( - "No size found for {} at {} in snapshot layer {} {}-{}", - self.seg, lsn, self.seg, self.start_lsn, self.end_lsn - ); - bail!( - "No size found for {} at {} in snapshot layer", - self.seg, - lsn - ); + result = inner.base_images.len() as u32; } + info!("get_seg_size: {} at {} -> {}", self.seg, lsn, result); + Ok(result) } /// Does this segment exist at given LSN? @@ -240,9 +253,11 @@ impl SnapshotLayer { start_lsn: Lsn, end_lsn: Lsn, dropped: bool, + base_images: Vec, page_versions: BTreeMap<(u32, Lsn), PageVersion>, relsizes: BTreeMap, ) -> Result { + let snapfile = SnapshotLayer { conf: conf, timelineid: timelineid, @@ -253,6 +268,7 @@ impl SnapshotLayer { dropped, inner: Mutex::new(SnapshotLayerInner { loaded: true, + base_images: base_images, page_versions: page_versions, relsizes: relsizes, }), @@ -267,7 +283,14 @@ impl SnapshotLayer { let file = File::create(&path)?; let book = BookWriter::new(file, SNAPSHOT_FILE_MAGIC)?; - // Write out page versions + // Write out the base images + let mut chapter = book.new_chapter(BASE_IMAGES_CHAPTER); + let buf = Vec::ser(&inner.base_images)?; + + chapter.write_all(&buf)?; + let book = chapter.close()?; + + // Write out the other page versions let mut chapter = book.new_chapter(PAGE_VERSIONS_CHAPTER); let buf = BTreeMap::ser(&inner.page_versions)?; chapter.write_all(&buf)?; @@ -314,6 +337,9 @@ impl SnapshotLayer { let file = File::open(&path)?; let book = Book::new(file)?; + let chapter = book.read_chapter(BASE_IMAGES_CHAPTER)?; + let base_images = Vec::des(&chapter)?; + let chapter = book.read_chapter(PAGE_VERSIONS_CHAPTER)?; let page_versions = BTreeMap::des(&chapter)?; @@ -324,6 +350,7 @@ impl SnapshotLayer { *inner = SnapshotLayerInner { loaded: true, + base_images, page_versions, relsizes, }; @@ -350,6 +377,7 @@ impl SnapshotLayer { dropped: filename.dropped, inner: Mutex::new(SnapshotLayerInner { loaded: false, + base_images: Vec::new(), page_versions: BTreeMap::new(), relsizes: BTreeMap::new(), }), @@ -370,6 +398,7 @@ impl SnapshotLayer { /// pub fn unload(&self) -> Result<()> { let mut inner = self.inner.lock().unwrap(); + inner.base_images = Vec::new(); inner.page_versions = BTreeMap::new(); inner.relsizes = BTreeMap::new(); inner.loaded = false;