WIP: store base images separately

This commit is contained in:
Heikki Linnakangas
2021-08-16 22:04:41 +03:00
parent 8d2b61f4d1
commit 882f549236
2 changed files with 87 additions and 41 deletions

View File

@@ -43,6 +43,8 @@ pub struct InMemoryLayerInner {
/// If this relation was dropped, remember when that happened.
drop_lsn: Option<Lsn>,
base_images: Vec<Bytes>,
///
/// All versions of all pages in the layer are are kept here.
/// Indexed by block number and LSN.
@@ -127,7 +129,18 @@ impl Layer for InMemoryLayer {
}
}
// release lock on 'page_versions'
// Use the base image, if needed
if need_base_image_lsn.is_some() {
let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize;
if let Some(img) = inner.base_images.get(base_blknum) {
reconstruct_data.page_img = Some(img.clone());
need_base_image_lsn = None;
} else {
bail!("inmem: no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn);
}
}
// release lock on 'inner'
}
Ok(need_base_image_lsn)
@@ -135,18 +148,20 @@ impl Layer for InMemoryLayer {
/// Get size of the relation at given LSN
fn get_seg_size(&self, lsn: Lsn) -> Result<u32> {
assert!(lsn >= self.start_lsn);
// Scan the BTreeMap backwards, starting from the given entry.
let inner = self.inner.lock().unwrap();
let mut iter = inner.segsizes.range((Included(&Lsn(0)), Included(&lsn)));
let result;
if let Some((_entry_lsn, entry)) = iter.next_back() {
let result = *entry;
drop(inner);
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
Ok(result)
result = *entry;
} else {
bail!("No size found for {} at {} in memory", self.seg, lsn);
result = inner.base_images.len() as u32;
}
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
Ok(result)
}
/// Does this segment exist at given LSN?
@@ -198,6 +213,7 @@ impl InMemoryLayer {
oldest_pending_lsn,
inner: Mutex::new(InMemoryLayerInner {
drop_lsn: None,
base_images: Vec::new(),
page_versions: BTreeMap::new(),
segsizes: BTreeMap::new(),
mem_used: 0,
@@ -270,7 +286,7 @@ impl InMemoryLayer {
if let Some((_entry_lsn, entry)) = iter.next_back() {
oldsize = *entry;
} else {
oldsize = 0;
oldsize = inner.base_images.len() as u32;
//bail!("No old size found for {} at {}", self.tag, lsn);
}
if newsize > oldsize {
@@ -326,14 +342,6 @@ impl InMemoryLayer {
start_lsn: Lsn,
oldest_pending_lsn: Lsn,
) -> Result<InMemoryLayer> {
trace!(
"initializing new InMemoryLayer for writing {} on timeline {} at {}",
src.get_seg_tag(),
timelineid,
start_lsn
);
let mut page_versions = BTreeMap::new();
let mut segsizes = BTreeMap::new();
let mut mem_used = 0;
let seg = src.get_seg_tag();
@@ -342,21 +350,27 @@ impl InMemoryLayer {
let size;
if seg.rel.is_blocky() {
size = src.get_seg_size(start_lsn)?;
segsizes.insert(start_lsn, size);
startblk = seg.segno * RELISH_SEG_SIZE;
} else {
size = 1;
startblk = 0;
}
for blknum in startblk..(startblk + size) {
trace!(
"initializing new InMemoryLayer for writing {} on timeline {} at {}, size {}",
src.get_seg_tag(),
timelineid,
start_lsn,
size,
);
let mut base_images: Vec<Bytes> = Vec::new();
for blknum in startblk..(startblk+size) {
let img = timeline.materialize_page(seg, blknum, start_lsn, src)?;
let pv = PageVersion {
page_image: Some(img),
record: None,
};
mem_used += pv.get_mem_size();
page_versions.insert((blknum, start_lsn), pv);
mem_used += img.len();
base_images.push(img);
}
Ok(InMemoryLayer {
@@ -368,8 +382,9 @@ impl InMemoryLayer {
oldest_pending_lsn,
inner: Mutex::new(InMemoryLayerInner {
drop_lsn: None,
page_versions: page_versions,
segsizes: segsizes,
base_images: base_images,
page_versions: BTreeMap::new(),
segsizes: BTreeMap::new(),
mem_used: mem_used,
}),
})
@@ -413,6 +428,7 @@ impl InMemoryLayer {
};
// Divide all the page versions into old and new at the 'end_lsn' cutoff point.
let before_base_images = inner.base_images.clone();
let mut before_page_versions;
let mut before_segsizes;
let mut after_page_versions;
@@ -456,6 +472,7 @@ impl InMemoryLayer {
self.start_lsn,
end_lsn,
dropped,
before_base_images,
before_page_versions,
before_segsizes,
)?;

View File

@@ -36,14 +36,17 @@
//!
//! A snapshot file is constructed using the 'bookfile' crate. Each file consists of two
//! parts: the page versions and the relation sizes. They are stored as separate chapters.
//! FIXME
//!
use crate::layered_repository::storage_layer::{
Layer, PageReconstructData, PageVersion, SegmentTag,
};
use crate::layered_repository::filename::{SnapshotFileName};
use crate::layered_repository::RELISH_SEG_SIZE;
use crate::PageServerConf;
use crate::{ZTenantId, ZTimelineId};
use anyhow::{bail, Result};
use bytes::Bytes;
use log::*;
use std::collections::BTreeMap;
use std::fs;
@@ -61,8 +64,9 @@ use zenith_utils::lsn::Lsn;
// Magic constant to identify a Zenith snapshot file
static SNAPSHOT_FILE_MAGIC: u32 = 0x5A616E01;
static PAGE_VERSIONS_CHAPTER: u64 = 1;
static REL_SIZES_CHAPTER: u64 = 2;
static BASE_IMAGES_CHAPTER: u64 = 1;
static PAGE_VERSIONS_CHAPTER: u64 = 2;
static REL_SIZES_CHAPTER: u64 = 3;
///
/// SnapshotLayer is the in-memory data structure associated with an
@@ -94,6 +98,9 @@ pub struct SnapshotLayerInner {
/// loaded into memory yet.
loaded: bool,
// indexed by block number (within segment)
base_images: Vec<Bytes>,
/// All versions of all pages in the file are are kept here.
/// Indexed by block number and LSN.
page_versions: BTreeMap<(u32, Lsn), PageVersion>,
@@ -159,6 +166,17 @@ impl Layer for SnapshotLayer {
}
}
// Use the base image, if needed
if need_base_image_lsn.is_some() {
let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize;
if let Some(img) = inner.base_images.get(base_blknum) {
reconstruct_data.page_img = Some(img.clone());
need_base_image_lsn = None;
} else {
bail!("no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn);
}
}
// release lock on 'inner'
}
@@ -167,26 +185,21 @@ impl Layer for SnapshotLayer {
/// Get size of the relation at given LSN
fn get_seg_size(&self, lsn: Lsn) -> Result<u32> {
assert!(lsn >= self.start_lsn);
// Scan the BTreeMap backwards, starting from the given entry.
let inner = self.load()?;
let mut iter = inner.relsizes.range((Included(&Lsn(0)), Included(&lsn)));
let result;
if let Some((_entry_lsn, entry)) = iter.next_back() {
let result = *entry;
drop(inner);
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
Ok(result)
result = *entry;
} else {
error!(
"No size found for {} at {} in snapshot layer {} {}-{}",
self.seg, lsn, self.seg, self.start_lsn, self.end_lsn
);
bail!(
"No size found for {} at {} in snapshot layer",
self.seg,
lsn
);
result = inner.base_images.len() as u32;
}
info!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
Ok(result)
}
/// Does this segment exist at given LSN?
@@ -240,9 +253,11 @@ impl SnapshotLayer {
start_lsn: Lsn,
end_lsn: Lsn,
dropped: bool,
base_images: Vec<Bytes>,
page_versions: BTreeMap<(u32, Lsn), PageVersion>,
relsizes: BTreeMap<Lsn, u32>,
) -> Result<SnapshotLayer> {
let snapfile = SnapshotLayer {
conf: conf,
timelineid: timelineid,
@@ -253,6 +268,7 @@ impl SnapshotLayer {
dropped,
inner: Mutex::new(SnapshotLayerInner {
loaded: true,
base_images: base_images,
page_versions: page_versions,
relsizes: relsizes,
}),
@@ -267,7 +283,14 @@ impl SnapshotLayer {
let file = File::create(&path)?;
let book = BookWriter::new(file, SNAPSHOT_FILE_MAGIC)?;
// Write out page versions
// Write out the base images
let mut chapter = book.new_chapter(BASE_IMAGES_CHAPTER);
let buf = Vec::ser(&inner.base_images)?;
chapter.write_all(&buf)?;
let book = chapter.close()?;
// Write out the other page versions
let mut chapter = book.new_chapter(PAGE_VERSIONS_CHAPTER);
let buf = BTreeMap::ser(&inner.page_versions)?;
chapter.write_all(&buf)?;
@@ -314,6 +337,9 @@ impl SnapshotLayer {
let file = File::open(&path)?;
let book = Book::new(file)?;
let chapter = book.read_chapter(BASE_IMAGES_CHAPTER)?;
let base_images = Vec::des(&chapter)?;
let chapter = book.read_chapter(PAGE_VERSIONS_CHAPTER)?;
let page_versions = BTreeMap::des(&chapter)?;
@@ -324,6 +350,7 @@ impl SnapshotLayer {
*inner = SnapshotLayerInner {
loaded: true,
base_images,
page_versions,
relsizes,
};
@@ -350,6 +377,7 @@ impl SnapshotLayer {
dropped: filename.dropped,
inner: Mutex::new(SnapshotLayerInner {
loaded: false,
base_images: Vec::new(),
page_versions: BTreeMap::new(),
relsizes: BTreeMap::new(),
}),
@@ -370,6 +398,7 @@ impl SnapshotLayer {
///
pub fn unload(&self) -> Result<()> {
let mut inner = self.inner.lock().unwrap();
inner.base_images = Vec::new();
inner.page_versions = BTreeMap::new();
inner.relsizes = BTreeMap::new();
inner.loaded = false;