mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
WIP: store base images separately
This commit is contained in:
@@ -43,6 +43,8 @@ pub struct InMemoryLayerInner {
|
||||
/// If this relation was dropped, remember when that happened.
|
||||
drop_lsn: Option<Lsn>,
|
||||
|
||||
base_images: Vec<Bytes>,
|
||||
|
||||
///
|
||||
/// All versions of all pages in the layer are are kept here.
|
||||
/// Indexed by block number and LSN.
|
||||
@@ -127,7 +129,18 @@ impl Layer for InMemoryLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// release lock on 'page_versions'
|
||||
// Use the base image, if needed
|
||||
if need_base_image_lsn.is_some() {
|
||||
let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize;
|
||||
if let Some(img) = inner.base_images.get(base_blknum) {
|
||||
reconstruct_data.page_img = Some(img.clone());
|
||||
need_base_image_lsn = None;
|
||||
} else {
|
||||
bail!("inmem: no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn);
|
||||
}
|
||||
}
|
||||
|
||||
// release lock on 'inner'
|
||||
}
|
||||
|
||||
Ok(need_base_image_lsn)
|
||||
@@ -135,18 +148,20 @@ impl Layer for InMemoryLayer {
|
||||
|
||||
/// Get size of the relation at given LSN
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<u32> {
|
||||
assert!(lsn >= self.start_lsn);
|
||||
|
||||
// Scan the BTreeMap backwards, starting from the given entry.
|
||||
let inner = self.inner.lock().unwrap();
|
||||
let mut iter = inner.segsizes.range((Included(&Lsn(0)), Included(&lsn)));
|
||||
|
||||
let result;
|
||||
if let Some((_entry_lsn, entry)) = iter.next_back() {
|
||||
let result = *entry;
|
||||
drop(inner);
|
||||
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
|
||||
Ok(result)
|
||||
result = *entry;
|
||||
} else {
|
||||
bail!("No size found for {} at {} in memory", self.seg, lsn);
|
||||
result = inner.base_images.len() as u32;
|
||||
}
|
||||
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Does this segment exist at given LSN?
|
||||
@@ -198,6 +213,7 @@ impl InMemoryLayer {
|
||||
oldest_pending_lsn,
|
||||
inner: Mutex::new(InMemoryLayerInner {
|
||||
drop_lsn: None,
|
||||
base_images: Vec::new(),
|
||||
page_versions: BTreeMap::new(),
|
||||
segsizes: BTreeMap::new(),
|
||||
mem_used: 0,
|
||||
@@ -270,7 +286,7 @@ impl InMemoryLayer {
|
||||
if let Some((_entry_lsn, entry)) = iter.next_back() {
|
||||
oldsize = *entry;
|
||||
} else {
|
||||
oldsize = 0;
|
||||
oldsize = inner.base_images.len() as u32;
|
||||
//bail!("No old size found for {} at {}", self.tag, lsn);
|
||||
}
|
||||
if newsize > oldsize {
|
||||
@@ -326,14 +342,6 @@ impl InMemoryLayer {
|
||||
start_lsn: Lsn,
|
||||
oldest_pending_lsn: Lsn,
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!(
|
||||
"initializing new InMemoryLayer for writing {} on timeline {} at {}",
|
||||
src.get_seg_tag(),
|
||||
timelineid,
|
||||
start_lsn
|
||||
);
|
||||
let mut page_versions = BTreeMap::new();
|
||||
let mut segsizes = BTreeMap::new();
|
||||
let mut mem_used = 0;
|
||||
|
||||
let seg = src.get_seg_tag();
|
||||
@@ -342,21 +350,27 @@ impl InMemoryLayer {
|
||||
let size;
|
||||
if seg.rel.is_blocky() {
|
||||
size = src.get_seg_size(start_lsn)?;
|
||||
segsizes.insert(start_lsn, size);
|
||||
startblk = seg.segno * RELISH_SEG_SIZE;
|
||||
} else {
|
||||
size = 1;
|
||||
startblk = 0;
|
||||
}
|
||||
|
||||
for blknum in startblk..(startblk + size) {
|
||||
trace!(
|
||||
"initializing new InMemoryLayer for writing {} on timeline {} at {}, size {}",
|
||||
src.get_seg_tag(),
|
||||
timelineid,
|
||||
start_lsn,
|
||||
size,
|
||||
);
|
||||
|
||||
let mut base_images: Vec<Bytes> = Vec::new();
|
||||
for blknum in startblk..(startblk+size) {
|
||||
let img = timeline.materialize_page(seg, blknum, start_lsn, src)?;
|
||||
let pv = PageVersion {
|
||||
page_image: Some(img),
|
||||
record: None,
|
||||
};
|
||||
mem_used += pv.get_mem_size();
|
||||
page_versions.insert((blknum, start_lsn), pv);
|
||||
|
||||
mem_used += img.len();
|
||||
|
||||
base_images.push(img);
|
||||
}
|
||||
|
||||
Ok(InMemoryLayer {
|
||||
@@ -368,8 +382,9 @@ impl InMemoryLayer {
|
||||
oldest_pending_lsn,
|
||||
inner: Mutex::new(InMemoryLayerInner {
|
||||
drop_lsn: None,
|
||||
page_versions: page_versions,
|
||||
segsizes: segsizes,
|
||||
base_images: base_images,
|
||||
page_versions: BTreeMap::new(),
|
||||
segsizes: BTreeMap::new(),
|
||||
mem_used: mem_used,
|
||||
}),
|
||||
})
|
||||
@@ -413,6 +428,7 @@ impl InMemoryLayer {
|
||||
};
|
||||
|
||||
// Divide all the page versions into old and new at the 'end_lsn' cutoff point.
|
||||
let before_base_images = inner.base_images.clone();
|
||||
let mut before_page_versions;
|
||||
let mut before_segsizes;
|
||||
let mut after_page_versions;
|
||||
@@ -456,6 +472,7 @@ impl InMemoryLayer {
|
||||
self.start_lsn,
|
||||
end_lsn,
|
||||
dropped,
|
||||
before_base_images,
|
||||
before_page_versions,
|
||||
before_segsizes,
|
||||
)?;
|
||||
|
||||
@@ -36,14 +36,17 @@
|
||||
//!
|
||||
//! A snapshot file is constructed using the 'bookfile' crate. Each file consists of two
|
||||
//! parts: the page versions and the relation sizes. They are stored as separate chapters.
|
||||
//! FIXME
|
||||
//!
|
||||
use crate::layered_repository::storage_layer::{
|
||||
Layer, PageReconstructData, PageVersion, SegmentTag,
|
||||
};
|
||||
use crate::layered_repository::filename::{SnapshotFileName};
|
||||
use crate::layered_repository::RELISH_SEG_SIZE;
|
||||
use crate::PageServerConf;
|
||||
use crate::{ZTenantId, ZTimelineId};
|
||||
use anyhow::{bail, Result};
|
||||
use bytes::Bytes;
|
||||
use log::*;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
@@ -61,8 +64,9 @@ use zenith_utils::lsn::Lsn;
|
||||
// Magic constant to identify a Zenith snapshot file
|
||||
static SNAPSHOT_FILE_MAGIC: u32 = 0x5A616E01;
|
||||
|
||||
static PAGE_VERSIONS_CHAPTER: u64 = 1;
|
||||
static REL_SIZES_CHAPTER: u64 = 2;
|
||||
static BASE_IMAGES_CHAPTER: u64 = 1;
|
||||
static PAGE_VERSIONS_CHAPTER: u64 = 2;
|
||||
static REL_SIZES_CHAPTER: u64 = 3;
|
||||
|
||||
///
|
||||
/// SnapshotLayer is the in-memory data structure associated with an
|
||||
@@ -94,6 +98,9 @@ pub struct SnapshotLayerInner {
|
||||
/// loaded into memory yet.
|
||||
loaded: bool,
|
||||
|
||||
// indexed by block number (within segment)
|
||||
base_images: Vec<Bytes>,
|
||||
|
||||
/// All versions of all pages in the file are are kept here.
|
||||
/// Indexed by block number and LSN.
|
||||
page_versions: BTreeMap<(u32, Lsn), PageVersion>,
|
||||
@@ -159,6 +166,17 @@ impl Layer for SnapshotLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// Use the base image, if needed
|
||||
if need_base_image_lsn.is_some() {
|
||||
let base_blknum: usize = (blknum % RELISH_SEG_SIZE) as usize;
|
||||
if let Some(img) = inner.base_images.get(base_blknum) {
|
||||
reconstruct_data.page_img = Some(img.clone());
|
||||
need_base_image_lsn = None;
|
||||
} else {
|
||||
bail!("no base img found for {} at blk {} at LSN {}", self.seg, base_blknum, lsn);
|
||||
}
|
||||
}
|
||||
|
||||
// release lock on 'inner'
|
||||
}
|
||||
|
||||
@@ -167,26 +185,21 @@ impl Layer for SnapshotLayer {
|
||||
|
||||
/// Get size of the relation at given LSN
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<u32> {
|
||||
|
||||
assert!(lsn >= self.start_lsn);
|
||||
|
||||
// Scan the BTreeMap backwards, starting from the given entry.
|
||||
let inner = self.load()?;
|
||||
let mut iter = inner.relsizes.range((Included(&Lsn(0)), Included(&lsn)));
|
||||
|
||||
let result;
|
||||
if let Some((_entry_lsn, entry)) = iter.next_back() {
|
||||
let result = *entry;
|
||||
drop(inner);
|
||||
trace!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
|
||||
Ok(result)
|
||||
result = *entry;
|
||||
} else {
|
||||
error!(
|
||||
"No size found for {} at {} in snapshot layer {} {}-{}",
|
||||
self.seg, lsn, self.seg, self.start_lsn, self.end_lsn
|
||||
);
|
||||
bail!(
|
||||
"No size found for {} at {} in snapshot layer",
|
||||
self.seg,
|
||||
lsn
|
||||
);
|
||||
result = inner.base_images.len() as u32;
|
||||
}
|
||||
info!("get_seg_size: {} at {} -> {}", self.seg, lsn, result);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Does this segment exist at given LSN?
|
||||
@@ -240,9 +253,11 @@ impl SnapshotLayer {
|
||||
start_lsn: Lsn,
|
||||
end_lsn: Lsn,
|
||||
dropped: bool,
|
||||
base_images: Vec<Bytes>,
|
||||
page_versions: BTreeMap<(u32, Lsn), PageVersion>,
|
||||
relsizes: BTreeMap<Lsn, u32>,
|
||||
) -> Result<SnapshotLayer> {
|
||||
|
||||
let snapfile = SnapshotLayer {
|
||||
conf: conf,
|
||||
timelineid: timelineid,
|
||||
@@ -253,6 +268,7 @@ impl SnapshotLayer {
|
||||
dropped,
|
||||
inner: Mutex::new(SnapshotLayerInner {
|
||||
loaded: true,
|
||||
base_images: base_images,
|
||||
page_versions: page_versions,
|
||||
relsizes: relsizes,
|
||||
}),
|
||||
@@ -267,7 +283,14 @@ impl SnapshotLayer {
|
||||
let file = File::create(&path)?;
|
||||
let book = BookWriter::new(file, SNAPSHOT_FILE_MAGIC)?;
|
||||
|
||||
// Write out page versions
|
||||
// Write out the base images
|
||||
let mut chapter = book.new_chapter(BASE_IMAGES_CHAPTER);
|
||||
let buf = Vec::ser(&inner.base_images)?;
|
||||
|
||||
chapter.write_all(&buf)?;
|
||||
let book = chapter.close()?;
|
||||
|
||||
// Write out the other page versions
|
||||
let mut chapter = book.new_chapter(PAGE_VERSIONS_CHAPTER);
|
||||
let buf = BTreeMap::ser(&inner.page_versions)?;
|
||||
chapter.write_all(&buf)?;
|
||||
@@ -314,6 +337,9 @@ impl SnapshotLayer {
|
||||
let file = File::open(&path)?;
|
||||
let book = Book::new(file)?;
|
||||
|
||||
let chapter = book.read_chapter(BASE_IMAGES_CHAPTER)?;
|
||||
let base_images = Vec::des(&chapter)?;
|
||||
|
||||
let chapter = book.read_chapter(PAGE_VERSIONS_CHAPTER)?;
|
||||
let page_versions = BTreeMap::des(&chapter)?;
|
||||
|
||||
@@ -324,6 +350,7 @@ impl SnapshotLayer {
|
||||
|
||||
*inner = SnapshotLayerInner {
|
||||
loaded: true,
|
||||
base_images,
|
||||
page_versions,
|
||||
relsizes,
|
||||
};
|
||||
@@ -350,6 +377,7 @@ impl SnapshotLayer {
|
||||
dropped: filename.dropped,
|
||||
inner: Mutex::new(SnapshotLayerInner {
|
||||
loaded: false,
|
||||
base_images: Vec::new(),
|
||||
page_versions: BTreeMap::new(),
|
||||
relsizes: BTreeMap::new(),
|
||||
}),
|
||||
@@ -370,6 +398,7 @@ impl SnapshotLayer {
|
||||
///
|
||||
pub fn unload(&self) -> Result<()> {
|
||||
let mut inner = self.inner.lock().unwrap();
|
||||
inner.base_images = Vec::new();
|
||||
inner.page_versions = BTreeMap::new();
|
||||
inner.relsizes = BTreeMap::new();
|
||||
inner.loaded = false;
|
||||
|
||||
Reference in New Issue
Block a user