Compare commits

...

3 Commits

Author SHA1 Message Date
Christian Schwarz
07808124f6 separate rwlock for second level of immutable_page_map to avoid contention 2023-08-28 19:28:49 +02:00
Christian Schwarz
dc37d9e8ac page_cache: drop_buffers_for_immutable: don't scan the entire 'slots'
Before this patch, `drop_buffers_for_immutable` would scan the entire
`slots` to proactively evict pages.

Some background on `drop_buffers_for_immutable`: it's really just a nice
courtesy to give back the slots that it used. Why is it just
nice-to-have? We're dropping the EphemeralFile, so, these slots are
never going to get accessed again.
The `find_victim` will eventually evict them.
2023-08-28 19:26:05 +02:00
Christian Schwarz
8c933a9686 page_cache: two-level immutable_page_map
This change alone makes things strictly worse, but on top of this, we'll
be able to rewrite drop_buffers_for_immutable to remove pages based on
what's in the `drop_file_id`'s immutable_page_map second-level hash map.
That's better than what we do currently, i.e., iterating over all the
slots.
2023-08-28 19:12:41 +02:00
5 changed files with 91 additions and 20 deletions

View File

@@ -99,6 +99,7 @@ async fn get_holes(path: &Path, max_holes: usize) -> Result<Vec<Hole>> {
let file = FileBlockReader::new(VirtualFile::open(path)?);
let summary_blk = file.read_blk(0)?;
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
drop(summary_blk); // so we don't borrow `file` for too long
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
actual_summary.index_start_blk,
actual_summary.index_root_blk,

View File

@@ -77,7 +77,7 @@ use std::{
convert::TryInto,
sync::{
atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering},
RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError,
RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError, Arc, TryLockResult,
},
};
@@ -205,6 +205,8 @@ impl Slot {
}
}
type ImmutablePageMap = HashMap<FileId, Arc<RwLock<HashMap<u32, usize>>>>;
pub struct PageCache {
/// This contains the mapping from the cache key to buffer slot that currently
/// contains the page, if any.
@@ -217,7 +219,7 @@ pub struct PageCache {
/// can have a separate mapping map, next to this field.
materialized_page_map: RwLock<HashMap<MaterializedPageHashKey, Vec<Version>>>,
immutable_page_map: RwLock<HashMap<(FileId, u32), usize>>,
immutable_page_map: RwLock<ImmutablePageMap>,
/// The actual buffers with their metadata.
slots: Box<[Slot]>,
@@ -427,22 +429,55 @@ impl PageCache {
/// Immediately drop all buffers belonging to given file
pub fn drop_buffers_for_immutable(&self, drop_file_id: FileId) {
for slot_idx in 0..self.slots.len() {
let slot = &self.slots[slot_idx];
let map = self.immutable_page_map.read().unwrap();
let Some(block_nos_arc_rwl) = map.get(&drop_file_id).map(Arc::clone) else {
return
};
drop(map); // avoid contention on immutable_page_map
let mut inner = slot.inner.write().unwrap();
if let Some(key) = &inner.key {
match key {
CacheKey::ImmutableFilePage { file_id, blkno: _ }
if *file_id == drop_file_id =>
{
// remove mapping for old buffer
self.remove_mapping(key);
let mut block_nos_guard = block_nos_arc_rwl.write().unwrap();
block_nos_guard.retain(|block_no, slot_idx| {
let expect_cache_key = CacheKey::ImmutableFilePage {
file_id: drop_file_id,
blkno: *block_no,
};
let slot = &self.slots[*slot_idx];
match slot.inner.try_write() {
TryLockResult::Ok(mut inner) => {
// check again, could have been taken since added to immutable_page_map
if inner.key == Some(expect_cache_key) {
// immutable_page_map was still in sync with reality
// TODO: find a way to share code with `remove_mapping`
self.size_metrics.current_bytes_immutable.sub_page_sz(1);
inner.key = None;
}
_ => {}
false
}
TryLockResult::Err(e @ TryLockError::Poisoned(_)) => {
panic!("slot lock {slot_idx} poisoned: {e:?}")
}
TryLockResult::Err(TryLockError::WouldBlock) => {
// This function is only called from the `Drop` impl of EphemeralFile.
// So, there shouldn't be any page cache users that are reading from
// that EphemeralFile anymore, because,
// 1. EphemeralFile doesn't hand out page cache read guards that outlive EphemeralFile and
// 2. there can't be any concurrent `EphemeralFile::reads`s because it's being dropped.
//
// So, the only reason why the page is locked right now is `find_victim`
// trying to claim this page.
// So, let `find_victim` do the work.
// Also, it will call `remove_mapping()` to remove the (file_id, block_no)
// from the immutable_page_map; so, keep the entry in the map here, otherwise
// the `remove_mapping()` call will panic.
true
}
}
});
if block_nos_guard.is_empty() {
drop(block_nos_guard); // remove_mapping_immutable_file_common re-aquires the lock
drop(block_nos_arc_rwl);
let mut map = self.immutable_page_map.write().unwrap();
self.remove_mapping_immutable_file_common(&mut map, drop_file_id)
}
}
@@ -657,7 +692,9 @@ impl PageCache {
}
CacheKey::ImmutableFilePage { file_id, blkno } => {
let map = self.immutable_page_map.read().unwrap();
Some(*map.get(&(*file_id, *blkno))?)
let block_nos = map.get(file_id)?;
let block_nos = block_nos.read().unwrap();
block_nos.get(blkno).copied()
}
}
}
@@ -680,7 +717,9 @@ impl PageCache {
}
CacheKey::ImmutableFilePage { file_id, blkno } => {
let map = self.immutable_page_map.read().unwrap();
Some(*map.get(&(*file_id, *blkno))?)
let block_nos = map.get(file_id)?;
let block_nos = block_nos.read().unwrap();
block_nos.get(blkno).copied()
}
}
}
@@ -712,14 +751,39 @@ impl PageCache {
}
}
CacheKey::ImmutableFilePage { file_id, blkno } => {
let mut map = self.immutable_page_map.write().unwrap();
map.remove(&(*file_id, *blkno))
.expect("could not find old key in mapping");
let map = self.immutable_page_map.read().unwrap();
let block_nos = map.get(file_id).expect("could not find file_id in mapping");
let mut block_nos = block_nos.write().unwrap();
block_nos
.remove(blkno)
.expect("could not find blkno in mapping");
self.size_metrics.current_bytes_immutable.sub_page_sz(1);
if block_nos.is_empty() {
drop(block_nos);
// re-lock map in write mode
drop(map);
let mut map = self.immutable_page_map.write().unwrap();
self.remove_mapping_immutable_file_common(&mut map, *file_id)
}
}
}
}
fn remove_mapping_immutable_file_common(
&self,
map: &mut RwLockWriteGuard<ImmutablePageMap>,
file_id: FileId,
) {
let Some(block_nos_rwl) = map.get(&file_id) else {
return;
};
let block_nos = block_nos_rwl.read().unwrap();
if block_nos.is_empty() {
drop(block_nos);
map.remove(&file_id);
}
}
///
/// Insert mapping for given key.
///
@@ -753,7 +817,9 @@ impl PageCache {
CacheKey::ImmutableFilePage { file_id, blkno } => {
let mut map = self.immutable_page_map.write().unwrap();
match map.entry((*file_id, *blkno)) {
let block_nos = map.entry(*file_id).or_default();
let mut block_nos = block_nos.write().unwrap();
match block_nos.entry(*blkno) {
Entry::Occupied(entry) => Some(*entry.get()),
Entry::Vacant(entry) => {
entry.insert(slot_idx);

View File

@@ -36,7 +36,9 @@ where
/// Reference to an in-memory copy of an immutable on-disk block.
pub enum BlockLease<'a> {
PageReadGuard(PageReadGuard<'static>),
/// [crate::page_cache::PageCache::drop_buffers_for_immutable] relies on the read guard
/// not outiving the EphemeralFile. See the comment in there for details.
PageReadGuard(PageReadGuard<'a>),
EphemeralFileMutableTail(&'a [u8; PAGE_SZ]),
#[cfg(test)]
Rc(std::rc::Rc<[u8; PAGE_SZ]>),

View File

@@ -848,6 +848,7 @@ impl DeltaLayerInner {
let summary_blk = file.read_blk(0)?;
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
drop(summary_blk); // so we don't borrow `file` for too long
if let Some(mut expected_summary) = summary {
// production code path

View File

@@ -442,6 +442,7 @@ impl ImageLayerInner {
let file = FileBlockReader::new(file);
let summary_blk = file.read_blk(0)?;
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
drop(summary_blk); // so we don't borrow `file` for too long
if let Some(mut expected_summary) = summary {
// production code path