From dc37d9e8acbc9adf0b280b1f33e532e2d53a96d7 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 28 Aug 2023 19:12:28 +0200 Subject: [PATCH] page_cache: drop_buffers_for_immutable: don't scan the entire 'slots' Before this patch, `drop_buffers_for_immutable` would scan the entire `slots` to proactively evict pages. Some background on `drop_buffers_for_immutable`: it's really just a nice courtesy to give back the slots that it used. Why is it just nice-to-have? We're dropping the EphemeralFile, so, these slots are never going to get accessed again. The `find_victim` will eventually evict them. --- pageserver/ctl/src/layer_map_analyzer.rs | 1 + pageserver/src/page_cache.rs | 86 +++++++++++-------- pageserver/src/tenant/block_io.rs | 4 +- .../src/tenant/storage_layer/delta_layer.rs | 1 + .../src/tenant/storage_layer/image_layer.rs | 1 + 5 files changed, 58 insertions(+), 35 deletions(-) diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs index 883762a273..e29033157b 100644 --- a/pageserver/ctl/src/layer_map_analyzer.rs +++ b/pageserver/ctl/src/layer_map_analyzer.rs @@ -99,6 +99,7 @@ async fn get_holes(path: &Path, max_holes: usize) -> Result> { let file = FileBlockReader::new(VirtualFile::open(path)?); let summary_blk = file.read_blk(0)?; let actual_summary = Summary::des_prefix(summary_blk.as_ref())?; + drop(summary_blk); // so we don't borrow `file` for too long let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new( actual_summary.index_start_blk, actual_summary.index_root_blk, diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index c9873fbd45..dd82e4d8fe 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -77,7 +77,7 @@ use std::{ convert::TryInto, sync::{ atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering}, - RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError, + RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError, TryLockResult, }, }; @@ -205,7 +205,7 @@ impl Slot { } } -type ImmutablePageMap = HashMap>>; +type ImmutablePageMap = HashMap>; pub struct PageCache { /// This contains the mapping from the cache key to buffer slot that currently @@ -429,22 +429,52 @@ impl PageCache { /// Immediately drop all buffers belonging to given file pub fn drop_buffers_for_immutable(&self, drop_file_id: FileId) { - for slot_idx in 0..self.slots.len() { - let slot = &self.slots[slot_idx]; - - let mut inner = slot.inner.write().unwrap(); - if let Some(key) = &inner.key { - match key { - CacheKey::ImmutableFilePage { file_id, blkno: _ } - if *file_id == drop_file_id => - { - // remove mapping for old buffer - self.remove_mapping(key); + let mut map = self.immutable_page_map.write().unwrap(); + let Entry::Occupied(mut block_nos_entry) = map.entry(drop_file_id) else { + // `find_victim` could have been faster + return + }; + block_nos_entry.get_mut().retain(|block_no, slot_idx| { + let expect_cache_key = CacheKey::ImmutableFilePage { + file_id: drop_file_id, + blkno: *block_no, + }; + let slot = &self.slots[*slot_idx]; + match slot.inner.try_write() { + TryLockResult::Ok(mut inner) => { + // check again, could have been taken since added to immutable_page_map + if inner.key == Some(expect_cache_key) { + // immutable_page_map was still in sync with reality + // TODO: find a way to share code with `remove_mapping` + self.size_metrics.current_bytes_immutable.sub_page_sz(1); inner.key = None; } - _ => {} + false + } + TryLockResult::Err(e @ TryLockError::Poisoned(_)) => { + panic!("slot lock {slot_idx} poisoned: {e:?}") + } + TryLockResult::Err(TryLockError::WouldBlock) => { + // This function is only called from the `Drop` impl of EphemeralFile. + // So, there shouldn't be any page cache users that are reading from + // that EphemeralFile anymore, because, + // 1. EphemeralFile doesn't hand out page cache read guards that outlive EphemeralFile and + // 2. there can't be any concurrent `EphemeralFile::reads`s because it's being dropped. + // + // So, the only reason why the page is locked right now is `find_victim` + // trying to claim this page. + // So, let `find_victim` do the work. + // Also, it will call `remove_mapping()` to remove the (file_id, block_no) + // from the immutable_page_map; so, keep the entry in the map here, otherwise + // the `remove_mapping()` call will panic. + true } } + }); + if block_nos_entry.get().is_empty() { + block_nos_entry.remove(); + } else { + // `find_victim` will do it } } @@ -660,7 +690,6 @@ impl PageCache { CacheKey::ImmutableFilePage { file_id, blkno } => { let map = self.immutable_page_map.read().unwrap(); let block_nos = map.get(file_id)?; - let block_nos = block_nos.read().unwrap(); block_nos.get(blkno).copied() } } @@ -685,7 +714,6 @@ impl PageCache { CacheKey::ImmutableFilePage { file_id, blkno } => { let map = self.immutable_page_map.read().unwrap(); let block_nos = map.get(file_id)?; - let block_nos = block_nos.read().unwrap(); block_nos.get(blkno).copied() } } @@ -718,26 +746,17 @@ impl PageCache { } } CacheKey::ImmutableFilePage { file_id, blkno } => { - let map = self.immutable_page_map.read().unwrap(); - let block_nos = map.get(file_id).expect("could not find file_id in mapping"); - let mut block_nos = block_nos.write().unwrap(); - block_nos + let mut map = self.immutable_page_map.write().unwrap(); + let Entry::Occupied(mut block_nos_entry) = map.entry(*file_id) else { + panic!("could not find file_id in mapping") + }; + block_nos_entry + .get_mut() .remove(blkno) .expect("could not find blkno in mapping"); self.size_metrics.current_bytes_immutable.sub_page_sz(1); - if block_nos.is_empty() { - drop(block_nos); - // re-lock map in write mode - drop(map); - let mut map = self.immutable_page_map.write().unwrap(); - let Some(block_nos_rwl) = map.get(file_id) else { - return; - }; - let block_nos = block_nos_rwl.read().unwrap(); - if block_nos.is_empty() { - drop(block_nos); - map.remove(file_id); - } + if block_nos_entry.get().is_empty() { + block_nos_entry.remove(); } } } @@ -777,7 +796,6 @@ impl PageCache { CacheKey::ImmutableFilePage { file_id, blkno } => { let mut map = self.immutable_page_map.write().unwrap(); let block_nos = map.entry(*file_id).or_default(); - let mut block_nos = block_nos.write().unwrap(); match block_nos.entry(*blkno) { Entry::Occupied(entry) => Some(*entry.get()), Entry::Vacant(entry) => { diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs index 3a6806357b..93a1c9b34f 100644 --- a/pageserver/src/tenant/block_io.rs +++ b/pageserver/src/tenant/block_io.rs @@ -36,7 +36,9 @@ where /// Reference to an in-memory copy of an immutable on-disk block. pub enum BlockLease<'a> { - PageReadGuard(PageReadGuard<'static>), + /// [crate::page_cache::PageCache::drop_buffers_for_immutable] relies on the read guard + /// not outiving the EphemeralFile. See the comment in there for details. + PageReadGuard(PageReadGuard<'a>), EphemeralFileMutableTail(&'a [u8; PAGE_SZ]), #[cfg(test)] Rc(std::rc::Rc<[u8; PAGE_SZ]>), diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 7a482706e5..39045e588b 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -848,6 +848,7 @@ impl DeltaLayerInner { let summary_blk = file.read_blk(0)?; let actual_summary = Summary::des_prefix(summary_blk.as_ref())?; + drop(summary_blk); // so we don't borrow `file` for too long if let Some(mut expected_summary) = summary { // production code path diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index cf4f40008a..19ac8eeebb 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -442,6 +442,7 @@ impl ImageLayerInner { let file = FileBlockReader::new(file); let summary_blk = file.read_blk(0)?; let actual_summary = Summary::des_prefix(summary_blk.as_ref())?; + drop(summary_blk); // so we don't borrow `file` for too long if let Some(mut expected_summary) = summary { // production code path