diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 299575f792..c485e46f47 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -41,7 +41,7 @@ use std::{ convert::TryInto, sync::{ atomic::{AtomicU8, AtomicUsize, Ordering}, - RwLock, RwLockReadGuard, RwLockWriteGuard, + RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError, }, }; @@ -683,16 +683,33 @@ impl PageCache { /// /// On return, the slot is empty and write-locked. fn find_victim(&self) -> (usize, RwLockWriteGuard) { - let iter_limit = self.slots.len() * 2; + let iter_limit = self.slots.len() * 10; let mut iters = 0; loop { + iters += 1; let slot_idx = self.next_evict_slot.fetch_add(1, Ordering::Relaxed) % self.slots.len(); let slot = &self.slots[slot_idx]; - if slot.dec_usage_count() == 0 || iters >= iter_limit { - let mut inner = slot.inner.write().unwrap(); - + if slot.dec_usage_count() == 0 { + let mut inner = match slot.inner.try_write() { + Ok(inner) => inner, + Err(TryLockError::Poisoned(err)) => { + panic!("buffer lock was poisoned: {:?}", err) + } + Err(TryLockError::WouldBlock) => { + // If we have looped through the whole buffer pool 10 times + // and still haven't found a victim buffer, something's wrong. + // Maybe all the buffers were in locked. That could happen in + // theory, if you have more threads holding buffers locked than + // there are buffers in the pool. In practice, with a reasonably + // large buffer pool it really shouldn't happen. + if iters > iter_limit { + panic!("could not find a victim buffer to evict"); + } + continue; + } + }; if let Some(old_key) = &inner.key { if inner.dirty { if let Err(err) = Self::writeback(old_key, inner.buf) { @@ -717,8 +734,6 @@ impl PageCache { } return (slot_idx, inner); } - - iters += 1; } }