From f0573f5991d99214e45f55308bf6b873394ae8e4 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 13 Mar 2023 14:07:35 +0200 Subject: [PATCH] Remove block cursor cache (#3740) ## Describe your changes Do not pin current block in BlockCursor ## Issue ticket number and link See #3712 There are places (see get_reconstruct_data) in our code when thread is holding read layers lock and then try to read file and so lock page cache slot. So we have edge in dependency graph layers->page cache slot. At the same time (as Christian noticed) we can lock page cache slot in BlockCursor and then try obtain shard lock on layers. So there is backward edge in dependency graph page cache slot>layers which forms loop and may cause deadlock. There are three possible fixes of the problem: 1. Perform compaction under `layers` shared lock. See PR #3732. It fixes the problem but make it not possible to append any data to pageserver until compaction is completed. 2. Do not hold `layers` lock while accessing layers (not sure if it is possible to do because it definitely introduce some new race conditions). 3. Do not pin current pages in BockCursor (this PR). My experiments shows that this cache in BlockCursor is not so useful: the number of hits/misses for cursor cache on pgbench workload (-i -s 10/-c 10 -T 100/-c 10 -S -T 100): ``` hits: 163011 misses: 1023602 ``` So number of cache misses is 10x times larger. And results for read-only pgbench are mostly the same: ``` with cache: 14581 w/out cache: 14429 ``` ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. --- pageserver/src/tenant/block_io.rs | 38 ++----------------------- pageserver/src/tenant/ephemeral_file.rs | 5 +--- 2 files changed, 4 insertions(+), 39 deletions(-) diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs index e3cc800447..10de34e3f6 100644 --- a/pageserver/src/tenant/block_io.rs +++ b/pageserver/src/tenant/block_io.rs @@ -51,9 +51,6 @@ where /// /// A "cursor" for efficiently reading multiple pages from a BlockReader /// -/// A cursor caches the last accessed page, allowing for faster access if the -/// same block is accessed repeatedly. -/// /// You can access the last page with `*cursor`. 'read_blk' returns 'self', so /// that in many cases you can use a BlockCursor as a drop-in replacement for /// the underlying BlockReader. For example: @@ -73,8 +70,6 @@ where R: BlockReader, { reader: R, - /// last accessed page - cache: Option<(u32, R::BlockLease)>, } impl BlockCursor @@ -82,40 +77,13 @@ where R: BlockReader, { pub fn new(reader: R) -> Self { - BlockCursor { - reader, - cache: None, - } + BlockCursor { reader } } - pub fn read_blk(&mut self, blknum: u32) -> Result<&Self, std::io::Error> { - // Fast return if this is the same block as before - if let Some((cached_blk, _buf)) = &self.cache { - if *cached_blk == blknum { - return Ok(self); - } - } - - // Read the block from the underlying reader, and cache it - self.cache = None; - let buf = self.reader.read_blk(blknum)?; - self.cache = Some((blknum, buf)); - - Ok(self) + pub fn read_blk(&mut self, blknum: u32) -> Result { + self.reader.read_blk(blknum) } } - -impl Deref for BlockCursor -where - R: BlockReader, -{ - type Target = [u8; PAGE_SZ]; - - fn deref(&self) -> &::Target { - &self.cache.as_ref().unwrap().1 - } -} - static NEXT_ID: AtomicU64 = AtomicU64::new(1); /// An adapter for reading a (virtual) file using the page cache. diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index c433e65ad2..4379438896 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -2,9 +2,7 @@ //! used to keep in-memory layers spilled on disk. use crate::config::PageServerConf; -use crate::page_cache; -use crate::page_cache::PAGE_SZ; -use crate::page_cache::{ReadBufResult, WriteBufResult}; +use crate::page_cache::{self, ReadBufResult, WriteBufResult, PAGE_SZ}; use crate::tenant::blob_io::BlobWriter; use crate::tenant::block_io::BlockReader; use crate::virtual_file::VirtualFile; @@ -427,7 +425,6 @@ mod tests { let actual = cursor.read_blob(pos)?; assert_eq!(actual, expected); } - drop(cursor); // Test a large blob that spans multiple pages let mut large_data = Vec::new();