From 4acd292717939363639ce840341b581997cafcf1 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 17 Nov 2021 12:50:25 +0300 Subject: [PATCH] Use BRIN to optimize GC --- pageserver/src/buffered_repository.rs | 44 ++++++++++++++++++++++++--- pageserver/src/lib.rs | 4 +-- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/pageserver/src/buffered_repository.rs b/pageserver/src/buffered_repository.rs index 642b59bce0..fdfc9f30a6 100644 --- a/pageserver/src/buffered_repository.rs +++ b/pageserver/src/buffered_repository.rs @@ -23,7 +23,7 @@ use std::convert::TryInto; use std::fs; use std::fs::{File, OpenOptions}; use std::io::Write; -use std::ops::{Bound::Included, Deref}; +use std::ops::{Bound::{*}, Deref}; use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock}; @@ -186,6 +186,7 @@ struct RelishStore { meta: Option>, brin: BTreeMap, last_checkpoint: Lsn, + last_gc: Lsn, } /// Public interface @@ -785,7 +786,7 @@ impl Timeline for BufferedTimeline { .ser()? .to_vec(); let till = StoreKey::Data(DataKey { rel, blknum, lsn }).ser()?.to_vec(); - let mut reconstruct_key: Option = None; + //let mut reconstruct_key: Option = None; let result = { let store = self.store.read().unwrap(); let mut iter = store.data.range(&from..=&till); @@ -804,7 +805,7 @@ impl Timeline for BufferedTimeline { records: Vec::new(), page_img: None, }; - reconstruct_key = Some(dk); + //reconstruct_key = Some(dk); data.records.push((dk.lsn, rec)); // loop until we locate full page image or initialization WAL record // FIXME-KK: cross-timelines histories are not handled now @@ -845,14 +846,16 @@ impl Timeline for BufferedTimeline { Ok(ZERO_PAGE.clone()) } }; + /* if let Some(key) = reconstruct_key { if let Ok(img) = &result { let mut store = self.store.write().unwrap(); store .data - .put(&StoreKey::Data(key).ser()?, &PageVersion::Image(img.clone()).ser()?)?; + .put(&StoreKey::Data(key).ser()?, &PageVersion::Page(img.clone()).ser()?)?; } } + */ result } @@ -1249,6 +1252,7 @@ impl BufferedTimeline { meta: None, brin: BTreeMap::new(), last_checkpoint: Lsn(0), + last_gc: Lsn(0), }), walredo_mgr, @@ -1417,7 +1421,7 @@ impl BufferedTimeline { rel: dk.rel, seg: dk.blknum / BRIN_SEGMENT_SIZE, }; - // At first iteratiob we need to scan the whole storage, because BRIN does't have enough information + // At first iteration we need to scan the whole storage, because BRIN does't have enough information if last_checkpoint != Lsn(0) && store .brin @@ -1575,6 +1579,13 @@ impl BufferedTimeline { // Keep tracked dropped relish let mut dropped: HashSet = HashSet::new(); + let last_gc; + { + let mut store = self.store.write().unwrap(); + last_gc = store.last_gc; + store.last_gc = self.get_last_record_lsn(); + } + 'meta: loop { let store = self.store.read().unwrap(); let mut iter = store.data.range(&from.ser()?..); @@ -1663,6 +1674,29 @@ impl BufferedTimeline { let raw_key = pair.0; let key = StoreKey::des(&raw_key)?; if let StoreKey::Data(dk) = key { + let seg_tag = BrinTag { + rel: dk.rel, + seg: dk.blknum / BRIN_SEGMENT_SIZE, + }; + // At first iteration we need to scan the whole storage, + // because BRIN does't have enough information + if last_gc != Lsn(0) + && store.brin.get(&seg_tag).map_or(true, |lsn| *lsn <= last_gc) + { + // This segment was not update since last GC: jump to next one + let mut iter = store.brin.range((Excluded(seg_tag), Unbounded)); + while let Some((next_seg, lsn)) = iter.next_back() { + if *lsn > last_gc { + from = StoreKey::Data(DataKey { + rel: next_seg.rel, + blknum: next_seg.seg * BRIN_SEGMENT_SIZE, + lsn: Lsn(0), + }); + continue 'pages; + } + } + break; + } let same_page = from_rel == dk.rel && from_blknum == dk.blknum; if !same_page { result.pages_total += 1; diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index db3c820009..c5200c909d 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -36,8 +36,8 @@ pub mod defaults { pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 0; pub const DEFAULT_CHECKPOINT_PERIOD: Duration = Duration::from_secs(1); - pub const DEFAULT_GC_HORIZON: u64 = 1024 * 1024; - pub const DEFAULT_GC_PERIOD: Duration = Duration::from_secs(10); + pub const DEFAULT_GC_HORIZON: u64 = 1024; + pub const DEFAULT_GC_PERIOD: Duration = Duration::from_secs(1); pub const DEFAULT_SUPERUSER: &str = "zenith_admin"; pub const DEFAULT_RELISH_STORAGE_MAX_CONCURRENT_SYNC_LIMITS: usize = 100;