mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-28 02:20:42 +00:00
Cargo fmt pass over a codebase
This commit is contained in:
@@ -7,24 +7,24 @@
|
||||
//
|
||||
|
||||
use core::ops::Bound::Included;
|
||||
use std::{convert::TryInto, ops::AddAssign};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error;
|
||||
use std::sync::{Arc,Condvar, Mutex};
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
use std::sync::{Arc, Condvar, Mutex};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::{convert::TryInto, ops::AddAssign};
|
||||
// use tokio::sync::RwLock;
|
||||
use bytes::Bytes;
|
||||
use lazy_static::lazy_static;
|
||||
use rand::Rng;
|
||||
use log::*;
|
||||
use rand::Rng;
|
||||
|
||||
use crate::{PageServerConf, walredo};
|
||||
use crate::{walredo, PageServerConf};
|
||||
|
||||
use crossbeam_channel::unbounded;
|
||||
use crossbeam_channel::{Sender, Receiver};
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
|
||||
// Timeout when waiting or WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
||||
static TIMEOUT: Duration = Duration::from_secs(60);
|
||||
@@ -63,7 +63,6 @@ pub struct PageCacheStats {
|
||||
}
|
||||
|
||||
impl AddAssign for PageCacheStats {
|
||||
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
*self = Self {
|
||||
num_entries: self.num_entries + other.num_entries,
|
||||
@@ -81,7 +80,6 @@ impl AddAssign for PageCacheStats {
|
||||
// Shared data structure, holding page cache and related auxiliary information
|
||||
//
|
||||
struct PageCacheShared {
|
||||
|
||||
// The actual page cache
|
||||
pagecache: BTreeMap<CacheKey, Arc<CacheEntry>>,
|
||||
|
||||
@@ -112,10 +110,10 @@ struct PageCacheShared {
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref PAGECACHES : Mutex<HashMap<u64, Arc<PageCache>>> = Mutex::new(HashMap::new());
|
||||
pub static ref PAGECACHES: Mutex<HashMap<u64, Arc<PageCache>>> = Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
pub fn get_pagecahe(conf: PageServerConf, sys_id : u64) -> Arc<PageCache> {
|
||||
pub fn get_pagecahe(conf: PageServerConf, sys_id: u64) -> Arc<PageCache> {
|
||||
let mut pcaches = PAGECACHES.lock().unwrap();
|
||||
|
||||
if !pcaches.contains_key(&sys_id) {
|
||||
@@ -137,20 +135,18 @@ pub fn get_pagecahe(conf: PageServerConf, sys_id : u64) -> Arc<PageCache> {
|
||||
pcaches.get(&sys_id).unwrap().clone()
|
||||
}
|
||||
|
||||
fn init_page_cache() -> PageCache
|
||||
{
|
||||
fn init_page_cache() -> PageCache {
|
||||
// Initialize the channel between the page cache and the WAL applicator
|
||||
let (s, r) = unbounded();
|
||||
|
||||
PageCache {
|
||||
shared: Mutex::new(
|
||||
PageCacheShared {
|
||||
pagecache: BTreeMap::new(),
|
||||
relsize_cache: HashMap::new(),
|
||||
first_valid_lsn: 0,
|
||||
last_valid_lsn: 0,
|
||||
last_record_lsn: 0,
|
||||
}),
|
||||
shared: Mutex::new(PageCacheShared {
|
||||
pagecache: BTreeMap::new(),
|
||||
relsize_cache: HashMap::new(),
|
||||
first_valid_lsn: 0,
|
||||
last_valid_lsn: 0,
|
||||
last_record_lsn: 0,
|
||||
}),
|
||||
valid_lsn_condvar: Condvar::new(),
|
||||
|
||||
walredo_sender: s,
|
||||
@@ -165,10 +161,8 @@ fn init_page_cache() -> PageCache
|
||||
last_valid_lsn: AtomicU64::new(0),
|
||||
last_record_lsn: AtomicU64::new(0),
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// We store two kinds of entries in the page cache:
|
||||
//
|
||||
@@ -185,7 +179,7 @@ fn init_page_cache() -> PageCache
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct CacheKey {
|
||||
pub tag: BufferTag,
|
||||
pub lsn: u64
|
||||
pub lsn: u64,
|
||||
}
|
||||
|
||||
pub struct CacheEntry {
|
||||
@@ -198,7 +192,7 @@ pub struct CacheEntry {
|
||||
//
|
||||
// FIXME: this takes quite a lot of space. Consider using parking_lot::Condvar
|
||||
// or something else.
|
||||
pub walredo_condvar: Condvar
|
||||
pub walredo_condvar: Condvar,
|
||||
}
|
||||
|
||||
pub struct CacheEntryContent {
|
||||
@@ -221,7 +215,6 @@ impl CacheEntry {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Eq, PartialEq, Hash, Clone, Copy)]
|
||||
pub struct RelTag {
|
||||
pub spcnode: u32,
|
||||
@@ -241,415 +234,409 @@ pub struct BufferTag {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WALRecord {
|
||||
pub lsn: u64, // LSN at the *end* of the record
|
||||
pub lsn: u64, // LSN at the *end* of the record
|
||||
pub will_init: bool,
|
||||
pub rec: Bytes
|
||||
pub rec: Bytes,
|
||||
}
|
||||
|
||||
|
||||
// Public interface functions
|
||||
|
||||
impl PageCache {
|
||||
|
||||
//
|
||||
// GetPage@LSN
|
||||
//
|
||||
// Returns an 8k page image
|
||||
//
|
||||
pub fn get_page_at_lsn(&self, tag: BufferTag, lsn: u64) -> Result<Bytes, Box<dyn Error>>
|
||||
{
|
||||
self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// Look up cache entry. If it's a page image, return that. If it's a WAL record,
|
||||
// ask the WAL redo service to reconstruct the page image from the WAL records.
|
||||
let minkey = CacheKey { tag: tag, lsn: 0 };
|
||||
let maxkey = CacheKey { tag: tag, lsn: lsn };
|
||||
|
||||
let entry_rc: Arc<CacheEntry>;
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let mut waited = false;
|
||||
|
||||
while lsn > shared.last_valid_lsn {
|
||||
// TODO: Wait for the WAL receiver to catch up
|
||||
waited = true;
|
||||
trace!("not caught up yet: {}, requested {}", shared.last_valid_lsn, lsn);
|
||||
let wait_result = self.valid_lsn_condvar.wait_timeout(shared, TIMEOUT).unwrap();
|
||||
|
||||
shared = wait_result.0;
|
||||
if wait_result.1.timed_out() {
|
||||
return Err(format!("Timed out while waiting for WAL record at LSN {} to arrive", lsn))?;
|
||||
}
|
||||
}
|
||||
if waited {
|
||||
trace!("caught up now, continuing");
|
||||
}
|
||||
|
||||
if lsn < shared.first_valid_lsn {
|
||||
return Err(format!("LSN {} has already been removed", lsn))?;
|
||||
}
|
||||
|
||||
let pagecache = &shared.pagecache;
|
||||
|
||||
let mut entries = pagecache.range((Included(&minkey), Included(&maxkey)));
|
||||
|
||||
let entry_opt = entries.next_back();
|
||||
|
||||
if entry_opt.is_none() {
|
||||
static ZERO_PAGE:[u8; 8192] = [0 as u8; 8192];
|
||||
return Ok(Bytes::from_static(&ZERO_PAGE));
|
||||
/* return Err("could not find page image")?; */
|
||||
}
|
||||
let (_key, entry) = entry_opt.unwrap();
|
||||
entry_rc = entry.clone();
|
||||
|
||||
// Now that we have a reference to the cache entry, drop the lock on the map.
|
||||
// It's important to do this before waiting on the condition variable below,
|
||||
// and better to do it as soon as possible to maximize concurrency.
|
||||
}
|
||||
|
||||
// Lock the cache entry and dig the page image out of it.
|
||||
let page_img: Bytes;
|
||||
{
|
||||
let mut entry_content = entry_rc.content.lock().unwrap();
|
||||
|
||||
if let Some(img) = &entry_content.page_image {
|
||||
assert!(!entry_content.apply_pending);
|
||||
page_img = img.clone();
|
||||
} else if entry_content.wal_record.is_some() {
|
||||
|
||||
//
|
||||
// If this page needs to be reconstructed by applying some WAL,
|
||||
// send a request to the WAL redo thread.
|
||||
//
|
||||
if !entry_content.apply_pending {
|
||||
assert!(!entry_content.apply_pending);
|
||||
entry_content.apply_pending = true;
|
||||
|
||||
let s = &self.walredo_sender;
|
||||
s.send(entry_rc.clone())?;
|
||||
}
|
||||
|
||||
while entry_content.apply_pending {
|
||||
entry_content = entry_rc.walredo_condvar.wait(entry_content).unwrap();
|
||||
}
|
||||
|
||||
// We should now have a page image. If we don't, it means that WAL redo
|
||||
// failed to reconstruct it. WAL redo should've logged that error already.
|
||||
page_img = match &entry_content.page_image {
|
||||
Some(p) => p.clone(),
|
||||
None => {
|
||||
error!("could not apply WAL to reconstruct page image for GetPage@LSN request");
|
||||
return Err("could not apply WAL to reconstruct page image".into());
|
||||
}
|
||||
};
|
||||
|
||||
} else {
|
||||
// No base image, and no WAL record. Huh?
|
||||
return Err(format!("no page image or WAL record for requested page"))?;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: assumes little-endian. Only used for the debugging log though
|
||||
let page_lsn_hi = u32::from_le_bytes(page_img.get(0..4).unwrap().try_into().unwrap());
|
||||
let page_lsn_lo = u32::from_le_bytes(page_img.get(4..8).unwrap().try_into().unwrap());
|
||||
trace!("Returning page with LSN {:X}/{:X} for {}/{}/{}.{} blk {}", page_lsn_hi, page_lsn_lo,
|
||||
tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum);
|
||||
|
||||
return Ok(page_img);
|
||||
}
|
||||
|
||||
//
|
||||
// Collect all the WAL records that are needed to reconstruct a page
|
||||
// image for the given cache entry.
|
||||
//
|
||||
// Returns an old page image (if any), and a vector of WAL records to apply
|
||||
// over it.
|
||||
//
|
||||
pub fn collect_records_for_apply(&self, entry: &CacheEntry) -> (Option<Bytes>, Vec<WALRecord>)
|
||||
{
|
||||
// Scan the BTreeMap backwards, starting from the given entry.
|
||||
let shared = self.shared.lock().unwrap();
|
||||
let pagecache = &shared.pagecache;
|
||||
|
||||
let minkey = CacheKey {
|
||||
tag: entry.key.tag,
|
||||
lsn: 0
|
||||
};
|
||||
let maxkey = CacheKey {
|
||||
tag: entry.key.tag,
|
||||
lsn: entry.key.lsn
|
||||
};
|
||||
let entries = pagecache.range((Included(&minkey), Included(&maxkey)));
|
||||
|
||||
// the last entry in the range should be the CacheEntry we were given
|
||||
//let _last_entry = entries.next_back();
|
||||
//assert!(last_entry == entry);
|
||||
|
||||
let mut base_img: Option<Bytes> = None;
|
||||
let mut records: Vec<WALRecord> = Vec::new();
|
||||
|
||||
// Scan backwards, collecting the WAL records, until we hit an
|
||||
// old page image.
|
||||
for (_key, e) in entries.rev() {
|
||||
let e = e.content.lock().unwrap();
|
||||
|
||||
if let Some(img) = &e.page_image {
|
||||
// We have a base image. No need to dig deeper into the list of
|
||||
// records
|
||||
base_img = Some(img.clone());
|
||||
break;
|
||||
} else if let Some(rec) = &e.wal_record {
|
||||
|
||||
records.push(rec.clone());
|
||||
|
||||
// If this WAL record initializes the page, no need to dig deeper.
|
||||
if rec.will_init {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
panic!("no base image and no WAL record on cache entry");
|
||||
}
|
||||
}
|
||||
|
||||
records.reverse();
|
||||
return (base_img, records);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Adds a WAL record to the page cache
|
||||
//
|
||||
pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord)
|
||||
{
|
||||
let key = CacheKey {
|
||||
tag: tag,
|
||||
lsn: rec.lsn
|
||||
};
|
||||
|
||||
let entry = CacheEntry::new(key.clone());
|
||||
entry.content.lock().unwrap().wal_record = Some(rec);
|
||||
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
let rel_tag = RelTag {
|
||||
spcnode: tag.spcnode,
|
||||
dbnode: tag.dbnode,
|
||||
relnode: tag.relnode,
|
||||
forknum: tag.forknum,
|
||||
};
|
||||
let rel_entry = shared.relsize_cache.entry(rel_tag).or_insert(0);
|
||||
if tag.blknum >= *rel_entry {
|
||||
*rel_entry = tag.blknum + 1;
|
||||
}
|
||||
|
||||
trace!("put_wal_record lsn: {}", key.lsn);
|
||||
|
||||
let oldentry = shared.pagecache.insert(key, Arc::new(entry));
|
||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
if !oldentry.is_none() {
|
||||
error!("overwriting WAL record in page cache");
|
||||
}
|
||||
|
||||
self.num_wal_records.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
// Memorize a full image of a page version
|
||||
//
|
||||
pub fn put_page_image(&self, tag: BufferTag, lsn: u64, img: Bytes)
|
||||
{
|
||||
let key = CacheKey {
|
||||
tag: tag,
|
||||
lsn: lsn
|
||||
};
|
||||
|
||||
let entry = CacheEntry::new(key.clone());
|
||||
entry.content.lock().unwrap().page_image = Some(img);
|
||||
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let pagecache = &mut shared.pagecache;
|
||||
|
||||
let oldentry = pagecache.insert(key, Arc::new(entry));
|
||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||
assert!(oldentry.is_none());
|
||||
|
||||
//debug!("inserted page image for {}/{}/{}_{} blk {} at {}",
|
||||
// tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum, lsn);
|
||||
|
||||
self.num_page_images.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
pub fn advance_last_valid_lsn(&self, lsn: u64)
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.last_valid_lsn);
|
||||
|
||||
shared.last_valid_lsn = lsn;
|
||||
self.valid_lsn_condvar.notify_all();
|
||||
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
// NOTE: this updates last_valid_lsn as well.
|
||||
//
|
||||
pub fn advance_last_record_lsn(&self, lsn: u64)
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.last_valid_lsn);
|
||||
assert!(lsn >= shared.last_record_lsn);
|
||||
|
||||
shared.last_valid_lsn = lsn;
|
||||
shared.last_record_lsn = lsn;
|
||||
self.valid_lsn_condvar.notify_all();
|
||||
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
pub fn _advance_first_valid_lsn(&self, lsn: u64)
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.first_valid_lsn);
|
||||
|
||||
// Can't overtake last_valid_lsn (except when we're
|
||||
// initializing the system and last_valid_lsn hasn't been set yet.
|
||||
assert!(shared.last_valid_lsn == 0 || lsn < shared.last_valid_lsn);
|
||||
|
||||
shared.first_valid_lsn = lsn;
|
||||
self.first_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn init_valid_lsn(&self, lsn: u64)
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
assert!(shared.first_valid_lsn == 0);
|
||||
assert!(shared.last_valid_lsn == 0);
|
||||
assert!(shared.last_record_lsn == 0);
|
||||
|
||||
shared.first_valid_lsn = lsn;
|
||||
shared.last_valid_lsn = lsn;
|
||||
shared.last_record_lsn = lsn;
|
||||
|
||||
self.first_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_record_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn get_last_valid_lsn(&self) -> u64
|
||||
{
|
||||
let shared = self.shared.lock().unwrap();
|
||||
|
||||
return shared.last_record_lsn;
|
||||
}
|
||||
|
||||
//
|
||||
// Simple test function for the WAL redo code:
|
||||
//
|
||||
// 1. Pick a page from the page cache at random.
|
||||
// 2. Request that page with GetPage@LSN, using Max LSN (i.e. get the latest page version)
|
||||
//
|
||||
//
|
||||
pub fn _test_get_page_at_lsn(&self)
|
||||
{
|
||||
// for quick testing of the get_page_at_lsn() funcion.
|
||||
//
|
||||
// Get a random page from the page cache. Apply all its WAL, by requesting
|
||||
// that page at the highest lsn.
|
||||
// GetPage@LSN
|
||||
//
|
||||
// Returns an 8k page image
|
||||
//
|
||||
pub fn get_page_at_lsn(&self, tag: BufferTag, lsn: u64) -> Result<Bytes, Box<dyn Error>> {
|
||||
self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let mut tag: Option<BufferTag> = None;
|
||||
// Look up cache entry. If it's a page image, return that. If it's a WAL record,
|
||||
// ask the WAL redo service to reconstruct the page image from the WAL records.
|
||||
let minkey = CacheKey { tag: tag, lsn: 0 };
|
||||
let maxkey = CacheKey { tag: tag, lsn: lsn };
|
||||
|
||||
{
|
||||
let entry_rc: Arc<CacheEntry>;
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let mut waited = false;
|
||||
|
||||
while lsn > shared.last_valid_lsn {
|
||||
// TODO: Wait for the WAL receiver to catch up
|
||||
waited = true;
|
||||
trace!(
|
||||
"not caught up yet: {}, requested {}",
|
||||
shared.last_valid_lsn,
|
||||
lsn
|
||||
);
|
||||
let wait_result = self
|
||||
.valid_lsn_condvar
|
||||
.wait_timeout(shared, TIMEOUT)
|
||||
.unwrap();
|
||||
|
||||
shared = wait_result.0;
|
||||
if wait_result.1.timed_out() {
|
||||
return Err(format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive",
|
||||
lsn
|
||||
))?;
|
||||
}
|
||||
}
|
||||
if waited {
|
||||
trace!("caught up now, continuing");
|
||||
}
|
||||
|
||||
if lsn < shared.first_valid_lsn {
|
||||
return Err(format!("LSN {} has already been removed", lsn))?;
|
||||
}
|
||||
|
||||
let pagecache = &shared.pagecache;
|
||||
|
||||
let mut entries = pagecache.range((Included(&minkey), Included(&maxkey)));
|
||||
|
||||
let entry_opt = entries.next_back();
|
||||
|
||||
if entry_opt.is_none() {
|
||||
static ZERO_PAGE: [u8; 8192] = [0 as u8; 8192];
|
||||
return Ok(Bytes::from_static(&ZERO_PAGE));
|
||||
/* return Err("could not find page image")?; */
|
||||
}
|
||||
let (_key, entry) = entry_opt.unwrap();
|
||||
entry_rc = entry.clone();
|
||||
|
||||
// Now that we have a reference to the cache entry, drop the lock on the map.
|
||||
// It's important to do this before waiting on the condition variable below,
|
||||
// and better to do it as soon as possible to maximize concurrency.
|
||||
}
|
||||
|
||||
// Lock the cache entry and dig the page image out of it.
|
||||
let page_img: Bytes;
|
||||
{
|
||||
let mut entry_content = entry_rc.content.lock().unwrap();
|
||||
|
||||
if let Some(img) = &entry_content.page_image {
|
||||
assert!(!entry_content.apply_pending);
|
||||
page_img = img.clone();
|
||||
} else if entry_content.wal_record.is_some() {
|
||||
//
|
||||
// If this page needs to be reconstructed by applying some WAL,
|
||||
// send a request to the WAL redo thread.
|
||||
//
|
||||
if !entry_content.apply_pending {
|
||||
assert!(!entry_content.apply_pending);
|
||||
entry_content.apply_pending = true;
|
||||
|
||||
let s = &self.walredo_sender;
|
||||
s.send(entry_rc.clone())?;
|
||||
}
|
||||
|
||||
while entry_content.apply_pending {
|
||||
entry_content = entry_rc.walredo_condvar.wait(entry_content).unwrap();
|
||||
}
|
||||
|
||||
// We should now have a page image. If we don't, it means that WAL redo
|
||||
// failed to reconstruct it. WAL redo should've logged that error already.
|
||||
page_img = match &entry_content.page_image {
|
||||
Some(p) => p.clone(),
|
||||
None => {
|
||||
error!(
|
||||
"could not apply WAL to reconstruct page image for GetPage@LSN request"
|
||||
);
|
||||
return Err("could not apply WAL to reconstruct page image".into());
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// No base image, and no WAL record. Huh?
|
||||
return Err(format!("no page image or WAL record for requested page"))?;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: assumes little-endian. Only used for the debugging log though
|
||||
let page_lsn_hi = u32::from_le_bytes(page_img.get(0..4).unwrap().try_into().unwrap());
|
||||
let page_lsn_lo = u32::from_le_bytes(page_img.get(4..8).unwrap().try_into().unwrap());
|
||||
trace!(
|
||||
"Returning page with LSN {:X}/{:X} for {}/{}/{}.{} blk {}",
|
||||
page_lsn_hi,
|
||||
page_lsn_lo,
|
||||
tag.spcnode,
|
||||
tag.dbnode,
|
||||
tag.relnode,
|
||||
tag.forknum,
|
||||
tag.blknum
|
||||
);
|
||||
|
||||
return Ok(page_img);
|
||||
}
|
||||
|
||||
//
|
||||
// Collect all the WAL records that are needed to reconstruct a page
|
||||
// image for the given cache entry.
|
||||
//
|
||||
// Returns an old page image (if any), and a vector of WAL records to apply
|
||||
// over it.
|
||||
//
|
||||
pub fn collect_records_for_apply(&self, entry: &CacheEntry) -> (Option<Bytes>, Vec<WALRecord>) {
|
||||
// Scan the BTreeMap backwards, starting from the given entry.
|
||||
let shared = self.shared.lock().unwrap();
|
||||
let pagecache = &shared.pagecache;
|
||||
|
||||
if pagecache.is_empty() {
|
||||
info!("page cache is empty");
|
||||
return;
|
||||
}
|
||||
let minkey = CacheKey {
|
||||
tag: entry.key.tag,
|
||||
lsn: 0,
|
||||
};
|
||||
let maxkey = CacheKey {
|
||||
tag: entry.key.tag,
|
||||
lsn: entry.key.lsn,
|
||||
};
|
||||
let entries = pagecache.range((Included(&minkey), Included(&maxkey)));
|
||||
|
||||
// Find nth entry in the map, where n is picked at random
|
||||
let n = rand::thread_rng().gen_range(0..pagecache.len());
|
||||
let mut i = 0;
|
||||
for (key, _e) in pagecache.iter() {
|
||||
if i == n {
|
||||
tag = Some(key.tag);
|
||||
// the last entry in the range should be the CacheEntry we were given
|
||||
//let _last_entry = entries.next_back();
|
||||
//assert!(last_entry == entry);
|
||||
|
||||
let mut base_img: Option<Bytes> = None;
|
||||
let mut records: Vec<WALRecord> = Vec::new();
|
||||
|
||||
// Scan backwards, collecting the WAL records, until we hit an
|
||||
// old page image.
|
||||
for (_key, e) in entries.rev() {
|
||||
let e = e.content.lock().unwrap();
|
||||
|
||||
if let Some(img) = &e.page_image {
|
||||
// We have a base image. No need to dig deeper into the list of
|
||||
// records
|
||||
base_img = Some(img.clone());
|
||||
break;
|
||||
} else if let Some(rec) = &e.wal_record {
|
||||
records.push(rec.clone());
|
||||
|
||||
// If this WAL record initializes the page, no need to dig deeper.
|
||||
if rec.will_init {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
panic!("no base image and no WAL record on cache entry");
|
||||
}
|
||||
}
|
||||
|
||||
records.reverse();
|
||||
return (base_img, records);
|
||||
}
|
||||
|
||||
//
|
||||
// Adds a WAL record to the page cache
|
||||
//
|
||||
pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
|
||||
let key = CacheKey {
|
||||
tag: tag,
|
||||
lsn: rec.lsn,
|
||||
};
|
||||
|
||||
let entry = CacheEntry::new(key.clone());
|
||||
entry.content.lock().unwrap().wal_record = Some(rec);
|
||||
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
let rel_tag = RelTag {
|
||||
spcnode: tag.spcnode,
|
||||
dbnode: tag.dbnode,
|
||||
relnode: tag.relnode,
|
||||
forknum: tag.forknum,
|
||||
};
|
||||
let rel_entry = shared.relsize_cache.entry(rel_tag).or_insert(0);
|
||||
if tag.blknum >= *rel_entry {
|
||||
*rel_entry = tag.blknum + 1;
|
||||
}
|
||||
|
||||
trace!("put_wal_record lsn: {}", key.lsn);
|
||||
|
||||
let oldentry = shared.pagecache.insert(key, Arc::new(entry));
|
||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
if !oldentry.is_none() {
|
||||
error!("overwriting WAL record in page cache");
|
||||
}
|
||||
|
||||
self.num_wal_records.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
// Memorize a full image of a page version
|
||||
//
|
||||
pub fn put_page_image(&self, tag: BufferTag, lsn: u64, img: Bytes) {
|
||||
let key = CacheKey { tag: tag, lsn: lsn };
|
||||
|
||||
let entry = CacheEntry::new(key.clone());
|
||||
entry.content.lock().unwrap().page_image = Some(img);
|
||||
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let pagecache = &mut shared.pagecache;
|
||||
|
||||
let oldentry = pagecache.insert(key, Arc::new(entry));
|
||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||
assert!(oldentry.is_none());
|
||||
|
||||
//debug!("inserted page image for {}/{}/{}_{} blk {} at {}",
|
||||
// tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum, lsn);
|
||||
|
||||
self.num_page_images.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
pub fn advance_last_valid_lsn(&self, lsn: u64) {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.last_valid_lsn);
|
||||
|
||||
shared.last_valid_lsn = lsn;
|
||||
self.valid_lsn_condvar.notify_all();
|
||||
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
// NOTE: this updates last_valid_lsn as well.
|
||||
//
|
||||
pub fn advance_last_record_lsn(&self, lsn: u64) {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.last_valid_lsn);
|
||||
assert!(lsn >= shared.last_record_lsn);
|
||||
|
||||
shared.last_valid_lsn = lsn;
|
||||
shared.last_record_lsn = lsn;
|
||||
self.valid_lsn_condvar.notify_all();
|
||||
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
//
|
||||
pub fn _advance_first_valid_lsn(&self, lsn: u64) {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
assert!(lsn >= shared.first_valid_lsn);
|
||||
|
||||
// Can't overtake last_valid_lsn (except when we're
|
||||
// initializing the system and last_valid_lsn hasn't been set yet.
|
||||
assert!(shared.last_valid_lsn == 0 || lsn < shared.last_valid_lsn);
|
||||
|
||||
shared.first_valid_lsn = lsn;
|
||||
self.first_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn init_valid_lsn(&self, lsn: u64) {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
assert!(shared.first_valid_lsn == 0);
|
||||
assert!(shared.last_valid_lsn == 0);
|
||||
assert!(shared.last_record_lsn == 0);
|
||||
|
||||
shared.first_valid_lsn = lsn;
|
||||
shared.last_valid_lsn = lsn;
|
||||
shared.last_record_lsn = lsn;
|
||||
|
||||
self.first_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
self.last_record_lsn.store(lsn, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn get_last_valid_lsn(&self) -> u64 {
|
||||
let shared = self.shared.lock().unwrap();
|
||||
|
||||
return shared.last_record_lsn;
|
||||
}
|
||||
|
||||
//
|
||||
// Simple test function for the WAL redo code:
|
||||
//
|
||||
// 1. Pick a page from the page cache at random.
|
||||
// 2. Request that page with GetPage@LSN, using Max LSN (i.e. get the latest page version)
|
||||
//
|
||||
//
|
||||
pub fn _test_get_page_at_lsn(&self) {
|
||||
// for quick testing of the get_page_at_lsn() funcion.
|
||||
//
|
||||
// Get a random page from the page cache. Apply all its WAL, by requesting
|
||||
// that page at the highest lsn.
|
||||
|
||||
let mut tag: Option<BufferTag> = None;
|
||||
|
||||
{
|
||||
let shared = self.shared.lock().unwrap();
|
||||
let pagecache = &shared.pagecache;
|
||||
|
||||
if pagecache.is_empty() {
|
||||
info!("page cache is empty");
|
||||
return;
|
||||
}
|
||||
|
||||
// Find nth entry in the map, where n is picked at random
|
||||
let n = rand::thread_rng().gen_range(0..pagecache.len());
|
||||
let mut i = 0;
|
||||
for (key, _e) in pagecache.iter() {
|
||||
if i == n {
|
||||
tag = Some(key.tag);
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!("testing GetPage@LSN for block {}", tag.unwrap().blknum);
|
||||
match self.get_page_at_lsn(tag.unwrap(), 0xffff_ffff_ffff_eeee) {
|
||||
Ok(_img) => {
|
||||
// This prints out the whole page image.
|
||||
//println!("{:X?}", img);
|
||||
}
|
||||
Err(error) => {
|
||||
error!("GetPage@LSN failed: {}", error);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!("testing GetPage@LSN for block {}", tag.unwrap().blknum);
|
||||
match self.get_page_at_lsn(tag.unwrap(), 0xffff_ffff_ffff_eeee) {
|
||||
Ok(_img) => {
|
||||
// This prints out the whole page image.
|
||||
//println!("{:X?}", img);
|
||||
},
|
||||
Err(error) => {
|
||||
error!("GetPage@LSN failed: {}", error);
|
||||
// FIXME: Shouldn't relation size also be tracked with an LSN?
|
||||
// If a replica is lagging behind, it needs to get the size as it was on
|
||||
// the replica's current replay LSN.
|
||||
pub fn relsize_inc(&self, rel: &RelTag, to: Option<u32>) {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
||||
|
||||
if let Some(to) = to {
|
||||
if to >= *entry {
|
||||
*entry = to + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn relsize_get(&self, rel: &RelTag) -> u32 {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
||||
*entry
|
||||
}
|
||||
|
||||
pub fn relsize_exist(&self, rel: &RelTag) -> bool {
|
||||
let shared = self.shared.lock().unwrap();
|
||||
let relsize_cache = &shared.relsize_cache;
|
||||
relsize_cache.contains_key(rel)
|
||||
}
|
||||
|
||||
pub fn get_stats(&self) -> PageCacheStats {
|
||||
PageCacheStats {
|
||||
num_entries: self.num_entries.load(Ordering::Relaxed),
|
||||
num_page_images: self.num_page_images.load(Ordering::Relaxed),
|
||||
num_wal_records: self.num_wal_records.load(Ordering::Relaxed),
|
||||
num_getpage_requests: self.num_getpage_requests.load(Ordering::Relaxed),
|
||||
first_valid_lsn: self.first_valid_lsn.load(Ordering::Relaxed),
|
||||
last_valid_lsn: self.last_valid_lsn.load(Ordering::Relaxed),
|
||||
last_record_lsn: self.last_record_lsn.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// FIXME: Shouldn't relation size also be tracked with an LSN?
|
||||
// If a replica is lagging behind, it needs to get the size as it was on
|
||||
// the replica's current replay LSN.
|
||||
pub fn relsize_inc(&self, rel: &RelTag, to: Option<u32>)
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
||||
|
||||
if let Some(to) = to {
|
||||
if to >= *entry {
|
||||
*entry = to + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn relsize_get(&self, rel: &RelTag) -> u32
|
||||
{
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
||||
*entry
|
||||
}
|
||||
|
||||
pub fn relsize_exist(&self, rel: &RelTag) -> bool
|
||||
{
|
||||
let shared = self.shared.lock().unwrap();
|
||||
let relsize_cache = &shared.relsize_cache;
|
||||
relsize_cache.contains_key(rel)
|
||||
}
|
||||
|
||||
pub fn get_stats(&self) -> PageCacheStats
|
||||
{
|
||||
PageCacheStats {
|
||||
num_entries: self.num_entries.load(Ordering::Relaxed),
|
||||
num_page_images: self.num_page_images.load(Ordering::Relaxed),
|
||||
num_wal_records: self.num_wal_records.load(Ordering::Relaxed),
|
||||
num_getpage_requests: self.num_getpage_requests.load(Ordering::Relaxed),
|
||||
first_valid_lsn: self.first_valid_lsn.load(Ordering::Relaxed),
|
||||
last_valid_lsn: self.last_valid_lsn.load(Ordering::Relaxed),
|
||||
last_record_lsn: self.last_record_lsn.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub fn get_stats() -> PageCacheStats
|
||||
{
|
||||
pub fn get_stats() -> PageCacheStats {
|
||||
let pcaches = PAGECACHES.lock().unwrap();
|
||||
|
||||
let mut stats = PageCacheStats {
|
||||
|
||||
Reference in New Issue
Block a user