mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
265 lines
6.6 KiB
Rust
265 lines
6.6 KiB
Rust
//
|
|
// Page Cache holds all the different page versions and WAL records
|
|
//
|
|
//
|
|
//
|
|
|
|
use std::collections::BTreeMap;
|
|
use std::error::Error;
|
|
use std::sync::Mutex;
|
|
use bytes::Bytes;
|
|
use lazy_static::lazy_static;
|
|
use rand::Rng;
|
|
|
|
use crate::walredo;
|
|
|
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
|
pub struct BufferTag {
|
|
pub spcnode: u32,
|
|
pub dbnode: u32,
|
|
pub relnode: u32,
|
|
pub forknum: u32,
|
|
pub blknum: u32,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct WALRecord {
|
|
pub lsn: u64,
|
|
pub will_init: bool,
|
|
pub rec: Bytes
|
|
}
|
|
|
|
//
|
|
// Shared data structure, holding page cache and related auxiliary information
|
|
//
|
|
struct PageCacheShared {
|
|
|
|
// The actual page cache
|
|
pagecache: BTreeMap<CacheKey, CacheEntry>,
|
|
|
|
// What page versions do we hold in the cache? If we get GetPage with
|
|
// LSN < first_valid_lsn, that's an error because we (no longer) hold that
|
|
// page version. If we get a request > last_valid_lsn, we need to wait until
|
|
// we receive all the WAL up to the request.
|
|
//
|
|
first_valid_lsn: u64,
|
|
last_valid_lsn: u64
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref PAGECACHE: Mutex<PageCacheShared> = Mutex::new(
|
|
PageCacheShared {
|
|
pagecache: BTreeMap::new(),
|
|
first_valid_lsn: 0,
|
|
last_valid_lsn: 0,
|
|
});
|
|
}
|
|
|
|
//
|
|
// We store two kinds of entries in the page cache:
|
|
//
|
|
// 1. Ready-made images of the block
|
|
// 2. WAL records, to be applied on top of the "previous" entry
|
|
//
|
|
// Some WAL records will initialize the page from scratch. For such records,
|
|
// the 'will_init' flag is set. They don't need the previous page image before
|
|
// applying. The 'will_init' flag is set for records containing a full-page image,
|
|
// and for records with the BKPBLOCK_WILL_INIT flag. These differ from PageImages
|
|
// stored directly in the cache entry in that you still need to run the WAL redo
|
|
// routine to generate the page image.
|
|
//
|
|
#[derive(PartialEq, Eq, PartialOrd, Ord)]
|
|
pub struct CacheKey {
|
|
pub tag: BufferTag,
|
|
pub lsn: u64
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum CacheEntry {
|
|
PageImage(Bytes),
|
|
|
|
WALRecord(WALRecord)
|
|
}
|
|
|
|
// Public interface functions
|
|
|
|
//
|
|
// GetPage@LSN
|
|
//
|
|
// Returns an 8k page image
|
|
//
|
|
pub fn get_page_at_lsn(tag: BufferTag, lsn: u64) -> Result<Bytes, Box<dyn Error>>
|
|
{
|
|
// TODO:
|
|
//
|
|
// Look up cache entry
|
|
// If it's a page image, return that. If it's a WAL record, walk backwards
|
|
// to the latest page image. Then apply all the WAL records up until the
|
|
// given LSN.
|
|
//
|
|
let minkey = CacheKey {
|
|
tag: tag,
|
|
lsn: 0
|
|
};
|
|
let maxkey = CacheKey {
|
|
tag: tag,
|
|
lsn: lsn + 1
|
|
};
|
|
|
|
let shared = PAGECACHE.lock().unwrap();
|
|
|
|
if lsn > shared.last_valid_lsn {
|
|
// TODO: Wait for the WAL receiver to catch up
|
|
}
|
|
if lsn < shared.first_valid_lsn {
|
|
return Err(format!("LSN {} has already been removed", lsn))?;
|
|
}
|
|
|
|
let pagecache = &shared.pagecache;
|
|
let entries = pagecache.range(&minkey .. &maxkey);
|
|
|
|
let mut records: Vec<WALRecord> = Vec::new();
|
|
|
|
let mut base_img: Option<Bytes> = None;
|
|
|
|
for (_key, e) in entries.rev() {
|
|
match e {
|
|
CacheEntry::PageImage(img) => {
|
|
// We have a base image. No need to dig deeper into the list of
|
|
// records
|
|
base_img = Some(img.clone());
|
|
break;
|
|
}
|
|
CacheEntry::WALRecord(rec) => {
|
|
records.push(rec.clone());
|
|
|
|
if rec.will_init {
|
|
println!("WAL record at LSN {} initializes the page", rec.lsn);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let page_img: Bytes;
|
|
|
|
if !records.is_empty() {
|
|
records.reverse();
|
|
|
|
page_img = walredo::apply_wal_records(tag, base_img, &records)?;
|
|
|
|
println!("applied {} WAL records to produce page image at LSN {}", records.len(), lsn);
|
|
|
|
// Here, we could put the new page image back to the page cache, to save effort if the
|
|
// same (or later) page version is requested again. It's a tradeoff though, as each
|
|
// page image consumes some memory
|
|
} else if base_img.is_some() {
|
|
page_img = base_img.unwrap();
|
|
} else {
|
|
return Err("could not find page image")?;
|
|
}
|
|
|
|
return Ok(page_img);
|
|
}
|
|
|
|
//
|
|
// Adds a WAL record to the page cache
|
|
//
|
|
pub fn put_wal_record(tag: BufferTag, rec: WALRecord)
|
|
{
|
|
let key = CacheKey {
|
|
tag: tag,
|
|
lsn: rec.lsn
|
|
};
|
|
|
|
let entry = CacheEntry::WALRecord(rec);
|
|
|
|
let mut shared = PAGECACHE.lock().unwrap();
|
|
let pagecache = &mut shared.pagecache;
|
|
|
|
let oldentry = pagecache.insert(key, entry);
|
|
assert!(oldentry.is_none());
|
|
}
|
|
|
|
//
|
|
pub fn advance_last_valid_lsn(lsn: u64)
|
|
{
|
|
let mut shared = PAGECACHE.lock().unwrap();
|
|
|
|
// Can't move backwards.
|
|
assert!(lsn >= shared.last_valid_lsn);
|
|
|
|
shared.last_valid_lsn = lsn;
|
|
}
|
|
|
|
//
|
|
pub fn advance_first_valid_lsn(lsn: u64)
|
|
{
|
|
let mut shared = PAGECACHE.lock().unwrap();
|
|
|
|
// Can't move backwards.
|
|
assert!(lsn >= shared.first_valid_lsn);
|
|
|
|
// Can't overtake last_valid_lsn (except when we're
|
|
// initializing the system and last_valid_lsn hasn't been set yet.
|
|
assert!(shared.last_valid_lsn == 0 || lsn < shared.last_valid_lsn);
|
|
|
|
shared.first_valid_lsn = lsn;
|
|
}
|
|
|
|
pub fn get_last_valid_lsn() -> u64
|
|
{
|
|
let shared = PAGECACHE.lock().unwrap();
|
|
|
|
return shared.last_valid_lsn;
|
|
}
|
|
|
|
|
|
//
|
|
// Simple test function for the WAL redo code:
|
|
//
|
|
// 1. Pick a page from the page cache at random.
|
|
// 2. Request that page with GetPage@LSN, using Max LSN (i.e. get the latest page version)
|
|
//
|
|
//
|
|
pub fn test_get_page_at_lsn()
|
|
{
|
|
// for quick testing of the get_page_at_lsn() funcion.
|
|
//
|
|
// Get a random page from the page cache. Apply all its WAL, by requesting
|
|
// that page at the highest lsn.
|
|
|
|
let mut tag: Option<BufferTag> = None;
|
|
|
|
{
|
|
let shared = PAGECACHE.lock().unwrap();
|
|
let pagecache = &shared.pagecache;
|
|
|
|
if pagecache.is_empty() {
|
|
println!("page cache is empty");
|
|
return;
|
|
}
|
|
|
|
// Find nth entry in the map, where n is picked at random
|
|
let n = rand::thread_rng().gen_range(0..pagecache.len());
|
|
let mut i = 0;
|
|
for (key, _e) in pagecache.iter() {
|
|
if i == n {
|
|
tag = Some(key.tag);
|
|
break;
|
|
}
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
println!("testing GetPage@LSN for block {}", tag.unwrap().blknum);
|
|
match get_page_at_lsn(tag.unwrap(), 0xffff_ffff_ffff_eeee) {
|
|
Ok(_img) => {
|
|
// This prints out the whole page image.
|
|
//println!("{:X?}", img);
|
|
},
|
|
Err(error) => {
|
|
println!("GetPage@LSN failed: {}", error);
|
|
}
|
|
}
|
|
}
|