mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 06:52:55 +00:00
Set hint bits in pageserver
This commit is contained in:
@@ -1446,6 +1446,7 @@ impl LayeredTimeline {
|
||||
trace!("found {} WAL records that will init the page for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn);
|
||||
}
|
||||
let img = self.walredo_mgr.request_redo(
|
||||
self,
|
||||
rel,
|
||||
blknum,
|
||||
request_lsn,
|
||||
|
||||
@@ -507,6 +507,7 @@ mod tests {
|
||||
impl WalRedoManager for TestRedoManager {
|
||||
fn request_redo(
|
||||
&self,
|
||||
timeline: &dyn Timeline,
|
||||
rel: RelishTag,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
|
||||
@@ -43,7 +43,7 @@ use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
use crate::relish::*;
|
||||
use crate::repository::WALRecord;
|
||||
use crate::repository::{Timeline, WALRecord};
|
||||
use crate::waldecoder::XlMultiXactCreate;
|
||||
use crate::waldecoder::XlXactParsedRecord;
|
||||
use crate::PageServerConf;
|
||||
@@ -79,6 +79,7 @@ pub trait WalRedoManager: Send + Sync {
|
||||
/// the reords.
|
||||
fn request_redo(
|
||||
&self,
|
||||
timeline: &dyn Timeline,
|
||||
rel: RelishTag,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
@@ -96,6 +97,7 @@ pub struct DummyRedoManager {}
|
||||
impl crate::walredo::WalRedoManager for DummyRedoManager {
|
||||
fn request_redo(
|
||||
&self,
|
||||
_timeline: &dyn Timeline,
|
||||
_rel: RelishTag,
|
||||
_blknum: u32,
|
||||
_lsn: Lsn,
|
||||
@@ -176,6 +178,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
///
|
||||
fn request_redo(
|
||||
&self,
|
||||
timeline: &dyn Timeline,
|
||||
rel: RelishTag,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
@@ -216,6 +219,13 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
WAL_REDO_WAIT_TIME.observe(lock_time.duration_since(start_time).as_secs_f64());
|
||||
WAL_REDO_TIME.observe(end_time.duration_since(lock_time).as_secs_f64());
|
||||
|
||||
if let Ok(page) = result {
|
||||
let mut buf = BytesMut::new();
|
||||
buf.extend_from_slice(&page);
|
||||
self.set_hint_bits(timeline, &mut buf, lsn, &request.records);
|
||||
return Ok(buf.freeze());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
@@ -242,6 +252,117 @@ impl PostgresRedoManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn xid_status(&self, timeline: &dyn Timeline, xid: u32, lsn: Lsn) -> u8 {
|
||||
let pageno = xid / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
if let Ok(clog_page) = timeline.get_page_at_lsn_nowait(
|
||||
RelishTag::Slru {
|
||||
slru: SlruKind::Clog,
|
||||
segno,
|
||||
},
|
||||
rpageno,
|
||||
lsn,
|
||||
) {
|
||||
postgres_ffi::nonrelfile_utils::transaction_id_get_status(xid, &clog_page[..])
|
||||
} else {
|
||||
pg_constants::TRANSACTION_STATUS_IN_PROGRESS
|
||||
}
|
||||
}
|
||||
|
||||
fn set_hint_bits(
|
||||
&self,
|
||||
timeline: &dyn Timeline,
|
||||
page: &mut BytesMut,
|
||||
lsn: Lsn,
|
||||
records: &Vec<WALRecord>,
|
||||
) {
|
||||
let mut flags = LittleEndian::read_u16(
|
||||
&page[pg_constants::PD_FLAGS_OFFSET..pg_constants::PD_FLAGS_OFFSET + 2],
|
||||
);
|
||||
if (flags & (pg_constants::PD_HEAP_RELATION | pg_constants::PD_NONHEAP_RELATION)) == 0 {
|
||||
// If type of relation was not determined yet,
|
||||
// then do it now
|
||||
for r in records {
|
||||
let xl_rmid = r.rec[pg_constants::XL_RMID_OFFS];
|
||||
if xl_rmid == pg_constants::RM_HEAP_ID || xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
flags |= pg_constants::PD_HEAP_RELATION;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (flags & pg_constants::PD_HEAP_RELATION) == 0 {
|
||||
flags |= pg_constants::PD_NONHEAP_RELATION;
|
||||
}
|
||||
LittleEndian::write_u16(
|
||||
&mut page[pg_constants::PD_FLAGS_OFFSET..pg_constants::PD_FLAGS_OFFSET + 2],
|
||||
flags,
|
||||
);
|
||||
}
|
||||
if (flags & pg_constants::PD_HEAP_RELATION) != 0 {
|
||||
// Set hint bits for heap relation page
|
||||
let pd_lower = LittleEndian::read_u16(
|
||||
&page[pg_constants::PD_LOWER_OFFSET..pg_constants::PD_LOWER_OFFSET + 2],
|
||||
) as usize;
|
||||
let mut tid_offs = pg_constants::SIZE_OF_PAGE_HEADER_DATA;
|
||||
while tid_offs < pd_lower {
|
||||
let tid = LittleEndian::read_u32(&page[tid_offs..tid_offs + 4]);
|
||||
let lp_off = (tid & 0x7FFF) as usize;
|
||||
if ((tid >> 15) & 3) == pg_constants::LP_NORMAL {
|
||||
// normal item pointer
|
||||
let t_xmin = LittleEndian::read_u32(
|
||||
&page[lp_off + pg_constants::T_XMIN_OFFS
|
||||
..lp_off + pg_constants::T_XMIN_OFFS + 4],
|
||||
);
|
||||
let t_xmax = LittleEndian::read_u32(
|
||||
&page[lp_off + pg_constants::T_XMAX_OFFS
|
||||
..lp_off + pg_constants::T_XMAX_OFFS + 4],
|
||||
);
|
||||
let mut t_infomask = LittleEndian::read_u16(
|
||||
&page[lp_off + pg_constants::T_INFOMASK_OFFS
|
||||
..lp_off + pg_constants::T_INFOMASK_OFFS + 2],
|
||||
);
|
||||
if (t_infomask
|
||||
& (pg_constants::HEAP_XMIN_COMMITTED | pg_constants::HEAP_XMIN_INVALID))
|
||||
== 0
|
||||
&& t_xmin != 0
|
||||
{
|
||||
let status = self.xid_status(timeline, t_xmin, lsn);
|
||||
if status == pg_constants::TRANSACTION_STATUS_COMMITTED {
|
||||
t_infomask |= pg_constants::HEAP_XMIN_COMMITTED;
|
||||
} else if status == pg_constants::TRANSACTION_STATUS_ABORTED {
|
||||
t_infomask |= pg_constants::HEAP_XMIN_INVALID;
|
||||
}
|
||||
LittleEndian::write_u16(
|
||||
&mut page[lp_off + pg_constants::T_INFOMASK_OFFS
|
||||
..lp_off + pg_constants::T_INFOMASK_OFFS + 2],
|
||||
t_infomask,
|
||||
);
|
||||
}
|
||||
if (t_infomask
|
||||
& (pg_constants::HEAP_XMAX_COMMITTED
|
||||
| pg_constants::HEAP_XMAX_INVALID
|
||||
| pg_constants::HEAP_XMAX_IS_MULTI))
|
||||
== 0
|
||||
&& t_xmax != 0
|
||||
{
|
||||
let status = self.xid_status(timeline, t_xmax, lsn);
|
||||
if status == pg_constants::TRANSACTION_STATUS_COMMITTED {
|
||||
t_infomask |= pg_constants::HEAP_XMAX_COMMITTED;
|
||||
} else if status == pg_constants::TRANSACTION_STATUS_ABORTED {
|
||||
t_infomask |= pg_constants::HEAP_XMAX_INVALID;
|
||||
}
|
||||
LittleEndian::write_u16(
|
||||
&mut page[lp_off + pg_constants::T_INFOMASK_OFFS
|
||||
..lp_off + pg_constants::T_INFOMASK_OFFS + 2],
|
||||
t_infomask,
|
||||
);
|
||||
}
|
||||
}
|
||||
tid_offs += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Process one request for WAL redo.
|
||||
///
|
||||
|
||||
@@ -46,6 +46,7 @@ pub const SIZE_OF_PAGE_HEADER: u16 = 24;
|
||||
pub const BITS_PER_HEAPBLOCK: u16 = 2;
|
||||
pub const HEAPBLOCKS_PER_PAGE: u16 = (BLCKSZ - SIZE_OF_PAGE_HEADER) * 8 / BITS_PER_HEAPBLOCK;
|
||||
|
||||
pub const TRANSACTION_STATUS_IN_PROGRESS: u8 = 0x00;
|
||||
pub const TRANSACTION_STATUS_COMMITTED: u8 = 0x01;
|
||||
pub const TRANSACTION_STATUS_ABORTED: u8 = 0x02;
|
||||
pub const TRANSACTION_STATUS_SUB_COMMITTED: u8 = 0x03;
|
||||
@@ -191,6 +192,31 @@ pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
|
||||
pub const PG_MAJORVERSION: &str = "14";
|
||||
|
||||
// Zenith specific page flags used to distinguish heap and non-heap relations
|
||||
pub const PD_HEAP_RELATION: u16 = 0x10;
|
||||
pub const PD_NONHEAP_RELATION: u16 = 0x20;
|
||||
|
||||
// bufpage.h
|
||||
pub const PD_FLAGS_OFFSET: usize = 10; // PageHeaderData.pd_flags
|
||||
pub const PD_LOWER_OFFSET: usize = 12; // PageHeaderData.pd_lower
|
||||
|
||||
// itemid.h
|
||||
pub const LP_NORMAL: u32 = 1;
|
||||
|
||||
// htup_details.h
|
||||
pub const T_XMIN_OFFS: usize = 0;
|
||||
pub const T_XMAX_OFFS: usize = 4;
|
||||
pub const T_INFOMASK_OFFS: usize = 4 * 3 + 2 * 3 + 2;
|
||||
pub const HEAP_XMIN_COMMITTED: u16 = 0x0100; /* t_xmin committed */
|
||||
pub const HEAP_XMIN_INVALID: u16 = 0x0200; /* t_xmin invalid/aborted */
|
||||
pub const HEAP_XMAX_COMMITTED: u16 = 0x0400; /* t_xmax committed */
|
||||
pub const HEAP_XMAX_INVALID: u16 = 0x0800; /* t_xmax invalid/aborted */
|
||||
pub const HEAP_XMAX_IS_MULTI: u16 = 0x1000; /* t_xmax is a MultiXactId */
|
||||
pub const SIZE_OF_PAGE_HEADER_DATA: usize = 24;
|
||||
|
||||
// xlogrecord.h
|
||||
pub const XL_RMID_OFFS: usize = 17;
|
||||
|
||||
// List of subdirectories inside pgdata.
|
||||
// Copied from src/bin/initdb/initdb.c
|
||||
pub const PGDATA_SUBDIRS: [&str; 22] = [
|
||||
|
||||
Reference in New Issue
Block a user