mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
Replay non-relational WAL records on page server
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
pub mod controlfile_utils;
|
||||
pub mod nonrelfile_utils;
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
pub mod xlog_utils;
|
||||
|
||||
@@ -26,6 +26,19 @@ pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
|
||||
pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
|
||||
pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
|
||||
|
||||
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
|
||||
// --with-segsize=SEGSIZE, but assume the defaults for now.
|
||||
pub const BLCKSZ: u16 = 8192;
|
||||
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
|
||||
|
||||
//
|
||||
// constants from clog.h
|
||||
//
|
||||
pub const CLOG_XACTS_PER_BYTE: u32 = 4;
|
||||
pub const CLOG_XACTS_PER_PAGE: u32 = BLCKSZ as u32 * CLOG_XACTS_PER_BYTE;
|
||||
pub const CLOG_BITS_PER_XACT: u8 = 2;
|
||||
pub const CLOG_XACT_BITMASK: u8 = (1 << CLOG_BITS_PER_XACT) - 1;
|
||||
|
||||
//
|
||||
// Constants from visbilitymap.h
|
||||
//
|
||||
@@ -33,6 +46,14 @@ pub const SIZE_OF_PAGE_HEADER: u16 = 24;
|
||||
pub const BITS_PER_HEAPBLOCK: u16 = 2;
|
||||
pub const HEAPBLOCKS_PER_PAGE: u16 = (BLCKSZ - SIZE_OF_PAGE_HEADER) * 8 / BITS_PER_HEAPBLOCK;
|
||||
|
||||
pub const TRANSACTION_STATUS_IN_PROGRESS: u8 = 0x00;
|
||||
pub const TRANSACTION_STATUS_COMMITTED: u8 = 0x01;
|
||||
pub const TRANSACTION_STATUS_ABORTED: u8 = 0x02;
|
||||
pub const TRANSACTION_STATUS_SUB_COMMITTED: u8 = 0x03;
|
||||
|
||||
pub const CLOG_ZEROPAGE: u8 = 0x00;
|
||||
pub const CLOG_TRUNCATE: u8 = 0x10;
|
||||
|
||||
// From xact.h
|
||||
pub const XLOG_XACT_COMMIT: u8 = 0x00;
|
||||
pub const XLOG_XACT_PREPARE: u8 = 0x10;
|
||||
@@ -40,6 +61,10 @@ pub const XLOG_XACT_ABORT: u8 = 0x20;
|
||||
pub const XLOG_XACT_COMMIT_PREPARED: u8 = 0x30;
|
||||
pub const XLOG_XACT_ABORT_PREPARED: u8 = 0x40;
|
||||
|
||||
// From srlu.h
|
||||
pub const SLRU_PAGES_PER_SEGMENT: u32 = 32;
|
||||
pub const SLRU_SEG_SIZE: usize = BLCKSZ as usize * SLRU_PAGES_PER_SEGMENT as usize;
|
||||
|
||||
/* mask for filtering opcodes out of xl_info */
|
||||
pub const XLOG_XACT_OPMASK: u8 = 0x70;
|
||||
pub const XLOG_HEAP_OPMASK: u8 = 0x70;
|
||||
@@ -60,8 +85,32 @@ pub const XACT_XINFO_HAS_TWOPHASE: u32 = 1u32 << 4;
|
||||
// pub const XACT_XINFO_HAS_GID: u32 = 1u32 << 7;
|
||||
|
||||
// From pg_control.h and rmgrlist.h
|
||||
pub const XLOG_NEXTOID: u8 = 0x30;
|
||||
pub const XLOG_SWITCH: u8 = 0x40;
|
||||
pub const XLOG_SMGR_TRUNCATE: u8 = 0x20;
|
||||
pub const DB_SHUTDOWNED: u32 = 1;
|
||||
|
||||
// From multixact.h
|
||||
pub const FIRST_MULTIXACT_ID: u32 = 1;
|
||||
pub const MAX_MULTIXACT_ID: u32 = 0xFFFFFFFF;
|
||||
|
||||
pub const XLOG_MULTIXACT_ZERO_OFF_PAGE: u8 = 0x00;
|
||||
pub const XLOG_MULTIXACT_ZERO_MEM_PAGE: u8 = 0x10;
|
||||
pub const XLOG_MULTIXACT_CREATE_ID: u8 = 0x20;
|
||||
pub const XLOG_MULTIXACT_TRUNCATE_ID: u8 = 0x30;
|
||||
|
||||
pub const MULTIXACT_OFFSETS_PER_PAGE: u16 = BLCKSZ / 4;
|
||||
pub const MXACT_MEMBER_BITS_PER_XACT: u16 = 8;
|
||||
pub const MXACT_MEMBER_FLAGS_PER_BYTE: u16 = 1;
|
||||
pub const MULTIXACT_FLAGBYTES_PER_GROUP: u16 = 4;
|
||||
pub const MULTIXACT_MEMBERS_PER_MEMBERGROUP: u16 =
|
||||
MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE;
|
||||
/* size in bytes of a complete group */
|
||||
pub const MULTIXACT_MEMBERGROUP_SIZE: u16 =
|
||||
4 * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP;
|
||||
pub const MULTIXACT_MEMBERGROUPS_PER_PAGE: u16 = BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE;
|
||||
pub const MULTIXACT_MEMBERS_PER_PAGE: u16 =
|
||||
MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP;
|
||||
|
||||
// From heapam_xlog.h
|
||||
pub const XLOG_HEAP_INSERT: u8 = 0x00;
|
||||
@@ -101,11 +150,6 @@ pub const XLOG_TBLSPC_DROP: u8 = 0x10;
|
||||
|
||||
pub const SIZEOF_XLOGRECORD: u32 = 24;
|
||||
|
||||
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
|
||||
// --with-segsize=SEGSIZE, but assume the defaults for now.
|
||||
pub const BLCKSZ: u16 = 8192;
|
||||
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
|
||||
|
||||
//
|
||||
// from xlogrecord.h
|
||||
//
|
||||
@@ -136,3 +180,8 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
|
||||
|
||||
/* FIXME: pageserver should request wal_seg_size from compute node */
|
||||
pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
|
||||
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
|
||||
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
|
||||
pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
|
||||
@@ -9,14 +9,16 @@
|
||||
|
||||
use crate::pg_constants;
|
||||
use crate::CheckPoint;
|
||||
use crate::ControlFileData;
|
||||
use crate::FullTransactionId;
|
||||
use crate::XLogLongPageHeaderData;
|
||||
use crate::XLogPageHeaderData;
|
||||
use crate::XLogRecord;
|
||||
|
||||
use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::{Buf, Bytes};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use crc32c::*;
|
||||
use log::*;
|
||||
use std::cmp::min;
|
||||
@@ -35,12 +37,15 @@ pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;
|
||||
pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = std::mem::size_of::<XLogPageHeaderData>();
|
||||
pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = std::mem::size_of::<XLogLongPageHeaderData>();
|
||||
pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
|
||||
pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
|
||||
|
||||
pub type XLogRecPtr = u64;
|
||||
pub type TimeLineID = u32;
|
||||
pub type TimestampTz = i64;
|
||||
pub type XLogSegNo = u64;
|
||||
|
||||
const XID_CHECKPOINT_INTERVAL: u32 = 1024;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
(0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo
|
||||
@@ -368,6 +373,7 @@ impl CheckPoint {
|
||||
// Next XID should be greater than new_xid.
|
||||
// Also take in account 32-bit wrap-around.
|
||||
pub fn update_next_xid(&mut self, xid: u32) {
|
||||
let xid = xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
|
||||
let full_xid = self.nextXid.value;
|
||||
let new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
|
||||
let old_xid = full_xid as u32;
|
||||
@@ -383,3 +389,58 @@ impl CheckPoint {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);
|
||||
|
||||
let hdr = XLogLongPageHeaderData {
|
||||
std: {
|
||||
XLogPageHeaderData {
|
||||
xlp_magic: XLOG_PAGE_MAGIC as u16,
|
||||
xlp_info: pg_constants::XLP_LONG_HEADER,
|
||||
xlp_tli: 1, // FIXME: always use Postgres timeline 1
|
||||
xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64,
|
||||
xlp_rem_len: 0,
|
||||
}
|
||||
},
|
||||
xlp_sysid: pg_control.system_identifier,
|
||||
xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32,
|
||||
xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
|
||||
};
|
||||
|
||||
let hdr_bytes = hdr.encode();
|
||||
seg_buf.extend_from_slice(&hdr_bytes);
|
||||
|
||||
let rec_hdr = XLogRecord {
|
||||
xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD
|
||||
+ SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT
|
||||
+ SIZEOF_CHECKPOINT) as u32,
|
||||
xl_xid: 0, //0 is for InvalidTransactionId
|
||||
xl_prev: 0,
|
||||
xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN,
|
||||
xl_rmid: pg_constants::RM_XLOG_ID,
|
||||
xl_crc: 0,
|
||||
};
|
||||
|
||||
let mut rec_shord_hdr_bytes = BytesMut::new();
|
||||
rec_shord_hdr_bytes.put_u8(pg_constants::XLR_BLOCK_ID_DATA_SHORT);
|
||||
rec_shord_hdr_bytes.put_u8(SIZEOF_CHECKPOINT as u8);
|
||||
|
||||
let rec_bytes = rec_hdr.encode();
|
||||
let checkpoint_bytes = pg_control.checkPointCopy.encode();
|
||||
|
||||
//calculate record checksum
|
||||
let mut crc = 0;
|
||||
crc = crc32c_append(crc, &rec_shord_hdr_bytes[..]);
|
||||
crc = crc32c_append(crc, &checkpoint_bytes[..]);
|
||||
crc = crc32c_append(crc, &rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
|
||||
seg_buf.extend_from_slice(&rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
seg_buf.put_u32_le(crc);
|
||||
seg_buf.extend_from_slice(&rec_shord_hdr_bytes);
|
||||
seg_buf.extend_from_slice(&checkpoint_bytes);
|
||||
|
||||
//zero out remainig file
|
||||
seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
|
||||
seg_buf.freeze()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user