Handle VM and FSM truncation WAL records in the page server.

Fixes issue #190.

Original patch by Konstantin Knizhnik.
This commit is contained in:
Heikki Linnakangas
2021-05-31 23:36:17 +03:00
parent 1aceea1bdd
commit ac60b68d50
5 changed files with 129 additions and 28 deletions

View File

@@ -281,27 +281,12 @@ pub fn save_decoded_record(
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK) == pg_constants::XLOG_SMGR_TRUNCATE
{
let truncate = XlSmgrTruncate::decode(&decoded);
if (truncate.flags & pg_constants::SMGR_TRUNCATE_HEAP) != 0 {
let rel = RelTag {
spcnode: truncate.rnode.spcnode,
dbnode: truncate.rnode.dbnode,
relnode: truncate.rnode.relnode,
forknum: pg_constants::MAIN_FORKNUM,
};
timeline.put_truncation(rel, lsn, truncate.blkno)?;
}
save_xlog_smgr_truncate(timeline, lsn, &truncate)?;
} else if decoded.xl_rmid == pg_constants::RM_DBASE_ID
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK) == pg_constants::XLOG_DBASE_CREATE
{
let createdb = XlCreateDatabase::decode(&decoded);
save_create_database(
timeline,
lsn,
createdb.db_id,
createdb.tablespace_id,
createdb.src_db_id,
createdb.src_tablespace_id,
)?;
save_xlog_dbase_create(timeline, lsn, &createdb)?;
}
// Now that this record has been handled, let the repository know that
@@ -311,14 +296,12 @@ pub fn save_decoded_record(
}
/// Subroutine of save_decoded_record(), to handle an XLOG_DBASE_CREATE record.
fn save_create_database(
timeline: &dyn Timeline,
lsn: Lsn,
db_id: Oid,
tablespace_id: Oid,
src_db_id: Oid,
src_tablespace_id: Oid,
) -> Result<()> {
fn save_xlog_dbase_create(timeline: &dyn Timeline, lsn: Lsn, rec: &XlCreateDatabase) -> Result<()> {
let db_id = rec.db_id;
let tablespace_id = rec.tablespace_id;
let src_db_id = rec.src_db_id;
let src_tablespace_id = rec.src_tablespace_id;
// Creating a database is implemented by copying the template (aka. source) database.
// To copy all the relations, we need to ask for the state as of the same LSN, but we
// cannot pass 'lsn' to the Timeline.get_* functions, or they will block waiting for
@@ -328,7 +311,7 @@ fn save_create_database(
let rels = timeline.list_rels(src_tablespace_id, src_db_id, req_lsn)?;
info!("creatdb: {} rels", rels.len());
trace!("save_create_database: {} rels", rels.len());
let mut num_rels_copied = 0;
let mut num_blocks_copied = 0;
@@ -376,3 +359,65 @@ fn save_create_database(
);
Ok(())
}
/// Subroutine of save_decoded_record(), to handle an XLOG_SMGR_TRUNCATE record.
///
/// This is the same logic as in PostgreSQL's smgr_redo() function.
fn save_xlog_smgr_truncate(timeline: &dyn Timeline, lsn: Lsn, rec: &XlSmgrTruncate) -> Result<()> {
let spcnode = rec.rnode.spcnode;
let dbnode = rec.rnode.dbnode;
let relnode = rec.rnode.relnode;
if (rec.flags & pg_constants::SMGR_TRUNCATE_HEAP) != 0 {
let rel = RelTag {
spcnode,
dbnode,
relnode,
forknum: pg_constants::MAIN_FORKNUM,
};
timeline.put_truncation(rel, lsn, rec.blkno)?;
}
if (rec.flags & pg_constants::SMGR_TRUNCATE_FSM) != 0 {
let rel = RelTag {
spcnode,
dbnode,
relnode,
forknum: pg_constants::FSM_FORKNUM,
};
// FIXME: 'blkno' stored in the WAL record is the new size of the
// heap. The formula for calculating the new size of the FSM is
// pretty complicated (see FreeSpaceMapPrepareTruncateRel() in
// PostgreSQL), and we should also clear bits in the tail FSM block,
// and update the upper level FSM pages. None of that has been
// implemented. What we do instead, is always just truncate the FSM
// to zero blocks. That's bad for performance, but safe. (The FSM
// isn't needed for correctness, so we could also leave garbage in
// it. Seems more tidy to zap it away.)
if rec.blkno != 0 {
info!("Partial truncation of FSM is not supported");
}
let num_fsm_blocks = 0;
timeline.put_truncation(rel, lsn, num_fsm_blocks)?;
}
if (rec.flags & pg_constants::SMGR_TRUNCATE_VM) != 0 {
let rel = RelTag {
spcnode,
dbnode,
relnode,
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
};
// FIXME: Like with the FSM above, the logic to truncate the VM
// correctly has not been implemented. Just zap it away completely,
// always. Unlike the FSM, the VM must never have bits incorrectly
// set. From a correctness point of view, it's always OK to clear
// bits or remove it altogether, though.
if rec.blkno != 0 {
info!("Partial truncation of VM is not supported");
}
let num_vm_blocks = 0;
timeline.put_truncation(rel, lsn, num_vm_blocks)?;
}
Ok(())
}

View File

@@ -264,7 +264,7 @@ pub struct DecodedBkpBlock {
/* Information on full-page image, if any */
has_image: bool, /* has image, even for consistency checking */
pub apply_image: bool, /* has image that should be restored */
pub will_init: bool,
pub will_init: bool, /* record doesn't need previous page version to apply */
//char *bkp_image;
hole_offset: u16,
hole_length: u16,
@@ -850,7 +850,7 @@ pub fn decode_wal_record(record: Bytes) -> DecodedWALRecord {
let spcnode = buf.get_u32_le();
let dbnode = buf.get_u32_le();
let relnode = buf.get_u32_le();
//TODO save these too
//TODO handle this too?
trace!(
"XLOG_XACT_ABORT relfilenode {}/{}/{}",
spcnode,

View File

@@ -20,6 +20,8 @@ pub const ROCKSDB_SPECIAL_FORKNUM: u8 = 50;
// From storage_xlog.h
pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
//
// Constants from visbilitymap.h

View File

@@ -17,3 +17,29 @@ SELECT * FROM truncatetest;
(0 rows)
DROP TABLE truncatetest;
--
-- Test that the FSM is truncated along with the table.
--
-- Create a test table and delete and vacuum away most of the rows.
-- This leaves the FSM full of pages with plenty of space
create table tt(i int);
insert into tt select g from generate_series(1, 100000) g;
delete from tt where i%100 != 0 and i > 10000;
vacuum freeze tt;
-- Delete the rest of the rows, and vacuum again. This truncates the
-- heap to 0 blocks, and should also truncate the FSM.
delete from tt;
vacuum tt;
-- This can be used to look at the FSM directly, if the 'pg_freespace' contrib module
-- is installed
--SELECT blkno, avail from generate_series(1, 450) blkno, pg_freespace('tt'::regclass, blkno) AS avail;
-- Insert a row again. It should go on block #0. If the FSM was not truncated,
-- the insertion would find a higher-numbered block in the FSM and use that instead.
insert into tt values (0);
select ctid, * from tt;
ctid | i
-------+---
(0,1) | 0
(1 row)
drop table tt;

View File

@@ -16,3 +16,31 @@ VACUUM truncatetest;
SELECT * FROM truncatetest;
DROP TABLE truncatetest;
--
-- Test that the FSM is truncated along with the table.
--
-- Create a test table and delete and vacuum away most of the rows.
-- This leaves the FSM full of pages with plenty of space
create table tt(i int);
insert into tt select g from generate_series(1, 100000) g;
delete from tt where i%100 != 0 and i > 10000;
vacuum freeze tt;
-- Delete the rest of the rows, and vacuum again. This truncates the
-- heap to 0 blocks, and should also truncate the FSM.
delete from tt;
vacuum tt;
-- This can be used to look at the FSM directly, if the 'pg_freespace' contrib module
-- is installed
--SELECT blkno, avail from generate_series(1, 450) blkno, pg_freespace('tt'::regclass, blkno) AS avail;
-- Insert a row again. It should go on block #0. If the FSM was not truncated,
-- the insertion would find a higher-numbered block in the FSM and use that instead.
insert into tt values (0);
select ctid, * from tt;
drop table tt;