mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
Handle VM and FSM truncation WAL records in the page server.
Fixes issue #190. Original patch by Konstantin Knizhnik.
This commit is contained in:
@@ -281,27 +281,12 @@ pub fn save_decoded_record(
|
||||
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK) == pg_constants::XLOG_SMGR_TRUNCATE
|
||||
{
|
||||
let truncate = XlSmgrTruncate::decode(&decoded);
|
||||
if (truncate.flags & pg_constants::SMGR_TRUNCATE_HEAP) != 0 {
|
||||
let rel = RelTag {
|
||||
spcnode: truncate.rnode.spcnode,
|
||||
dbnode: truncate.rnode.dbnode,
|
||||
relnode: truncate.rnode.relnode,
|
||||
forknum: pg_constants::MAIN_FORKNUM,
|
||||
};
|
||||
timeline.put_truncation(rel, lsn, truncate.blkno)?;
|
||||
}
|
||||
save_xlog_smgr_truncate(timeline, lsn, &truncate)?;
|
||||
} else if decoded.xl_rmid == pg_constants::RM_DBASE_ID
|
||||
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK) == pg_constants::XLOG_DBASE_CREATE
|
||||
{
|
||||
let createdb = XlCreateDatabase::decode(&decoded);
|
||||
save_create_database(
|
||||
timeline,
|
||||
lsn,
|
||||
createdb.db_id,
|
||||
createdb.tablespace_id,
|
||||
createdb.src_db_id,
|
||||
createdb.src_tablespace_id,
|
||||
)?;
|
||||
save_xlog_dbase_create(timeline, lsn, &createdb)?;
|
||||
}
|
||||
|
||||
// Now that this record has been handled, let the repository know that
|
||||
@@ -311,14 +296,12 @@ pub fn save_decoded_record(
|
||||
}
|
||||
|
||||
/// Subroutine of save_decoded_record(), to handle an XLOG_DBASE_CREATE record.
|
||||
fn save_create_database(
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
db_id: Oid,
|
||||
tablespace_id: Oid,
|
||||
src_db_id: Oid,
|
||||
src_tablespace_id: Oid,
|
||||
) -> Result<()> {
|
||||
fn save_xlog_dbase_create(timeline: &dyn Timeline, lsn: Lsn, rec: &XlCreateDatabase) -> Result<()> {
|
||||
let db_id = rec.db_id;
|
||||
let tablespace_id = rec.tablespace_id;
|
||||
let src_db_id = rec.src_db_id;
|
||||
let src_tablespace_id = rec.src_tablespace_id;
|
||||
|
||||
// Creating a database is implemented by copying the template (aka. source) database.
|
||||
// To copy all the relations, we need to ask for the state as of the same LSN, but we
|
||||
// cannot pass 'lsn' to the Timeline.get_* functions, or they will block waiting for
|
||||
@@ -328,7 +311,7 @@ fn save_create_database(
|
||||
|
||||
let rels = timeline.list_rels(src_tablespace_id, src_db_id, req_lsn)?;
|
||||
|
||||
info!("creatdb: {} rels", rels.len());
|
||||
trace!("save_create_database: {} rels", rels.len());
|
||||
|
||||
let mut num_rels_copied = 0;
|
||||
let mut num_blocks_copied = 0;
|
||||
@@ -376,3 +359,65 @@ fn save_create_database(
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Subroutine of save_decoded_record(), to handle an XLOG_SMGR_TRUNCATE record.
|
||||
///
|
||||
/// This is the same logic as in PostgreSQL's smgr_redo() function.
|
||||
fn save_xlog_smgr_truncate(timeline: &dyn Timeline, lsn: Lsn, rec: &XlSmgrTruncate) -> Result<()> {
|
||||
let spcnode = rec.rnode.spcnode;
|
||||
let dbnode = rec.rnode.dbnode;
|
||||
let relnode = rec.rnode.relnode;
|
||||
|
||||
if (rec.flags & pg_constants::SMGR_TRUNCATE_HEAP) != 0 {
|
||||
let rel = RelTag {
|
||||
spcnode,
|
||||
dbnode,
|
||||
relnode,
|
||||
forknum: pg_constants::MAIN_FORKNUM,
|
||||
};
|
||||
timeline.put_truncation(rel, lsn, rec.blkno)?;
|
||||
}
|
||||
if (rec.flags & pg_constants::SMGR_TRUNCATE_FSM) != 0 {
|
||||
let rel = RelTag {
|
||||
spcnode,
|
||||
dbnode,
|
||||
relnode,
|
||||
forknum: pg_constants::FSM_FORKNUM,
|
||||
};
|
||||
|
||||
// FIXME: 'blkno' stored in the WAL record is the new size of the
|
||||
// heap. The formula for calculating the new size of the FSM is
|
||||
// pretty complicated (see FreeSpaceMapPrepareTruncateRel() in
|
||||
// PostgreSQL), and we should also clear bits in the tail FSM block,
|
||||
// and update the upper level FSM pages. None of that has been
|
||||
// implemented. What we do instead, is always just truncate the FSM
|
||||
// to zero blocks. That's bad for performance, but safe. (The FSM
|
||||
// isn't needed for correctness, so we could also leave garbage in
|
||||
// it. Seems more tidy to zap it away.)
|
||||
if rec.blkno != 0 {
|
||||
info!("Partial truncation of FSM is not supported");
|
||||
}
|
||||
let num_fsm_blocks = 0;
|
||||
timeline.put_truncation(rel, lsn, num_fsm_blocks)?;
|
||||
}
|
||||
if (rec.flags & pg_constants::SMGR_TRUNCATE_VM) != 0 {
|
||||
let rel = RelTag {
|
||||
spcnode,
|
||||
dbnode,
|
||||
relnode,
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
};
|
||||
|
||||
// FIXME: Like with the FSM above, the logic to truncate the VM
|
||||
// correctly has not been implemented. Just zap it away completely,
|
||||
// always. Unlike the FSM, the VM must never have bits incorrectly
|
||||
// set. From a correctness point of view, it's always OK to clear
|
||||
// bits or remove it altogether, though.
|
||||
if rec.blkno != 0 {
|
||||
info!("Partial truncation of VM is not supported");
|
||||
}
|
||||
let num_vm_blocks = 0;
|
||||
timeline.put_truncation(rel, lsn, num_vm_blocks)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ pub struct DecodedBkpBlock {
|
||||
/* Information on full-page image, if any */
|
||||
has_image: bool, /* has image, even for consistency checking */
|
||||
pub apply_image: bool, /* has image that should be restored */
|
||||
pub will_init: bool,
|
||||
pub will_init: bool, /* record doesn't need previous page version to apply */
|
||||
//char *bkp_image;
|
||||
hole_offset: u16,
|
||||
hole_length: u16,
|
||||
@@ -850,7 +850,7 @@ pub fn decode_wal_record(record: Bytes) -> DecodedWALRecord {
|
||||
let spcnode = buf.get_u32_le();
|
||||
let dbnode = buf.get_u32_le();
|
||||
let relnode = buf.get_u32_le();
|
||||
//TODO save these too
|
||||
//TODO handle this too?
|
||||
trace!(
|
||||
"XLOG_XACT_ABORT relfilenode {}/{}/{}",
|
||||
spcnode,
|
||||
|
||||
@@ -20,6 +20,8 @@ pub const ROCKSDB_SPECIAL_FORKNUM: u8 = 50;
|
||||
|
||||
// From storage_xlog.h
|
||||
pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
|
||||
pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
|
||||
pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
|
||||
|
||||
//
|
||||
// Constants from visbilitymap.h
|
||||
|
||||
@@ -17,3 +17,29 @@ SELECT * FROM truncatetest;
|
||||
(0 rows)
|
||||
|
||||
DROP TABLE truncatetest;
|
||||
--
|
||||
-- Test that the FSM is truncated along with the table.
|
||||
--
|
||||
-- Create a test table and delete and vacuum away most of the rows.
|
||||
-- This leaves the FSM full of pages with plenty of space
|
||||
create table tt(i int);
|
||||
insert into tt select g from generate_series(1, 100000) g;
|
||||
delete from tt where i%100 != 0 and i > 10000;
|
||||
vacuum freeze tt;
|
||||
-- Delete the rest of the rows, and vacuum again. This truncates the
|
||||
-- heap to 0 blocks, and should also truncate the FSM.
|
||||
delete from tt;
|
||||
vacuum tt;
|
||||
-- This can be used to look at the FSM directly, if the 'pg_freespace' contrib module
|
||||
-- is installed
|
||||
--SELECT blkno, avail from generate_series(1, 450) blkno, pg_freespace('tt'::regclass, blkno) AS avail;
|
||||
-- Insert a row again. It should go on block #0. If the FSM was not truncated,
|
||||
-- the insertion would find a higher-numbered block in the FSM and use that instead.
|
||||
insert into tt values (0);
|
||||
select ctid, * from tt;
|
||||
ctid | i
|
||||
-------+---
|
||||
(0,1) | 0
|
||||
(1 row)
|
||||
|
||||
drop table tt;
|
||||
|
||||
@@ -16,3 +16,31 @@ VACUUM truncatetest;
|
||||
SELECT * FROM truncatetest;
|
||||
|
||||
DROP TABLE truncatetest;
|
||||
|
||||
|
||||
--
|
||||
-- Test that the FSM is truncated along with the table.
|
||||
--
|
||||
|
||||
-- Create a test table and delete and vacuum away most of the rows.
|
||||
-- This leaves the FSM full of pages with plenty of space
|
||||
create table tt(i int);
|
||||
insert into tt select g from generate_series(1, 100000) g;
|
||||
delete from tt where i%100 != 0 and i > 10000;
|
||||
vacuum freeze tt;
|
||||
|
||||
-- Delete the rest of the rows, and vacuum again. This truncates the
|
||||
-- heap to 0 blocks, and should also truncate the FSM.
|
||||
delete from tt;
|
||||
vacuum tt;
|
||||
|
||||
-- This can be used to look at the FSM directly, if the 'pg_freespace' contrib module
|
||||
-- is installed
|
||||
--SELECT blkno, avail from generate_series(1, 450) blkno, pg_freespace('tt'::regclass, blkno) AS avail;
|
||||
|
||||
-- Insert a row again. It should go on block #0. If the FSM was not truncated,
|
||||
-- the insertion would find a higher-numbered block in the FSM and use that instead.
|
||||
insert into tt values (0);
|
||||
select ctid, * from tt;
|
||||
|
||||
drop table tt;
|
||||
|
||||
Reference in New Issue
Block a user