mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Correctly truncate VM (#9342)
## Problem https://github.com/neondatabase/neon/issues/9240 ## Summary of changes Correctly truncate VM page instead just replacing it with zero page. ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
This commit is contained in:
committed by
GitHub
parent
21282aa113
commit
f70611c8df
@@ -41,6 +41,11 @@ pub enum NeonWalRecord {
|
||||
file_path: String,
|
||||
content: Option<Bytes>,
|
||||
},
|
||||
// Truncate visibility map page
|
||||
TruncateVisibilityMap {
|
||||
trunc_byte: usize,
|
||||
trunc_offs: usize,
|
||||
},
|
||||
|
||||
/// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
|
||||
#[cfg(feature = "testing")]
|
||||
|
||||
@@ -243,8 +243,11 @@ const FSM_LEAF_NODES_PER_PAGE: usize = FSM_NODES_PER_PAGE - FSM_NON_LEAF_NODES_P
|
||||
pub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;
|
||||
|
||||
/* From visibilitymap.c */
|
||||
pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
|
||||
(BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA) as u32 * (8 / 2); // MAPSIZE * (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
|
||||
|
||||
pub const VM_MAPSIZE: usize = BLCKSZ as usize - MAXALIGN_SIZE_OF_PAGE_HEADER_DATA;
|
||||
pub const VM_BITS_PER_HEAPBLOCK: usize = 2;
|
||||
pub const VM_HEAPBLOCKS_PER_BYTE: usize = 8 / VM_BITS_PER_HEAPBLOCK;
|
||||
pub const VM_HEAPBLOCKS_PER_PAGE: usize = VM_MAPSIZE * VM_HEAPBLOCKS_PER_BYTE;
|
||||
|
||||
/* From origin.c */
|
||||
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
|
||||
|
||||
@@ -587,11 +587,29 @@ impl WalIngest {
|
||||
forknum: VISIBILITYMAP_FORKNUM,
|
||||
};
|
||||
|
||||
let mut vm_page_no = blkno / pg_constants::VM_HEAPBLOCKS_PER_PAGE;
|
||||
if blkno % pg_constants::VM_HEAPBLOCKS_PER_PAGE != 0 {
|
||||
// Tail of last remaining vm page has to be zeroed.
|
||||
// We are not precise here and instead of digging in VM bitmap format just clear the whole page.
|
||||
modification.put_rel_page_image_zero(rel, vm_page_no)?;
|
||||
// last remaining block, byte, and bit
|
||||
let mut vm_page_no = blkno / (pg_constants::VM_HEAPBLOCKS_PER_PAGE as u32);
|
||||
let trunc_byte = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_PAGE
|
||||
/ pg_constants::VM_HEAPBLOCKS_PER_BYTE;
|
||||
let trunc_offs = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_BYTE
|
||||
* pg_constants::VM_BITS_PER_HEAPBLOCK;
|
||||
|
||||
// Unless the new size is exactly at a visibility map page boundary, the
|
||||
// tail bits in the last remaining map page, representing truncated heap
|
||||
// blocks, need to be cleared. This is not only tidy, but also necessary
|
||||
// because we don't get a chance to clear the bits if the heap is extended
|
||||
// again.
|
||||
if (trunc_byte != 0 || trunc_offs != 0)
|
||||
&& self.shard.is_key_local(&rel_block_to_key(rel, vm_page_no))
|
||||
{
|
||||
modification.put_rel_wal_record(
|
||||
rel,
|
||||
vm_page_no,
|
||||
NeonWalRecord::TruncateVisibilityMap {
|
||||
trunc_byte,
|
||||
trunc_offs,
|
||||
},
|
||||
)?;
|
||||
vm_page_no += 1;
|
||||
}
|
||||
let nblocks = get_relsize(modification, rel, ctx).await?;
|
||||
|
||||
@@ -42,6 +42,34 @@ pub(crate) fn apply_in_neon(
|
||||
} => {
|
||||
anyhow::bail!("tried to pass postgres wal record to neon WAL redo");
|
||||
}
|
||||
//
|
||||
// Code copied from PostgreSQL `visibilitymap_prepare_truncate` function in `visibilitymap.c`
|
||||
//
|
||||
NeonWalRecord::TruncateVisibilityMap {
|
||||
trunc_byte,
|
||||
trunc_offs,
|
||||
} => {
|
||||
// sanity check that this is modifying the correct relation
|
||||
let (rel, _) = key.to_rel_block().context("invalid record")?;
|
||||
assert!(
|
||||
rel.forknum == VISIBILITYMAP_FORKNUM,
|
||||
"TruncateVisibilityMap record on unexpected rel {}",
|
||||
rel
|
||||
);
|
||||
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
|
||||
map[*trunc_byte + 1..].fill(0u8);
|
||||
/*----
|
||||
* Mask out the unwanted bits of the last remaining byte.
|
||||
*
|
||||
* ((1 << 0) - 1) = 00000000
|
||||
* ((1 << 1) - 1) = 00000001
|
||||
* ...
|
||||
* ((1 << 6) - 1) = 00111111
|
||||
* ((1 << 7) - 1) = 01111111
|
||||
*----
|
||||
*/
|
||||
map[*trunc_byte] &= (1 << *trunc_offs) - 1;
|
||||
}
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
|
||||
33
test_runner/regress/test_vm_truncate.py
Normal file
33
test_runner/regress/test_vm_truncate.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
|
||||
#
|
||||
# Test that VM is properly truncated
|
||||
#
|
||||
def test_vm_truncate(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
con = endpoint.connect()
|
||||
cur = con.cursor()
|
||||
cur.execute("CREATE EXTENSION neon_test_utils")
|
||||
cur.execute("CREATE EXTENSION pageinspect")
|
||||
|
||||
cur.execute(
|
||||
"create table t(pk integer primary key, counter integer default 0, filler text default repeat('?', 200))"
|
||||
)
|
||||
cur.execute("insert into t (pk) values (generate_series(1,1000))")
|
||||
cur.execute("delete from t where pk>10")
|
||||
cur.execute("vacuum t") # truncates the relation, including its VM and FSM
|
||||
# get image of the first block of the VM excluding the page header. It's expected
|
||||
# to still be in the buffer cache.
|
||||
# ignore page header (24 bytes, 48 - it's hex representation)
|
||||
cur.execute("select substr(encode(get_raw_page('t', 'vm', 0), 'hex'), 48)")
|
||||
pg_bitmap = cur.fetchall()[0][0]
|
||||
# flush shared buffers
|
||||
cur.execute("SELECT clear_buffer_cache()")
|
||||
# now download the first block of the VM from the pageserver ...
|
||||
cur.execute("select substr(encode(get_raw_page('t', 'vm', 0), 'hex'), 48)")
|
||||
ps_bitmap = cur.fetchall()[0][0]
|
||||
# and check that content of bitmaps are equal, i.e. PS is producing the same VM page as Postgres
|
||||
assert pg_bitmap == ps_bitmap
|
||||
Reference in New Issue
Block a user