Compare commits

...

17 Commits

Author SHA1 Message Date
Christian Schwarz
70f993331c clippy 2024-01-04 13:07:22 +00:00
Christian Schwarz
c87c19a646 move the logic of emitting the clear visibility wal records into a common function 2024-01-04 13:03:39 +00:00
Christian Schwarz
92280727df turns on ingest_neonrmgr_record is just copy-pasta, re-do copy-pasta 2024-01-04 12:52:34 +00:00
Christian Schwarz
31fc069482 fixup 2024-01-04 12:48:49 +00:00
Christian Schwarz
16090c876d and now it's obvious that new_heap_blkno and old_heap_blkno really are independent 2024-01-04 12:47:42 +00:00
Christian Schwarz
02dc0db633 comments 2024-01-04 12:36:45 +00:00
Christian Schwarz
8e04de6ef9 fixup 'restructure match block to make the special case clear' 2024-01-04 12:36:35 +00:00
Christian Schwarz
0713f367d4 restructure match block to make the special case clear 2024-01-04 12:23:23 +00:00
Christian Schwarz
93d0f5e93d lift up the vm_size checking logic 2024-01-04 12:14:31 +00:00
Christian Schwarz
20957d6c4e lift up HEAPBLK_TO_MAPBLOCK call 2024-01-04 11:54:08 +00:00
Christian Schwarz
f4de9adb1d same for the Some,Some case 2024-01-04 11:10:12 +00:00
Christian Schwarz
98ee0d9012 propagate Some()-ness 2024-01-04 11:05:26 +00:00
Christian Schwarz
6933f5d089 transform the nested if into a flattened match 2024-01-04 10:57:19 +00:00
Christian Schwarz
853f77eb11 some constant propagation 2024-01-04 10:52:21 +00:00
Christian Schwarz
ccfc9741f6 move vm_rel out of match 2024-01-04 10:48:53 +00:00
Christian Schwarz
c6d09f8942 transform outermost if to a match 2024-01-04 10:47:05 +00:00
Christian Schwarz
c8d36dab59 walredo: DRY ClearVisibilityMapFlags record handling 2024-01-04 10:41:36 +00:00
3 changed files with 103 additions and 168 deletions

View File

@@ -709,89 +709,99 @@ impl WalIngest {
_ => {}
}
// Clear the VM bits if required.
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
let vm_rel = RelTag {
forknum: VISIBILITYMAP_FORKNUM,
spcnode: decoded.blocks[0].rnode_spcnode,
dbnode: decoded.blocks[0].rnode_dbnode,
relnode: decoded.blocks[0].rnode_relnode,
let vm_rel = RelTag {
forknum: VISIBILITYMAP_FORKNUM,
spcnode: decoded.blocks[0].rnode_spcnode,
dbnode: decoded.blocks[0].rnode_dbnode,
relnode: decoded.blocks[0].rnode_relnode,
};
self.clear_visibility_map_bits_if_required(
modification,
vm_rel,
new_heap_blkno,
old_heap_blkno,
flags,
ctx,
)
.await?;
Ok(())
}
async fn clear_visibility_map_bits_if_required(
&mut self,
modification: &mut DatadirModification<'_>,
vm_rel: RelTag,
new_heap_blkno: Option<u32>,
old_heap_blkno: Option<u32>,
flags: u8,
ctx: &RequestContext,
) -> anyhow::Result<()> {
let new = new_heap_blkno.map(|x| (x, pg_constants::HEAPBLK_TO_MAPBLOCK(x)));
let old = old_heap_blkno.map(|x| (x, pg_constants::HEAPBLK_TO_MAPBLOCK(x)));
// Sometimes, Postgres seems to create heap WAL records with the
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
// not set. In fact, it's possible that the VM page does not exist at all.
// In that case, we don't want to store a record to clear the VM bit;
// replaying it would fail to find the previous image of the page, because
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
// record if it doesn't.
let (new, old) = {
let vm_size = if new.or(old).is_some() {
Some(get_relsize(modification, vm_rel, ctx).await?)
} else {
None
};
let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
// Sometimes, Postgres seems to create heap WAL records with the
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
// not set. In fact, it's possible that the VM page does not exist at all.
// In that case, we don't want to store a record to clear the VM bit;
// replaying it would fail to find the previous image of the page, because
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
// record if it doesn't.
let vm_size = get_relsize(modification, vm_rel, ctx).await?;
if let Some(blknum) = new_vm_blk {
if blknum >= vm_size {
new_vm_blk = None;
let filter = |(heap_blk, vm_blk)| {
let vm_size = vm_size.expect("we set it to Some() if new or old is Some()");
if vm_blk >= vm_size {
None
} else {
Some((heap_blk, vm_blk))
}
}
if let Some(blknum) = old_vm_blk {
if blknum >= vm_size {
old_vm_blk = None;
}
}
};
(new.and_then(filter), old.and_then(filter))
};
if new_vm_blk.is_some() || old_vm_blk.is_some() {
if new_vm_blk == old_vm_blk {
// An UPDATE record that needs to clear the bits for both old and the
// new page, both of which reside on the same VM page.
match (new, old) {
(Some((new_heap_blkno, new_vm_blk)), Some((old_heap_blkno, old_vm_blk)))
if new_vm_blk == old_vm_blk =>
{
// An UPDATE record that needs to clear the bits for both old and the
// new page, both of which reside on the same VM page.
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk, // could also be old_vm_blk, they're the same
NeonWalRecord::ClearVisibilityMapFlags {
heap_blkno_1: Some(new_heap_blkno),
heap_blkno_2: Some(old_heap_blkno),
flags,
},
ctx,
)
.await?;
}
(new, old) => {
// Emit one record per VM block that needs updating.
for (heap_blkno, vm_blk) in [new, old].into_iter().flatten() {
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk.unwrap(),
vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno,
heap_blkno_1: Some(heap_blkno),
heap_blkno_2: None,
flags,
},
ctx,
)
.await?;
} else {
// Clear VM bits for one heap page, or for two pages that reside on
// different VM pages.
if let Some(new_vm_blk) = new_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno: None,
flags,
},
ctx,
)
.await?;
}
if let Some(old_vm_blk) = old_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
old_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno: None,
old_heap_blkno,
flags,
},
ctx,
)
.await?;
}
}
}
}
Ok(())
anyhow::Ok(())
}
async fn ingest_neonrmgr_record(
@@ -880,87 +890,21 @@ impl WalIngest {
),
}
// Clear the VM bits if required.
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
let vm_rel = RelTag {
forknum: VISIBILITYMAP_FORKNUM,
spcnode: decoded.blocks[0].rnode_spcnode,
dbnode: decoded.blocks[0].rnode_dbnode,
relnode: decoded.blocks[0].rnode_relnode,
};
let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
// Sometimes, Postgres seems to create heap WAL records with the
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
// not set. In fact, it's possible that the VM page does not exist at all.
// In that case, we don't want to store a record to clear the VM bit;
// replaying it would fail to find the previous image of the page, because
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
// record if it doesn't.
let vm_size = get_relsize(modification, vm_rel, ctx).await?;
if let Some(blknum) = new_vm_blk {
if blknum >= vm_size {
new_vm_blk = None;
}
}
if let Some(blknum) = old_vm_blk {
if blknum >= vm_size {
old_vm_blk = None;
}
}
if new_vm_blk.is_some() || old_vm_blk.is_some() {
if new_vm_blk == old_vm_blk {
// An UPDATE record that needs to clear the bits for both old and the
// new page, both of which reside on the same VM page.
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk.unwrap(),
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno,
flags,
},
ctx,
)
.await?;
} else {
// Clear VM bits for one heap page, or for two pages that reside on
// different VM pages.
if let Some(new_vm_blk) = new_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno: None,
flags,
},
ctx,
)
.await?;
}
if let Some(old_vm_blk) = old_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
old_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno: None,
old_heap_blkno,
flags,
},
ctx,
)
.await?;
}
}
}
}
let vm_rel = RelTag {
forknum: VISIBILITYMAP_FORKNUM,
spcnode: decoded.blocks[0].rnode_spcnode,
dbnode: decoded.blocks[0].rnode_dbnode,
relnode: decoded.blocks[0].rnode_relnode,
};
self.clear_visibility_map_bits_if_required(
modification,
vm_rel,
new_heap_blkno,
old_heap_blkno,
flags,
ctx,
)
.await?;
Ok(())
}

View File

@@ -21,10 +21,13 @@ pub enum NeonWalRecord {
/// Native PostgreSQL WAL record
Postgres { will_init: bool, rec: Bytes },
/// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
/// Clear the bits specified in `flags` in the heap visibility map for the given heap blocks.
///
/// For example, for `{ heap_blkno_1: None, heap_blkno_2: Some(23), flags: 0b0010_0000}`
/// redo will apply `&=0b0010_0000` on heap block 23's visibility map bitmask.
ClearVisibilityMapFlags {
new_heap_blkno: Option<u32>,
old_heap_blkno: Option<u32>,
heap_blkno_1: Option<u32>,
heap_blkno_2: Option<u32>,
flags: u8,
},
/// Mark transaction IDs as committed on a CLOG page

View File

@@ -403,8 +403,8 @@ impl PostgresRedoManager {
anyhow::bail!("tried to pass postgres wal record to neon WAL redo");
}
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno,
heap_blkno_1,
heap_blkno_2,
flags,
} => {
// sanity check that this is modifying the correct relation
@@ -414,7 +414,8 @@ impl PostgresRedoManager {
"ClearVisibilityMapFlags record on unexpected rel {}",
rel
);
if let Some(heap_blkno) = *new_heap_blkno {
for heap_blkno in [heap_blkno_1, heap_blkno_2].into_iter().flatten() {
let heap_blkno = *heap_blkno;
// Calculate the VM block and offset that corresponds to the heap block.
let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
@@ -428,19 +429,6 @@ impl PostgresRedoManager {
map[map_byte as usize] &= !(flags << map_offset);
}
// Repeat for 'old_heap_blkno', if any
if let Some(heap_blkno) = *old_heap_blkno {
let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
assert!(map_block == blknum);
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
map[map_byte as usize] &= !(flags << map_offset);
}
}
// Non-relational WAL records are handled here, with custom code that has the
// same effects as the corresponding Postgres WAL redo function.