Experiment in decoding postgres tuples

This commit is contained in:
John Spray
2025-05-19 17:33:10 +02:00
parent 53286d8880
commit 52e98de0bc
4 changed files with 137 additions and 3 deletions

View File

@@ -29,7 +29,11 @@ impl ParseCallbacks for PostgresFfiCallbacks {
"CheckPoint",
"FullTransactionId",
"ControlFileData",
"PageXLogRecPtr",
"HeapTupleHeaderData",
"ItemPointerData",
"BlockIdData",
"OffsetNumber",
];
if serde_list.contains(&derive_info.name) {
@@ -127,6 +131,9 @@ fn main() -> anyhow::Result<()> {
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
.allowlist_type("PageHeaderData")
.allowlist_type("HeapTupleHeaderData")
.allowlist_type("ItemPointerData")
.allowlist_type("TransactionId")
.allowlist_type("LocationIndex")
.allowlist_type("Datum")
.allowlist_type("DBState")
.allowlist_type("RelMapFile")

View File

@@ -223,9 +223,10 @@ pub mod walrecord;
// Export some widely used datatypes that are unlikely to change across Postgres versions
pub use v14::bindings::{
BlockNumber, CheckPoint, ControlFileData, HeapTupleHeaderData, MultiXactId, OffsetNumber, Oid,
PageHeaderData, RepOriginId, TimeLineID, TimestampTz, TransactionId, XLogRecPtr, XLogRecord,
XLogSegNo, uint32, uint64,
BlockNumber, CheckPoint, ControlFileData, HeapTupleHeaderData, ItemIdData, ItemPointerData,
LocationIndex, MultiXactId, OffsetNumber, Oid, PageHeaderData, PageXLogRecPtr, RepOriginId,
TimeLineID, TimestampTz, TransactionId, XLogRecPtr, XLogRecord, XLogSegNo, uint8, uint16,
uint32, uint64,
};
// Likewise for these, although the assumption that these don't change is a little more iffy.
pub use v14::bindings::{MultiXactOffset, MultiXactStatus};

View File

@@ -7520,11 +7520,15 @@ fn is_send() {
#[cfg(test)]
mod tests {
use std::io::Cursor;
use std::path::PathBuf;
use std::sync::Arc;
use pageserver_api::key::Key;
use pageserver_api::value::Value;
use postgres_ffi::v17::bindings::PageHeaderData;
use std::iter::Iterator;
use tokio_epoll_uring::BoundedBuf;
use tracing::Instrument;
use utils::id::TimelineId;
use utils::lsn::Lsn;
@@ -7836,4 +7840,126 @@ mod tests {
layers.get_from_desc(&desc)
}
fn fixture_path(relative: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
}
use postgres_ffi::ItemIdData;
use postgres_ffi::LocationIndex;
use postgres_ffi::PageXLogRecPtr;
use postgres_ffi::TransactionId;
/// We already have a PageHeaderData via bindgen, but it's not serializable and
/// contains a non-serializable __IncompleteArrayField.
/// FIXME: we should be able to exclude pd_linp from the bindgen, but blocklist_item()
/// wasn't working for some reason.
use serde::{Deserialize, Serialize};
#[repr(C)]
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct SerPageHeaderData {
pub pd_lsn: PageXLogRecPtr,
pub pd_checksum: postgres_ffi::uint16,
pub pd_flags: postgres_ffi::uint16,
pub pd_lower: LocationIndex,
pub pd_upper: LocationIndex,
pub pd_special: LocationIndex,
pub pd_pagesize_version: postgres_ffi::uint16,
pub pd_prune_xid: TransactionId,
}
#[repr(C)]
#[repr(align(4))]
#[derive(Debug, Copy, Clone, Deserialize)]
pub struct SerItemIdData {
pub raw: [u8; 4],
}
/// Postgres HeapTupleHeaderData type is actually some kind of placeholder,
/// so we have a hand-rolled struct to describe the actual serialized header of a tuple
#[repr(C)]
#[derive(Debug, Default, Serialize, Deserialize)]
struct SerMinimalTupleData {
t_xmin: postgres_ffi::uint32, /* actual length of minimal tuple */
t_xmax: postgres_ffi::uint32, /* actual length of minimal tuple */
t_cid_or_t_xvac: postgres_ffi::uint32,
t_ctid: postgres_ffi::ItemPointerData,
t_infomask2: postgres_ffi::uint16, /* number of attributes + various flags */
t_infomask: postgres_ffi::uint16, /* various flag bits, see below */
t_hoff: postgres_ffi::uint8, /* sizeof header incl. bitmap, padding */
/* ^ - 23 bytes - ^ */
}
#[test]
fn decode_tuples() {
use utils::bin_ser::LeSer;
//assert_eq!(std::mem::size_of::<SerMinimalTupleData>(), 23);
// A simple example with three valid tuples, each tuple is (id, string, string, timestamp)
let input_raw = std::fs::read(fixture_path("test_data/example_page.bin")).unwrap();
// 00000000 00 00 00 00 a0 3d 53 01 00 00 00 00 24 00 70 1f |.....=S.....$.p.|
// 00000010 00 20 04 20 00 00 00 00 d0 9f 60 00 a0 9f 60 00 |. . ......`...`.|
// 00000020 70 9f 60 00 00 00 00 00 00 00 00 00 00 00 00 00 |p.`.............|
// 00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
// *
// 00001f70 e7 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
// 00001f80 03 00 04 00 02 08 18 00 03 00 00 00 0b 6b 65 79 |.............key|
// 00001f90 31 0f 76 61 6c 75 65 31 9f 2a e0 69 79 d8 02 00 |1.value1.*.iy...|
// 00001fa0 e6 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
// 00001fb0 02 00 04 00 02 08 18 00 02 00 00 00 0b 6b 65 79 |.............key|
// 00001fc0 33 0f 76 61 6c 75 65 33 54 b9 a3 69 79 d8 02 00 |3.value3T..iy...|
// 00001fd0 e5 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
// 00001fe0 01 00 04 00 02 08 18 00 01 00 00 00 0b 6b 65 79 |.............key|
// 00001ff0 32 0f 76 61 6c 75 65 32 dd 0e 6b 68 79 d8 02 00 |2.value2..khy...|
// 00002000
// The attributes for this example:
// attrelid | attname | atttypid | attlen | attnum | attcacheoff | atttypmod | attndims | attbyval | attalign | attstorage | attcompression | attnotnull | atthasdef | atthasmissing | attidentity | attgenerated | attisdropped | attislocal | attinhcount | attcollation | attstattarget | attacl | attoptions | attfdwoptions | attmissingval
// ----------+-------------+----------+--------+--------+-------------+-----------+----------+----------+----------+------------+----------------+------------+-----------+---------------+-------------+--------------+--------------+------------+-------------+--------------+---------------+--------+------------+---------------+---------------
// 16385 | id | 23 | 4 | 1 | -1 | -1 | 0 | t | i | p | | t | t | f | | | f | t | 0 | 0 | | | | |
// 16385 | key | 1043 | -1 | 2 | -1 | 259 | 0 | f | i | x | | f | f | f | | | f | t | 0 | 100 | | | | |
// 16385 | value | 25 | -1 | 3 | -1 | -1 | 0 | f | i | x | | f | f | f | | | f | t | 0 | 100 | | | | |
// 16385 | modified_at | 1184 | 8 | 4 | -1 | -1 | 0 | t | d | p | | f | t | f | | | f | t | 0 | 0 | | | | |
// TODO: test cases for schema changes:
// 1. baseline: data that exactly matches attributes
// 2. add one column, write a row, read all rows
// 3. drop one column, write a row, read all rows.
// 4. vacuum full, write a row, read all rows.
// let mut cursor = Cursor::new(input_raw);
let header =
SerPageHeaderData::des(&input_raw[0..std::mem::size_of::<SerPageHeaderData>()])
.unwrap();
let item_count = (header.pd_lower as usize - std::mem::size_of::<SerPageHeaderData>())
/ std::mem::size_of::<ItemIdData>();
assert_eq!(item_count, 3);
for item_counter in 0..item_count {
let item_base = std::mem::size_of::<SerPageHeaderData>()
+ item_counter * std::mem::size_of::<ItemIdData>();
let item = SerItemIdData::des(
&input_raw[item_base..item_base + std::mem::size_of::<ItemIdData>()],
)
.unwrap();
let item = unsafe { std::mem::transmute::<[u8; 4], ItemIdData>(item.raw) };
println!("item: {} {}", item.lp_off(), item.lp_len());
let tuple_header_bytes =
&input_raw[item.lp_off() as usize..item.lp_off() as usize + 23];
let tuple_header = SerMinimalTupleData::des(tuple_header_bytes).unwrap();
println!("tuple_header: insert offset {}", tuple_header.t_hoff);
let tuple_raw = &input_raw[item.lp_off() as usize + tuple_header.t_hoff as usize
..item.lp_off() as usize + item.lp_len() as usize];
}
}
}

Binary file not shown.