mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-27 08:09:58 +00:00
Replace transmute with serde
Upgrade to bindgen 0.59, which has two new abilities: - specify arbitrary #[derive] attributes to attach to generated structs - request explicit padding fields These two features are enough to replace transmute with serde/bincode.
This commit is contained in:
committed by
Heikki Linnakangas
parent
81dd4bc41e
commit
41fa02f82b
@@ -19,8 +19,9 @@ lazy_static = "1.4"
|
||||
log = "0.4.14"
|
||||
memoffset = "0.6.2"
|
||||
thiserror = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
workspace_hack = { path = "../workspace_hack" }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.57"
|
||||
bindgen = "0.59.1"
|
||||
|
||||
@@ -13,11 +13,6 @@ in each major PostgreSQL version. Currently, this module is based on
|
||||
PostgreSQL v14, but in the future we will probably need a separate
|
||||
copy for each PostgreSQL version.
|
||||
|
||||
To interact with the C structs, there is some unsafe code in this
|
||||
module. Do not copy-paste that to the rest of the codebase! Keep the
|
||||
amount of unsafe code to a minimum, and limited to this module only,
|
||||
and only where it's truly needed.
|
||||
|
||||
TODO: Currently, there is also some code that deals with WAL records
|
||||
in pageserver/src/waldecoder.rs. That should be moved into this
|
||||
module. The rest of the codebase should not have intimate knowledge of
|
||||
|
||||
@@ -3,6 +3,44 @@ extern crate bindgen;
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bindgen::callbacks::ParseCallbacks;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PostgresFfiCallbacks;
|
||||
|
||||
impl ParseCallbacks for PostgresFfiCallbacks {
|
||||
fn include_file(&self, filename: &str) {
|
||||
// This does the equivalent of passing bindgen::CargoCallbacks
|
||||
// to the builder .parse_callbacks() method.
|
||||
let cargo_callbacks = bindgen::CargoCallbacks;
|
||||
cargo_callbacks.include_file(filename)
|
||||
}
|
||||
|
||||
// Add any custom #[derive] attributes to the data structures that bindgen
|
||||
// creates.
|
||||
fn add_derives(&self, name: &str) -> Vec<String> {
|
||||
// This is the list of data structures that we want to serialize/deserialize.
|
||||
let serde_list = [
|
||||
"XLogRecord",
|
||||
"XLogPageHeaderData",
|
||||
"XLogLongPageHeaderData",
|
||||
"CheckPoint",
|
||||
"FullTransactionId",
|
||||
"ControlFileData",
|
||||
];
|
||||
|
||||
if serde_list.contains(&name) {
|
||||
vec![
|
||||
"Default".into(), // Default allows us to easily fill the padding fields with 0.
|
||||
"Serialize".into(),
|
||||
"Deserialize".into(),
|
||||
]
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||
@@ -19,20 +57,23 @@ fn main() {
|
||||
// Tell cargo to invalidate the built crate whenever any of the
|
||||
// included header files changed.
|
||||
//
|
||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
||||
.parse_callbacks(Box::new(PostgresFfiCallbacks))
|
||||
//
|
||||
// These are the types and constants that we want to generate bindings for
|
||||
//
|
||||
.whitelist_type("ControlFileData")
|
||||
.whitelist_type("CheckPoint")
|
||||
.whitelist_type("FullTransactionId")
|
||||
.whitelist_type("XLogRecord")
|
||||
.whitelist_type("XLogPageHeaderData")
|
||||
.whitelist_type("XLogLongPageHeaderData")
|
||||
.whitelist_var("XLOG_PAGE_MAGIC")
|
||||
.whitelist_var("PG_CONTROL_FILE_SIZE")
|
||||
.whitelist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.whitelist_type("DBState")
|
||||
.allowlist_type("ControlFileData")
|
||||
.allowlist_type("CheckPoint")
|
||||
.allowlist_type("FullTransactionId")
|
||||
.allowlist_type("XLogRecord")
|
||||
.allowlist_type("XLogPageHeaderData")
|
||||
.allowlist_type("XLogLongPageHeaderData")
|
||||
.allowlist_var("XLOG_PAGE_MAGIC")
|
||||
.allowlist_var("PG_CONTROL_FILE_SIZE")
|
||||
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.allowlist_type("DBState")
|
||||
// Because structs are used for serialization, tell bindgen to emit
|
||||
// explicit padding fields.
|
||||
.explicit_padding(true)
|
||||
//
|
||||
// Path the server include dir. It is in tmp_install/include/server, if you did
|
||||
// "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
|
||||
|
||||
@@ -43,6 +43,8 @@ impl ControlFileData {
|
||||
/// Interpret a slice of bytes as a Postgres control file.
|
||||
///
|
||||
pub fn decode(buf: &[u8]) -> Result<ControlFileData> {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
|
||||
// Check that the slice has the expected size. The control file is
|
||||
// padded with zeros up to a 512 byte sector size, so accept a
|
||||
// larger size too, so that the caller can just the whole file
|
||||
@@ -55,26 +57,8 @@ impl ControlFileData {
|
||||
let OFFSETOF_CRC = Self::pg_control_crc_offset();
|
||||
let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]);
|
||||
|
||||
// Convert the slice into an array of the right size, and use `transmute` to
|
||||
// reinterpret the raw bytes as a ControlFileData struct.
|
||||
//
|
||||
// NB: Ideally we would use 'zerocopy::FromBytes' for this, but bindgen doesn't
|
||||
// derive FromBytes for us. The safety of this depends on the same constraints
|
||||
// as for FromBytes, namely, all of its fields must implement FromBytes. That
|
||||
// includes the primitive integer types, like `u8`, `u16`, `u32`, `u64` and their
|
||||
// signed variants. But `bool` is not safe, because the contents of the high bits
|
||||
// in a rust bool are undefined. In practice, PostgreSQL uses 1 to represent
|
||||
// true and 0 for false, which is compatible with Rust bool, but let's try not to
|
||||
// depend on it.
|
||||
//
|
||||
// FIXME: ControlFileData does contain 'bool's at the moment.
|
||||
//
|
||||
// See https://github.com/zenithdb/zenith/issues/207 for discussion on the safety
|
||||
// of this.
|
||||
let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA];
|
||||
b.copy_from_slice(&buf[0..SIZEOF_CONTROLDATA]);
|
||||
let controlfile: ControlFileData =
|
||||
unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
|
||||
// Use serde to deserialize the input as a ControlFileData struct.
|
||||
let controlfile = ControlFileData::des(buf)?;
|
||||
|
||||
// Check the CRC
|
||||
if expectedcrc != controlfile.crc {
|
||||
@@ -93,21 +77,10 @@ impl ControlFileData {
|
||||
///
|
||||
/// The CRC is recomputed to match the contents of the fields.
|
||||
pub fn encode(&self) -> Bytes {
|
||||
//
|
||||
// Use `transmute` to reinterpret struct as raw bytes.
|
||||
//
|
||||
// FIXME: This triggers undefined behavior, because the contents
|
||||
// of the padding bytes are undefined, and this leaks those
|
||||
// undefined bytes into the resulting array. The Rust code won't
|
||||
// care what's in those bytes, and PostgreSQL doesn't care
|
||||
// either. HOWEVER, it is a potential security issue, because the
|
||||
// bytes can contain arbitrary pieces of memory from the page
|
||||
// server. In the worst case, that could be private keys or
|
||||
// another tenant's data.
|
||||
//
|
||||
// See https://github.com/zenithdb/zenith/issues/207 for discussion.
|
||||
let b: [u8; SIZEOF_CONTROLDATA] =
|
||||
unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(*self) };
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
|
||||
// Serialize into a new buffer.
|
||||
let b = self.ser().unwrap();
|
||||
|
||||
// Recompute the CRC
|
||||
let OFFSETOF_CRC = Self::pg_control_crc_offset();
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
// suppress warnings on rust 1.53 due to bindgen unit tests.
|
||||
// https://github.com/rust-lang/rust-bindgen/issues/1651
|
||||
#![allow(deref_nullptr)]
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
pub mod controlfile_utils;
|
||||
|
||||
@@ -271,23 +271,13 @@ pub fn main() {
|
||||
|
||||
impl XLogRecord {
|
||||
pub fn from_bytes(buf: &mut Bytes) -> XLogRecord {
|
||||
XLogRecord {
|
||||
xl_tot_len: buf.get_u32_le(),
|
||||
xl_xid: buf.get_u32_le(),
|
||||
xl_prev: buf.get_u64_le(),
|
||||
xl_info: buf.get_u8(),
|
||||
xl_rmid: buf.get_u8(),
|
||||
xl_crc: {
|
||||
buf.advance(2);
|
||||
buf.get_u32_le()
|
||||
},
|
||||
}
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
XLogRecord::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> Bytes {
|
||||
let b: [u8; XLOG_SIZE_OF_XLOG_RECORD];
|
||||
b = unsafe { std::mem::transmute::<XLogRecord, [u8; XLOG_SIZE_OF_XLOG_RECORD]>(*self) };
|
||||
Bytes::copy_from_slice(&b[..])
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
|
||||
// Is this record an XLOG_SWITCH record? They need some special processing,
|
||||
@@ -298,34 +288,20 @@ impl XLogRecord {
|
||||
|
||||
impl XLogPageHeaderData {
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogPageHeaderData {
|
||||
let hdr: XLogPageHeaderData = XLogPageHeaderData {
|
||||
xlp_magic: buf.get_u16_le(),
|
||||
xlp_info: buf.get_u16_le(),
|
||||
xlp_tli: buf.get_u32_le(),
|
||||
xlp_pageaddr: buf.get_u64_le(),
|
||||
xlp_rem_len: buf.get_u32_le(),
|
||||
};
|
||||
buf.get_u32_le(); //padding
|
||||
hdr
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
XLogPageHeaderData::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl XLogLongPageHeaderData {
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogLongPageHeaderData {
|
||||
XLogLongPageHeaderData {
|
||||
std: XLogPageHeaderData::from_bytes(buf),
|
||||
xlp_sysid: buf.get_u64_le(),
|
||||
xlp_seg_size: buf.get_u32_le(),
|
||||
xlp_xlog_blcksz: buf.get_u32_le(),
|
||||
}
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
XLogLongPageHeaderData::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> Bytes {
|
||||
let b: [u8; XLOG_SIZE_OF_XLOG_LONG_PHD];
|
||||
b = unsafe {
|
||||
std::mem::transmute::<XLogLongPageHeaderData, [u8; XLOG_SIZE_OF_XLOG_LONG_PHD]>(*self)
|
||||
};
|
||||
Bytes::copy_from_slice(&b[..])
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,17 +309,13 @@ pub const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();
|
||||
|
||||
impl CheckPoint {
|
||||
pub fn encode(&self) -> Bytes {
|
||||
let b: [u8; SIZEOF_CHECKPOINT];
|
||||
b = unsafe { std::mem::transmute::<CheckPoint, [u8; SIZEOF_CHECKPOINT]>(*self) };
|
||||
Bytes::copy_from_slice(&b[..])
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
|
||||
pub fn decode(buf: &[u8]) -> Result<CheckPoint, anyhow::Error> {
|
||||
let mut b = [0u8; SIZEOF_CHECKPOINT];
|
||||
b.copy_from_slice(&buf[0..SIZEOF_CHECKPOINT]);
|
||||
let checkpoint: CheckPoint;
|
||||
checkpoint = unsafe { std::mem::transmute::<[u8; SIZEOF_CHECKPOINT], CheckPoint>(b) };
|
||||
Ok(checkpoint)
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
Ok(CheckPoint::des(buf)?)
|
||||
}
|
||||
|
||||
// Update next XID based on provided new_xid and stored epoch.
|
||||
@@ -385,6 +357,7 @@ pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
xlp_tli: 1, // FIXME: always use Postgres timeline 1
|
||||
xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64,
|
||||
xlp_rem_len: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
}
|
||||
},
|
||||
xlp_sysid: pg_control.system_identifier,
|
||||
@@ -404,6 +377,7 @@ pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN,
|
||||
xl_rmid: pg_constants::RM_XLOG_ID,
|
||||
xl_crc: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
};
|
||||
|
||||
let mut rec_shord_hdr_bytes = BytesMut::new();
|
||||
|
||||
Reference in New Issue
Block a user