Add comments on the unsafe use of transmute in encode/decode_pg_control

Note the unsafety of the unsafe block, with a link to the ongoing
discussion. This doesn't try to solve the problem, but let's at least
document the status quo.
This commit is contained in:
Heikki Linnakangas
2021-06-04 23:05:26 +03:00
parent 8147aa7e93
commit 8b5a061c8e

View File

@@ -45,14 +45,32 @@ pub fn decode_pg_control(buf: &[u8]) -> Result<ControlFileData> {
if buf.len() < SIZEOF_CONTROLDATA {
bail!("control file is too short");
}
let controlfile: ControlFileData;
// Compute the expected CRC of the content.
let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]);
// Convert the slice into an array of the right size, and use `transmute` to
// reinterpret the raw bytes as a ControlFileData struct.
//
// NB: Ideally we would use 'zerocopy::FromBytes' for this, but bindgen doesn't
// derive FromBytes for us. The safety of this depends on the same constraints
// as for FromBytes, namely, all of its fields must implement FromBytes. That
// includes the primitive integer types, like `u8`, `u16`, `u32`, `u64` and their
// signed variants. But `bool` is not safe, because the contents of the high bits
// in a rust bool are undefined. In practice, PostgreSQL uses 1 to represent
// true and 0 for false, which is compatible with Rust bool, but let's try not to
// depend on it.
//
// FIXME: ControlFileData does contain 'bool's at the moment.
//
// See https://github.com/zenithdb/zenith/issues/207 for discussion on the safety
// of this.
let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA];
b.copy_from_slice(&buf[0..SIZEOF_CONTROLDATA]);
let expectedcrc = crc32c::crc32c(&b[0..OFFSETOF_CRC]);
controlfile = unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
let controlfile: ControlFileData =
unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
// Check the CRC
if expectedcrc != controlfile.crc {
bail!(
"invalid CRC in control file: expected {:08X}, was {:08X}",
@@ -69,9 +87,21 @@ pub fn decode_pg_control(buf: &[u8]) -> Result<ControlFileData> {
///
/// The CRC is recomputed to match the contents of the fields.
pub fn encode_pg_control(controlfile: &ControlFileData) -> Bytes {
let b: [u8; SIZEOF_CONTROLDATA];
b = unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(*controlfile) };
//
// Use `transmute` to reinterpret struct as raw bytes.
//
// FIXME: This triggers undefined behavior, because the contents
// of the padding bytes are undefined, and this leaks those
// undefined bytes into the resulting array. The Rust code won't
// care what's in those bytes, and PostgreSQL doesn't care
// either. HOWEVER, it is a potential security issue, because the
// bytes can contain arbitrary pieces of memory from the page
// server. In the worst case, that could be private keys or
// another tenant's data.
//
// See https://github.com/zenithdb/zenith/issues/207 for discussion.
let b: [u8; SIZEOF_CONTROLDATA] =
unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(*controlfile) };
// Recompute the CRC
let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
@@ -79,7 +109,6 @@ pub fn encode_pg_control(controlfile: &ControlFileData) -> Bytes {
let newcrc = crc32c::crc32c(&data_without_crc);
let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize);
buf.extend_from_slice(&b[0..OFFSETOF_CRC]);
buf.extend_from_slice(&newcrc.to_ne_bytes());
// Fill the rest of the control file with zeros.