From 8b5a061c8e1d856658ec8dd0f3ab8d85e3056263 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 4 Jun 2021 23:05:26 +0300 Subject: [PATCH] Add comments on the unsafe use of transmute in encode/decode_pg_control Note the unsafety of the unsafe block, with a link to the ongoing discussion. This doesn't try to solve the problem, but let's at least document the status quo. --- postgres_ffi/src/controlfile_utils.rs | 45 ++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/postgres_ffi/src/controlfile_utils.rs b/postgres_ffi/src/controlfile_utils.rs index dab9410992..d6c625c296 100644 --- a/postgres_ffi/src/controlfile_utils.rs +++ b/postgres_ffi/src/controlfile_utils.rs @@ -45,14 +45,32 @@ pub fn decode_pg_control(buf: &[u8]) -> Result { if buf.len() < SIZEOF_CONTROLDATA { bail!("control file is too short"); } - let controlfile: ControlFileData; + // Compute the expected CRC of the content. + let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]); + + // Convert the slice into an array of the right size, and use `transmute` to + // reinterpret the raw bytes as a ControlFileData struct. + // + // NB: Ideally we would use 'zerocopy::FromBytes' for this, but bindgen doesn't + // derive FromBytes for us. The safety of this depends on the same constraints + // as for FromBytes, namely, all of its fields must implement FromBytes. That + // includes the primitive integer types, like `u8`, `u16`, `u32`, `u64` and their + // signed variants. But `bool` is not safe, because the contents of the high bits + // in a rust bool are undefined. In practice, PostgreSQL uses 1 to represent + // true and 0 for false, which is compatible with Rust bool, but let's try not to + // depend on it. + // + // FIXME: ControlFileData does contain 'bool's at the moment. + // + // See https://github.com/zenithdb/zenith/issues/207 for discussion on the safety + // of this. let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA]; b.copy_from_slice(&buf[0..SIZEOF_CONTROLDATA]); - let expectedcrc = crc32c::crc32c(&b[0..OFFSETOF_CRC]); - - controlfile = unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) }; + let controlfile: ControlFileData = + unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) }; + // Check the CRC if expectedcrc != controlfile.crc { bail!( "invalid CRC in control file: expected {:08X}, was {:08X}", @@ -69,9 +87,21 @@ pub fn decode_pg_control(buf: &[u8]) -> Result { /// /// The CRC is recomputed to match the contents of the fields. pub fn encode_pg_control(controlfile: &ControlFileData) -> Bytes { - let b: [u8; SIZEOF_CONTROLDATA]; - - b = unsafe { std::mem::transmute::(*controlfile) }; + // + // Use `transmute` to reinterpret struct as raw bytes. + // + // FIXME: This triggers undefined behavior, because the contents + // of the padding bytes are undefined, and this leaks those + // undefined bytes into the resulting array. The Rust code won't + // care what's in those bytes, and PostgreSQL doesn't care + // either. HOWEVER, it is a potential security issue, because the + // bytes can contain arbitrary pieces of memory from the page + // server. In the worst case, that could be private keys or + // another tenant's data. + // + // See https://github.com/zenithdb/zenith/issues/207 for discussion. + let b: [u8; SIZEOF_CONTROLDATA] = + unsafe { std::mem::transmute::(*controlfile) }; // Recompute the CRC let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC]; @@ -79,7 +109,6 @@ pub fn encode_pg_control(controlfile: &ControlFileData) -> Bytes { let newcrc = crc32c::crc32c(&data_without_crc); let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize); - buf.extend_from_slice(&b[0..OFFSETOF_CRC]); buf.extend_from_slice(&newcrc.to_ne_bytes()); // Fill the rest of the control file with zeros.