From 434374ebb46613f3aabbf6cdf0cc874703371ec5 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 4 Jun 2021 23:05:30 +0300 Subject: [PATCH] Turn encode/decode into methods Like in PR #208 --- pageserver/src/branches.rs | 14 +-- postgres_ffi/src/controlfile_utils.rs | 167 +++++++++++++------------- 2 files changed, 89 insertions(+), 92 deletions(-) diff --git a/pageserver/src/branches.rs b/pageserver/src/branches.rs index ac73f0db1d..e304ae3b63 100644 --- a/pageserver/src/branches.rs +++ b/pageserver/src/branches.rs @@ -6,7 +6,7 @@ use anyhow::{anyhow, bail, Context, Result}; use fs::File; -use postgres_ffi::{controlfile_utils, pg_constants, xlog_utils}; +use postgres_ffi::{pg_constants, xlog_utils, ControlFileData}; use rand::Rng; use serde::{Deserialize, Serialize}; use std::env; @@ -79,7 +79,7 @@ pub fn init_repo(conf: &'static PageServerConf, repo_dir: &Path) -> Result<()> { // Read control file to extract the LSN and system id let controlfile_path = tmppath.join("global").join("pg_control"); - let controlfile = controlfile_utils::decode_pg_control(&fs::read(controlfile_path)?)?; + let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?; // let systemid = controlfile.system_identifier; let lsn = controlfile.checkPoint; let lsnstr = format!("{:016X}", lsn); @@ -211,7 +211,7 @@ pub(crate) fn get_system_id(conf: &PageServerConf) -> Result { let (_, main_snap_dir) = find_latest_snapshot(conf, *main_tli)?; let controlfile_path = main_snap_dir.join("global").join("pg_control"); - let controlfile = controlfile_utils::decode_pg_control(&fs::read(controlfile_path)?)?; + let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?; Ok(controlfile.system_identifier) } @@ -354,15 +354,11 @@ fn parse_point_in_time(conf: &PageServerConf, s: &str) -> Result { fn force_crash_recovery(datadir: &Path) -> Result<()> { // Read in the control file let controlfilepath = datadir.to_path_buf().join("global").join("pg_control"); - let mut controlfile = - controlfile_utils::decode_pg_control(&fs::read(controlfilepath.as_path())?)?; + let mut controlfile = ControlFileData::decode(&fs::read(controlfilepath.as_path())?)?; controlfile.state = postgres_ffi::DBState_DB_IN_PRODUCTION; - fs::write( - controlfilepath.as_path(), - controlfile_utils::encode_pg_control(&controlfile), - )?; + fs::write(controlfilepath.as_path(), controlfile.encode())?; Ok(()) } diff --git a/postgres_ffi/src/controlfile_utils.rs b/postgres_ffi/src/controlfile_utils.rs index 2e0db3dab8..58c8a374c0 100644 --- a/postgres_ffi/src/controlfile_utils.rs +++ b/postgres_ffi/src/controlfile_utils.rs @@ -31,93 +31,94 @@ use bytes::{Bytes, BytesMut}; /// Equivalent to sizeof(ControlFileData) in C const SIZEOF_CONTROLDATA: usize = std::mem::size_of::(); -/// Compute the offset of the `crc` field within the `ControlFileData` struct. -/// Equivalent to offsetof(ControlFileData, crc) in C. -// Someday this can be const when the right compiler features land. -fn pg_control_crc_offset() -> usize { - memoffset::offset_of!(ControlFileData, crc) -} - - -/// -/// Interpret a slice of bytes as a Postgres control file. -/// -pub fn decode_pg_control(buf: &[u8]) -> Result { - // Check that the slice has the expected size. The control file is - // padded with zeros up to a 512 byte sector size, so accept a - // larger size too, so that the caller can just the whole file - // contents without knowing the exact size of the struct. - if buf.len() < SIZEOF_CONTROLDATA { - bail!("control file is too short"); +impl ControlFileData { + /// Compute the offset of the `crc` field within the `ControlFileData` struct. + /// Equivalent to offsetof(ControlFileData, crc) in C. + // Someday this can be const when the right compiler features land. + fn pg_control_crc_offset() -> usize { + memoffset::offset_of!(ControlFileData, crc) } - // Compute the expected CRC of the content. - let OFFSETOF_CRC = pg_control_crc_offset(); - let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]); + /// + /// Interpret a slice of bytes as a Postgres control file. + /// + pub fn decode(buf: &[u8]) -> Result { + // Check that the slice has the expected size. The control file is + // padded with zeros up to a 512 byte sector size, so accept a + // larger size too, so that the caller can just the whole file + // contents without knowing the exact size of the struct. + if buf.len() < SIZEOF_CONTROLDATA { + bail!("control file is too short"); + } - // Convert the slice into an array of the right size, and use `transmute` to - // reinterpret the raw bytes as a ControlFileData struct. - // - // NB: Ideally we would use 'zerocopy::FromBytes' for this, but bindgen doesn't - // derive FromBytes for us. The safety of this depends on the same constraints - // as for FromBytes, namely, all of its fields must implement FromBytes. That - // includes the primitive integer types, like `u8`, `u16`, `u32`, `u64` and their - // signed variants. But `bool` is not safe, because the contents of the high bits - // in a rust bool are undefined. In practice, PostgreSQL uses 1 to represent - // true and 0 for false, which is compatible with Rust bool, but let's try not to - // depend on it. - // - // FIXME: ControlFileData does contain 'bool's at the moment. - // - // See https://github.com/zenithdb/zenith/issues/207 for discussion on the safety - // of this. - let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA]; - b.copy_from_slice(&buf[0..SIZEOF_CONTROLDATA]); - let controlfile: ControlFileData = - unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) }; + // Compute the expected CRC of the content. + let OFFSETOF_CRC = Self::pg_control_crc_offset(); + let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]); - // Check the CRC - if expectedcrc != controlfile.crc { - bail!( - "invalid CRC in control file: expected {:08X}, was {:08X}", - expectedcrc, - controlfile.crc - ); + // Convert the slice into an array of the right size, and use `transmute` to + // reinterpret the raw bytes as a ControlFileData struct. + // + // NB: Ideally we would use 'zerocopy::FromBytes' for this, but bindgen doesn't + // derive FromBytes for us. The safety of this depends on the same constraints + // as for FromBytes, namely, all of its fields must implement FromBytes. That + // includes the primitive integer types, like `u8`, `u16`, `u32`, `u64` and their + // signed variants. But `bool` is not safe, because the contents of the high bits + // in a rust bool are undefined. In practice, PostgreSQL uses 1 to represent + // true and 0 for false, which is compatible with Rust bool, but let's try not to + // depend on it. + // + // FIXME: ControlFileData does contain 'bool's at the moment. + // + // See https://github.com/zenithdb/zenith/issues/207 for discussion on the safety + // of this. + let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA]; + b.copy_from_slice(&buf[0..SIZEOF_CONTROLDATA]); + let controlfile: ControlFileData = + unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) }; + + // Check the CRC + if expectedcrc != controlfile.crc { + bail!( + "invalid CRC in control file: expected {:08X}, was {:08X}", + expectedcrc, + controlfile.crc + ); + } + + Ok(controlfile) } - Ok(controlfile) -} - -/// -/// Convert a struct representing a Postgres control file into raw bytes. -/// -/// The CRC is recomputed to match the contents of the fields. -pub fn encode_pg_control(controlfile: &ControlFileData) -> Bytes { - // - // Use `transmute` to reinterpret struct as raw bytes. - // - // FIXME: This triggers undefined behavior, because the contents - // of the padding bytes are undefined, and this leaks those - // undefined bytes into the resulting array. The Rust code won't - // care what's in those bytes, and PostgreSQL doesn't care - // either. HOWEVER, it is a potential security issue, because the - // bytes can contain arbitrary pieces of memory from the page - // server. In the worst case, that could be private keys or - // another tenant's data. - // - // See https://github.com/zenithdb/zenith/issues/207 for discussion. - let b: [u8; SIZEOF_CONTROLDATA] = - unsafe { std::mem::transmute::(*controlfile) }; - - // Recompute the CRC - let OFFSETOF_CRC = pg_control_crc_offset(); - let newcrc = crc32c::crc32c(&b[0..OFFSETOF_CRC]); - - let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize); - buf.extend_from_slice(&b[0..OFFSETOF_CRC]); - buf.extend_from_slice(&newcrc.to_ne_bytes()); - // Fill the rest of the control file with zeros. - buf.resize(PG_CONTROL_FILE_SIZE as usize, 0); - - buf.into() + /// + /// Convert a struct representing a Postgres control file into raw bytes. + /// + /// The CRC is recomputed to match the contents of the fields. + pub fn encode(&self) -> Bytes { + // + // Use `transmute` to reinterpret struct as raw bytes. + // + // FIXME: This triggers undefined behavior, because the contents + // of the padding bytes are undefined, and this leaks those + // undefined bytes into the resulting array. The Rust code won't + // care what's in those bytes, and PostgreSQL doesn't care + // either. HOWEVER, it is a potential security issue, because the + // bytes can contain arbitrary pieces of memory from the page + // server. In the worst case, that could be private keys or + // another tenant's data. + // + // See https://github.com/zenithdb/zenith/issues/207 for discussion. + let b: [u8; SIZEOF_CONTROLDATA] = + unsafe { std::mem::transmute::(*self) }; + + // Recompute the CRC + let OFFSETOF_CRC = Self::pg_control_crc_offset(); + let newcrc = crc32c::crc32c(&b[0..OFFSETOF_CRC]); + + let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize); + buf.extend_from_slice(&b[0..OFFSETOF_CRC]); + buf.extend_from_slice(&newcrc.to_ne_bytes()); + // Fill the rest of the control file with zeros. + buf.resize(PG_CONTROL_FILE_SIZE as usize, 0); + + buf.into() + } }