From 36c12247b92f961478d3952a210828056f8ec1ce Mon Sep 17 00:00:00 2001 From: Eric Seppanen Date: Fri, 7 May 2021 00:43:40 -0700 Subject: [PATCH] add bin_ser module This module adds two traits that implement bincode-based serialization. BeSer implements methods for big-endian encoding/decoding. LeSer implements methods for little-endian encoding/decoding. Right now, the BeSer and LeSer methods have the same names, meaning you can't `use` them both at the same time. This is intended to be a safety mechanism: mixing big-endian and little-endian encoding in the same file is error-prone. There are ways around this, but the easiest fix is to put the big-endian code and little-endian code in different files or submodules. --- Cargo.lock | 19 +++ zenith_utils/Cargo.toml | 9 +- zenith_utils/src/bin_ser.rs | 211 +++++++++++++++++++++++++++++ zenith_utils/src/lib.rs | 2 + zenith_utils/tests/bin_ser_test.rs | 42 ++++++ 5 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 zenith_utils/src/bin_ser.rs create mode 100644 zenith_utils/tests/bin_ser_test.rs diff --git a/Cargo.lock b/Cargo.lock index 19edcd2c0f..eb191e681c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,6 +111,15 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bindgen" version = "0.57.0" @@ -651,6 +660,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hex-literal" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5af1f635ef1bc545d78392b136bfe1c9809e029023c84a3638a864a10b8819c8" + [[package]] name = "hmac" version = "0.10.1" @@ -2438,5 +2453,9 @@ dependencies = [ name = "zenith_utils" version = "0.1.0" dependencies = [ + "bincode", + "bytes", + "hex-literal", + "serde", "thiserror", ] diff --git a/zenith_utils/Cargo.toml b/zenith_utils/Cargo.toml index ee549ab2f9..2bf7aada94 100644 --- a/zenith_utils/Cargo.toml +++ b/zenith_utils/Cargo.toml @@ -5,4 +5,11 @@ authors = ["Eric Seppanen "] edition = "2018" [dependencies] -thiserror = "1" +serde = "1.0" +bincode = "1.3" +thiserror = "1.0" + +[dev-dependencies] +serde = { version = "1.0", features = ["derive"] } +hex-literal = "0.3" +bytes = "1.0" diff --git a/zenith_utils/src/bin_ser.rs b/zenith_utils/src/bin_ser.rs new file mode 100644 index 0000000000..24c775f237 --- /dev/null +++ b/zenith_utils/src/bin_ser.rs @@ -0,0 +1,211 @@ +//! Utilities for binary serialization/deserialization. +//! +//! The [`BeSer`] trait allows us to define data structures +//! that can match data structures that are sent over the wire +//! in big-endian form with no packing. +//! +//! The [`LeSer`] trait does the same thing, in little-endian form. +//! +//! Note: you will get a compile error if you try to `use` both trais +//! in the same module or scope. This is intended to be a safety +//! mechanism: mixing big-endian and little-endian encoding in the same file +//! is error-prone. + +#![warn(missing_docs)] + +use bincode::Options; +use serde::{de::DeserializeOwned, Serialize}; +use std::io::{Read, Write}; +use thiserror::Error; + +/// An error that occurred during a deserialize operation +/// +/// This could happen because the input data was too short, +/// or because an invalid value was encountered. +#[derive(Debug, Error)] +#[error("deserialize error")] +pub struct DeserializeError; + +/// An error that occurred during a serialize operation +/// +/// This probably means our [`Write`] failed, e.g. we tried +/// to write beyond the end of a buffer. +#[derive(Debug, Error)] +#[error("serialize error")] +pub struct SerializeError; + +/// A shortcut that configures big-endian binary serialization +/// +/// Properties: +/// - Big endian +/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01) +/// - Allow trailing bytes: this means we don't throw an error +/// if the deserializer is passed a buffer with more data +/// past the end. +pub fn be_coder() -> impl Options { + bincode::DefaultOptions::new() + .with_big_endian() + .with_fixint_encoding() + .allow_trailing_bytes() +} + +/// A shortcut that configures little-ending binary serialization +/// +/// Properties: +/// - Little endian +/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01) +/// - Allow trailing bytes: this means we don't throw an error +/// if the deserializer is passed a buffer with more data +/// past the end. +pub fn le_coder() -> impl Options { + bincode::DefaultOptions::new() + .with_little_endian() + .with_fixint_encoding() + .allow_trailing_bytes() +} + +/// Binary serialize/deserialize helper functions (Big Endian) +/// +pub trait BeSer: Serialize + DeserializeOwned { + /// Serialize into a byte slice + fn ser_into_slice(&self, b: &mut [u8]) -> Result<(), SerializeError> { + // This is slightly awkward; we need a mutable reference to a mutable reference. + let mut w = b; + self.ser_into(&mut w) + } + + /// Serialize into a borrowed writer + /// + /// This is useful for most `Write` types except `&mut [u8]`, which + /// can more easily use [`ser_into_slice`](Self::ser_into_slice). + fn ser_into(&self, w: &mut W) -> Result<(), SerializeError> { + le_coder().serialize_into(w, &self).or(Err(SerializeError)) + } + + /// Serialize into a new heap-allocated buffer + fn ser(&self) -> Result, SerializeError> { + be_coder().serialize(&self).or(Err(SerializeError)) + } + + /// Deserialize from a byte slice + fn des(buf: &[u8]) -> Result { + be_coder().deserialize(buf).or(Err(DeserializeError)) + } + + /// Deserialize from a reader + /// + /// tip: `&[u8]` implements `Read` + fn des_from(r: R) -> Result { + le_coder().deserialize_from(r).or(Err(DeserializeError)) + } +} + +/// Binary serialize/deserialize helper functions (Big Endian) +/// +pub trait LeSer: Serialize + DeserializeOwned { + /// Serialize into a byte slice + fn ser_into_slice(&self, b: &mut [u8]) -> Result<(), SerializeError> { + // This is slightly awkward; we need a mutable reference to a mutable reference. + let mut w = b; + self.ser_into(&mut w) + } + + /// Serialize into a borrowed writer + /// + /// This is useful for most `Write` types except `&mut [u8]`, which + /// can more easily use [`ser_into_slice`](Self::ser_into_slice). + fn ser_into(&self, w: &mut W) -> Result<(), SerializeError> { + le_coder().serialize_into(w, &self).or(Err(SerializeError)) + } + + /// Serialize into a new heap-allocated buffer + fn ser(&self) -> Result, SerializeError> { + le_coder().serialize(&self).or(Err(SerializeError)) + } + + /// Deserialize from a byte slice + fn des(buf: &[u8]) -> Result { + le_coder().deserialize(buf).or(Err(DeserializeError)) + } + + /// Deserialize from a reader + /// + /// tip: `&[u8]` implements `Read` + fn des_from(r: R) -> Result { + le_coder().deserialize_from(r).or(Err(DeserializeError)) + } +} + +impl BeSer for T where T: Serialize + DeserializeOwned {} + +impl LeSer for T where T: Serialize + DeserializeOwned {} + +#[cfg(test)] +mod tests { + use serde::{Deserialize, Serialize}; + + #[derive(Debug, PartialEq, Serialize, Deserialize)] + pub struct ShortStruct { + a: u8, + b: u32, + } + + #[test] + fn be_short() { + use super::BeSer; + + let x = ShortStruct { a: 7, b: 65536 }; + + let encoded = x.ser().unwrap(); + + assert_eq!(encoded, vec![7, 0, 1, 0, 0]); + + let raw = [8u8, 7, 3, 0, 0]; + let decoded = ShortStruct::des(&raw).unwrap(); + + assert_eq!( + decoded, + ShortStruct { + a: 8, + b: 0x07030000 + } + ); + + // has trailing data + let raw = [8u8, 7, 3, 0, 0, 0xFF, 0xFF, 0xFF]; + let _ = ShortStruct::des(&raw).unwrap(); + } + + #[derive(Debug, PartialEq, Serialize, Deserialize)] + pub struct BigMsg { + pub tag: u8, + pub blockpos: u64, + pub last_flush_position: u64, + pub apply: u64, + pub timestamp: u64, + pub reply_requested: u8, + } + + #[test] + fn be_big() { + use super::BeSer; + + let msg = BigMsg { + tag: 42, + blockpos: 0x1000_2000_3000_4000, + last_flush_position: 0x1234_2345_3456_4567, + apply: 0x9876_5432_10FE_DCBA, + timestamp: 0xABBA_CDDC_EFFE_0110, + reply_requested: 1, + }; + + let encoded = msg.ser().unwrap(); + let expected = hex_literal::hex!( + "2A 1000 2000 3000 4000 1234 2345 3456 4567 9876 5432 10FE DCBA ABBA CDDC EFFE 0110 01" + ); + assert_eq!(encoded, expected); + + let msg2 = BigMsg::des(&encoded).unwrap(); + assert_eq!(msg, msg2); + } +} diff --git a/zenith_utils/src/lib.rs b/zenith_utils/src/lib.rs index 21bfc73930..8388c5f9ed 100644 --- a/zenith_utils/src/lib.rs +++ b/zenith_utils/src/lib.rs @@ -8,3 +8,5 @@ pub mod seqwait; // Async version of SeqWait. Currently unused. // pub mod seqwait_async; + +pub mod bin_ser; diff --git a/zenith_utils/tests/bin_ser_test.rs b/zenith_utils/tests/bin_ser_test.rs new file mode 100644 index 0000000000..b0f3a63418 --- /dev/null +++ b/zenith_utils/tests/bin_ser_test.rs @@ -0,0 +1,42 @@ +use bytes::{Buf, BytesMut}; +use hex_literal::hex; +use serde::{Deserialize, Serialize}; +use std::io::Read; +use zenith_utils::bin_ser::LeSer; + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub struct HeaderData { + magic: u16, + info: u16, + tli: u32, + pageaddr: u64, + len: u32, +} + +// A manual implementation using BytesMut, just so we can +// verify that we decode the same way. +pub fn decode_header_data(buf: &mut BytesMut) -> HeaderData { + HeaderData { + magic: buf.get_u16_le(), + info: buf.get_u16_le(), + tli: buf.get_u32_le(), + pageaddr: buf.get_u64_le(), + len: buf.get_u32_le(), + } +} + +pub fn decode2(reader: &mut R) -> HeaderData { + HeaderData::des_from(reader).unwrap() +} + +#[test] +fn test1() { + let raw1 = hex!("8940 7890 5534 7890 1289 5379 8378 7893 4207 8923 4712 3218"); + let mut buf1 = BytesMut::from(&raw1[..]); + let mut buf2 = &raw1[..]; + let dec1 = decode_header_data(&mut buf1); + let dec2 = decode2(&mut buf2); + assert_eq!(dec1, dec2); + eprintln!("{} {}", buf1.len(), buf2.len()); + assert_eq!(buf1, buf2); +}