diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs new file mode 100644 index 000000000..ca9e5e893 --- /dev/null +++ b/src/common/bitpacker.rs @@ -0,0 +1,157 @@ +use std::io::Write; +use std::io; +use common::serialize::BinarySerializable; +use std::mem; + + +pub fn compute_num_bits(amplitude: u32) -> u8 { + (32u32 - amplitude.leading_zeros()) as u8 +} + +pub struct BitPacker { + output: TWrite, + mini_buffer: u64, + mini_buffer_written: usize, + num_bits: usize, + written_size: usize, +} + +impl BitPacker { + + pub fn new(output: TWrite, num_bits: usize) -> BitPacker { + BitPacker { + output: output, + mini_buffer: 0u64, + mini_buffer_written: 0, + num_bits: num_bits, + written_size: 0, + } + } + + pub fn write(&mut self, val: u32) -> io::Result<()> { + let val_u64 = val as u64; + if self.mini_buffer_written + self.num_bits > 64 { + self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32); + self.written_size += self.mini_buffer.serialize(&mut self.output)?; + self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32); + self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64; + } + else { + self.mini_buffer |= val_u64 << self.mini_buffer_written; + self.mini_buffer_written += self.num_bits; + if self.mini_buffer_written == 64 { + self.written_size += self.mini_buffer.serialize(&mut self.output)?; + self.mini_buffer_written = 0; + self.mini_buffer = 0u64; + } + } + Ok(()) + } + + fn flush(&mut self) -> io::Result<()>{ + if self.mini_buffer_written > 0 { + let num_bytes = (self.mini_buffer_written + 7) / 8; + let arr: [u8; 8] = unsafe { mem::transmute::(self.mini_buffer) }; + self.output.write_all(&arr[..num_bytes])?; + self.written_size += num_bytes; + self.mini_buffer_written = 0; + } + Ok(()) + } + + pub fn close(mut self) -> io::Result<(TWrite, usize)> { + self.flush()?; + Ok((self.output, self.written_size)) + } +} + + + +pub struct BitUnpacker<'a> { + data: &'a [u8], + num_bits: usize, + mask: u32, +} + +impl<'a> BitUnpacker<'a> { + pub fn new(data: &'a [u8], num_bits: usize) -> BitUnpacker<'a> { + BitUnpacker { + data: data, + num_bits: num_bits, + mask: (1u32 << num_bits) - 1u32, + } + } + + pub fn get(&self, idx: usize) -> u32 { + if self.num_bits == 0 { + return 0; + } + let addr = (idx * self.num_bits) / 8; + let bit_shift = (idx * self.num_bits) - addr * 8; + let val_unshifted_unmasked: u64; + if addr + 8 <= self.data.len() { + val_unshifted_unmasked = unsafe { * (self.data.as_ptr().offset(addr as isize) as *const u64) }; + } + else { + let mut arr = [0u8; 8]; + for i in 0..self.data.len() - addr { + arr[i] = self.data[addr + i]; + } + val_unshifted_unmasked = unsafe { mem::transmute::<[u8; 8], u64>(arr) }; + } + let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32; + (val_shifted & self.mask) + } + +} + + + + +#[cfg(test)] +mod test { + use super::{BitPacker, BitUnpacker, compute_num_bits}; + + #[test] + fn test_compute_num_bits() { + assert_eq!(compute_num_bits(1), 1u8); + assert_eq!(compute_num_bits(0), 0u8); + assert_eq!(compute_num_bits(2), 2u8); + assert_eq!(compute_num_bits(3), 2u8); + assert_eq!(compute_num_bits(4), 3u8); + assert_eq!(compute_num_bits(255), 8u8); + assert_eq!(compute_num_bits(256), 9u8); + } + + fn test_bitpacker_util(len: usize, num_bits: usize) { + let mut bitpacker = BitPacker::new(Vec::new(), num_bits); + let max_val: u32 = (1 << num_bits) - 1; + let vals: Vec = (0u32..len as u32).map(|i| { + if max_val == 0 { + 0 + } + else { + i % max_val + } + }).collect(); + for &val in &vals { + bitpacker.write(val).unwrap(); + } + let (data, num_bytes) = bitpacker.close().unwrap(); + assert_eq!(num_bytes, (num_bits * len + 7) / 8); + assert_eq!(data.len(), num_bytes); + let bitunpacker = BitUnpacker::new(&data, num_bits); + for (i, val) in vals.iter().enumerate() { + assert_eq!(bitunpacker.get(i), *val); + } + } + + #[test] + fn test_bitpacker() { + test_bitpacker_util(10, 3); + test_bitpacker_util(10, 0); + test_bitpacker_util(10, 1); + test_bitpacker_util(6, 14); + test_bitpacker_util(1000, 14); + } +} \ No newline at end of file diff --git a/src/common/mod.rs b/src/common/mod.rs index 0c64d5f9c..50549ccac 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,12 +1,17 @@ mod serialize; mod timer; mod vint; +mod bitpacker; + pub use self::serialize::BinarySerializable; pub use self::timer::Timing; pub use self::timer::TimerTree; pub use self::timer::OpenTimer; pub use self::vint::VInt; +pub use self::bitpacker::compute_num_bits; + + use std::io; pub fn make_io_err(msg: String) -> io::Error { @@ -26,36 +31,3 @@ pub trait HasLen { } -fn count_leading_zeros(mut val: u32) -> u8 { - if val == 0 { - return 32; - } - let mut result = 0u8; - while (val & (1u32 << 31)) == 0 { - val <<= 1; - result += 1; - } - result -} - - -pub fn compute_num_bits(amplitude: u32) -> u8 { - 32u8 - count_leading_zeros(amplitude) -} - - - -#[cfg(test)] -mod test { - use super::compute_num_bits; - - fn test_compute_num_bits() { - assert_eq!(compute_num_bits(1), 1u8); - assert_eq!(compute_num_bits(0), 0u8); - assert_eq!(compute_num_bits(2), 2u8); - assert_eq!(compute_num_bits(3), 2u8); - assert_eq!(compute_num_bits(4), 3u8); - assert_eq!(compute_num_bits(255), 8u8); - assert_eq!(compute_num_bits(256), 9u8); - } -} \ No newline at end of file diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 90752992c..29c7b2986 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -1,5 +1,6 @@ -use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; +use byteorder::{ReadBytesExt, WriteBytesExt}; +use byteorder::LittleEndian as Endianness; use std::fmt; use std::io::Write; use std::io::Read; @@ -59,13 +60,13 @@ impl BinarySerializable for impl BinarySerializable for u32 { fn serialize(&self, writer: &mut Write) -> io::Result { - writer.write_u32::(*self) + writer.write_u32::(*self) .map(|_| 4) .map_err(convert_byte_order_error) } fn deserialize(reader: &mut Read) -> io::Result { - reader.read_u32::() + reader.read_u32::() .map_err(convert_byte_order_error) } } @@ -73,12 +74,12 @@ impl BinarySerializable for u32 { impl BinarySerializable for u64 { fn serialize(&self, writer: &mut Write) -> io::Result { - writer.write_u64::(*self) + writer.write_u64::(*self) .map(|_| 8) .map_err(convert_byte_order_error) } fn deserialize(reader: &mut Read) -> io::Result { - reader.read_u64::() + reader.read_u64::() .map_err(convert_byte_order_error) } } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index a79819347..eedadae65 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -22,8 +22,6 @@ pub use self::serializer::FastFieldSerializer; #[cfg(test)] mod tests { - - use common::compute_num_bits; use super::*; use schema::Field; use std::path::Path; diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index b7e1b092c..13e71b239 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -2,6 +2,8 @@ use common::BinarySerializable; use directory::WritePtr; use schema::Field; use common::compute_num_bits; +use std::io; +use std::io::{Write, Seek, SeekFrom}; /// `FastFieldSerializer` is in charge of serializing