issue/55 externalizing the bitpacker

This commit is contained in:
Paul Masurel
2016-11-24 00:48:20 +09:00
parent b875b18bd6
commit a87c7a6400
5 changed files with 170 additions and 40 deletions

157
src/common/bitpacker.rs Normal file
View File

@@ -0,0 +1,157 @@
use std::io::Write;
use std::io;
use common::serialize::BinarySerializable;
use std::mem;
pub fn compute_num_bits(amplitude: u32) -> u8 {
(32u32 - amplitude.leading_zeros()) as u8
}
pub struct BitPacker<TWrite: Write> {
output: TWrite,
mini_buffer: u64,
mini_buffer_written: usize,
num_bits: usize,
written_size: usize,
}
impl<TWrite: Write> BitPacker<TWrite> {
pub fn new(output: TWrite, num_bits: usize) -> BitPacker<TWrite> {
BitPacker {
output: output,
mini_buffer: 0u64,
mini_buffer_written: 0,
num_bits: num_bits,
written_size: 0,
}
}
pub fn write(&mut self, val: u32) -> io::Result<()> {
let val_u64 = val as u64;
if self.mini_buffer_written + self.num_bits > 64 {
self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
self.written_size += self.mini_buffer.serialize(&mut self.output)?;
self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64;
}
else {
self.mini_buffer |= val_u64 << self.mini_buffer_written;
self.mini_buffer_written += self.num_bits;
if self.mini_buffer_written == 64 {
self.written_size += self.mini_buffer.serialize(&mut self.output)?;
self.mini_buffer_written = 0;
self.mini_buffer = 0u64;
}
}
Ok(())
}
fn flush(&mut self) -> io::Result<()>{
if self.mini_buffer_written > 0 {
let num_bytes = (self.mini_buffer_written + 7) / 8;
let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) };
self.output.write_all(&arr[..num_bytes])?;
self.written_size += num_bytes;
self.mini_buffer_written = 0;
}
Ok(())
}
pub fn close(mut self) -> io::Result<(TWrite, usize)> {
self.flush()?;
Ok((self.output, self.written_size))
}
}
pub struct BitUnpacker<'a> {
data: &'a [u8],
num_bits: usize,
mask: u32,
}
impl<'a> BitUnpacker<'a> {
pub fn new(data: &'a [u8], num_bits: usize) -> BitUnpacker<'a> {
BitUnpacker {
data: data,
num_bits: num_bits,
mask: (1u32 << num_bits) - 1u32,
}
}
pub fn get(&self, idx: usize) -> u32 {
if self.num_bits == 0 {
return 0;
}
let addr = (idx * self.num_bits) / 8;
let bit_shift = (idx * self.num_bits) - addr * 8;
let val_unshifted_unmasked: u64;
if addr + 8 <= self.data.len() {
val_unshifted_unmasked = unsafe { * (self.data.as_ptr().offset(addr as isize) as *const u64) };
}
else {
let mut arr = [0u8; 8];
for i in 0..self.data.len() - addr {
arr[i] = self.data[addr + i];
}
val_unshifted_unmasked = unsafe { mem::transmute::<[u8; 8], u64>(arr) };
}
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
(val_shifted & self.mask)
}
}
#[cfg(test)]
mod test {
use super::{BitPacker, BitUnpacker, compute_num_bits};
#[test]
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
assert_eq!(compute_num_bits(0), 0u8);
assert_eq!(compute_num_bits(2), 2u8);
assert_eq!(compute_num_bits(3), 2u8);
assert_eq!(compute_num_bits(4), 3u8);
assert_eq!(compute_num_bits(255), 8u8);
assert_eq!(compute_num_bits(256), 9u8);
}
fn test_bitpacker_util(len: usize, num_bits: usize) {
let mut bitpacker = BitPacker::new(Vec::new(), num_bits);
let max_val: u32 = (1 << num_bits) - 1;
let vals: Vec<u32> = (0u32..len as u32).map(|i| {
if max_val == 0 {
0
}
else {
i % max_val
}
}).collect();
for &val in &vals {
bitpacker.write(val).unwrap();
}
let (data, num_bytes) = bitpacker.close().unwrap();
assert_eq!(num_bytes, (num_bits * len + 7) / 8);
assert_eq!(data.len(), num_bytes);
let bitunpacker = BitUnpacker::new(&data, num_bits);
for (i, val) in vals.iter().enumerate() {
assert_eq!(bitunpacker.get(i), *val);
}
}
#[test]
fn test_bitpacker() {
test_bitpacker_util(10, 3);
test_bitpacker_util(10, 0);
test_bitpacker_util(10, 1);
test_bitpacker_util(6, 14);
test_bitpacker_util(1000, 14);
}
}

View File

@@ -1,12 +1,17 @@
mod serialize;
mod timer;
mod vint;
mod bitpacker;
pub use self::serialize::BinarySerializable;
pub use self::timer::Timing;
pub use self::timer::TimerTree;
pub use self::timer::OpenTimer;
pub use self::vint::VInt;
pub use self::bitpacker::compute_num_bits;
use std::io;
pub fn make_io_err(msg: String) -> io::Error {
@@ -26,36 +31,3 @@ pub trait HasLen {
}
fn count_leading_zeros(mut val: u32) -> u8 {
if val == 0 {
return 32;
}
let mut result = 0u8;
while (val & (1u32 << 31)) == 0 {
val <<= 1;
result += 1;
}
result
}
pub fn compute_num_bits(amplitude: u32) -> u8 {
32u8 - count_leading_zeros(amplitude)
}
#[cfg(test)]
mod test {
use super::compute_num_bits;
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
assert_eq!(compute_num_bits(0), 0u8);
assert_eq!(compute_num_bits(2), 2u8);
assert_eq!(compute_num_bits(3), 2u8);
assert_eq!(compute_num_bits(4), 3u8);
assert_eq!(compute_num_bits(255), 8u8);
assert_eq!(compute_num_bits(256), 9u8);
}
}

View File

@@ -1,5 +1,6 @@
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use byteorder::{ReadBytesExt, WriteBytesExt};
use byteorder::LittleEndian as Endianness;
use std::fmt;
use std::io::Write;
use std::io::Read;
@@ -59,13 +60,13 @@ impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
writer.write_u32::<NativeEndian>(*self)
writer.write_u32::<Endianness>(*self)
.map(|_| 4)
.map_err(convert_byte_order_error)
}
fn deserialize(reader: &mut Read) -> io::Result<u32> {
reader.read_u32::<NativeEndian>()
reader.read_u32::<Endianness>()
.map_err(convert_byte_order_error)
}
}
@@ -73,12 +74,12 @@ impl BinarySerializable for u32 {
impl BinarySerializable for u64 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
writer.write_u64::<NativeEndian>(*self)
writer.write_u64::<Endianness>(*self)
.map(|_| 8)
.map_err(convert_byte_order_error)
}
fn deserialize(reader: &mut Read) -> io::Result<u64> {
reader.read_u64::<NativeEndian>()
reader.read_u64::<Endianness>()
.map_err(convert_byte_order_error)
}
}

View File

@@ -22,8 +22,6 @@ pub use self::serializer::FastFieldSerializer;
#[cfg(test)]
mod tests {
use common::compute_num_bits;
use super::*;
use schema::Field;
use std::path::Path;

View File

@@ -2,6 +2,8 @@ use common::BinarySerializable;
use directory::WritePtr;
use schema::Field;
use common::compute_num_bits;
use std::io;
use std::io::{Write, Seek, SeekFrom};
/// `FastFieldSerializer` is in charge of serializing