removed deps to SIMDCompressionAndIntersection

This commit is contained in:
Paul Masurel
2016-05-07 17:35:53 +09:00
parent 1a71d4095a
commit 4c244c99ae
12 changed files with 278 additions and 568 deletions

View File

@@ -1,177 +0,0 @@
use libc::size_t;
use std::ptr;
extern {
fn encode_sorted_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_sorted_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
fn encode_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
}
//-------------------------
// Block128
pub struct Block128Encoder {
input_buffer: [u32; 128],
output_buffer: [u8; 256 * 4],
}
impl Block128Encoder {
pub fn new() -> Block128Encoder {
Block128Encoder {
input_buffer: [0u32; 128],
output_buffer: [0u8; 256 * 4],
}
}
pub fn encode(&mut self, input: &[u32]) -> &[u8] {
assert_eq!(input.len(), 128);
// TODO use clone_from when available
let written_size: usize;
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
written_size = encode_block128_native(
self.input_buffer.as_mut_ptr(),
self.output_buffer.as_mut_ptr(),
256 * 4,
);
}
return &self.output_buffer[0..written_size];
}
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
assert_eq!(input.len(), 128);
// TODO use clone_from when available
let written_size: usize;
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
written_size = encode_sorted_block128_native(
self.input_buffer.as_mut_ptr(),
self.output_buffer.as_mut_ptr(),
256 * 4,
);
}
return &self.output_buffer[0..written_size];
}
}
pub struct Block128Decoder {
output: [u32; 128],
}
impl Block128Decoder {
pub fn new() -> Block128Decoder {
Block128Decoder {
output: [0u32; 128]
}
}
pub fn decode<'a, 'b>(
&'b mut self,
compressed_data: &'a [u8]) -> &'a[u8] {
unsafe {
let consumed_num_bytes: usize = decode_block128_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
self.output.as_mut_ptr());
&compressed_data[consumed_num_bytes..]
}
}
pub fn decode_sorted<'a, 'b>(
&'b mut self,
compressed_data: &'a [u8]) -> &'a [u8] {
unsafe {
let consumed_num_bytes: usize = decode_sorted_block128_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
self.output.as_mut_ptr());
&compressed_data[consumed_num_bytes..]
}
}
pub fn decode_sorted_remaining(&mut self,
compressed_data: &[u8]) -> &[u32] {
unsafe {
let num_uncompressed = decode_sorted_vint_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
self.output.as_mut_ptr(),
128);
&self.output[..num_uncompressed]
}
}
pub fn output(&self,) -> &[u32; 128] {
&self.output
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_sorted_block() {
for num_extra_values in [0, 2, 11].into_iter() {
let mut encoder = Block128Encoder::new();
let mut input = [0u32; 128];
for i in 0u32..128u32 {
input[i as usize] = i * 7 / 2;
}
let mut encoded_vec: Vec<u8> = encoder.encode_sorted(&input).to_vec();
assert_eq!(encoded_vec.len(), 84);
for i in 0u8..*num_extra_values as u8 {
encoded_vec.push(i);
}
let mut decoder = Block128Decoder::new();
let remaining_input = decoder.decode_sorted(&encoded_vec[..]);
let uncompressed_values = decoder.output();
assert_eq!(remaining_input.len(), *num_extra_values);
for i in 0..128 {
assert_eq!(uncompressed_values[i], input[i]);
}
for i in 0..*num_extra_values {
assert_eq!(remaining_input[i], i as u8);
}
}
}
#[test]
fn test_encode_block() {
for num_extra_values in [0, 2, 11].into_iter() {
let mut encoder = Block128Encoder::new();
let mut input = [0u32; 128];
for i in 0u32..128u32 {
input[i as usize] = i * 7 % 31;
}
let mut encoded_vec: Vec<u8> = encoder.encode(&input).to_vec();
assert_eq!(encoded_vec.len(), 100);
for i in 0u8..*num_extra_values as u8 {
encoded_vec.push(i);
}
let mut decoder = Block128Decoder::new();
let remaining_input: &[u8] = decoder.decode(&encoded_vec[..]);
let uncompressed_values = decoder.output();
assert_eq!(remaining_input.len(), *num_extra_values);
for i in 0..128 {
assert_eq!(uncompressed_values[i], input[i]);
}
for i in 0..*num_extra_values {
assert_eq!(remaining_input[i], i as u8);
}
}
}
}

View File

@@ -0,0 +1,88 @@
use compression::SIMDBlockEncoder;
use compression::SIMDBlockDecoder;
use super::NUM_DOCS_PER_BLOCK;
pub struct CompositeEncoder {
block_encoder: SIMDBlockEncoder,
output: Vec<u8>,
}
impl CompositeEncoder {
pub fn new() -> CompositeEncoder {
CompositeEncoder {
block_encoder: SIMDBlockEncoder::new(),
output: Vec::new(),
}
}
pub fn compress_sorted(&mut self, vals: &[u32]) -> &[u8] {
self.output.clear();
let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
let mut offset = 0u32;
for i in 0..num_blocks {
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
let block_compressed = self.block_encoder.compress_block_sorted(&vals_slice, offset);
offset = vals_slice[NUM_DOCS_PER_BLOCK - 1];
self.output.extend_from_slice(block_compressed);
}
let vint_compressed = self.block_encoder.compress_vint_sorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..], offset);
self.output.extend_from_slice(vint_compressed);
&self.output
}
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
self.output.clear();
let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
for i in 0..num_blocks {
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
let block_compressed = self.block_encoder.compress_block_unsorted(&vals_slice);
self.output.extend_from_slice(block_compressed);
}
let vint_compressed = self.block_encoder.compress_vint_unsorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..]);
self.output.extend_from_slice(vint_compressed);
&self.output
}
}
pub struct CompositeDecoder {
block_decoder: SIMDBlockDecoder,
vals: Vec<u32>,
}
impl CompositeDecoder {
pub fn new() -> CompositeDecoder {
CompositeDecoder {
block_decoder: SIMDBlockDecoder::new(),
vals: Vec::new(),
}
}
pub fn uncompress_sorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
let mut offset = 0u32;
self.vals.clear();
let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
for _ in 0..num_blocks {
compressed_data = self.block_decoder.uncompress_block_sorted(compressed_data, offset);
offset = self.block_decoder.output()[NUM_DOCS_PER_BLOCK - 1];
self.vals.extend_from_slice(self.block_decoder.output());
}
self.block_decoder.uncompress_vint_sorted(compressed_data, offset, doc_freq % NUM_DOCS_PER_BLOCK);
self.vals.extend_from_slice(self.block_decoder.output());
&self.vals
}
pub fn uncompress_unsorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
self.vals.clear();
let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
for _ in 0..num_blocks {
compressed_data = self.block_decoder.uncompress_block_unsorted(compressed_data);
self.vals.extend_from_slice(self.block_decoder.output());
}
self.block_decoder.uncompress_vint_unsorted(compressed_data, doc_freq % NUM_DOCS_PER_BLOCK);
self.vals.extend_from_slice(self.block_decoder.output());
&self.vals
}
}

View File

@@ -1,14 +0,0 @@
use libc::size_t;
extern {
fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
}
pub fn intersection(left: &[u32], right: &[u32], output: &mut [u32]) -> usize {
unsafe {
intersection_native(
left.as_ptr(), left.len(),
right.as_ptr(), right.len(),
output.as_mut_ptr())
}
}

View File

@@ -1,18 +1,10 @@
#![allow(dead_code)]
mod intersection;
pub use self::intersection::intersection;
mod s4bp128;
pub use self::s4bp128::{S4BP128Encoder, S4BP128Decoder};
mod block128;
pub use self::block128::{Block128Encoder, Block128Decoder};
mod vints;
pub use self::vints::{VIntsEncoder, VIntsDecoder};
mod simdcomp;
pub use self::simdcomp::{SIMDBlockEncoder, SIMDBlockDecoder};
mod composite;
pub use self::composite::CompositeEncoder;
pub const NUM_DOCS_PER_BLOCK: usize = 128;

View File

@@ -1,164 +0,0 @@
use libc::size_t;
use std::ptr;
extern {
// complete s4-bp128-dm
fn encode_s4_bp128_dm_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_s4_bp128_dm_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
fn encode_composite_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_composite_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
}
//-------------------------
// s4-bp128-dm
pub struct S4BP128Encoder {
input_buffer: Vec<u32>,
output_buffer: Vec<u8>,
}
impl S4BP128Encoder {
pub fn new() -> S4BP128Encoder {
S4BP128Encoder {
input_buffer: Vec::new(),
output_buffer: Vec::new(),
}
}
pub fn encode(&mut self, input: &[u32]) -> &[u8] {
self.input_buffer.clear();
let input_len = input.len();
if input_len + 10000 >= self.input_buffer.len() {
let target_length = input_len + 1024;
self.input_buffer.resize(target_length, 0);
self.output_buffer.resize(target_length * 4, 0);
}
// TODO use clone_from when available
let written_size;
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
written_size = encode_composite_native(
self.input_buffer.as_mut_ptr(),
input_len as size_t,
self.output_buffer.as_mut_ptr(),
self.output_buffer.len() as size_t,
);
}
&self.output_buffer[0..written_size]
}
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
self.input_buffer.clear();
let input_len = input.len();
if input_len + 10000 >= self.input_buffer.len() {
let target_length = input_len + 1024;
self.input_buffer.resize(target_length, 0);
self.output_buffer.resize(target_length * 4, 0);
}
// TODO use clone_from when available
let written_size;
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
written_size = encode_s4_bp128_dm_native(
self.input_buffer.as_mut_ptr(),
input_len as size_t,
self.output_buffer.as_mut_ptr(),
self.output_buffer.len() as size_t,
);
}
return &self.output_buffer[0..written_size];
}
}
pub struct S4BP128Decoder;
impl S4BP128Decoder {
pub fn new() -> S4BP128Decoder {
S4BP128Decoder
}
pub fn decode_sorted(&self,
compressed_data: &[u8],
uncompressed_values: &mut [u32]) -> size_t {
unsafe {
return decode_s4_bp128_dm_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
uncompressed_values.as_mut_ptr(),
uncompressed_values.len() as size_t);
}
}
pub fn decode(&self,
compressed_data: &[u8],
uncompressed_values: &mut [u32]) -> size_t {
unsafe {
return decode_composite_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
uncompressed_values.as_mut_ptr(),
uncompressed_values.len() as size_t);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use test::Bencher;
use compression::tests::generate_array;
#[test]
fn test_encode_sorted_big() {
let mut encoder = S4BP128Encoder::new();
let num_ints = 10_000 as usize;
let expected_length = 5_096;
let input: Vec<u32> = (0..num_ints as u32)
.map(|i| i * 7 / 2)
.into_iter().collect();
let encoded_data = encoder.encode_sorted(&input);
assert_eq!(encoded_data.len(), expected_length);
let decoder = S4BP128Decoder::new();
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
assert_eq!(num_ints, decoder.decode_sorted(&encoded_data[..], &mut decoded_data));
assert_eq!(decoded_data, input);
}
#[test]
fn test_encode_unsorted_big() {
let mut encoder = S4BP128Encoder::new();
let num_ints = 10_000 as usize;
let expected_length = 7_588;
let input: Vec<u32> = (0..num_ints as u32)
.map(|i| i * 7 % 37)
.into_iter().collect();
let encoded_data = encoder.encode(&input);
assert_eq!(encoded_data.len(), expected_length);
let decoder = S4BP128Decoder::new();
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
assert_eq!(num_ints, decoder.decode(&encoded_data[..], &mut decoded_data));
assert_eq!(decoded_data, input);
}
#[bench]
fn bench_decode(b: &mut Bencher) {
const TEST_SIZE: usize = 1_000_000;
let arr = generate_array(TEST_SIZE, 0.1);
let mut encoder = S4BP128Encoder::new();
let encoded = encoder.encode_sorted(&arr);
let mut uncompressed: Vec<u32> = (0..TEST_SIZE as u32).collect();
let decoder = S4BP128Decoder;
b.iter(|| {
decoder.decode_sorted(&encoded, &mut uncompressed);
});
}
}

View File

@@ -1,4 +1,7 @@
use libc::size_t;
use super::NUM_DOCS_PER_BLOCK;
const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1;
extern {
// complete s4-bp128-dm
@@ -21,56 +24,151 @@ extern {
output: *mut u32) -> size_t;
}
const BLOCK_SIZE: usize = 128;
const COMPRESSED_BLOCK_MAX_SIZE: usize = BLOCK_SIZE * 4 + 1;
pub struct SIMDBlockEncoder {
output_buffer: [u8; COMPRESSED_BLOCK_MAX_SIZE],
output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
output_len: usize,
}
impl SIMDBlockEncoder {
pub fn new() -> SIMDBlockEncoder {
SIMDBlockEncoder {
output_buffer: [0u8; COMPRESSED_BLOCK_MAX_SIZE]
output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
output_len: 0,
}
}
pub fn compress_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
&self.output_buffer[..compressed_size]
pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output.as_mut_ptr(), offset) };
&self.output[..compressed_size]
}
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr()) };
&self.output_buffer[..compressed_size]
pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output.as_mut_ptr()) };
&self.output[..compressed_size]
}
pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
let mut byte_written = 0;
for v in input.iter() {
let mut to_encode: u32 = *v - offset;
offset = *v;
loop {
let next_byte: u8 = (to_encode % 128u32) as u8;
to_encode /= 128u32;
if to_encode == 0u32 {
self.output[byte_written] = next_byte | 128u8;
byte_written += 1;
break;
}
else {
self.output[byte_written] = next_byte;
byte_written += 1;
}
}
}
return &self.output[..byte_written];
}
pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
let mut byte_written = 0;
for &i in input.iter() {
let mut to_encode: u32 = i;
loop {
let next_byte: u8 = (to_encode % 128u32) as u8;
to_encode /= 128u32;
if to_encode == 0u32 {
self.output[byte_written] = next_byte | 128u8;
byte_written += 1;
break;
}
else {
self.output[byte_written] = next_byte;
byte_written += 1;
}
}
}
return &self.output[..byte_written];
}
}
pub struct SIMDBlockDecoder {
output_buffer: [u32; COMPRESSED_BLOCK_MAX_SIZE],
output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
output_len: usize,
}
impl SIMDBlockDecoder {
pub fn new() -> SIMDBlockDecoder {
SIMDBlockDecoder {
output_buffer: [0u32; COMPRESSED_BLOCK_MAX_SIZE]
output: [0u32; COMPRESSED_BLOCK_MAX_SIZE],
output_len: 0,
}
}
pub fn uncompress_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
pub fn uncompress_block_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr(), offset) };
self.output_len = NUM_DOCS_PER_BLOCK;
&compressed_data[consumed_size..]
}
pub fn uncompress_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr()) };
pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr()) };
self.output_len = NUM_DOCS_PER_BLOCK;
&compressed_data[consumed_size..]
}
pub fn uncompress_vint_sorted<'a>(
&mut self,
compressed_data: &'a [u8],
offset: u32,
num_els: usize) -> &'a [u8] {
let mut read_byte = 0;
let mut result = offset;
for i in 0..num_els {
let mut shift = 0u32;
loop {
let cur_byte = compressed_data[read_byte];
read_byte += 1;
result += ((cur_byte % 128u8) as u32) << shift;
if cur_byte & 128u8 != 0u8 {
break;
}
shift += 7;
}
self.output[i] = result;
}
self.output_len = num_els;
&compressed_data[read_byte..]
}
pub fn uncompress_vint_unsorted<'a>(
&mut self,
compressed_data: &'a [u8],
num_els: usize) -> &'a [u8] {
let mut read_byte = 0;
for i in 0..num_els {
let mut result = 0u32;
let mut shift = 0u32;
loop {
let cur_byte = compressed_data[read_byte];
read_byte += 1;
result += ((cur_byte % 128u8) as u32) << shift;
if cur_byte & 128u8 != 0u8 {
break;
}
shift += 7;
}
self.output[i] = result;
}
self.output_len = num_els;
&compressed_data[read_byte..]
}
pub fn output(&self,) -> &[u32] {
&self.output_buffer
&self.output[..self.output_len]
}
}
@@ -84,10 +182,10 @@ mod tests {
fn test_encode_sorted_block() {
let vals: Vec<u32> = (0u32..128u32).map(|i| i*7).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 0);
let compressed_data = encoder.compress_block_sorted(&vals, 0);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(compressed_data, 0);
let remaining_data = decoder.uncompress_block_sorted(compressed_data, 0);
assert_eq!(remaining_data.len(), 0);
}
for i in 0..128 {
@@ -99,10 +197,10 @@ mod tests {
fn test_encode_sorted_block_with_offset() {
let vals: Vec<u32> = (0u32..128u32).map(|i| 11 + i*7).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
let compressed_data = encoder.compress_block_sorted(&vals, 10);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(compressed_data, 10);
let remaining_data = decoder.uncompress_block_sorted(compressed_data, 10);
assert_eq!(remaining_data.len(), 0);
}
for i in 0..128 {
@@ -116,12 +214,12 @@ mod tests {
let n = 128;
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
let compressed_data = encoder.compress_block_sorted(&vals, 10);
compressed.extend_from_slice(compressed_data);
compressed.push(173u8);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);
assert_eq!(remaining_data.len(), 1);
assert_eq!(remaining_data[0], 173u8);
}
@@ -136,12 +234,12 @@ mod tests {
let n = 128;
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
let compressed_data = encoder.compress_block_sorted(&vals, 10);
compressed.extend_from_slice(compressed_data);
compressed.push(173u8);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);
assert_eq!(remaining_data.len(), 1);
assert_eq!(remaining_data[0], 173u8);
}
@@ -149,4 +247,38 @@ mod tests {
assert_eq!(vals[i], decoder.output()[i]);
}
}
#[test]
fn test_encode_vint() {
{
let expected_length = 123;
let mut encoder = SIMDBlockEncoder::new();
let input: Vec<u32> = (0u32..123u32)
.map(|i| 4 + i * 7 / 2)
.into_iter()
.collect();
for offset in [0u32, 1u32, 2u32].iter() {
let encoded_data = encoder.compress_vint_sorted(&input, *offset);
assert_eq!(encoded_data.len(), expected_length);
let mut decoder = SIMDBlockDecoder::new();
let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
assert_eq!(0, remaining_data.len());
for (&decoded, &expected) in decoder.output().iter().zip(input.iter()) {
assert_eq!(decoded, expected);
}
}
}
{
let mut encoder = SIMDBlockEncoder::new();
let input = vec!(3u32, 17u32, 187u32);
let encoded_data = encoder.compress_vint_sorted(&input, 0);
assert_eq!(encoded_data.len(), 4);
assert_eq!(encoded_data[0], 3u8 + 128u8);
assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
assert_eq!(encoded_data[3], (1u8 + 128u8));
}
}
}

View File

@@ -1,112 +0,0 @@
use libc::size_t;
use std::ptr;
use std::iter;
extern {
fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
}
pub struct VIntsEncoder {
input_buffer: Vec<u32>,
output_buffer: Vec<u8>,
}
impl VIntsEncoder {
pub fn new() -> VIntsEncoder {
VIntsEncoder {
input_buffer: Vec::with_capacity(128),
output_buffer: iter::repeat(0u8).take(256 * 4).collect(),
}
}
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
assert!(input.len() < 128);
let input_len = input.len();
let written_size: usize;
// TODO use clone_from when available
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
written_size = encode_sorted_vint_native(
self.input_buffer.as_mut_ptr(),
input_len as size_t,
self.output_buffer.as_mut_ptr(),
256 * 4,
);
}
return &self.output_buffer[0..written_size];
}
}
pub struct VIntsDecoder {
output: [u32; 128],
}
impl VIntsDecoder {
pub fn new() -> VIntsDecoder {
VIntsDecoder {
output: [0u32; 128]
}
}
pub fn decode_sorted(&mut self,
compressed_data: &[u8]) -> &[u32] {
unsafe {
let num_uncompressed = decode_sorted_vint_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
self.output.as_mut_ptr(),
128);
&self.output[..num_uncompressed]
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_vint() {
{
let mut encoder = VIntsEncoder::new();
let expected_length = 124;
let input: Vec<u32> = (0u32..123u32)
.map(|i| i * 7 / 2)
.into_iter()
.collect();
let encoded_data = encoder.encode_sorted(&input);
assert_eq!(encoded_data.len(), expected_length);
let mut decoder = VIntsDecoder::new();
let decoded_data = decoder.decode_sorted(&encoded_data[..]);
assert_eq!(123, decoded_data.len());
assert_eq!(&decoded_data[0..123], &input[..]);
}
{
let mut encoder = VIntsEncoder::new();
let input = vec!(3u32, 17u32, 187u32);
let encoded_data = encoder.encode_sorted(&input);
assert_eq!(encoded_data.len(), 4);
assert_eq!(encoded_data[0], 3u8 + 128u8);
assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
assert_eq!(encoded_data[3], (1u8 + 128u8));
}
{
let mut encoder = VIntsEncoder::new();
let input = vec!(0u32, 1u32, 2u32);
let encoded_data = encoder.encode_sorted(&input);
let mut decoder = VIntsDecoder::new();
let decoded_data = decoder.decode_sorted(&encoded_data[..]);
assert_eq!(3, decoded_data.len());
assert_eq!(&decoded_data[..], &input[..]);
}
}
}