diff --git a/cpp/encode.cpp b/cpp/encode.cpp index d4ce8e7ab..91427a8ea 100644 --- a/cpp/encode.cpp +++ b/cpp/encode.cpp @@ -17,12 +17,39 @@ static VariableByte codec_unsorted = VariableByte(); static SIMDBinaryPacking> simd_pack_sorted = SIMDBinaryPacking>(); +static SIMDBinaryPacking> simd_pack = SIMDBinaryPacking>(); + + static VariableByte vint_codec = VariableByte(); -// SIMDBinaryPacking +// SIMDBinaryPacking extern "C" { + // encode 128 u32 at a time. + size_t encode_block128_native( + uint32_t* begin, + uint32_t* output, + const size_t output_capacity) { + size_t output_length = output_capacity; + simd_pack.encodeArray(begin, + 128, + output, + output_length); + return output_length; + } + + // returns the number of byte that have been read. + size_t decode_block128_native( + const uint32_t* compressed_data, + const size_t compressed_size, + uint32_t* uncompressed) { + size_t output_capacity = 128; + const uint32_t* pointer_end = simd_pack.decodeArray(compressed_data, compressed_size, uncompressed, output_capacity); + return static_cast(pointer_end - compressed_data); + + } + // encode 128 u32 at a time. size_t encode_sorted_block128_native( uint32_t* begin, @@ -128,5 +155,4 @@ extern "C" { uint32_t* output) { return IntersectionFactory::getFromName("simd")(left, left_size, right, right_size, output); } - } diff --git a/src/compression/block128.rs b/src/compression/block128.rs index 8945173ac..0959d38ce 100644 --- a/src/compression/block128.rs +++ b/src/compression/block128.rs @@ -5,6 +5,8 @@ use std::ptr; extern { fn encode_sorted_block128_native(data: *mut u32, output: *mut u32, output_capacity: size_t) -> size_t; fn decode_sorted_block128_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32) -> usize; + fn encode_block128_native(data: *mut u32, output: *mut u32, output_capacity: size_t) -> size_t; + fn decode_block128_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32) -> usize; } //------------------------- @@ -12,7 +14,7 @@ extern { pub struct Block128Encoder { input_buffer: [u32; 128], - output_buffer: [u32; 129], + output_buffer: [u32; 256], } impl Block128Encoder { @@ -20,10 +22,25 @@ impl Block128Encoder { pub fn new() -> Block128Encoder { Block128Encoder { input_buffer: [0u32; 128], - output_buffer: [0u32; 129], + output_buffer: [0u32; 256], } } + pub fn encode(&mut self, input: &[u32]) -> &[u32] { + assert_eq!(input.len(), 128); + // TODO use clone_from when available + let written_size: usize; + unsafe { + ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128); + written_size = encode_block128_native( + self.input_buffer.as_mut_ptr(), + self.output_buffer.as_mut_ptr(), + 256, + ); + } + return &self.output_buffer[0..written_size]; + } + pub fn encode_sorted(&mut self, input: &[u32]) -> &[u32] { assert_eq!(input.len(), 128); // TODO use clone_from when available @@ -52,6 +69,18 @@ impl Block128Decoder { } } + pub fn decode<'a, 'b>( + &'b mut self, + compressed_data: &'a [u32]) -> (&'a[u32], &'b[u32; 128]) { + unsafe { + let consumed_num_bytes: usize = decode_block128_native( + compressed_data.as_ptr(), + compressed_data.len() as size_t, + self.output.as_mut_ptr()); + (&compressed_data[consumed_num_bytes..], &self.output) + } + } + pub fn decode_sorted<'a, 'b>( &'b mut self, compressed_data: &'a [u32]) -> (&'a[u32], &'b[u32; 128]) { @@ -72,7 +101,7 @@ mod tests { use super::*; #[test] - fn test_encode_block() { + fn test_encode_sorted_block() { for num_extra_values in [0, 2, 11].into_iter() { let mut encoder = Block128Encoder::new(); let mut input = [0u32; 128]; @@ -96,6 +125,31 @@ mod tests { } } + #[test] + fn test_encode_block() { + for num_extra_values in [0, 2, 11].into_iter() { + let mut encoder = Block128Encoder::new(); + let mut input = [0u32; 128]; + for i in 0u32..128u32 { + input[i as usize] = i * 7 % 31; + } + let mut encoded_vec: Vec = encoder.encode(&input).to_vec(); + assert_eq!(encoded_vec.len(), 25); + for i in 0u32..*num_extra_values as u32 { + encoded_vec.push(i); + } + let mut decoder = Block128Decoder::new(); + let (remaining_input, uncompressed_values) = decoder.decode(&encoded_vec[..]); + assert_eq!(remaining_input.len(), *num_extra_values); + for i in 0..128 { + assert_eq!(uncompressed_values[i], input[i]); + } + for i in 0..*num_extra_values { + assert_eq!(remaining_input[i], i as u32); + } + } + } + // // #[test] // fn test_partial_decode_block() { diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 45c07156f..621e90a69 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -8,7 +8,7 @@ mod block128; pub use self::block128::{Block128Encoder, Block128Decoder}; mod vints; -pub use self::vints::{SortedVIntsEncoder, SortedVIntsDecoder}; +pub use self::vints::{VIntsEncoder, VIntsDecoder}; pub const NUM_DOCS_PER_BLOCK: usize = 128; diff --git a/src/compression/vints.rs b/src/compression/vints.rs index ae0cdbf8c..d92795e5a 100644 --- a/src/compression/vints.rs +++ b/src/compression/vints.rs @@ -7,15 +7,15 @@ extern { fn decode_sorted_vint_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t; } -pub struct SortedVIntsEncoder { +pub struct VIntsEncoder { input_buffer: Vec, output_buffer: Vec, } -impl SortedVIntsEncoder { +impl VIntsEncoder { - pub fn new() -> SortedVIntsEncoder { - SortedVIntsEncoder { + pub fn new() -> VIntsEncoder { + VIntsEncoder { input_buffer: Vec::with_capacity(128), output_buffer: iter::repeat(0u32).take(256).collect(), } @@ -41,12 +41,12 @@ impl SortedVIntsEncoder { -pub struct SortedVIntsDecoder; +pub struct VIntsDecoder; -impl SortedVIntsDecoder { +impl VIntsDecoder { - pub fn new() -> SortedVIntsDecoder { - SortedVIntsDecoder + pub fn new() -> VIntsDecoder { + VIntsDecoder } pub fn decode_sorted(&self, @@ -72,7 +72,7 @@ mod tests { #[test] fn test_encode_vint() { { - let mut encoder = SortedVIntsEncoder::new(); + let mut encoder = VIntsEncoder::new(); let expected_length = 31; let input: Vec = (0u32..123u32) .map(|i| i * 7 / 2) @@ -80,13 +80,13 @@ mod tests { .collect(); let encoded_data = encoder.encode_sorted(&input); assert_eq!(encoded_data.len(), expected_length); - let decoder = SortedVIntsDecoder::new(); + let decoder = VIntsDecoder::new(); let mut decoded_data: Vec = iter::repeat(0u32).take(128).collect(); assert_eq!(123, decoder.decode_sorted(&encoded_data[..], &mut decoded_data)); assert_eq!(&decoded_data[0..123], &input[..]); } { - let mut encoder = SortedVIntsEncoder::new(); + let mut encoder = VIntsEncoder::new(); let input = vec!(3, 17u32, 187); let encoded_data = encoder.encode_sorted(&input); assert_eq!(encoded_data.len(), 1);