From c8196aceb6fa5d34c1cc3bfc28ba98011d28b0b5 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 28 Feb 2016 20:35:54 +0900 Subject: [PATCH] werwer --- cpp/encode.cpp | 36 +++++++++++++++++++++--- src/core/simdcompression.rs | 55 ++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/cpp/encode.cpp b/cpp/encode.cpp index bde232e66..47da680cd 100644 --- a/cpp/encode.cpp +++ b/cpp/encode.cpp @@ -3,38 +3,66 @@ #include "codecfactory.h" #include "intersection.h" +#include "variablebyte.h" using namespace SIMDCompressionLib; -static shared_ptr codec = CODECFactory::getFromName("s4-bp128-dm"); +// sorted +static shared_ptr codec_sorted = CODECFactory::getFromName("s4-bp128-dm"); +// variable byte +static VariableByte codec_unsorted = VariableByte(); + +static SIMDBinaryPacking> codec_packed_sorted = SIMDBinaryPacking>(); extern "C" { - size_t encode_sorted_native( uint32_t* begin, const size_t num_els, uint32_t* output, const size_t output_capacity) { size_t output_length = output_capacity; - codec -> encodeArray(begin, + codec_sorted -> encodeArray(begin, num_els, output, output_length); return output_length; } + size_t encode_unsorted_native( + uint32_t* begin, + const size_t num_els, + uint32_t* output, + const size_t output_capacity) { + size_t output_length = output_capacity; + codec_unsorted.encodeArray(begin, + num_els, + output, + output_length); + return output_length; + } + size_t decode_sorted_native( const uint32_t* compressed_data, const size_t compressed_size, uint32_t* uncompressed, const size_t uncompressed_capacity) { size_t num_ints = uncompressed_capacity; - codec -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints); + codec_sorted -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints); return num_ints; } + size_t decode_unsorted_native( + const uint32_t* compressed_data, + const size_t compressed_size, + uint32_t* uncompressed, + const size_t uncompressed_capacity) { + size_t num_ints = uncompressed_capacity; + codec_unsorted.decodeArray(compressed_data, compressed_size, uncompressed, num_ints); + return num_ints; + } + size_t intersection_native( const uint32_t* left, const size_t left_size, diff --git a/src/core/simdcompression.rs b/src/core/simdcompression.rs index 411abb661..8e745b715 100644 --- a/src/core/simdcompression.rs +++ b/src/core/simdcompression.rs @@ -4,6 +4,9 @@ use std::cmp::min; use std::iter; extern { + fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t; + fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t; + fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t; fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t; fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t; @@ -43,6 +46,28 @@ impl Encoder { return &self.output_buffer[0..written_size]; } } + + + pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] { + self.input_buffer.clear(); + let input_len = input.len(); + if input_len + 10000 >= self.input_buffer.len() { + let target_length = input_len + 1024; + self.input_buffer.resize(target_length, 0); + self.output_buffer.resize(target_length, 0); + } + // TODO use clone_from when available + unsafe { + ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len); + let written_size = encode_unsorted_native( + self.input_buffer.as_mut_ptr(), + input_len as size_t, + self.output_buffer.as_mut_ptr(), + self.output_buffer.len() as size_t, + ); + return &self.output_buffer[0..written_size]; + } + } } @@ -66,6 +91,18 @@ impl Decoder { uncompressed_values.len() as size_t); } } + + pub fn decode_unsorted(&self, + compressed_data: &[u32], + uncompressed_values: &mut [u32]) -> size_t { + unsafe { + return decode_unsorted_native( + compressed_data.as_ptr(), + compressed_data.len() as size_t, + uncompressed_values.as_mut_ptr(), + uncompressed_values.len() as size_t); + } + } } pub struct Intersector { @@ -125,7 +162,7 @@ mod tests { let num_ints = 10000 as usize; let expected_length = 1274; let input: Vec = (0..num_ints as u32) - .map(|i| i * 7 / 2) + .map(|i| i % 7 / 2) .into_iter().collect(); let encoded_data = encoder.encode_sorted(&input); assert_eq!(encoded_data.len(), expected_length); @@ -135,6 +172,22 @@ mod tests { assert_eq!(decoded_data, input); } + #[test] + fn test_encode_unsorted() { + let mut encoder = Encoder::new(); + let num_ints = 10_000 as usize; + let expected_length = 4361; + let input: Vec = (0..num_ints as u32) + .map(|i| i * 213_127 % 501) + .into_iter().collect(); + assert_eq!(input.len(), 10_000); + let encoded_data = encoder.encode_unsorted(&input); + assert_eq!(encoded_data.len(), expected_length); + let decoder = Decoder::new(); + let mut decoded_data: Vec = (0..num_ints as u32).collect(); + assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data)); + assert_eq!(decoded_data, input); + } #[test] fn test_simd_intersection() {