This commit is contained in:
Paul Masurel
2016-02-28 20:35:54 +09:00
parent 186e448a15
commit c8196aceb6
2 changed files with 86 additions and 5 deletions

View File

@@ -3,38 +3,66 @@
#include "codecfactory.h"
#include "intersection.h"
#include "variablebyte.h"
using namespace SIMDCompressionLib;
static shared_ptr<IntegerCODEC> codec = CODECFactory::getFromName("s4-bp128-dm");
// sorted
static shared_ptr<IntegerCODEC> codec_sorted = CODECFactory::getFromName("s4-bp128-dm");
// variable byte
static VariableByte<false> codec_unsorted = VariableByte<false>();
static SIMDBinaryPacking<SIMDIntegratedBlockPacker<Max4DeltaSIMD, true>> codec_packed_sorted = SIMDBinaryPacking<SIMDIntegratedBlockPacker<Max4DeltaSIMD, true>>();
extern "C" {
size_t encode_sorted_native(
uint32_t* begin,
const size_t num_els,
uint32_t* output,
const size_t output_capacity) {
size_t output_length = output_capacity;
codec -> encodeArray(begin,
codec_sorted -> encodeArray(begin,
num_els,
output,
output_length);
return output_length;
}
size_t encode_unsorted_native(
uint32_t* begin,
const size_t num_els,
uint32_t* output,
const size_t output_capacity) {
size_t output_length = output_capacity;
codec_unsorted.encodeArray(begin,
num_els,
output,
output_length);
return output_length;
}
size_t decode_sorted_native(
const uint32_t* compressed_data,
const size_t compressed_size,
uint32_t* uncompressed,
const size_t uncompressed_capacity) {
size_t num_ints = uncompressed_capacity;
codec -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
codec_sorted -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
return num_ints;
}
size_t decode_unsorted_native(
const uint32_t* compressed_data,
const size_t compressed_size,
uint32_t* uncompressed,
const size_t uncompressed_capacity) {
size_t num_ints = uncompressed_capacity;
codec_unsorted.decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
return num_ints;
}
size_t intersection_native(
const uint32_t* left,
const size_t left_size,

View File

@@ -4,6 +4,9 @@ use std::cmp::min;
use std::iter;
extern {
fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
@@ -43,6 +46,28 @@ impl Encoder {
return &self.output_buffer[0..written_size];
}
}
pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
self.input_buffer.clear();
let input_len = input.len();
if input_len + 10000 >= self.input_buffer.len() {
let target_length = input_len + 1024;
self.input_buffer.resize(target_length, 0);
self.output_buffer.resize(target_length, 0);
}
// TODO use clone_from when available
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
let written_size = encode_unsorted_native(
self.input_buffer.as_mut_ptr(),
input_len as size_t,
self.output_buffer.as_mut_ptr(),
self.output_buffer.len() as size_t,
);
return &self.output_buffer[0..written_size];
}
}
}
@@ -66,6 +91,18 @@ impl Decoder {
uncompressed_values.len() as size_t);
}
}
pub fn decode_unsorted(&self,
compressed_data: &[u32],
uncompressed_values: &mut [u32]) -> size_t {
unsafe {
return decode_unsorted_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
uncompressed_values.as_mut_ptr(),
uncompressed_values.len() as size_t);
}
}
}
pub struct Intersector {
@@ -125,7 +162,7 @@ mod tests {
let num_ints = 10000 as usize;
let expected_length = 1274;
let input: Vec<u32> = (0..num_ints as u32)
.map(|i| i * 7 / 2)
.map(|i| i % 7 / 2)
.into_iter().collect();
let encoded_data = encoder.encode_sorted(&input);
assert_eq!(encoded_data.len(), expected_length);
@@ -135,6 +172,22 @@ mod tests {
assert_eq!(decoded_data, input);
}
#[test]
fn test_encode_unsorted() {
let mut encoder = Encoder::new();
let num_ints = 10_000 as usize;
let expected_length = 4361;
let input: Vec<u32> = (0..num_ints as u32)
.map(|i| i * 213_127 % 501)
.into_iter().collect();
assert_eq!(input.len(), 10_000);
let encoded_data = encoder.encode_unsorted(&input);
assert_eq!(encoded_data.len(), expected_length);
let decoder = Decoder::new();
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
assert_eq!(decoded_data, input);
}
#[test]
fn test_simd_intersection() {