This commit is contained in:
Paul Masurel
2016-05-07 10:50:28 +09:00
parent 45746e4175
commit 1a71d4095a
7 changed files with 222 additions and 1 deletions

3
.gitmodules vendored
View File

@@ -1,3 +1,6 @@
[submodule "cpp/SIMDCompressionAndIntersection"]
path = cpp/SIMDCompressionAndIntersection
url = git@github.com:lemire/SIMDCompressionAndIntersection.git
[submodule "cpp/simdcomp"]
path = cpp/simdcomp
url = git@github.com:lemire/simdcomp.git

View File

@@ -4,16 +4,25 @@ extern crate gcc;
use std::process::Command;
fn main() {
Command::new("make")
.current_dir("cpp/SIMDCompressionAndIntersection")
.output()
.unwrap_or_else(|e| { panic!("Failed to make SIMDCompressionAndIntersection: {}", e) });
Command::new("make")
.current_dir("cpp/simdcomp")
.output()
.unwrap_or_else(|e| { panic!("Failed to make simdcomp: {}", e) });
gcc::Config::new()
.cpp(true)
.flag("-std=c++11")
.flag("-O3")
.flag("-mssse3")
.include("./cpp/SIMDCompressionAndIntersection/include")
.include("./cpp/simdcomp/include")
.object("cpp/SIMDCompressionAndIntersection/bitpacking.o")
.object("cpp/SIMDCompressionAndIntersection/integratedbitpacking.o")
.object("cpp/SIMDCompressionAndIntersection/simdbitpacking.o")
@@ -26,7 +35,15 @@ fn main() {
.object("cpp/SIMDCompressionAndIntersection/simdpackedselect.o")
.object("cpp/SIMDCompressionAndIntersection/frameofreference.o")
.object("cpp/SIMDCompressionAndIntersection/for.o")
.object("cpp/simdcomp/avxbitpacking.o")
.object("cpp/simdcomp/simdintegratedbitpacking.o")
.object("cpp/simdcomp/simdbitpacking.o")
.object("cpp/simdcomp/simdpackedsearch.o")
.object("cpp/simdcomp/simdcomputil.o")
.object("cpp/simdcomp/simdpackedselect.o")
.object("cpp/simdcomp/simdfor.o")
.file("cpp/encode.cpp")
.file("cpp/simdcomp_wrapper.cpp")
.compile("libsimdcompression.a");
println!("cargo:rustc-flags=-l dylib=stdc++");
}

View File

@@ -1,4 +1,3 @@
#include <iostream>
#include <stdint.h>
#include "codecfactory.h"

1
cpp/simdcomp Submodule

Submodule cpp/simdcomp added at 0dca28668f

48
cpp/simdcomp_wrapper.cpp Normal file
View File

@@ -0,0 +1,48 @@
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include "simdcomp.h"
#include "simdcomputil.h"
extern "C" {
// assumes datain has a size of 128 uint32
// and that buffer is large enough to host the data.
size_t compress_sorted_cpp(
const uint32_t* datain,
uint8_t* output,
const uint32_t offset) {
const uint32_t b = simdmaxbitsd1(offset, datain);
*output++ = b;
simdpackwithoutmaskd1(offset, datain, (__m128i *) output, b);
return 1 + b * sizeof(__m128i);;
}
// assumes datain has a size of 128 uint32
// and that buffer is large enough to host the data.
size_t uncompress_sorted_cpp(
const uint8_t* compressed_data,
uint32_t* output,
uint32_t offset) {
const uint32_t b = *compressed_data++;
simdunpackd1(offset, (__m128i *)compressed_data, output, b);
return 1 + b * sizeof(__m128i);
}
size_t compress_unsorted_cpp(
const uint32_t* datain,
uint8_t* output) {
const uint32_t b = maxbits(datain);
*output++ = b;
simdpackwithoutmask(datain, (__m128i *) output, b);
return 1 + b * sizeof(__m128i);;
}
size_t uncompress_unsorted_cpp(
const uint8_t* compressed_data,
uint32_t* output) {
const uint32_t b = *compressed_data++;
simdunpack((__m128i *)compressed_data, output, b);
return 1 + b * sizeof(__m128i);
}
}

View File

@@ -12,6 +12,7 @@ pub use self::block128::{Block128Encoder, Block128Decoder};
mod vints;
pub use self::vints::{VIntsEncoder, VIntsDecoder};
mod simdcomp;
pub const NUM_DOCS_PER_BLOCK: usize = 128;

152
src/compression/simdcomp.rs Normal file
View File

@@ -0,0 +1,152 @@
use libc::size_t;
extern {
// complete s4-bp128-dm
fn compress_sorted_cpp(
data: *const u32,
output: *mut u8,
offset: u32) -> size_t;
fn uncompress_sorted_cpp(
compressed_data: *const u8,
output: *mut u32,
offset: u32) -> size_t;
fn compress_unsorted_cpp(
data: *const u32,
output: *mut u8) -> size_t;
fn uncompress_unsorted_cpp(
compressed_data: *const u8,
output: *mut u32) -> size_t;
}
const BLOCK_SIZE: usize = 128;
const COMPRESSED_BLOCK_MAX_SIZE: usize = BLOCK_SIZE * 4 + 1;
pub struct SIMDBlockEncoder {
output_buffer: [u8; COMPRESSED_BLOCK_MAX_SIZE],
}
impl SIMDBlockEncoder {
pub fn new() -> SIMDBlockEncoder {
SIMDBlockEncoder {
output_buffer: [0u8; COMPRESSED_BLOCK_MAX_SIZE]
}
}
pub fn compress_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
&self.output_buffer[..compressed_size]
}
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr()) };
&self.output_buffer[..compressed_size]
}
}
pub struct SIMDBlockDecoder {
output_buffer: [u32; COMPRESSED_BLOCK_MAX_SIZE],
}
impl SIMDBlockDecoder {
pub fn new() -> SIMDBlockDecoder {
SIMDBlockDecoder {
output_buffer: [0u32; COMPRESSED_BLOCK_MAX_SIZE]
}
}
pub fn uncompress_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
&compressed_data[consumed_size..]
}
pub fn uncompress_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr()) };
&compressed_data[consumed_size..]
}
pub fn output(&self,) -> &[u32] {
&self.output_buffer
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_sorted_block() {
let vals: Vec<u32> = (0u32..128u32).map(|i| i*7).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 0);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(compressed_data, 0);
assert_eq!(remaining_data.len(), 0);
}
for i in 0..128 {
assert_eq!(vals[i], decoder.output()[i]);
}
}
#[test]
fn test_encode_sorted_block_with_offset() {
let vals: Vec<u32> = (0u32..128u32).map(|i| 11 + i*7).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(compressed_data, 10);
assert_eq!(remaining_data.len(), 0);
}
for i in 0..128 {
assert_eq!(vals[i], decoder.output()[i]);
}
}
#[test]
fn test_encode_sorted_block_with_junk() {
let mut compressed: Vec<u8> = Vec::new();
let n = 128;
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
compressed.extend_from_slice(compressed_data);
compressed.push(173u8);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
assert_eq!(remaining_data.len(), 1);
assert_eq!(remaining_data[0], 173u8);
}
for i in 0..n {
assert_eq!(vals[i], decoder.output()[i]);
}
}
#[test]
fn test_encode_unsorted_block_with_junk() {
let mut compressed: Vec<u8> = Vec::new();
let n = 128;
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect();
let mut encoder = SIMDBlockEncoder::new();
let compressed_data = encoder.compress_sorted(&vals, 10);
compressed.extend_from_slice(compressed_data);
compressed.push(173u8);
let mut decoder = SIMDBlockDecoder::new();
{
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
assert_eq!(remaining_data.len(), 1);
assert_eq!(remaining_data[0], 173u8);
}
for i in 0..n {
assert_eq!(vals[i], decoder.output()[i]);
}
}
}