mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-27 21:50:41 +00:00
blop
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +1,6 @@
|
||||
[submodule "cpp/SIMDCompressionAndIntersection"]
|
||||
path = cpp/SIMDCompressionAndIntersection
|
||||
url = git@github.com:lemire/SIMDCompressionAndIntersection.git
|
||||
[submodule "cpp/simdcomp"]
|
||||
path = cpp/simdcomp
|
||||
url = git@github.com:lemire/simdcomp.git
|
||||
|
||||
17
build.rs
17
build.rs
@@ -4,16 +4,25 @@ extern crate gcc;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
|
||||
Command::new("make")
|
||||
.current_dir("cpp/SIMDCompressionAndIntersection")
|
||||
.output()
|
||||
.unwrap_or_else(|e| { panic!("Failed to make SIMDCompressionAndIntersection: {}", e) });
|
||||
|
||||
Command::new("make")
|
||||
.current_dir("cpp/simdcomp")
|
||||
.output()
|
||||
.unwrap_or_else(|e| { panic!("Failed to make simdcomp: {}", e) });
|
||||
|
||||
|
||||
gcc::Config::new()
|
||||
.cpp(true)
|
||||
.flag("-std=c++11")
|
||||
.flag("-O3")
|
||||
.flag("-mssse3")
|
||||
.include("./cpp/SIMDCompressionAndIntersection/include")
|
||||
.include("./cpp/simdcomp/include")
|
||||
.object("cpp/SIMDCompressionAndIntersection/bitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/integratedbitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdbitpacking.o")
|
||||
@@ -26,7 +35,15 @@ fn main() {
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdpackedselect.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/frameofreference.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/for.o")
|
||||
.object("cpp/simdcomp/avxbitpacking.o")
|
||||
.object("cpp/simdcomp/simdintegratedbitpacking.o")
|
||||
.object("cpp/simdcomp/simdbitpacking.o")
|
||||
.object("cpp/simdcomp/simdpackedsearch.o")
|
||||
.object("cpp/simdcomp/simdcomputil.o")
|
||||
.object("cpp/simdcomp/simdpackedselect.o")
|
||||
.object("cpp/simdcomp/simdfor.o")
|
||||
.file("cpp/encode.cpp")
|
||||
.file("cpp/simdcomp_wrapper.cpp")
|
||||
.compile("libsimdcompression.a");
|
||||
println!("cargo:rustc-flags=-l dylib=stdc++");
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#include <iostream>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "codecfactory.h"
|
||||
|
||||
1
cpp/simdcomp
Submodule
1
cpp/simdcomp
Submodule
Submodule cpp/simdcomp added at 0dca28668f
48
cpp/simdcomp_wrapper.cpp
Normal file
48
cpp/simdcomp_wrapper.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "simdcomp.h"
|
||||
#include "simdcomputil.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
// assumes datain has a size of 128 uint32
|
||||
// and that buffer is large enough to host the data.
|
||||
size_t compress_sorted_cpp(
|
||||
const uint32_t* datain,
|
||||
uint8_t* output,
|
||||
const uint32_t offset) {
|
||||
const uint32_t b = simdmaxbitsd1(offset, datain);
|
||||
*output++ = b;
|
||||
simdpackwithoutmaskd1(offset, datain, (__m128i *) output, b);
|
||||
return 1 + b * sizeof(__m128i);;
|
||||
}
|
||||
|
||||
// assumes datain has a size of 128 uint32
|
||||
// and that buffer is large enough to host the data.
|
||||
size_t uncompress_sorted_cpp(
|
||||
const uint8_t* compressed_data,
|
||||
uint32_t* output,
|
||||
uint32_t offset) {
|
||||
const uint32_t b = *compressed_data++;
|
||||
simdunpackd1(offset, (__m128i *)compressed_data, output, b);
|
||||
return 1 + b * sizeof(__m128i);
|
||||
}
|
||||
|
||||
size_t compress_unsorted_cpp(
|
||||
const uint32_t* datain,
|
||||
uint8_t* output) {
|
||||
const uint32_t b = maxbits(datain);
|
||||
*output++ = b;
|
||||
simdpackwithoutmask(datain, (__m128i *) output, b);
|
||||
return 1 + b * sizeof(__m128i);;
|
||||
}
|
||||
|
||||
size_t uncompress_unsorted_cpp(
|
||||
const uint8_t* compressed_data,
|
||||
uint32_t* output) {
|
||||
const uint32_t b = *compressed_data++;
|
||||
simdunpack((__m128i *)compressed_data, output, b);
|
||||
return 1 + b * sizeof(__m128i);
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ pub use self::block128::{Block128Encoder, Block128Decoder};
|
||||
mod vints;
|
||||
pub use self::vints::{VIntsEncoder, VIntsDecoder};
|
||||
|
||||
mod simdcomp;
|
||||
|
||||
pub const NUM_DOCS_PER_BLOCK: usize = 128;
|
||||
|
||||
|
||||
152
src/compression/simdcomp.rs
Normal file
152
src/compression/simdcomp.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
use libc::size_t;
|
||||
|
||||
extern {
|
||||
// complete s4-bp128-dm
|
||||
fn compress_sorted_cpp(
|
||||
data: *const u32,
|
||||
output: *mut u8,
|
||||
offset: u32) -> size_t;
|
||||
|
||||
fn uncompress_sorted_cpp(
|
||||
compressed_data: *const u8,
|
||||
output: *mut u32,
|
||||
offset: u32) -> size_t;
|
||||
|
||||
fn compress_unsorted_cpp(
|
||||
data: *const u32,
|
||||
output: *mut u8) -> size_t;
|
||||
|
||||
fn uncompress_unsorted_cpp(
|
||||
compressed_data: *const u8,
|
||||
output: *mut u32) -> size_t;
|
||||
}
|
||||
|
||||
const BLOCK_SIZE: usize = 128;
|
||||
const COMPRESSED_BLOCK_MAX_SIZE: usize = BLOCK_SIZE * 4 + 1;
|
||||
|
||||
pub struct SIMDBlockEncoder {
|
||||
output_buffer: [u8; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
}
|
||||
|
||||
impl SIMDBlockEncoder {
|
||||
|
||||
pub fn new() -> SIMDBlockEncoder {
|
||||
SIMDBlockEncoder {
|
||||
output_buffer: [0u8; COMPRESSED_BLOCK_MAX_SIZE]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compress_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
|
||||
&self.output_buffer[..compressed_size]
|
||||
}
|
||||
|
||||
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr()) };
|
||||
&self.output_buffer[..compressed_size]
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SIMDBlockDecoder {
|
||||
output_buffer: [u32; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
}
|
||||
|
||||
|
||||
impl SIMDBlockDecoder {
|
||||
pub fn new() -> SIMDBlockDecoder {
|
||||
SIMDBlockDecoder {
|
||||
output_buffer: [0u32; COMPRESSED_BLOCK_MAX_SIZE]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uncompress_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
|
||||
&compressed_data[consumed_size..]
|
||||
}
|
||||
|
||||
pub fn uncompress_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr()) };
|
||||
&compressed_data[consumed_size..]
|
||||
}
|
||||
|
||||
pub fn output(&self,) -> &[u32] {
|
||||
&self.output_buffer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_sorted_block() {
|
||||
let vals: Vec<u32> = (0u32..128u32).map(|i| i*7).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 0);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(compressed_data, 0);
|
||||
assert_eq!(remaining_data.len(), 0);
|
||||
}
|
||||
for i in 0..128 {
|
||||
assert_eq!(vals[i], decoder.output()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_sorted_block_with_offset() {
|
||||
let vals: Vec<u32> = (0u32..128u32).map(|i| 11 + i*7).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(compressed_data, 10);
|
||||
assert_eq!(remaining_data.len(), 0);
|
||||
}
|
||||
for i in 0..128 {
|
||||
assert_eq!(vals[i], decoder.output()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_sorted_block_with_junk() {
|
||||
let mut compressed: Vec<u8> = Vec::new();
|
||||
let n = 128;
|
||||
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
compressed.extend_from_slice(compressed_data);
|
||||
compressed.push(173u8);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
|
||||
assert_eq!(remaining_data.len(), 1);
|
||||
assert_eq!(remaining_data[0], 173u8);
|
||||
}
|
||||
for i in 0..n {
|
||||
assert_eq!(vals[i], decoder.output()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_unsorted_block_with_junk() {
|
||||
let mut compressed: Vec<u8> = Vec::new();
|
||||
let n = 128;
|
||||
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
compressed.extend_from_slice(compressed_data);
|
||||
compressed.push(173u8);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
|
||||
assert_eq!(remaining_data.len(), 1);
|
||||
assert_eq!(remaining_data[0], 173u8);
|
||||
}
|
||||
for i in 0..n {
|
||||
assert_eq!(vals[i], decoder.output()[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user