mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 01:32:53 +00:00
werwer
This commit is contained in:
@@ -3,38 +3,66 @@
|
||||
|
||||
#include "codecfactory.h"
|
||||
#include "intersection.h"
|
||||
#include "variablebyte.h"
|
||||
|
||||
using namespace SIMDCompressionLib;
|
||||
|
||||
static shared_ptr<IntegerCODEC> codec = CODECFactory::getFromName("s4-bp128-dm");
|
||||
// sorted
|
||||
static shared_ptr<IntegerCODEC> codec_sorted = CODECFactory::getFromName("s4-bp128-dm");
|
||||
// variable byte
|
||||
static VariableByte<false> codec_unsorted = VariableByte<false>();
|
||||
|
||||
static SIMDBinaryPacking<SIMDIntegratedBlockPacker<Max4DeltaSIMD, true>> codec_packed_sorted = SIMDBinaryPacking<SIMDIntegratedBlockPacker<Max4DeltaSIMD, true>>();
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
|
||||
size_t encode_sorted_native(
|
||||
uint32_t* begin,
|
||||
const size_t num_els,
|
||||
uint32_t* output,
|
||||
const size_t output_capacity) {
|
||||
size_t output_length = output_capacity;
|
||||
codec -> encodeArray(begin,
|
||||
codec_sorted -> encodeArray(begin,
|
||||
num_els,
|
||||
output,
|
||||
output_length);
|
||||
return output_length;
|
||||
}
|
||||
|
||||
size_t encode_unsorted_native(
|
||||
uint32_t* begin,
|
||||
const size_t num_els,
|
||||
uint32_t* output,
|
||||
const size_t output_capacity) {
|
||||
size_t output_length = output_capacity;
|
||||
codec_unsorted.encodeArray(begin,
|
||||
num_els,
|
||||
output,
|
||||
output_length);
|
||||
return output_length;
|
||||
}
|
||||
|
||||
size_t decode_sorted_native(
|
||||
const uint32_t* compressed_data,
|
||||
const size_t compressed_size,
|
||||
uint32_t* uncompressed,
|
||||
const size_t uncompressed_capacity) {
|
||||
size_t num_ints = uncompressed_capacity;
|
||||
codec -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
|
||||
codec_sorted -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
|
||||
return num_ints;
|
||||
}
|
||||
|
||||
size_t decode_unsorted_native(
|
||||
const uint32_t* compressed_data,
|
||||
const size_t compressed_size,
|
||||
uint32_t* uncompressed,
|
||||
const size_t uncompressed_capacity) {
|
||||
size_t num_ints = uncompressed_capacity;
|
||||
codec_unsorted.decodeArray(compressed_data, compressed_size, uncompressed, num_ints);
|
||||
return num_ints;
|
||||
}
|
||||
|
||||
size_t intersection_native(
|
||||
const uint32_t* left,
|
||||
const size_t left_size,
|
||||
|
||||
@@ -4,6 +4,9 @@ use std::cmp::min;
|
||||
use std::iter;
|
||||
|
||||
extern {
|
||||
fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
|
||||
fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
|
||||
fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
|
||||
fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
|
||||
@@ -43,6 +46,28 @@ impl Encoder {
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
|
||||
self.input_buffer.clear();
|
||||
let input_len = input.len();
|
||||
if input_len + 10000 >= self.input_buffer.len() {
|
||||
let target_length = input_len + 1024;
|
||||
self.input_buffer.resize(target_length, 0);
|
||||
self.output_buffer.resize(target_length, 0);
|
||||
}
|
||||
// TODO use clone_from when available
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
|
||||
let written_size = encode_unsorted_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
input_len as size_t,
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
self.output_buffer.len() as size_t,
|
||||
);
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +91,18 @@ impl Decoder {
|
||||
uncompressed_values.len() as size_t);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_unsorted(&self,
|
||||
compressed_data: &[u32],
|
||||
uncompressed_values: &mut [u32]) -> size_t {
|
||||
unsafe {
|
||||
return decode_unsorted_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
uncompressed_values.as_mut_ptr(),
|
||||
uncompressed_values.len() as size_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Intersector {
|
||||
@@ -125,7 +162,7 @@ mod tests {
|
||||
let num_ints = 10000 as usize;
|
||||
let expected_length = 1274;
|
||||
let input: Vec<u32> = (0..num_ints as u32)
|
||||
.map(|i| i * 7 / 2)
|
||||
.map(|i| i % 7 / 2)
|
||||
.into_iter().collect();
|
||||
let encoded_data = encoder.encode_sorted(&input);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
@@ -135,6 +172,22 @@ mod tests {
|
||||
assert_eq!(decoded_data, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_unsorted() {
|
||||
let mut encoder = Encoder::new();
|
||||
let num_ints = 10_000 as usize;
|
||||
let expected_length = 4361;
|
||||
let input: Vec<u32> = (0..num_ints as u32)
|
||||
.map(|i| i * 213_127 % 501)
|
||||
.into_iter().collect();
|
||||
assert_eq!(input.len(), 10_000);
|
||||
let encoded_data = encoder.encode_unsorted(&input);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let decoder = Decoder::new();
|
||||
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
|
||||
assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
|
||||
assert_eq!(decoded_data, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simd_intersection() {
|
||||
|
||||
Reference in New Issue
Block a user