mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-14 13:02:55 +00:00
removed deps to SIMDCompressionAndIntersection
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,6 +1,3 @@
|
||||
[submodule "cpp/SIMDCompressionAndIntersection"]
|
||||
path = cpp/SIMDCompressionAndIntersection
|
||||
url = git@github.com:lemire/SIMDCompressionAndIntersection.git
|
||||
[submodule "cpp/simdcomp"]
|
||||
path = cpp/simdcomp
|
||||
url = git@github.com:lemire/simdcomp.git
|
||||
|
||||
23
build.rs
23
build.rs
@@ -4,37 +4,17 @@ extern crate gcc;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
|
||||
Command::new("make")
|
||||
.current_dir("cpp/SIMDCompressionAndIntersection")
|
||||
.output()
|
||||
.unwrap_or_else(|e| { panic!("Failed to make SIMDCompressionAndIntersection: {}", e) });
|
||||
|
||||
Command::new("make")
|
||||
.current_dir("cpp/simdcomp")
|
||||
.output()
|
||||
.unwrap_or_else(|e| { panic!("Failed to make simdcomp: {}", e) });
|
||||
|
||||
|
||||
gcc::Config::new()
|
||||
.cpp(true)
|
||||
.flag("-std=c++11")
|
||||
.flag("-O3")
|
||||
.flag("-mssse3")
|
||||
.include("./cpp/SIMDCompressionAndIntersection/include")
|
||||
.include("./cpp/simdcomp/include")
|
||||
.object("cpp/SIMDCompressionAndIntersection/bitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/integratedbitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdbitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/usimdbitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdintegratedbitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/intersection.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/varintdecode.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/streamvbyte.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdpackedsearch.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/simdpackedselect.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/frameofreference.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/for.o")
|
||||
.object("cpp/simdcomp/avxbitpacking.o")
|
||||
.object("cpp/simdcomp/simdintegratedbitpacking.o")
|
||||
.object("cpp/simdcomp/simdbitpacking.o")
|
||||
@@ -42,8 +22,7 @@ fn main() {
|
||||
.object("cpp/simdcomp/simdcomputil.o")
|
||||
.object("cpp/simdcomp/simdpackedselect.o")
|
||||
.object("cpp/simdcomp/simdfor.o")
|
||||
.file("cpp/encode.cpp")
|
||||
.file("cpp/simdcomp_wrapper.cpp")
|
||||
.compile("libsimdcompression.a");
|
||||
.compile("libsimdcomp.a");
|
||||
println!("cargo:rustc-flags=-l dylib=stdc++");
|
||||
}
|
||||
|
||||
Submodule cpp/SIMDCompressionAndIntersection deleted from 1f8e12aebd
@@ -1,177 +0,0 @@
|
||||
|
||||
use libc::size_t;
|
||||
use std::ptr;
|
||||
|
||||
extern {
|
||||
fn encode_sorted_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_sorted_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
|
||||
|
||||
fn encode_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
|
||||
|
||||
fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
// Block128
|
||||
|
||||
pub struct Block128Encoder {
|
||||
input_buffer: [u32; 128],
|
||||
output_buffer: [u8; 256 * 4],
|
||||
}
|
||||
|
||||
impl Block128Encoder {
|
||||
|
||||
pub fn new() -> Block128Encoder {
|
||||
Block128Encoder {
|
||||
input_buffer: [0u32; 128],
|
||||
output_buffer: [0u8; 256 * 4],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(&mut self, input: &[u32]) -> &[u8] {
|
||||
assert_eq!(input.len(), 128);
|
||||
// TODO use clone_from when available
|
||||
let written_size: usize;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
|
||||
written_size = encode_block128_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
256 * 4,
|
||||
);
|
||||
}
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
|
||||
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
|
||||
assert_eq!(input.len(), 128);
|
||||
// TODO use clone_from when available
|
||||
let written_size: usize;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
|
||||
written_size = encode_sorted_block128_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
256 * 4,
|
||||
);
|
||||
}
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Block128Decoder {
|
||||
output: [u32; 128],
|
||||
}
|
||||
|
||||
impl Block128Decoder {
|
||||
|
||||
pub fn new() -> Block128Decoder {
|
||||
Block128Decoder {
|
||||
output: [0u32; 128]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode<'a, 'b>(
|
||||
&'b mut self,
|
||||
compressed_data: &'a [u8]) -> &'a[u8] {
|
||||
unsafe {
|
||||
let consumed_num_bytes: usize = decode_block128_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
self.output.as_mut_ptr());
|
||||
&compressed_data[consumed_num_bytes..]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_sorted<'a, 'b>(
|
||||
&'b mut self,
|
||||
compressed_data: &'a [u8]) -> &'a [u8] {
|
||||
unsafe {
|
||||
let consumed_num_bytes: usize = decode_sorted_block128_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
self.output.as_mut_ptr());
|
||||
&compressed_data[consumed_num_bytes..]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_sorted_remaining(&mut self,
|
||||
compressed_data: &[u8]) -> &[u32] {
|
||||
unsafe {
|
||||
let num_uncompressed = decode_sorted_vint_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
self.output.as_mut_ptr(),
|
||||
128);
|
||||
&self.output[..num_uncompressed]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn output(&self,) -> &[u32; 128] {
|
||||
&self.output
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_sorted_block() {
|
||||
for num_extra_values in [0, 2, 11].into_iter() {
|
||||
let mut encoder = Block128Encoder::new();
|
||||
let mut input = [0u32; 128];
|
||||
for i in 0u32..128u32 {
|
||||
input[i as usize] = i * 7 / 2;
|
||||
}
|
||||
let mut encoded_vec: Vec<u8> = encoder.encode_sorted(&input).to_vec();
|
||||
assert_eq!(encoded_vec.len(), 84);
|
||||
for i in 0u8..*num_extra_values as u8 {
|
||||
encoded_vec.push(i);
|
||||
}
|
||||
let mut decoder = Block128Decoder::new();
|
||||
let remaining_input = decoder.decode_sorted(&encoded_vec[..]);
|
||||
let uncompressed_values = decoder.output();
|
||||
assert_eq!(remaining_input.len(), *num_extra_values);
|
||||
for i in 0..128 {
|
||||
assert_eq!(uncompressed_values[i], input[i]);
|
||||
}
|
||||
for i in 0..*num_extra_values {
|
||||
assert_eq!(remaining_input[i], i as u8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_block() {
|
||||
for num_extra_values in [0, 2, 11].into_iter() {
|
||||
let mut encoder = Block128Encoder::new();
|
||||
let mut input = [0u32; 128];
|
||||
for i in 0u32..128u32 {
|
||||
input[i as usize] = i * 7 % 31;
|
||||
}
|
||||
let mut encoded_vec: Vec<u8> = encoder.encode(&input).to_vec();
|
||||
assert_eq!(encoded_vec.len(), 100);
|
||||
for i in 0u8..*num_extra_values as u8 {
|
||||
encoded_vec.push(i);
|
||||
}
|
||||
let mut decoder = Block128Decoder::new();
|
||||
let remaining_input: &[u8] = decoder.decode(&encoded_vec[..]);
|
||||
let uncompressed_values = decoder.output();
|
||||
assert_eq!(remaining_input.len(), *num_extra_values);
|
||||
for i in 0..128 {
|
||||
assert_eq!(uncompressed_values[i], input[i]);
|
||||
}
|
||||
for i in 0..*num_extra_values {
|
||||
assert_eq!(remaining_input[i], i as u8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
88
src/compression/composite.rs
Normal file
88
src/compression/composite.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use compression::SIMDBlockEncoder;
|
||||
use compression::SIMDBlockDecoder;
|
||||
use super::NUM_DOCS_PER_BLOCK;
|
||||
|
||||
pub struct CompositeEncoder {
|
||||
block_encoder: SIMDBlockEncoder,
|
||||
output: Vec<u8>,
|
||||
}
|
||||
|
||||
impl CompositeEncoder {
|
||||
|
||||
pub fn new() -> CompositeEncoder {
|
||||
CompositeEncoder {
|
||||
block_encoder: SIMDBlockEncoder::new(),
|
||||
output: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compress_sorted(&mut self, vals: &[u32]) -> &[u8] {
|
||||
self.output.clear();
|
||||
let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
|
||||
let mut offset = 0u32;
|
||||
for i in 0..num_blocks {
|
||||
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
|
||||
let block_compressed = self.block_encoder.compress_block_sorted(&vals_slice, offset);
|
||||
offset = vals_slice[NUM_DOCS_PER_BLOCK - 1];
|
||||
self.output.extend_from_slice(block_compressed);
|
||||
}
|
||||
let vint_compressed = self.block_encoder.compress_vint_sorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..], offset);
|
||||
self.output.extend_from_slice(vint_compressed);
|
||||
&self.output
|
||||
}
|
||||
|
||||
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
|
||||
self.output.clear();
|
||||
let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
|
||||
for i in 0..num_blocks {
|
||||
let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
|
||||
let block_compressed = self.block_encoder.compress_block_unsorted(&vals_slice);
|
||||
self.output.extend_from_slice(block_compressed);
|
||||
}
|
||||
let vint_compressed = self.block_encoder.compress_vint_unsorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..]);
|
||||
self.output.extend_from_slice(vint_compressed);
|
||||
&self.output
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct CompositeDecoder {
|
||||
block_decoder: SIMDBlockDecoder,
|
||||
vals: Vec<u32>,
|
||||
}
|
||||
|
||||
|
||||
impl CompositeDecoder {
|
||||
pub fn new() -> CompositeDecoder {
|
||||
CompositeDecoder {
|
||||
block_decoder: SIMDBlockDecoder::new(),
|
||||
vals: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uncompress_sorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
|
||||
let mut offset = 0u32;
|
||||
self.vals.clear();
|
||||
let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
|
||||
for _ in 0..num_blocks {
|
||||
compressed_data = self.block_decoder.uncompress_block_sorted(compressed_data, offset);
|
||||
offset = self.block_decoder.output()[NUM_DOCS_PER_BLOCK - 1];
|
||||
self.vals.extend_from_slice(self.block_decoder.output());
|
||||
}
|
||||
self.block_decoder.uncompress_vint_sorted(compressed_data, offset, doc_freq % NUM_DOCS_PER_BLOCK);
|
||||
self.vals.extend_from_slice(self.block_decoder.output());
|
||||
&self.vals
|
||||
}
|
||||
|
||||
pub fn uncompress_unsorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
|
||||
self.vals.clear();
|
||||
let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
|
||||
for _ in 0..num_blocks {
|
||||
compressed_data = self.block_decoder.uncompress_block_unsorted(compressed_data);
|
||||
self.vals.extend_from_slice(self.block_decoder.output());
|
||||
}
|
||||
self.block_decoder.uncompress_vint_unsorted(compressed_data, doc_freq % NUM_DOCS_PER_BLOCK);
|
||||
self.vals.extend_from_slice(self.block_decoder.output());
|
||||
&self.vals
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
use libc::size_t;
|
||||
|
||||
extern {
|
||||
fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
|
||||
}
|
||||
|
||||
pub fn intersection(left: &[u32], right: &[u32], output: &mut [u32]) -> usize {
|
||||
unsafe {
|
||||
intersection_native(
|
||||
left.as_ptr(), left.len(),
|
||||
right.as_ptr(), right.len(),
|
||||
output.as_mut_ptr())
|
||||
}
|
||||
}
|
||||
@@ -1,18 +1,10 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
mod intersection;
|
||||
pub use self::intersection::intersection;
|
||||
|
||||
mod s4bp128;
|
||||
pub use self::s4bp128::{S4BP128Encoder, S4BP128Decoder};
|
||||
|
||||
mod block128;
|
||||
pub use self::block128::{Block128Encoder, Block128Decoder};
|
||||
|
||||
mod vints;
|
||||
pub use self::vints::{VIntsEncoder, VIntsDecoder};
|
||||
|
||||
mod simdcomp;
|
||||
pub use self::simdcomp::{SIMDBlockEncoder, SIMDBlockDecoder};
|
||||
|
||||
mod composite;
|
||||
pub use self::composite::CompositeEncoder;
|
||||
|
||||
pub const NUM_DOCS_PER_BLOCK: usize = 128;
|
||||
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
|
||||
use libc::size_t;
|
||||
use std::ptr;
|
||||
|
||||
|
||||
|
||||
extern {
|
||||
// complete s4-bp128-dm
|
||||
fn encode_s4_bp128_dm_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_s4_bp128_dm_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
|
||||
fn encode_composite_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_composite_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
// s4-bp128-dm
|
||||
|
||||
pub struct S4BP128Encoder {
|
||||
input_buffer: Vec<u32>,
|
||||
output_buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl S4BP128Encoder {
|
||||
pub fn new() -> S4BP128Encoder {
|
||||
S4BP128Encoder {
|
||||
input_buffer: Vec::new(),
|
||||
output_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(&mut self, input: &[u32]) -> &[u8] {
|
||||
self.input_buffer.clear();
|
||||
let input_len = input.len();
|
||||
if input_len + 10000 >= self.input_buffer.len() {
|
||||
let target_length = input_len + 1024;
|
||||
self.input_buffer.resize(target_length, 0);
|
||||
self.output_buffer.resize(target_length * 4, 0);
|
||||
}
|
||||
// TODO use clone_from when available
|
||||
let written_size;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
|
||||
written_size = encode_composite_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
input_len as size_t,
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
self.output_buffer.len() as size_t,
|
||||
);
|
||||
}
|
||||
&self.output_buffer[0..written_size]
|
||||
}
|
||||
|
||||
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
|
||||
self.input_buffer.clear();
|
||||
let input_len = input.len();
|
||||
if input_len + 10000 >= self.input_buffer.len() {
|
||||
let target_length = input_len + 1024;
|
||||
self.input_buffer.resize(target_length, 0);
|
||||
self.output_buffer.resize(target_length * 4, 0);
|
||||
}
|
||||
// TODO use clone_from when available
|
||||
let written_size;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
|
||||
written_size = encode_s4_bp128_dm_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
input_len as size_t,
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
self.output_buffer.len() as size_t,
|
||||
);
|
||||
}
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
}
|
||||
|
||||
pub struct S4BP128Decoder;
|
||||
|
||||
impl S4BP128Decoder {
|
||||
|
||||
pub fn new() -> S4BP128Decoder {
|
||||
S4BP128Decoder
|
||||
}
|
||||
|
||||
pub fn decode_sorted(&self,
|
||||
compressed_data: &[u8],
|
||||
uncompressed_values: &mut [u32]) -> size_t {
|
||||
unsafe {
|
||||
return decode_s4_bp128_dm_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
uncompressed_values.as_mut_ptr(),
|
||||
uncompressed_values.len() as size_t);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode(&self,
|
||||
compressed_data: &[u8],
|
||||
uncompressed_values: &mut [u32]) -> size_t {
|
||||
unsafe {
|
||||
return decode_composite_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
uncompressed_values.as_mut_ptr(),
|
||||
uncompressed_values.len() as size_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use compression::tests::generate_array;
|
||||
|
||||
#[test]
|
||||
fn test_encode_sorted_big() {
|
||||
let mut encoder = S4BP128Encoder::new();
|
||||
let num_ints = 10_000 as usize;
|
||||
let expected_length = 5_096;
|
||||
let input: Vec<u32> = (0..num_ints as u32)
|
||||
.map(|i| i * 7 / 2)
|
||||
.into_iter().collect();
|
||||
let encoded_data = encoder.encode_sorted(&input);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let decoder = S4BP128Decoder::new();
|
||||
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
|
||||
assert_eq!(num_ints, decoder.decode_sorted(&encoded_data[..], &mut decoded_data));
|
||||
assert_eq!(decoded_data, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_unsorted_big() {
|
||||
let mut encoder = S4BP128Encoder::new();
|
||||
let num_ints = 10_000 as usize;
|
||||
let expected_length = 7_588;
|
||||
let input: Vec<u32> = (0..num_ints as u32)
|
||||
.map(|i| i * 7 % 37)
|
||||
.into_iter().collect();
|
||||
let encoded_data = encoder.encode(&input);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let decoder = S4BP128Decoder::new();
|
||||
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
|
||||
assert_eq!(num_ints, decoder.decode(&encoded_data[..], &mut decoded_data));
|
||||
assert_eq!(decoded_data, input);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_decode(b: &mut Bencher) {
|
||||
const TEST_SIZE: usize = 1_000_000;
|
||||
let arr = generate_array(TEST_SIZE, 0.1);
|
||||
let mut encoder = S4BP128Encoder::new();
|
||||
let encoded = encoder.encode_sorted(&arr);
|
||||
let mut uncompressed: Vec<u32> = (0..TEST_SIZE as u32).collect();
|
||||
let decoder = S4BP128Decoder;
|
||||
b.iter(|| {
|
||||
decoder.decode_sorted(&encoded, &mut uncompressed);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
use libc::size_t;
|
||||
use super::NUM_DOCS_PER_BLOCK;
|
||||
|
||||
const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1;
|
||||
|
||||
extern {
|
||||
// complete s4-bp128-dm
|
||||
@@ -21,56 +24,151 @@ extern {
|
||||
output: *mut u32) -> size_t;
|
||||
}
|
||||
|
||||
const BLOCK_SIZE: usize = 128;
|
||||
const COMPRESSED_BLOCK_MAX_SIZE: usize = BLOCK_SIZE * 4 + 1;
|
||||
|
||||
pub struct SIMDBlockEncoder {
|
||||
output_buffer: [u8; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output_len: usize,
|
||||
}
|
||||
|
||||
impl SIMDBlockEncoder {
|
||||
|
||||
pub fn new() -> SIMDBlockEncoder {
|
||||
SIMDBlockEncoder {
|
||||
output_buffer: [0u8; COMPRESSED_BLOCK_MAX_SIZE]
|
||||
output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output_len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compress_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
|
||||
&self.output_buffer[..compressed_size]
|
||||
pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output.as_mut_ptr(), offset) };
|
||||
&self.output[..compressed_size]
|
||||
}
|
||||
|
||||
pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr()) };
|
||||
&self.output_buffer[..compressed_size]
|
||||
pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
|
||||
let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output.as_mut_ptr()) };
|
||||
&self.output[..compressed_size]
|
||||
}
|
||||
|
||||
pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
|
||||
let mut byte_written = 0;
|
||||
for v in input.iter() {
|
||||
let mut to_encode: u32 = *v - offset;
|
||||
offset = *v;
|
||||
loop {
|
||||
let next_byte: u8 = (to_encode % 128u32) as u8;
|
||||
to_encode /= 128u32;
|
||||
if to_encode == 0u32 {
|
||||
self.output[byte_written] = next_byte | 128u8;
|
||||
byte_written += 1;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
self.output[byte_written] = next_byte;
|
||||
byte_written += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return &self.output[..byte_written];
|
||||
}
|
||||
|
||||
pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
|
||||
let mut byte_written = 0;
|
||||
for &i in input.iter() {
|
||||
let mut to_encode: u32 = i;
|
||||
loop {
|
||||
let next_byte: u8 = (to_encode % 128u32) as u8;
|
||||
to_encode /= 128u32;
|
||||
if to_encode == 0u32 {
|
||||
self.output[byte_written] = next_byte | 128u8;
|
||||
byte_written += 1;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
self.output[byte_written] = next_byte;
|
||||
byte_written += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return &self.output[..byte_written];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub struct SIMDBlockDecoder {
|
||||
output_buffer: [u32; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output_len: usize,
|
||||
}
|
||||
|
||||
|
||||
impl SIMDBlockDecoder {
|
||||
pub fn new() -> SIMDBlockDecoder {
|
||||
SIMDBlockDecoder {
|
||||
output_buffer: [0u32; COMPRESSED_BLOCK_MAX_SIZE]
|
||||
output: [0u32; COMPRESSED_BLOCK_MAX_SIZE],
|
||||
output_len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uncompress_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
|
||||
pub fn uncompress_block_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr(), offset) };
|
||||
self.output_len = NUM_DOCS_PER_BLOCK;
|
||||
&compressed_data[consumed_size..]
|
||||
}
|
||||
|
||||
pub fn uncompress_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr()) };
|
||||
pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
|
||||
let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr()) };
|
||||
self.output_len = NUM_DOCS_PER_BLOCK;
|
||||
&compressed_data[consumed_size..]
|
||||
}
|
||||
|
||||
pub fn uncompress_vint_sorted<'a>(
|
||||
&mut self,
|
||||
compressed_data: &'a [u8],
|
||||
offset: u32,
|
||||
num_els: usize) -> &'a [u8] {
|
||||
let mut read_byte = 0;
|
||||
let mut result = offset;
|
||||
for i in 0..num_els {
|
||||
let mut shift = 0u32;
|
||||
loop {
|
||||
let cur_byte = compressed_data[read_byte];
|
||||
read_byte += 1;
|
||||
result += ((cur_byte % 128u8) as u32) << shift;
|
||||
if cur_byte & 128u8 != 0u8 {
|
||||
break;
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
self.output[i] = result;
|
||||
}
|
||||
self.output_len = num_els;
|
||||
&compressed_data[read_byte..]
|
||||
}
|
||||
|
||||
pub fn uncompress_vint_unsorted<'a>(
|
||||
&mut self,
|
||||
compressed_data: &'a [u8],
|
||||
num_els: usize) -> &'a [u8] {
|
||||
let mut read_byte = 0;
|
||||
for i in 0..num_els {
|
||||
let mut result = 0u32;
|
||||
let mut shift = 0u32;
|
||||
loop {
|
||||
let cur_byte = compressed_data[read_byte];
|
||||
read_byte += 1;
|
||||
result += ((cur_byte % 128u8) as u32) << shift;
|
||||
if cur_byte & 128u8 != 0u8 {
|
||||
break;
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
self.output[i] = result;
|
||||
}
|
||||
self.output_len = num_els;
|
||||
&compressed_data[read_byte..]
|
||||
}
|
||||
|
||||
pub fn output(&self,) -> &[u32] {
|
||||
&self.output_buffer
|
||||
&self.output[..self.output_len]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,10 +182,10 @@ mod tests {
|
||||
fn test_encode_sorted_block() {
|
||||
let vals: Vec<u32> = (0u32..128u32).map(|i| i*7).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 0);
|
||||
let compressed_data = encoder.compress_block_sorted(&vals, 0);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(compressed_data, 0);
|
||||
let remaining_data = decoder.uncompress_block_sorted(compressed_data, 0);
|
||||
assert_eq!(remaining_data.len(), 0);
|
||||
}
|
||||
for i in 0..128 {
|
||||
@@ -99,10 +197,10 @@ mod tests {
|
||||
fn test_encode_sorted_block_with_offset() {
|
||||
let vals: Vec<u32> = (0u32..128u32).map(|i| 11 + i*7).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
let compressed_data = encoder.compress_block_sorted(&vals, 10);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(compressed_data, 10);
|
||||
let remaining_data = decoder.uncompress_block_sorted(compressed_data, 10);
|
||||
assert_eq!(remaining_data.len(), 0);
|
||||
}
|
||||
for i in 0..128 {
|
||||
@@ -116,12 +214,12 @@ mod tests {
|
||||
let n = 128;
|
||||
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
let compressed_data = encoder.compress_block_sorted(&vals, 10);
|
||||
compressed.extend_from_slice(compressed_data);
|
||||
compressed.push(173u8);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
|
||||
let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);
|
||||
assert_eq!(remaining_data.len(), 1);
|
||||
assert_eq!(remaining_data[0], 173u8);
|
||||
}
|
||||
@@ -136,12 +234,12 @@ mod tests {
|
||||
let n = 128;
|
||||
let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect();
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let compressed_data = encoder.compress_sorted(&vals, 10);
|
||||
let compressed_data = encoder.compress_block_sorted(&vals, 10);
|
||||
compressed.extend_from_slice(compressed_data);
|
||||
compressed.push(173u8);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
{
|
||||
let remaining_data = decoder.uncompress_sorted(&compressed, 10);
|
||||
let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);
|
||||
assert_eq!(remaining_data.len(), 1);
|
||||
assert_eq!(remaining_data[0], 173u8);
|
||||
}
|
||||
@@ -149,4 +247,38 @@ mod tests {
|
||||
assert_eq!(vals[i], decoder.output()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_encode_vint() {
|
||||
{
|
||||
let expected_length = 123;
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let input: Vec<u32> = (0u32..123u32)
|
||||
.map(|i| 4 + i * 7 / 2)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for offset in [0u32, 1u32, 2u32].iter() {
|
||||
let encoded_data = encoder.compress_vint_sorted(&input, *offset);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
|
||||
assert_eq!(0, remaining_data.len());
|
||||
for (&decoded, &expected) in decoder.output().iter().zip(input.iter()) {
|
||||
assert_eq!(decoded, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let mut encoder = SIMDBlockEncoder::new();
|
||||
let input = vec!(3u32, 17u32, 187u32);
|
||||
let encoded_data = encoder.compress_vint_sorted(&input, 0);
|
||||
assert_eq!(encoded_data.len(), 4);
|
||||
assert_eq!(encoded_data[0], 3u8 + 128u8);
|
||||
assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
|
||||
assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
|
||||
assert_eq!(encoded_data[3], (1u8 + 128u8));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
use libc::size_t;
|
||||
use std::ptr;
|
||||
use std::iter;
|
||||
|
||||
extern {
|
||||
fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
|
||||
fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
|
||||
}
|
||||
|
||||
pub struct VIntsEncoder {
|
||||
input_buffer: Vec<u32>,
|
||||
output_buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl VIntsEncoder {
|
||||
|
||||
pub fn new() -> VIntsEncoder {
|
||||
VIntsEncoder {
|
||||
input_buffer: Vec::with_capacity(128),
|
||||
output_buffer: iter::repeat(0u8).take(256 * 4).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
|
||||
assert!(input.len() < 128);
|
||||
let input_len = input.len();
|
||||
let written_size: usize;
|
||||
// TODO use clone_from when available
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
|
||||
written_size = encode_sorted_vint_native(
|
||||
self.input_buffer.as_mut_ptr(),
|
||||
input_len as size_t,
|
||||
self.output_buffer.as_mut_ptr(),
|
||||
256 * 4,
|
||||
);
|
||||
}
|
||||
return &self.output_buffer[0..written_size];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub struct VIntsDecoder {
|
||||
output: [u32; 128],
|
||||
}
|
||||
|
||||
impl VIntsDecoder {
|
||||
|
||||
pub fn new() -> VIntsDecoder {
|
||||
VIntsDecoder {
|
||||
output: [0u32; 128]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_sorted(&mut self,
|
||||
compressed_data: &[u8]) -> &[u32] {
|
||||
unsafe {
|
||||
let num_uncompressed = decode_sorted_vint_native(
|
||||
compressed_data.as_ptr(),
|
||||
compressed_data.len() as size_t,
|
||||
self.output.as_mut_ptr(),
|
||||
128);
|
||||
&self.output[..num_uncompressed]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_vint() {
|
||||
{
|
||||
let mut encoder = VIntsEncoder::new();
|
||||
let expected_length = 124;
|
||||
let input: Vec<u32> = (0u32..123u32)
|
||||
.map(|i| i * 7 / 2)
|
||||
.into_iter()
|
||||
.collect();
|
||||
let encoded_data = encoder.encode_sorted(&input);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let mut decoder = VIntsDecoder::new();
|
||||
let decoded_data = decoder.decode_sorted(&encoded_data[..]);
|
||||
assert_eq!(123, decoded_data.len());
|
||||
assert_eq!(&decoded_data[0..123], &input[..]);
|
||||
}
|
||||
{
|
||||
let mut encoder = VIntsEncoder::new();
|
||||
let input = vec!(3u32, 17u32, 187u32);
|
||||
let encoded_data = encoder.encode_sorted(&input);
|
||||
assert_eq!(encoded_data.len(), 4);
|
||||
assert_eq!(encoded_data[0], 3u8 + 128u8);
|
||||
assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
|
||||
assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
|
||||
assert_eq!(encoded_data[3], (1u8 + 128u8));
|
||||
}
|
||||
{
|
||||
let mut encoder = VIntsEncoder::new();
|
||||
let input = vec!(0u32, 1u32, 2u32);
|
||||
let encoded_data = encoder.encode_sorted(&input);
|
||||
let mut decoder = VIntsDecoder::new();
|
||||
let decoded_data = decoder.decode_sorted(&encoded_data[..]);
|
||||
assert_eq!(3, decoded_data.len());
|
||||
assert_eq!(&decoded_data[..], &input[..]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,17 +1,15 @@
|
||||
use postings::Postings;
|
||||
use compression::{NUM_DOCS_PER_BLOCK, Block128Decoder};
|
||||
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
|
||||
use DocId;
|
||||
use std::cmp::Ordering;
|
||||
use postings::SkipResult;
|
||||
use std::io::Cursor;
|
||||
use common::VInt;
|
||||
use std::num::Wrapping;
|
||||
use common::BinarySerializable;
|
||||
|
||||
// No Term Frequency, no postings.
|
||||
pub struct SegmentPostings<'a> {
|
||||
doc_freq: usize,
|
||||
block_decoder: Block128Decoder,
|
||||
doc_offset: u32,
|
||||
block_decoder: SIMDBlockDecoder,
|
||||
remaining_data: &'a [u8],
|
||||
cur: Wrapping<usize>,
|
||||
}
|
||||
@@ -23,29 +21,29 @@ impl<'a> SegmentPostings<'a> {
|
||||
pub fn empty() -> SegmentPostings<'a> {
|
||||
SegmentPostings {
|
||||
doc_freq: 0,
|
||||
block_decoder: Block128Decoder::new(),
|
||||
doc_offset: 0,
|
||||
block_decoder: SIMDBlockDecoder::new(),
|
||||
remaining_data: &EMPTY_ARRAY,
|
||||
cur: Wrapping(usize::max_value()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_next_block(&mut self,) {
|
||||
if self.doc_freq - self.cur.0 >= NUM_DOCS_PER_BLOCK {
|
||||
self.remaining_data = self.block_decoder.decode_sorted(self.remaining_data);
|
||||
let num_remaining_docs = self.doc_freq - self.cur.0;
|
||||
if num_remaining_docs >= NUM_DOCS_PER_BLOCK {
|
||||
self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset);
|
||||
self.doc_offset = self.block_decoder.output()[NUM_DOCS_PER_BLOCK - 1];
|
||||
}
|
||||
else {
|
||||
let mut cursor = Cursor::new(self.remaining_data);
|
||||
let remaining_len: usize = VInt::deserialize(&mut cursor).unwrap().0 as usize;
|
||||
let position = cursor.position() as usize;
|
||||
self.remaining_data = &self.remaining_data[position..position+remaining_len];
|
||||
self.block_decoder.decode_sorted_remaining(self.remaining_data);
|
||||
self.remaining_data = self.block_decoder.uncompress_vint_sorted(self.remaining_data, self.doc_offset, num_remaining_docs);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_data(doc_freq: u32, data: &'a [u8]) -> SegmentPostings<'a> {
|
||||
SegmentPostings {
|
||||
doc_freq: doc_freq as usize,
|
||||
block_decoder: Block128Decoder::new(),
|
||||
doc_offset: 0,
|
||||
block_decoder: SIMDBlockDecoder::new(),
|
||||
remaining_data: data,
|
||||
cur: Wrapping(usize::max_value()),
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use datastruct::FstMapBuilder;
|
||||
use super::TermInfo;
|
||||
use schema::Term;
|
||||
use directory::WritePtr;
|
||||
use compression::{NUM_DOCS_PER_BLOCK, Block128Encoder, VIntsEncoder, S4BP128Encoder};
|
||||
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockEncoder, CompositeEncoder};
|
||||
use DocId;
|
||||
use core::index::Segment;
|
||||
use std::io;
|
||||
@@ -17,9 +17,9 @@ pub struct PostingsSerializer {
|
||||
positions_write: WritePtr,
|
||||
written_bytes_postings: usize,
|
||||
written_bytes_positions: usize,
|
||||
positions_encoder: S4BP128Encoder,
|
||||
block_encoder: Block128Encoder,
|
||||
vints_encoder: VIntsEncoder,
|
||||
last_doc_id_encoded: u32,
|
||||
positions_encoder: CompositeEncoder,
|
||||
block_encoder: SIMDBlockEncoder,
|
||||
doc_ids: Vec<DocId>,
|
||||
term_freqs: Vec<u32>,
|
||||
position_deltas: Vec<u32>,
|
||||
@@ -40,9 +40,9 @@ impl PostingsSerializer {
|
||||
positions_write: positions_write,
|
||||
written_bytes_postings: 0,
|
||||
written_bytes_positions: 0,
|
||||
positions_encoder: S4BP128Encoder::new(),
|
||||
block_encoder: Block128Encoder::new(),
|
||||
vints_encoder: VIntsEncoder::new(),
|
||||
last_doc_id_encoded: 0u32,
|
||||
positions_encoder: CompositeEncoder::new(),
|
||||
block_encoder: SIMDBlockEncoder::new(),
|
||||
doc_ids: Vec::new(),
|
||||
term_freqs: Vec::new(),
|
||||
position_deltas: Vec::new(),
|
||||
@@ -54,6 +54,7 @@ impl PostingsSerializer {
|
||||
pub fn new_term(&mut self, term: &Term, doc_freq: DocId) -> io::Result<()> {
|
||||
try!(self.close_term());
|
||||
self.doc_ids.clear();
|
||||
self.last_doc_id_encoded = 0;
|
||||
self.term_freqs.clear();
|
||||
self.position_deltas.clear();
|
||||
let term_info = TermInfo {
|
||||
@@ -67,16 +68,13 @@ impl PostingsSerializer {
|
||||
pub fn close_term(&mut self,) -> io::Result<()> {
|
||||
if !self.doc_ids.is_empty() {
|
||||
{
|
||||
let block_encoded = self.vints_encoder.encode_sorted(&self.doc_ids[..]);
|
||||
self.written_bytes_postings += try!(VInt(block_encoded.len() as u64).serialize(&mut self.postings_write));
|
||||
|
||||
for num in block_encoded {
|
||||
self.written_bytes_postings += try!(num.serialize(&mut self.postings_write));
|
||||
}
|
||||
let block_encoded = self.block_encoder.compress_vint_sorted(&self.doc_ids, self.last_doc_id_encoded);
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
}
|
||||
if self.is_termfreq_enabled {
|
||||
{
|
||||
let block_encoded = self.vints_encoder.encode_sorted(&self.term_freqs[..]);
|
||||
let block_encoded = self.block_encoder.compress_vint_unsorted(&self.term_freqs[..]);
|
||||
self.written_bytes_postings += try!(VInt(block_encoded.len() as u64).serialize(&mut self.postings_write));
|
||||
for num in block_encoded {
|
||||
self.written_bytes_postings += try!(num.serialize(&mut self.postings_write));
|
||||
@@ -84,8 +82,7 @@ impl PostingsSerializer {
|
||||
self.term_freqs.clear();
|
||||
}
|
||||
if self.is_positions_enabled {
|
||||
let positions_encoded: &[u8] = self.positions_encoder.encode(&self.position_deltas[..]);
|
||||
self.written_bytes_positions += try!(VInt(positions_encoded.len() as u64).serialize(&mut self.positions_write));
|
||||
let positions_encoded: &[u8] = self.positions_encoder.compress_unsorted(&self.position_deltas[..]);
|
||||
try!(self.positions_write.write_all(positions_encoded));
|
||||
self.written_bytes_positions += positions_encoded.len();
|
||||
self.position_deltas.clear();
|
||||
@@ -107,21 +104,16 @@ impl PostingsSerializer {
|
||||
if self.doc_ids.len() == NUM_DOCS_PER_BLOCK {
|
||||
{
|
||||
// encode the positions
|
||||
let block_encoded: &[u8] = self.block_encoder.encode_sorted(&self.doc_ids);
|
||||
let block_encoded: &[u8] = self.block_encoder.compress_block_sorted(&self.doc_ids, self.last_doc_id_encoded);
|
||||
self.last_doc_id_encoded = self.doc_ids[self.doc_ids.len() - 1];
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
}
|
||||
if self.is_termfreq_enabled {
|
||||
// encode the term_freqs
|
||||
let block_encoded: &[u8] = self.block_encoder.encode_sorted(&self.term_freqs);
|
||||
let block_encoded: &[u8] = self.block_encoder.compress_block_unsorted(&self.term_freqs);
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
if self.is_positions_enabled {
|
||||
let positions_encoded: &[u8] = self.positions_encoder.encode(&self.position_deltas[..]);
|
||||
try!(self.positions_write.write_all(positions_encoded));
|
||||
self.written_bytes_positions += positions_encoded.len();
|
||||
self.position_deltas.clear();
|
||||
}
|
||||
self.term_freqs.clear();
|
||||
}
|
||||
self.doc_ids.clear();
|
||||
|
||||
Reference in New Issue
Block a user