mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-22 03:00:42 +00:00
Merge remote-tracking branch 'origin/master' into bug8/experimental
This commit is contained in:
@@ -1,124 +0,0 @@
|
||||
use std::num::Wrapping;
|
||||
|
||||
|
||||
|
||||
// ported from libdivide.h by ridiculous_fish
|
||||
|
||||
|
||||
|
||||
const LIBDIVIDE_32_SHIFT_MASK: u8 = 0x1F;
|
||||
const LIBDIVIDE_ADD_MARKER: u8 = 0x40;
|
||||
const LIBDIVIDE_U32_SHIFT_PATH: u8 = 0x80;
|
||||
|
||||
pub fn count_leading_zeros(mut val: u32) -> u8 {
|
||||
if val == 0 {
|
||||
return 32;
|
||||
}
|
||||
let mut result = 0u8;
|
||||
while (val & (1u32 << 31)) == 0 {
|
||||
val <<= 1;
|
||||
result += 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn count_trailing_zeros(mut val: u32) -> u8 {
|
||||
let mut result = 0u8;
|
||||
val = (val ^ (val - 1)) >> 1;
|
||||
while val != 0 {
|
||||
val >>= 1;
|
||||
result += 1;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DividerU32 {
|
||||
magic: u32,
|
||||
more: u8,
|
||||
}
|
||||
|
||||
fn divide_64_div_32_to_32(n: u64, d: u32) -> (u32, u32) {
|
||||
let d64: u64 = d as u64;
|
||||
let q: u64 = n / d64;
|
||||
let r: u32 = (Wrapping(n) - (Wrapping(q) * Wrapping(d64))).0 as u32;
|
||||
(q as u32, r)
|
||||
}
|
||||
|
||||
impl DividerU32 {
|
||||
pub fn divide_by(d: u32) -> DividerU32 {
|
||||
if d == 0 {
|
||||
DividerU32::divide_by(u32::max_value())
|
||||
}
|
||||
else if (d & (d - 1)) == 0 {
|
||||
DividerU32 {
|
||||
magic: 0,
|
||||
more: count_trailing_zeros(d) | LIBDIVIDE_U32_SHIFT_PATH,
|
||||
}
|
||||
}
|
||||
else {
|
||||
let floor_log_2_d: u8 = 31 - count_leading_zeros(d);
|
||||
let more: u8;
|
||||
let (mut proposed_m, rem) = divide_64_div_32_to_32((1u64 << floor_log_2_d) << 32, d);
|
||||
debug_assert!(rem > 0 && rem < d);
|
||||
let e = d - rem;
|
||||
if e < (1u32 << floor_log_2_d) {
|
||||
more = floor_log_2_d;
|
||||
}
|
||||
else {
|
||||
proposed_m = proposed_m << 1;
|
||||
let twice_rem: u32 = rem * 2;
|
||||
if twice_rem >= d || twice_rem < rem {
|
||||
proposed_m += 1;
|
||||
}
|
||||
more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
|
||||
}
|
||||
DividerU32 {
|
||||
magic: 1 + proposed_m,
|
||||
more: more,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn divide(&self, n: u32) -> u32 {
|
||||
if self.more & LIBDIVIDE_U32_SHIFT_PATH != 0 {
|
||||
n >> (self.more & LIBDIVIDE_32_SHIFT_MASK)
|
||||
}
|
||||
else {
|
||||
let q_shifted = (self.magic as u64) * (n as u64);
|
||||
let q = (q_shifted >> 32) as u32;
|
||||
if self.more & LIBDIVIDE_ADD_MARKER != 0 {
|
||||
let t = ((n - q) >> 1) + q;
|
||||
t >> (self.more & LIBDIVIDE_32_SHIFT_MASK)
|
||||
}
|
||||
else {
|
||||
q >> self.more
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DividerU32;
|
||||
|
||||
#[test]
|
||||
fn test_libdivide() {
|
||||
for d in 1..32 {
|
||||
let divider = DividerU32::divide_by(d);
|
||||
for i in 0..100_000 {
|
||||
assert_eq!(divider.divide(i), i / d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_libdivide_by0() {
|
||||
let divider = DividerU32::divide_by(0);
|
||||
for i in 0..100_000 {
|
||||
assert_eq!(divider.divide(i), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,23 @@
|
||||
mod fastdivide;
|
||||
mod reader;
|
||||
mod writer;
|
||||
mod serializer;
|
||||
|
||||
|
||||
pub use self::fastdivide::DividerU32;
|
||||
pub use self::writer::{U32FastFieldsWriter, U32FastFieldWriter};
|
||||
pub use self::reader::{U32FastFieldsReader, U32FastFieldReader};
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
|
||||
use self::fastdivide::count_leading_zeros;
|
||||
fn count_leading_zeros(mut val: u32) -> u8 {
|
||||
if val == 0 {
|
||||
return 32;
|
||||
}
|
||||
let mut result = 0u8;
|
||||
while (val & (1u32 << 31)) == 0 {
|
||||
val <<= 1;
|
||||
result += 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
fn compute_num_bits(amplitude: u32) -> u8 {
|
||||
32u8 - count_leading_zeros(amplitude)
|
||||
@@ -111,7 +119,7 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
assert_eq!(source.len(), 58 as usize);
|
||||
assert_eq!(source.len(), 50 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::collections::HashMap;
|
||||
use std::ops::Deref;
|
||||
|
||||
use directory::ReadOnlySource;
|
||||
use fastfield::DividerU32;
|
||||
use common::BinarySerializable;
|
||||
use DocId;
|
||||
use schema::Field;
|
||||
@@ -13,13 +12,11 @@ use super::compute_num_bits;
|
||||
|
||||
pub struct U32FastFieldReader {
|
||||
_data: ReadOnlySource,
|
||||
data_ptr: *const u64,
|
||||
data_ptr: *const u8,
|
||||
min_val: u32,
|
||||
max_val: u32,
|
||||
num_bits: u8,
|
||||
num_bits: u32,
|
||||
mask: u32,
|
||||
num_in_pack: u32,
|
||||
divider: DividerU32,
|
||||
}
|
||||
|
||||
impl U32FastFieldReader {
|
||||
@@ -42,34 +39,24 @@ impl U32FastFieldReader {
|
||||
}
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
let mask = (1 << num_bits) - 1;
|
||||
let num_in_pack;
|
||||
if num_bits == 0u8 {
|
||||
num_in_pack = 0u32;
|
||||
}
|
||||
else {
|
||||
num_in_pack = 64u32 / (num_bits as u32);
|
||||
}
|
||||
let ptr: *const u8 = &(data.deref()[8 as usize]);
|
||||
Ok(U32FastFieldReader {
|
||||
_data: data,
|
||||
data_ptr: ptr as *const u64,
|
||||
data_ptr: ptr,
|
||||
min_val: min_val,
|
||||
max_val: min_val + amplitude,
|
||||
num_bits: num_bits,
|
||||
num_bits: num_bits as u32,
|
||||
mask: mask,
|
||||
num_in_pack: num_in_pack,
|
||||
divider: DividerU32::divide_by(num_in_pack),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get(&self, doc: DocId) -> u32 {
|
||||
if self.num_in_pack == 0u32 {
|
||||
if self.num_bits == 0u32 {
|
||||
return self.min_val;
|
||||
}
|
||||
let long_addr = self.divider.divide(doc);
|
||||
let ord_within_long = doc - long_addr * self.num_in_pack;
|
||||
let bit_shift = (self.num_bits as u32) * ord_within_long;
|
||||
let val_unshifted_unmasked: u64 = unsafe { *self.data_ptr.offset(long_addr as isize) };
|
||||
let addr = (doc * self.num_bits) / 8;
|
||||
let bit_shift = (doc * self.num_bits) - addr * 8; //doc - long_addr * self.num_in_pack;
|
||||
let val_unshifted_unmasked: u64 = unsafe { * (self.data_ptr.offset(addr as isize) as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
|
||||
return self.min_val + (val_shifted & self.mask);
|
||||
}
|
||||
|
||||
@@ -10,12 +10,12 @@ pub struct FastFieldSerializer {
|
||||
written_size: usize,
|
||||
fields: Vec<(Field, u32)>,
|
||||
num_bits: u8,
|
||||
|
||||
min_value: u32,
|
||||
|
||||
field_open: bool,
|
||||
|
||||
|
||||
mini_buffer_written: usize,
|
||||
mini_buffer: u64,
|
||||
mini_buffer: u32,
|
||||
}
|
||||
|
||||
impl FastFieldSerializer {
|
||||
@@ -27,10 +27,11 @@ impl FastFieldSerializer {
|
||||
written_size: written_size,
|
||||
fields: Vec::new(),
|
||||
num_bits: 0u8,
|
||||
field_open: false,
|
||||
mini_buffer_written: 0,
|
||||
mini_buffer: 0,
|
||||
min_value: 0,
|
||||
field_open: false,
|
||||
|
||||
mini_buffer_written: 0,
|
||||
mini_buffer: 0u32,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -57,13 +58,23 @@ impl FastFieldSerializer {
|
||||
|
||||
pub fn add_val(&mut self, val: u32) -> io::Result<()> {
|
||||
let write: &mut Write = &mut self.write;
|
||||
if self.mini_buffer_written + (self.num_bits as usize) > 64 {
|
||||
let val_to_write: u32 = val - self.min_value;
|
||||
if self.mini_buffer_written + self.num_bits as usize > 32 {
|
||||
self.mini_buffer |= val_to_write.wrapping_shl(self.mini_buffer_written as u32);
|
||||
self.written_size += try!(self.mini_buffer.serialize(write));
|
||||
self.mini_buffer = 0;
|
||||
self.mini_buffer_written = 0;
|
||||
// overflow of the shift operand is guarded here by the if case.
|
||||
self.mini_buffer = val_to_write.wrapping_shr(32u32 - self.mini_buffer_written as u32);
|
||||
self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 32 ;
|
||||
}
|
||||
else {
|
||||
self.mini_buffer |= val_to_write << self.mini_buffer_written;
|
||||
self.mini_buffer_written += self.num_bits as usize;
|
||||
if self.mini_buffer_written == 32 {
|
||||
self.written_size += try!(self.mini_buffer.serialize(write));
|
||||
self.mini_buffer_written = 0;
|
||||
self.mini_buffer = 0u32;
|
||||
}
|
||||
}
|
||||
self.mini_buffer |= ((val - self.min_value) as u64) << self.mini_buffer_written;
|
||||
self.mini_buffer_written += self.num_bits as usize;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -76,6 +87,10 @@ impl FastFieldSerializer {
|
||||
self.mini_buffer_written = 0;
|
||||
self.written_size += try!(self.mini_buffer.serialize(&mut self.write));
|
||||
}
|
||||
// adding some padding to make sure we
|
||||
// can read the last elements with our u64
|
||||
// cursor
|
||||
self.written_size += try!(0u32.serialize(&mut self.write));
|
||||
self.mini_buffer = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user