Merge remote-tracking branch 'origin/master' into bug8/experimental

This commit is contained in:
Paul Masurel
2016-09-15 00:01:53 +09:00
4 changed files with 47 additions and 161 deletions

View File

@@ -1,124 +0,0 @@
use std::num::Wrapping;
// ported from libdivide.h by ridiculous_fish
const LIBDIVIDE_32_SHIFT_MASK: u8 = 0x1F;
const LIBDIVIDE_ADD_MARKER: u8 = 0x40;
const LIBDIVIDE_U32_SHIFT_PATH: u8 = 0x80;
pub fn count_leading_zeros(mut val: u32) -> u8 {
if val == 0 {
return 32;
}
let mut result = 0u8;
while (val & (1u32 << 31)) == 0 {
val <<= 1;
result += 1;
}
return result;
}
pub fn count_trailing_zeros(mut val: u32) -> u8 {
let mut result = 0u8;
val = (val ^ (val - 1)) >> 1;
while val != 0 {
val >>= 1;
result += 1;
}
result
}
#[derive(Debug)]
pub struct DividerU32 {
magic: u32,
more: u8,
}
fn divide_64_div_32_to_32(n: u64, d: u32) -> (u32, u32) {
let d64: u64 = d as u64;
let q: u64 = n / d64;
let r: u32 = (Wrapping(n) - (Wrapping(q) * Wrapping(d64))).0 as u32;
(q as u32, r)
}
impl DividerU32 {
pub fn divide_by(d: u32) -> DividerU32 {
if d == 0 {
DividerU32::divide_by(u32::max_value())
}
else if (d & (d - 1)) == 0 {
DividerU32 {
magic: 0,
more: count_trailing_zeros(d) | LIBDIVIDE_U32_SHIFT_PATH,
}
}
else {
let floor_log_2_d: u8 = 31 - count_leading_zeros(d);
let more: u8;
let (mut proposed_m, rem) = divide_64_div_32_to_32((1u64 << floor_log_2_d) << 32, d);
debug_assert!(rem > 0 && rem < d);
let e = d - rem;
if e < (1u32 << floor_log_2_d) {
more = floor_log_2_d;
}
else {
proposed_m = proposed_m << 1;
let twice_rem: u32 = rem * 2;
if twice_rem >= d || twice_rem < rem {
proposed_m += 1;
}
more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
}
DividerU32 {
magic: 1 + proposed_m,
more: more,
}
}
}
pub fn divide(&self, n: u32) -> u32 {
if self.more & LIBDIVIDE_U32_SHIFT_PATH != 0 {
n >> (self.more & LIBDIVIDE_32_SHIFT_MASK)
}
else {
let q_shifted = (self.magic as u64) * (n as u64);
let q = (q_shifted >> 32) as u32;
if self.more & LIBDIVIDE_ADD_MARKER != 0 {
let t = ((n - q) >> 1) + q;
t >> (self.more & LIBDIVIDE_32_SHIFT_MASK)
}
else {
q >> self.more
}
}
}
}
#[cfg(test)]
mod tests {
use super::DividerU32;
#[test]
fn test_libdivide() {
for d in 1..32 {
let divider = DividerU32::divide_by(d);
for i in 0..100_000 {
assert_eq!(divider.divide(i), i / d);
}
}
}
#[test]
fn test_libdivide_by0() {
let divider = DividerU32::divide_by(0);
for i in 0..100_000 {
assert_eq!(divider.divide(i), 0);
}
}
}

View File

@@ -1,15 +1,23 @@
mod fastdivide;
mod reader;
mod writer;
mod serializer;
pub use self::fastdivide::DividerU32;
pub use self::writer::{U32FastFieldsWriter, U32FastFieldWriter};
pub use self::reader::{U32FastFieldsReader, U32FastFieldReader};
pub use self::serializer::FastFieldSerializer;
use self::fastdivide::count_leading_zeros;
fn count_leading_zeros(mut val: u32) -> u8 {
if val == 0 {
return 32;
}
let mut result = 0u8;
while (val & (1u32 << 31)) == 0 {
val <<= 1;
result += 1;
}
return result;
}
fn compute_num_bits(amplitude: u32) -> u8 {
32u8 - count_leading_zeros(amplitude)
@@ -111,7 +119,7 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
assert_eq!(source.len(), 58 as usize);
assert_eq!(source.len(), 50 as usize);
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();

View File

@@ -4,7 +4,6 @@ use std::collections::HashMap;
use std::ops::Deref;
use directory::ReadOnlySource;
use fastfield::DividerU32;
use common::BinarySerializable;
use DocId;
use schema::Field;
@@ -13,13 +12,11 @@ use super::compute_num_bits;
pub struct U32FastFieldReader {
_data: ReadOnlySource,
data_ptr: *const u64,
data_ptr: *const u8,
min_val: u32,
max_val: u32,
num_bits: u8,
num_bits: u32,
mask: u32,
num_in_pack: u32,
divider: DividerU32,
}
impl U32FastFieldReader {
@@ -42,34 +39,24 @@ impl U32FastFieldReader {
}
let num_bits = compute_num_bits(amplitude);
let mask = (1 << num_bits) - 1;
let num_in_pack;
if num_bits == 0u8 {
num_in_pack = 0u32;
}
else {
num_in_pack = 64u32 / (num_bits as u32);
}
let ptr: *const u8 = &(data.deref()[8 as usize]);
Ok(U32FastFieldReader {
_data: data,
data_ptr: ptr as *const u64,
data_ptr: ptr,
min_val: min_val,
max_val: min_val + amplitude,
num_bits: num_bits,
num_bits: num_bits as u32,
mask: mask,
num_in_pack: num_in_pack,
divider: DividerU32::divide_by(num_in_pack),
})
}
pub fn get(&self, doc: DocId) -> u32 {
if self.num_in_pack == 0u32 {
if self.num_bits == 0u32 {
return self.min_val;
}
let long_addr = self.divider.divide(doc);
let ord_within_long = doc - long_addr * self.num_in_pack;
let bit_shift = (self.num_bits as u32) * ord_within_long;
let val_unshifted_unmasked: u64 = unsafe { *self.data_ptr.offset(long_addr as isize) };
let addr = (doc * self.num_bits) / 8;
let bit_shift = (doc * self.num_bits) - addr * 8; //doc - long_addr * self.num_in_pack;
let val_unshifted_unmasked: u64 = unsafe { * (self.data_ptr.offset(addr as isize) as *const u64) };
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
return self.min_val + (val_shifted & self.mask);
}

View File

@@ -10,12 +10,12 @@ pub struct FastFieldSerializer {
written_size: usize,
fields: Vec<(Field, u32)>,
num_bits: u8,
min_value: u32,
field_open: bool,
mini_buffer_written: usize,
mini_buffer: u64,
mini_buffer: u32,
}
impl FastFieldSerializer {
@@ -27,10 +27,11 @@ impl FastFieldSerializer {
written_size: written_size,
fields: Vec::new(),
num_bits: 0u8,
field_open: false,
mini_buffer_written: 0,
mini_buffer: 0,
min_value: 0,
field_open: false,
mini_buffer_written: 0,
mini_buffer: 0u32,
})
}
@@ -57,13 +58,23 @@ impl FastFieldSerializer {
pub fn add_val(&mut self, val: u32) -> io::Result<()> {
let write: &mut Write = &mut self.write;
if self.mini_buffer_written + (self.num_bits as usize) > 64 {
let val_to_write: u32 = val - self.min_value;
if self.mini_buffer_written + self.num_bits as usize > 32 {
self.mini_buffer |= val_to_write.wrapping_shl(self.mini_buffer_written as u32);
self.written_size += try!(self.mini_buffer.serialize(write));
self.mini_buffer = 0;
self.mini_buffer_written = 0;
// overflow of the shift operand is guarded here by the if case.
self.mini_buffer = val_to_write.wrapping_shr(32u32 - self.mini_buffer_written as u32);
self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 32 ;
}
else {
self.mini_buffer |= val_to_write << self.mini_buffer_written;
self.mini_buffer_written += self.num_bits as usize;
if self.mini_buffer_written == 32 {
self.written_size += try!(self.mini_buffer.serialize(write));
self.mini_buffer_written = 0;
self.mini_buffer = 0u32;
}
}
self.mini_buffer |= ((val - self.min_value) as u64) << self.mini_buffer_written;
self.mini_buffer_written += self.num_bits as usize;
Ok(())
}
@@ -76,6 +87,10 @@ impl FastFieldSerializer {
self.mini_buffer_written = 0;
self.written_size += try!(self.mini_buffer.serialize(&mut self.write));
}
// adding some padding to make sure we
// can read the last elements with our u64
// cursor
self.written_size += try!(0u32.serialize(&mut self.write));
self.mini_buffer = 0;
Ok(())
}