diff --git a/src/fastfield/fastdivide.rs b/src/fastfield/fastdivide.rs deleted file mode 100644 index b2215ccba..000000000 --- a/src/fastfield/fastdivide.rs +++ /dev/null @@ -1,124 +0,0 @@ -use std::num::Wrapping; - - - -// ported from libdivide.h by ridiculous_fish - - - -const LIBDIVIDE_32_SHIFT_MASK: u8 = 0x1F; -const LIBDIVIDE_ADD_MARKER: u8 = 0x40; -const LIBDIVIDE_U32_SHIFT_PATH: u8 = 0x80; - -pub fn count_leading_zeros(mut val: u32) -> u8 { - if val == 0 { - return 32; - } - let mut result = 0u8; - while (val & (1u32 << 31)) == 0 { - val <<= 1; - result += 1; - } - return result; -} - -pub fn count_trailing_zeros(mut val: u32) -> u8 { - let mut result = 0u8; - val = (val ^ (val - 1)) >> 1; - while val != 0 { - val >>= 1; - result += 1; - } - result -} - -#[derive(Debug)] -pub struct DividerU32 { - magic: u32, - more: u8, -} - -fn divide_64_div_32_to_32(n: u64, d: u32) -> (u32, u32) { - let d64: u64 = d as u64; - let q: u64 = n / d64; - let r: u32 = (Wrapping(n) - (Wrapping(q) * Wrapping(d64))).0 as u32; - (q as u32, r) -} - -impl DividerU32 { - pub fn divide_by(d: u32) -> DividerU32 { - if d == 0 { - DividerU32::divide_by(u32::max_value()) - } - else if (d & (d - 1)) == 0 { - DividerU32 { - magic: 0, - more: count_trailing_zeros(d) | LIBDIVIDE_U32_SHIFT_PATH, - } - } - else { - let floor_log_2_d: u8 = 31 - count_leading_zeros(d); - let more: u8; - let (mut proposed_m, rem) = divide_64_div_32_to_32((1u64 << floor_log_2_d) << 32, d); - debug_assert!(rem > 0 && rem < d); - let e = d - rem; - if e < (1u32 << floor_log_2_d) { - more = floor_log_2_d; - } - else { - proposed_m = proposed_m << 1; - let twice_rem: u32 = rem * 2; - if twice_rem >= d || twice_rem < rem { - proposed_m += 1; - } - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - DividerU32 { - magic: 1 + proposed_m, - more: more, - } - } - } - - pub fn divide(&self, n: u32) -> u32 { - if self.more & LIBDIVIDE_U32_SHIFT_PATH != 0 { - n >> (self.more & LIBDIVIDE_32_SHIFT_MASK) - } - else { - let q_shifted = (self.magic as u64) * (n as u64); - let q = (q_shifted >> 32) as u32; - if self.more & LIBDIVIDE_ADD_MARKER != 0 { - let t = ((n - q) >> 1) + q; - t >> (self.more & LIBDIVIDE_32_SHIFT_MASK) - } - else { - q >> self.more - } - } - } -} - - - -#[cfg(test)] -mod tests { - use super::DividerU32; - - #[test] - fn test_libdivide() { - for d in 1..32 { - let divider = DividerU32::divide_by(d); - for i in 0..100_000 { - assert_eq!(divider.divide(i), i / d); - } - } - } - - #[test] - fn test_libdivide_by0() { - let divider = DividerU32::divide_by(0); - for i in 0..100_000 { - assert_eq!(divider.divide(i), 0); - } - } -} diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 331ec9692..8137d7fd5 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -1,15 +1,23 @@ -mod fastdivide; mod reader; mod writer; mod serializer; - -pub use self::fastdivide::DividerU32; pub use self::writer::{U32FastFieldsWriter, U32FastFieldWriter}; pub use self::reader::{U32FastFieldsReader, U32FastFieldReader}; pub use self::serializer::FastFieldSerializer; -use self::fastdivide::count_leading_zeros; +fn count_leading_zeros(mut val: u32) -> u8 { + if val == 0 { + return 32; + } + let mut result = 0u8; + while (val & (1u32 << 31)) == 0 { + val <<= 1; + result += 1; + } + return result; +} + fn compute_num_bits(amplitude: u32) -> u8 { 32u8 - count_leading_zeros(amplitude) @@ -111,7 +119,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 58 as usize); + assert_eq!(source.len(), 50 as usize); } { let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 92d75b18a..7731fd140 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -4,7 +4,6 @@ use std::collections::HashMap; use std::ops::Deref; use directory::ReadOnlySource; -use fastfield::DividerU32; use common::BinarySerializable; use DocId; use schema::Field; @@ -13,13 +12,11 @@ use super::compute_num_bits; pub struct U32FastFieldReader { _data: ReadOnlySource, - data_ptr: *const u64, + data_ptr: *const u8, min_val: u32, max_val: u32, - num_bits: u8, + num_bits: u32, mask: u32, - num_in_pack: u32, - divider: DividerU32, } impl U32FastFieldReader { @@ -42,34 +39,24 @@ impl U32FastFieldReader { } let num_bits = compute_num_bits(amplitude); let mask = (1 << num_bits) - 1; - let num_in_pack; - if num_bits == 0u8 { - num_in_pack = 0u32; - } - else { - num_in_pack = 64u32 / (num_bits as u32); - } let ptr: *const u8 = &(data.deref()[8 as usize]); Ok(U32FastFieldReader { _data: data, - data_ptr: ptr as *const u64, + data_ptr: ptr, min_val: min_val, max_val: min_val + amplitude, - num_bits: num_bits, + num_bits: num_bits as u32, mask: mask, - num_in_pack: num_in_pack, - divider: DividerU32::divide_by(num_in_pack), }) } pub fn get(&self, doc: DocId) -> u32 { - if self.num_in_pack == 0u32 { + if self.num_bits == 0u32 { return self.min_val; } - let long_addr = self.divider.divide(doc); - let ord_within_long = doc - long_addr * self.num_in_pack; - let bit_shift = (self.num_bits as u32) * ord_within_long; - let val_unshifted_unmasked: u64 = unsafe { *self.data_ptr.offset(long_addr as isize) }; + let addr = (doc * self.num_bits) / 8; + let bit_shift = (doc * self.num_bits) - addr * 8; //doc - long_addr * self.num_in_pack; + let val_unshifted_unmasked: u64 = unsafe { * (self.data_ptr.offset(addr as isize) as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32; return self.min_val + (val_shifted & self.mask); } diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index e23614af5..756119596 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -10,12 +10,12 @@ pub struct FastFieldSerializer { written_size: usize, fields: Vec<(Field, u32)>, num_bits: u8, - min_value: u32, - field_open: bool, + + mini_buffer_written: usize, - mini_buffer: u64, + mini_buffer: u32, } impl FastFieldSerializer { @@ -27,10 +27,11 @@ impl FastFieldSerializer { written_size: written_size, fields: Vec::new(), num_bits: 0u8, - field_open: false, - mini_buffer_written: 0, - mini_buffer: 0, min_value: 0, + field_open: false, + + mini_buffer_written: 0, + mini_buffer: 0u32, }) } @@ -57,13 +58,23 @@ impl FastFieldSerializer { pub fn add_val(&mut self, val: u32) -> io::Result<()> { let write: &mut Write = &mut self.write; - if self.mini_buffer_written + (self.num_bits as usize) > 64 { + let val_to_write: u32 = val - self.min_value; + if self.mini_buffer_written + self.num_bits as usize > 32 { + self.mini_buffer |= val_to_write.wrapping_shl(self.mini_buffer_written as u32); self.written_size += try!(self.mini_buffer.serialize(write)); - self.mini_buffer = 0; - self.mini_buffer_written = 0; + // overflow of the shift operand is guarded here by the if case. + self.mini_buffer = val_to_write.wrapping_shr(32u32 - self.mini_buffer_written as u32); + self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 32 ; + } + else { + self.mini_buffer |= val_to_write << self.mini_buffer_written; + self.mini_buffer_written += self.num_bits as usize; + if self.mini_buffer_written == 32 { + self.written_size += try!(self.mini_buffer.serialize(write)); + self.mini_buffer_written = 0; + self.mini_buffer = 0u32; + } } - self.mini_buffer |= ((val - self.min_value) as u64) << self.mini_buffer_written; - self.mini_buffer_written += self.num_bits as usize; Ok(()) } @@ -76,6 +87,10 @@ impl FastFieldSerializer { self.mini_buffer_written = 0; self.written_size += try!(self.mini_buffer.serialize(&mut self.write)); } + // adding some padding to make sure we + // can read the last elements with our u64 + // cursor + self.written_size += try!(0u32.serialize(&mut self.write)); self.mini_buffer = 0; Ok(()) }