fastfield working.

This commit is contained in:
Paul Masurel
2016-03-15 16:37:48 +09:00
parent c26cd0e238
commit 522a3dc1e1
2 changed files with 206 additions and 26 deletions

View File

@@ -30,6 +30,11 @@ impl Deref for ReadOnlySource {
}
impl ReadOnlySource {
pub fn len(&self,) -> usize {
self.as_slice().len()
}
pub fn as_slice(&self,) -> &[u8] {
match *self {
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },

View File

@@ -1,9 +1,44 @@
use std::io::Write;
use std::io;
use std::io::Cursor;
use std::io::Seek;
use std::io::SeekFrom;
use core::serialize::BinarySerializable;
use core::directory::ReadOnlySource;
use core::schema::DocId;
struct IntFastFieldWriter {
vals: Vec<u64>,
vals: Vec<u32>,
}
pub fn compute_num_bits(amplitude: u32) -> u8 {
if amplitude == 0 {
0
}
else {
1 + compute_num_bits(amplitude / 2)
}
}
// only works for big-endian
fn serialize_packed_ints<I: Iterator<Item=u32>>(vals_it: I, num_bits: u8, write: &mut Write) -> io::Result<()> {
let mut mini_buffer_written = 0;
let mut mini_buffer = 0u64;
for val in vals_it {
if mini_buffer_written + num_bits > 64 {
try!(mini_buffer.serialize(write));
mini_buffer = 0;
mini_buffer_written = 0;
}
mini_buffer |= (val as u64) << mini_buffer_written;
mini_buffer_written += num_bits;
}
if mini_buffer_written > 0 {
try!(mini_buffer.serialize(write));
}
Ok(())
}
impl IntFastFieldWriter {
@@ -14,49 +49,189 @@ impl IntFastFieldWriter {
}
}
pub fn add(&mut self, val: u64) {
pub fn add(&mut self, val: u32) {
self.vals.push(val);
}
pub fn compute_num_bits(&self, amplitude: u64) -> u8 {
if amplitude == 0 {
0
}
else {
1 + self.compute_num_bits(amplitude / 2)
}
}
pub fn close(&self, write: &mut Write) -> io::Result<()> {
try!((self.vals.len() as u32).serialize(write));
if self.vals.is_empty() {
return Ok(())
}
let min = self.vals.iter().min().unwrap();
let max = self.vals.iter().max().unwrap();
let amplitude: u64 = max - min;
let num_bits = self.compute_num_bits(amplitude);
for val in self.vals.iter() {
try!(val.serialize(write));
}
Ok(())
try!(min.serialize(write));
let amplitude: u32 = max - min;
let num_bits: u8 = compute_num_bits(amplitude);
try!(num_bits.serialize(write));
let vals_it = self.vals.iter().map(|i| i-min);
serialize_packed_ints(vals_it, num_bits, write)
}
}
pub struct IntFastFieldReader {
data: ReadOnlySource,
min_val: u32,
num_bits: u32,
mask: u32,
num_in_pack: u32,
}
impl IntFastFieldReader {
pub fn open(data: &ReadOnlySource) -> io::Result<IntFastFieldReader> {
let mut cursor: Cursor<&[u8]> = Cursor::new(&*data);
let min_val = try!(u32::deserialize(&mut cursor));
let num_bits = try!(u8::deserialize(&mut cursor));
let mask = (1 << num_bits) - 1;
let num_in_pack = 64u32 / (num_bits as u32);
Ok(IntFastFieldReader {
min_val: min_val,
num_bits: num_bits as u32,
data: data.slice(5, data.len()),
mask: mask,
num_in_pack: num_in_pack,
})
}
pub fn get(&self, doc: DocId) -> u32 {
let mut cursor = Cursor::new(&*self.data);
let long_addr = doc / self.num_in_pack;
let ord_within_long = doc - long_addr * self.num_in_pack;
let bit_shift = (self.num_bits as u32) * ord_within_long;
cursor.seek(SeekFrom::Start((long_addr as u64) * 8u64)).unwrap();
let val_unshifted_unmasked = u64::deserialize(&mut cursor).unwrap();
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
return self.min_val + (val_shifted & self.mask);
}
}
#[cfg(test)]
mod tests {
use super::compute_num_bits;
use super::IntFastFieldWriter;
use super::IntFastFieldReader;
use core::directory::ReadOnlySource;
use test::Bencher;
use test;
use rand::Rng;
use rand::SeedableRng;
use rand::XorShiftRng;
#[test]
fn test_intfastfieldwriter() {
let mut write: Vec<u8> = Vec::new();
let mut int_fast_field_writer = IntFastFieldWriter::new();
int_fast_field_writer.add(4u64);
int_fast_field_writer.add(14u64);
int_fast_field_writer.add(2u64);
int_fast_field_writer.close(&mut write).unwrap();
assert_eq!(write.len(), 8 * 3 + 4);
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
assert_eq!(compute_num_bits(0), 0u8);
assert_eq!(compute_num_bits(2), 2u8);
assert_eq!(compute_num_bits(3), 2u8);
assert_eq!(compute_num_bits(4), 3u8);
assert_eq!(compute_num_bits(255), 8u8);
assert_eq!(compute_num_bits(256), 9u8);
}
#[test]
fn test_intfastfield_small() {
let mut buffer: Vec<u8> = Vec::new();
{
let mut int_fast_field_writer = IntFastFieldWriter::new();
int_fast_field_writer.add(4u32);
int_fast_field_writer.add(14u32);
int_fast_field_writer.add(2u32);
int_fast_field_writer.close(&mut buffer).unwrap();
assert_eq!(buffer.len(), 4 + 1 + 8 as usize);
}
{
let source = ReadOnlySource::Anonymous(buffer);
let fast_field_reader = IntFastFieldReader::open(&source).unwrap();
assert_eq!(fast_field_reader.get(0), 4u32);
assert_eq!(fast_field_reader.get(1), 14u32);
assert_eq!(fast_field_reader.get(2), 2u32);
}
}
#[test]
fn test_intfastfield_large() {
let mut buffer: Vec<u8> = Vec::new();
{
let mut int_fast_field_writer = IntFastFieldWriter::new();
int_fast_field_writer.add(4u32);
int_fast_field_writer.add(14_082_001u32);
int_fast_field_writer.add(3_052u32);
int_fast_field_writer.close(&mut buffer).unwrap();
assert_eq!(buffer.len(), 21 as usize);
}
{
let source = ReadOnlySource::Anonymous(buffer);
let fast_field_reader = IntFastFieldReader::open(&source).unwrap();
assert_eq!(fast_field_reader.get(0), 4u32);
assert_eq!(fast_field_reader.get(1), 14_082_001u32);
assert_eq!(fast_field_reader.get(2), 3_052u32);
}
}
fn generate_permutation() -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, 4];
let mut rng = XorShiftRng::from_seed(*seed);
let mut permutation: Vec<u32> = (0u32..1_000_000u32).collect();
rng.shuffle(&mut permutation);
permutation
}
#[test]
fn test_intfastfield_permutation() {
let mut buffer: Vec<u8> = Vec::new();
let permutation = generate_permutation();
{
let mut int_fast_field_writer = IntFastFieldWriter::new();
for x in permutation.iter() {
int_fast_field_writer.add(*x);
}
int_fast_field_writer.close(&mut buffer).unwrap();
}
let source = ReadOnlySource::Anonymous(buffer);
let int_fast_field_reader = IntFastFieldReader::open(&source).unwrap();
let n = test::black_box(100);
let mut a = 0u32;
for _ in 0..n {
assert_eq!(int_fast_field_reader.get(a as u32), permutation[a as usize]);
a = int_fast_field_reader.get(a as u32);
}
}
#[bench]
fn bench_intfastfield_veclookup(b: &mut Bencher) {
let permutation = generate_permutation();
b.iter(|| {
let n = test::black_box(100);
let mut a = 0u32;
for _ in 0..n {
a = permutation[a as usize];
}
});
}
#[bench]
fn bench_intfastfield_fflookup(b: &mut Bencher) {
let mut buffer: Vec<u8> = Vec::new();
{
let permutation = generate_permutation();
let mut int_fast_field_writer = IntFastFieldWriter::new();
for x in permutation.iter() {
int_fast_field_writer.add(*x);
}
int_fast_field_writer.close(&mut buffer).unwrap();
}
let source = ReadOnlySource::Anonymous(buffer);
let int_fast_field_reader = IntFastFieldReader::open(&source).unwrap();
b.iter(|| {
let n = test::black_box(100);
let mut a = 0u32;
for _ in 0..n {
a = int_fast_field_reader.get(a as u32);
}
});
}
}