FastField with different types

This commit is contained in:
Paul Masurel
2017-05-01 17:14:51 +08:00
parent afdfb1a69b
commit 26cfe2909f
11 changed files with 197 additions and 77 deletions

View File

@@ -75,6 +75,7 @@ pub mod tests {
use core::SegmentReader;
use SegmentLocalId;
use fastfield::U64FastFieldReader;
use fastfield::FastFieldReader;
use schema::Field;
/// Stores all of the doc ids.

View File

@@ -18,7 +18,7 @@ use std::fmt;
use schema::Field;
use postings::SegmentPostingsOption;
use postings::SegmentPostings;
use fastfield::{U64FastFieldsReader, U64FastFieldReader};
use fastfield::{FastFieldsReader, FastFieldReader, U64FastFieldReader};
use schema::Schema;
use schema::FieldType;
use postings::FreqHandler;
@@ -42,8 +42,8 @@ pub struct SegmentReader {
term_infos: Arc<FstMap<TermInfo>>,
postings_data: ReadOnlySource,
store_reader: StoreReader,
fast_fields_reader: Arc<U64FastFieldsReader>,
fieldnorms_reader: Arc<U64FastFieldsReader>,
fast_fields_reader: Arc<FastFieldsReader>,
fieldnorms_reader: Arc<FastFieldsReader>,
delete_bitset: DeleteBitSet,
positions_data: ReadOnlySource,
schema: Schema,
@@ -74,41 +74,48 @@ impl SegmentReader {
}
/// Accessor to a segment's fast field reader given a field.
pub fn get_fast_field_reader(&self, field: Field) -> Option<U64FastFieldReader> {
/// Returns the u64 fast value reader if the field
/// is a u64 field indexed as "fast".
///
/// Return None if the field is not a u64 field
/// indexed with the fast option.
///
/// # Panics
/// May panic if the index is corrupted.
///
/// Returns the u64 fast value reader if the field
/// is a u64 field indexed as "fast".
///
/// Return None if the field is not a u64 field
/// indexed with the fast option.
///
/// # Panics
/// May panic if the index is corrupted.
/// TODO return Err
pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(&self, field: Field) -> Option<TFastFieldReader> {
let field_entry = self.schema.get_field_entry(field);
match field_entry.field_type() {
&FieldType::Str(_) => {
warn!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name());
None
},
&FieldType::U64(ref integer_options) => {
if integer_options.is_fast() {
self.fast_fields_reader.get_field(field)
}
else {
warn!("Field <{}> is not defined as a fast field.", field_entry.name());
None
}
},
&FieldType::I64(ref integer_options) => {
panic!("not implemented");
// if integer_options.is_fast() {
// self.fast_fields_reader.get_field(field)
// }
// else {
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
// None
// }
},
if !TFastFieldReader::is_enabled(field_entry.field_type()) {
None
}
else {
self.fast_fields_reader.open_reader(field)
}
// match field_entry.field_type() {
// &FieldType::Str(_) => {
// warn!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name());
// None
// },
// &FieldType::U64(ref integer_options) => {
// if integer_options.is_fast() {
// self.fast_fields_reader.open_reader(field)
// }
// else {
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
// None
// }
// },
// &FieldType::I64(ref integer_options) => {
// if integer_options.is_fast() {
// self.fast_fields_reader.open_reader(field)
// }
// else {
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
// None
// }
// },
// }
}
/// Accessor to the segment's `Field norms`'s reader.
@@ -119,7 +126,7 @@ impl SegmentReader {
/// They are simply stored as a fast field, serialized in
/// the `.fieldnorm` file of the segment.
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
self.fieldnorms_reader.get_field(field)
self.fieldnorms_reader.open_reader(field)
}
/// Returns the number of documents containing the term.
@@ -144,10 +151,10 @@ impl SegmentReader {
let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));
let fast_field_data = try!(segment.open_read(SegmentComponent::FASTFIELDS));
let fast_fields_reader = try!(U64FastFieldsReader::open(fast_field_data));
let fast_fields_reader = try!(FastFieldsReader::open(fast_field_data));
let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS));
let fieldnorms_reader = try!(U64FastFieldsReader::open(fieldnorms_data));
let fieldnorms_reader = try!(FastFieldsReader::open(fieldnorms_data));
let positions_data = segment
.open_read(SegmentComponent::POSITIONS)

View File

@@ -16,7 +16,8 @@ mod serializer;
pub mod delete;
pub use self::writer::{U64FastFieldsWriter, U64FastFieldWriter};
pub use self::reader::{U64FastFieldsReader, U64FastFieldReader};
pub use self::reader::{FastFieldsReader, U64FastFieldReader};
pub use self::reader::FastFieldReader;
pub use self::serializer::FastFieldSerializer;
#[cfg(test)]
@@ -30,6 +31,7 @@ mod tests {
use schema::FAST;
use test::Bencher;
use test;
use fastfield::FastFieldReader;
use rand::Rng;
use rand::SeedableRng;
use rand::XorShiftRng;
@@ -78,8 +80,8 @@ mod tests {
assert_eq!(source.len(), 31 as usize);
}
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 13u64);
assert_eq!(fast_field_reader.get(1), 14u64);
assert_eq!(fast_field_reader.get(2), 2u64);
@@ -111,8 +113,8 @@ mod tests {
assert_eq!(source.len(), 56 as usize);
}
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 4u64);
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
assert_eq!(fast_field_reader.get(2), 3_052u64);
@@ -146,8 +148,8 @@ mod tests {
assert_eq!(source.len(), 29 as usize);
}
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get(doc), 100_000u64);
}
@@ -177,8 +179,8 @@ mod tests {
assert_eq!(source.len(), 80037 as usize);
}
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 0u64);
for doc in 1..10_001 {
assert_eq!(fast_field_reader.get(doc), 5_000_000_000_000_000_000u64 + doc as u64 - 1u64);
@@ -212,8 +214,8 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
let mut a = 0u64;
for _ in 0..n {
println!("i {}=> {} {}", a, fast_field_reader.get(a as u32), permutation[a as usize]);
@@ -266,8 +268,8 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(7000u32);
let mut a = 0u64;
@@ -296,8 +298,8 @@ mod tests {
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let fast_field_readers = FastFieldsReader::open(source).unwrap();
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u32;

View File

@@ -11,7 +11,7 @@ use fastfield::FastFieldSerializer;
use fastfield::U64FastFieldsWriter;
use common::bitpacker::compute_num_bits;
use common::bitpacker::BitUnpacker;
use schema::FieldType;
lazy_static! {
static ref U64_FAST_FIELD_EMPTY: ReadOnlySource = {
@@ -21,8 +21,14 @@ lazy_static! {
}
pub trait FastFieldReader<T> {
fn get(&self, doc: DocId) -> T;
pub trait FastFieldReader: Sized {
type ValueType;
fn get(&self, doc: DocId) -> Self::ValueType;
fn open(source: ReadOnlySource) -> Self;
fn is_enabled(field_type: &FieldType) -> bool;
}
pub struct U64FastFieldReader {
@@ -45,12 +51,34 @@ impl U64FastFieldReader {
pub fn max_val(&self,) -> u64 {
self.max_val
}
}
impl FastFieldReader for U64FastFieldReader {
type ValueType = u64;
fn get(&self, doc: DocId) -> u64 {
self.min_val + self.bit_unpacker.get(doc as usize)
}
fn is_enabled(field_type: &FieldType) -> bool {
match field_type {
&FieldType::U64(ref integer_options) => {
if integer_options.is_fast() {
true
}
else {
false
}
},
_ => false,
}
}
/// Opens a new fast field reader given a read only source.
///
/// # Panics
/// Panics if the data is corrupted.
pub fn open(data: ReadOnlySource) -> U64FastFieldReader {
fn open(data: ReadOnlySource) -> U64FastFieldReader {
let min_val: u64;
let max_val: u64;
let bit_unpacker: BitUnpacker;
@@ -72,9 +100,6 @@ impl U64FastFieldReader {
}
}
pub fn get(&self, doc: DocId) -> u64 {
self.min_val + self.bit_unpacker.get(doc as usize)
}
}
@@ -97,18 +122,98 @@ impl From<Vec<u64>> for U64FastFieldReader {
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
fast_field_readers.get_field(field).unwrap()
let fast_field_readers = FastFieldsReader::open(source).unwrap();
fast_field_readers.open_reader(field).unwrap()
}
}
pub struct U64FastFieldsReader {
pub struct I64FastFieldReader {
_data: ReadOnlySource,
bit_unpacker: BitUnpacker,
min_val: i64,
max_val: i64,
}
impl I64FastFieldReader {
pub fn empty() -> I64FastFieldReader {
// TODO implement
panic!("");
// I64FastFieldReader::open(I64_FAST_FIELD_EMPTY.clone())
}
pub fn min_val(&self,) -> i64 {
self.min_val
}
pub fn max_val(&self,) -> i64 {
self.max_val
}
}
impl FastFieldReader for I64FastFieldReader {
type ValueType = i64;
fn get(&self, doc: DocId) -> i64 {
self.min_val + (self.bit_unpacker.get(doc as usize) as i64)
}
/// Opens a new fast field reader given a read only source.
///
/// # Panics
/// Panics if the data is corrupted.
fn open(data: ReadOnlySource) -> I64FastFieldReader {
let min_val: i64;
let max_val: i64;
let bit_unpacker: BitUnpacker;
{
let mut cursor: &[u8] = data.as_slice();
min_val = i64::deserialize(&mut cursor).expect("Failed to read the min_val of fast field.");
let amplitude = u64::deserialize(&mut cursor).expect("Failed to read the amplitude of fast field.");
max_val = min_val + (amplitude as i64);
let num_bits = compute_num_bits(amplitude);
bit_unpacker = BitUnpacker::new(cursor, num_bits as usize)
}
I64FastFieldReader {
_data: data,
bit_unpacker: bit_unpacker,
min_val: min_val,
max_val: max_val,
}
}
fn is_enabled(field_type: &FieldType) -> bool {
match field_type {
&FieldType::I64(ref integer_options) => {
if integer_options.is_fast() {
true
}
else {
false
}
},
_ => false,
}
}
}
pub struct FastFieldsReader {
source: ReadOnlySource,
field_offsets: HashMap<Field, (u32, u32)>,
}
impl U64FastFieldsReader {
pub fn open(source: ReadOnlySource) -> io::Result<U64FastFieldsReader> {
impl FastFieldsReader {
pub fn open(source: ReadOnlySource) -> io::Result<FastFieldsReader> {
let header_offset;
let field_offsets: Vec<(Field, u32)>;
{
@@ -132,12 +237,12 @@ impl U64FastFieldsReader {
let (field, start_offset) = *field_start_offsets;
field_offsets_map.insert(field, (start_offset, *stop_offset));
}
Ok(U64FastFieldsReader {
Ok(FastFieldsReader {
field_offsets: field_offsets_map,
source: source,
})
}
/// Returns the u64 fast value reader if the field
/// is a u64 field indexed as "fast".
///
@@ -146,12 +251,12 @@ impl U64FastFieldsReader {
///
/// # Panics
/// May panic if the index is corrupted.
pub fn get_field(&self, field: Field) -> Option<U64FastFieldReader> {
pub fn open_reader<FFReader: FastFieldReader>(&self, field: Field) -> Option<FFReader> {
self.field_offsets
.get(&field)
.map(|&(start, stop)| {
let field_source = self.source.slice(start as usize, stop as usize);
U64FastFieldReader::open(field_source)
FFReader::open(field_source)
})
}
}

View File

@@ -14,6 +14,7 @@ use core::TermIterator;
use fastfield::delete::DeleteBitSet;
use schema::{Schema, Field};
use fastfield::FastFieldSerializer;
use fastfield::FastFieldReader;
use store::StoreWriter;
use std::cmp::{min, max};
use common::allocate_vec;

View File

@@ -7,8 +7,10 @@
#![feature(conservative_impl_trait)]
#![feature(integer_atomics)]
#![cfg_attr(test, feature(rand))]
#![cfg_attr(test, feature(test))]
#![cfg_attr(test, feature(step_by))]
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
#![warn(missing_docs)]
@@ -200,6 +202,7 @@ mod tests {
use schema::*;
use DocSet;
use IndexWriter;
use fastfield::FastFieldReader;
use Postings;
#[test]

View File

@@ -43,6 +43,7 @@ mod tests {
use core::Index;
use std::iter;
use datastruct::stacker::Heap;
use fastfield::FastFieldReader;
use query::TermQuery;
use schema::Field;
use test::Bencher;

View File

@@ -19,6 +19,7 @@ mod tests {
use Index;
use schema::*;
use postings::SegmentPostingsOption;
use fastfield::FastFieldReader;
fn abs_diff(left: f32, right: f32) -> f32 {
(right - left).abs()

View File

@@ -4,6 +4,7 @@ use fastfield::U64FastFieldReader;
use postings::DocSet;
use query::Scorer;
use postings::Postings;
use fastfield::FastFieldReader;
pub struct TermScorer<TPostings> where TPostings: Postings {
pub idf: Score,

View File

@@ -59,7 +59,7 @@ impl FieldEntry {
}
}
// Returns true iff the field is a u64 fast field
/// Returns true iff the field is a u64 fast field
pub fn is_u64_fast(&self,) -> bool {
match self.field_type {
FieldType::U64(ref options) => options.is_fast(),

View File

@@ -36,15 +36,13 @@ impl FieldValue {
impl BinarySerializable for FieldValue {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut written_size = 0;
written_size += try!(self.field.serialize(writer));
written_size += try!(self.value.serialize(writer));
Ok(written_size)
Ok(self.field.serialize(writer)? +
self.value.serialize(writer)?)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let field = try!(Field::deserialize(reader));
let value = try!(Value::deserialize(reader));
let field = Field::deserialize(reader)?;
let value = Value::deserialize(reader)?;
Ok(FieldValue::new(field, value))
}
}