mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
FastField with different types
This commit is contained in:
@@ -75,6 +75,7 @@ pub mod tests {
|
||||
use core::SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Field;
|
||||
|
||||
/// Stores all of the doc ids.
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::fmt;
|
||||
use schema::Field;
|
||||
use postings::SegmentPostingsOption;
|
||||
use postings::SegmentPostings;
|
||||
use fastfield::{U64FastFieldsReader, U64FastFieldReader};
|
||||
use fastfield::{FastFieldsReader, FastFieldReader, U64FastFieldReader};
|
||||
use schema::Schema;
|
||||
use schema::FieldType;
|
||||
use postings::FreqHandler;
|
||||
@@ -42,8 +42,8 @@ pub struct SegmentReader {
|
||||
term_infos: Arc<FstMap<TermInfo>>,
|
||||
postings_data: ReadOnlySource,
|
||||
store_reader: StoreReader,
|
||||
fast_fields_reader: Arc<U64FastFieldsReader>,
|
||||
fieldnorms_reader: Arc<U64FastFieldsReader>,
|
||||
fast_fields_reader: Arc<FastFieldsReader>,
|
||||
fieldnorms_reader: Arc<FastFieldsReader>,
|
||||
delete_bitset: DeleteBitSet,
|
||||
positions_data: ReadOnlySource,
|
||||
schema: Schema,
|
||||
@@ -74,41 +74,48 @@ impl SegmentReader {
|
||||
}
|
||||
|
||||
/// Accessor to a segment's fast field reader given a field.
|
||||
pub fn get_fast_field_reader(&self, field: Field) -> Option<U64FastFieldReader> {
|
||||
/// Returns the u64 fast value reader if the field
|
||||
/// is a u64 field indexed as "fast".
|
||||
///
|
||||
/// Return None if the field is not a u64 field
|
||||
/// indexed with the fast option.
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the index is corrupted.
|
||||
///
|
||||
/// Returns the u64 fast value reader if the field
|
||||
/// is a u64 field indexed as "fast".
|
||||
///
|
||||
/// Return None if the field is not a u64 field
|
||||
/// indexed with the fast option.
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the index is corrupted.
|
||||
/// TODO return Err
|
||||
pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(&self, field: Field) -> Option<TFastFieldReader> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
match field_entry.field_type() {
|
||||
&FieldType::Str(_) => {
|
||||
warn!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name());
|
||||
None
|
||||
},
|
||||
&FieldType::U64(ref integer_options) => {
|
||||
if integer_options.is_fast() {
|
||||
self.fast_fields_reader.get_field(field)
|
||||
}
|
||||
else {
|
||||
warn!("Field <{}> is not defined as a fast field.", field_entry.name());
|
||||
None
|
||||
}
|
||||
},
|
||||
&FieldType::I64(ref integer_options) => {
|
||||
panic!("not implemented");
|
||||
// if integer_options.is_fast() {
|
||||
// self.fast_fields_reader.get_field(field)
|
||||
// }
|
||||
// else {
|
||||
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
|
||||
// None
|
||||
// }
|
||||
},
|
||||
if !TFastFieldReader::is_enabled(field_entry.field_type()) {
|
||||
None
|
||||
}
|
||||
else {
|
||||
self.fast_fields_reader.open_reader(field)
|
||||
}
|
||||
// match field_entry.field_type() {
|
||||
// &FieldType::Str(_) => {
|
||||
// warn!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name());
|
||||
// None
|
||||
// },
|
||||
// &FieldType::U64(ref integer_options) => {
|
||||
// if integer_options.is_fast() {
|
||||
// self.fast_fields_reader.open_reader(field)
|
||||
// }
|
||||
// else {
|
||||
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
|
||||
// None
|
||||
// }
|
||||
// },
|
||||
// &FieldType::I64(ref integer_options) => {
|
||||
// if integer_options.is_fast() {
|
||||
// self.fast_fields_reader.open_reader(field)
|
||||
// }
|
||||
// else {
|
||||
// warn!("Field <{}> is not defined as a fast field.", field_entry.name());
|
||||
// None
|
||||
// }
|
||||
// },
|
||||
// }
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `Field norms`'s reader.
|
||||
@@ -119,7 +126,7 @@ impl SegmentReader {
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
|
||||
self.fieldnorms_reader.get_field(field)
|
||||
self.fieldnorms_reader.open_reader(field)
|
||||
}
|
||||
|
||||
/// Returns the number of documents containing the term.
|
||||
@@ -144,10 +151,10 @@ impl SegmentReader {
|
||||
let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));
|
||||
|
||||
let fast_field_data = try!(segment.open_read(SegmentComponent::FASTFIELDS));
|
||||
let fast_fields_reader = try!(U64FastFieldsReader::open(fast_field_data));
|
||||
let fast_fields_reader = try!(FastFieldsReader::open(fast_field_data));
|
||||
|
||||
let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS));
|
||||
let fieldnorms_reader = try!(U64FastFieldsReader::open(fieldnorms_data));
|
||||
let fieldnorms_reader = try!(FastFieldsReader::open(fieldnorms_data));
|
||||
|
||||
let positions_data = segment
|
||||
.open_read(SegmentComponent::POSITIONS)
|
||||
|
||||
@@ -16,7 +16,8 @@ mod serializer;
|
||||
pub mod delete;
|
||||
|
||||
pub use self::writer::{U64FastFieldsWriter, U64FastFieldWriter};
|
||||
pub use self::reader::{U64FastFieldsReader, U64FastFieldReader};
|
||||
pub use self::reader::{FastFieldsReader, U64FastFieldReader};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -30,6 +31,7 @@ mod tests {
|
||||
use schema::FAST;
|
||||
use test::Bencher;
|
||||
use test;
|
||||
use fastfield::FastFieldReader;
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
use rand::XorShiftRng;
|
||||
@@ -78,8 +80,8 @@ mod tests {
|
||||
assert_eq!(source.len(), 31 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 13u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14u64);
|
||||
assert_eq!(fast_field_reader.get(2), 2u64);
|
||||
@@ -111,8 +113,8 @@ mod tests {
|
||||
assert_eq!(source.len(), 56 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
|
||||
assert_eq!(fast_field_reader.get(2), 3_052u64);
|
||||
@@ -146,8 +148,8 @@ mod tests {
|
||||
assert_eq!(source.len(), 29 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
for doc in 0..10_000 {
|
||||
assert_eq!(fast_field_reader.get(doc), 100_000u64);
|
||||
}
|
||||
@@ -177,8 +179,8 @@ mod tests {
|
||||
assert_eq!(source.len(), 80037 as usize);
|
||||
}
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 0u64);
|
||||
for doc in 1..10_001 {
|
||||
assert_eq!(fast_field_reader.get(doc), 5_000_000_000_000_000_000u64 + doc as u64 - 1u64);
|
||||
@@ -212,8 +214,8 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
let mut a = 0u64;
|
||||
for _ in 0..n {
|
||||
println!("i {}=> {} {}", a, fast_field_reader.get(a as u32), permutation[a as usize]);
|
||||
@@ -266,8 +268,8 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
let mut a = 0u64;
|
||||
@@ -296,8 +298,8 @@ mod tests {
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000u32);
|
||||
let mut a = 0u32;
|
||||
|
||||
@@ -11,7 +11,7 @@ use fastfield::FastFieldSerializer;
|
||||
use fastfield::U64FastFieldsWriter;
|
||||
use common::bitpacker::compute_num_bits;
|
||||
use common::bitpacker::BitUnpacker;
|
||||
|
||||
use schema::FieldType;
|
||||
|
||||
lazy_static! {
|
||||
static ref U64_FAST_FIELD_EMPTY: ReadOnlySource = {
|
||||
@@ -21,8 +21,14 @@ lazy_static! {
|
||||
}
|
||||
|
||||
|
||||
pub trait FastFieldReader<T> {
|
||||
fn get(&self, doc: DocId) -> T;
|
||||
pub trait FastFieldReader: Sized {
|
||||
type ValueType;
|
||||
|
||||
fn get(&self, doc: DocId) -> Self::ValueType;
|
||||
|
||||
fn open(source: ReadOnlySource) -> Self;
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool;
|
||||
}
|
||||
|
||||
pub struct U64FastFieldReader {
|
||||
@@ -45,12 +51,34 @@ impl U64FastFieldReader {
|
||||
pub fn max_val(&self,) -> u64 {
|
||||
self.max_val
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for U64FastFieldReader {
|
||||
type ValueType = u64;
|
||||
|
||||
fn get(&self, doc: DocId) -> u64 {
|
||||
self.min_val + self.bit_unpacker.get(doc as usize)
|
||||
}
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match field_type {
|
||||
&FieldType::U64(ref integer_options) => {
|
||||
if integer_options.is_fast() {
|
||||
true
|
||||
}
|
||||
else {
|
||||
false
|
||||
}
|
||||
},
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
pub fn open(data: ReadOnlySource) -> U64FastFieldReader {
|
||||
fn open(data: ReadOnlySource) -> U64FastFieldReader {
|
||||
let min_val: u64;
|
||||
let max_val: u64;
|
||||
let bit_unpacker: BitUnpacker;
|
||||
@@ -72,9 +100,6 @@ impl U64FastFieldReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, doc: DocId) -> u64 {
|
||||
self.min_val + self.bit_unpacker.get(doc as usize)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -97,18 +122,98 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
let fast_field_readers = U64FastFieldsReader::open(source).unwrap();
|
||||
fast_field_readers.get_field(field).unwrap()
|
||||
let fast_field_readers = FastFieldsReader::open(source).unwrap();
|
||||
fast_field_readers.open_reader(field).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct U64FastFieldsReader {
|
||||
|
||||
|
||||
pub struct I64FastFieldReader {
|
||||
_data: ReadOnlySource,
|
||||
bit_unpacker: BitUnpacker,
|
||||
min_val: i64,
|
||||
max_val: i64,
|
||||
}
|
||||
|
||||
impl I64FastFieldReader {
|
||||
|
||||
pub fn empty() -> I64FastFieldReader {
|
||||
// TODO implement
|
||||
panic!("");
|
||||
// I64FastFieldReader::open(I64_FAST_FIELD_EMPTY.clone())
|
||||
}
|
||||
|
||||
pub fn min_val(&self,) -> i64 {
|
||||
self.min_val
|
||||
}
|
||||
|
||||
pub fn max_val(&self,) -> i64 {
|
||||
self.max_val
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for I64FastFieldReader {
|
||||
type ValueType = i64;
|
||||
|
||||
fn get(&self, doc: DocId) -> i64 {
|
||||
self.min_val + (self.bit_unpacker.get(doc as usize) as i64)
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
fn open(data: ReadOnlySource) -> I64FastFieldReader {
|
||||
let min_val: i64;
|
||||
let max_val: i64;
|
||||
let bit_unpacker: BitUnpacker;
|
||||
|
||||
{
|
||||
let mut cursor: &[u8] = data.as_slice();
|
||||
min_val = i64::deserialize(&mut cursor).expect("Failed to read the min_val of fast field.");
|
||||
let amplitude = u64::deserialize(&mut cursor).expect("Failed to read the amplitude of fast field.");
|
||||
max_val = min_val + (amplitude as i64);
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
bit_unpacker = BitUnpacker::new(cursor, num_bits as usize)
|
||||
}
|
||||
|
||||
I64FastFieldReader {
|
||||
_data: data,
|
||||
bit_unpacker: bit_unpacker,
|
||||
min_val: min_val,
|
||||
max_val: max_val,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match field_type {
|
||||
&FieldType::I64(ref integer_options) => {
|
||||
if integer_options.is_fast() {
|
||||
true
|
||||
}
|
||||
else {
|
||||
false
|
||||
}
|
||||
},
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
pub struct FastFieldsReader {
|
||||
source: ReadOnlySource,
|
||||
field_offsets: HashMap<Field, (u32, u32)>,
|
||||
}
|
||||
|
||||
impl U64FastFieldsReader {
|
||||
pub fn open(source: ReadOnlySource) -> io::Result<U64FastFieldsReader> {
|
||||
impl FastFieldsReader {
|
||||
|
||||
pub fn open(source: ReadOnlySource) -> io::Result<FastFieldsReader> {
|
||||
let header_offset;
|
||||
let field_offsets: Vec<(Field, u32)>;
|
||||
{
|
||||
@@ -132,12 +237,12 @@ impl U64FastFieldsReader {
|
||||
let (field, start_offset) = *field_start_offsets;
|
||||
field_offsets_map.insert(field, (start_offset, *stop_offset));
|
||||
}
|
||||
Ok(U64FastFieldsReader {
|
||||
Ok(FastFieldsReader {
|
||||
field_offsets: field_offsets_map,
|
||||
source: source,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Returns the u64 fast value reader if the field
|
||||
/// is a u64 field indexed as "fast".
|
||||
///
|
||||
@@ -146,12 +251,12 @@ impl U64FastFieldsReader {
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the index is corrupted.
|
||||
pub fn get_field(&self, field: Field) -> Option<U64FastFieldReader> {
|
||||
pub fn open_reader<FFReader: FastFieldReader>(&self, field: Field) -> Option<FFReader> {
|
||||
self.field_offsets
|
||||
.get(&field)
|
||||
.map(|&(start, stop)| {
|
||||
let field_source = self.source.slice(start as usize, stop as usize);
|
||||
U64FastFieldReader::open(field_source)
|
||||
FFReader::open(field_source)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ use core::TermIterator;
|
||||
use fastfield::delete::DeleteBitSet;
|
||||
use schema::{Schema, Field};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use fastfield::FastFieldReader;
|
||||
use store::StoreWriter;
|
||||
use std::cmp::{min, max};
|
||||
use common::allocate_vec;
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
#![feature(conservative_impl_trait)]
|
||||
#![feature(integer_atomics)]
|
||||
|
||||
#![cfg_attr(test, feature(rand))]
|
||||
#![cfg_attr(test, feature(test))]
|
||||
#![cfg_attr(test, feature(step_by))]
|
||||
|
||||
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
|
||||
|
||||
#![warn(missing_docs)]
|
||||
@@ -200,6 +202,7 @@ mod tests {
|
||||
use schema::*;
|
||||
use DocSet;
|
||||
use IndexWriter;
|
||||
use fastfield::FastFieldReader;
|
||||
use Postings;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -43,6 +43,7 @@ mod tests {
|
||||
use core::Index;
|
||||
use std::iter;
|
||||
use datastruct::stacker::Heap;
|
||||
use fastfield::FastFieldReader;
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use test::Bencher;
|
||||
|
||||
@@ -19,6 +19,7 @@ mod tests {
|
||||
use Index;
|
||||
use schema::*;
|
||||
use postings::SegmentPostingsOption;
|
||||
use fastfield::FastFieldReader;
|
||||
|
||||
fn abs_diff(left: f32, right: f32) -> f32 {
|
||||
(right - left).abs()
|
||||
|
||||
@@ -4,6 +4,7 @@ use fastfield::U64FastFieldReader;
|
||||
use postings::DocSet;
|
||||
use query::Scorer;
|
||||
use postings::Postings;
|
||||
use fastfield::FastFieldReader;
|
||||
|
||||
pub struct TermScorer<TPostings> where TPostings: Postings {
|
||||
pub idf: Score,
|
||||
|
||||
@@ -59,7 +59,7 @@ impl FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true iff the field is a u64 fast field
|
||||
/// Returns true iff the field is a u64 fast field
|
||||
pub fn is_u64_fast(&self,) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U64(ref options) => options.is_fast(),
|
||||
|
||||
@@ -36,15 +36,13 @@ impl FieldValue {
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
written_size += try!(self.field.serialize(writer));
|
||||
written_size += try!(self.value.serialize(writer));
|
||||
Ok(written_size)
|
||||
Ok(self.field.serialize(writer)? +
|
||||
self.value.serialize(writer)?)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
let field = try!(Field::deserialize(reader));
|
||||
let value = try!(Value::deserialize(reader));
|
||||
let field = Field::deserialize(reader)?;
|
||||
let value = Value::deserialize(reader)?;
|
||||
Ok(FieldValue::new(field, value))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user