mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
Refactoring of fastfields
This commit is contained in:
@@ -94,7 +94,6 @@ pub mod tests {
|
||||
use Score;
|
||||
use core::SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Field;
|
||||
|
||||
@@ -148,7 +147,7 @@ pub mod tests {
|
||||
pub struct FastFieldTestCollector {
|
||||
vals: Vec<u64>,
|
||||
field: Field,
|
||||
ff_reader: Option<U64FastFieldReader>,
|
||||
ff_reader: Option<FastFieldReader<u64>>,
|
||||
}
|
||||
|
||||
impl FastFieldTestCollector {
|
||||
|
||||
@@ -89,7 +89,7 @@ where
|
||||
|
||||
pub fn get(&self, idx: usize) -> u64 {
|
||||
if self.num_bits == 0 {
|
||||
return 0;
|
||||
return 0u64;
|
||||
}
|
||||
let data: &[u8] = &*self.data;
|
||||
let num_bits = self.num_bits;
|
||||
@@ -107,7 +107,7 @@ where
|
||||
);
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
(val_shifted & mask)
|
||||
val_shifted & mask
|
||||
} else {
|
||||
let val_unshifted_unmasked: u64 = if addr + 8 <= data.len() {
|
||||
unsafe { *(data[addr..].as_ptr() as *const u64) }
|
||||
@@ -119,14 +119,18 @@ where
|
||||
unsafe { *(buffer[..].as_ptr() as *const u64) }
|
||||
};
|
||||
let val_shifted = val_unshifted_unmasked >> (bit_shift as u64);
|
||||
(val_shifted & mask)
|
||||
val_shifted & mask
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads a range of values from the fast field.
|
||||
///
|
||||
/// The range of values read is from
|
||||
/// `[start..start + output.len()[`
|
||||
pub fn get_range(&self, start: u32, output: &mut [u64]) {
|
||||
if self.num_bits == 0 {
|
||||
for val in output.iter_mut() {
|
||||
*val = 0;
|
||||
*val = 0u64;
|
||||
}
|
||||
} else {
|
||||
let data: &[u8] = &*self.data;
|
||||
|
||||
@@ -21,10 +21,11 @@ use schema::FieldType;
|
||||
use error::ErrorKind;
|
||||
use termdict::TermDictionaryImpl;
|
||||
use fastfield::FacetReader;
|
||||
use fastfield::{FastFieldReader, U64FastFieldReader};
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Schema;
|
||||
use termdict::TermDictionary;
|
||||
use fastfield::MultiValueIntFastFieldReader;
|
||||
use fastfield::{FastValue, MultiValueIntFastFieldReader};
|
||||
use schema::Cardinality;
|
||||
|
||||
/// Entry point to access all of the datastructures of the `Segment`
|
||||
///
|
||||
@@ -91,18 +92,37 @@ impl SegmentReader {
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the index is corrupted.
|
||||
pub fn fast_field_reader<TFastFieldReader: FastFieldReader>(
|
||||
pub fn fast_field_reader<Item: FastValue>(
|
||||
&self,
|
||||
field: Field,
|
||||
) -> fastfield::Result<TFastFieldReader> {
|
||||
) -> fastfield::Result<FastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if !TFastFieldReader::is_enabled(field_entry.field_type()) {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
} else {
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue) {
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(TFastFieldReader::open)
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
|
||||
/// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
|
||||
/// May panick if the field is not a multivalued fastfield of the type `Item`.
|
||||
pub fn multi_fast_field_reader<Item: FastValue>(&self, field: Field) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues) {
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,25 +158,10 @@ impl SegmentReader {
|
||||
///
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<FastFieldReader<u64>> {
|
||||
self.fieldnorms_composite
|
||||
.open_read(field)
|
||||
.map(U64FastFieldReader::open)
|
||||
}
|
||||
|
||||
/// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
|
||||
/// <TFastFieldReader: FastFieldReader>
|
||||
pub fn multi_fast_field_reader<Item>(&self, field: Field) -> Result<MultiValueIntFastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(U64FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(U64FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
.map(FastFieldReader::open)
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `StoreReader`.
|
||||
|
||||
@@ -23,6 +23,19 @@ values stored.
|
||||
Read access performance is comparable to that of an array lookup.
|
||||
*/
|
||||
|
||||
use common;
|
||||
use schema::Cardinality;
|
||||
use schema::FieldType;
|
||||
use schema::Value;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::MultiValueIntFastFieldReader;
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
|
||||
mod reader;
|
||||
mod writer;
|
||||
mod serializer;
|
||||
@@ -31,18 +44,76 @@ mod delete;
|
||||
mod facet_reader;
|
||||
mod multivalued;
|
||||
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
pub use self::reader::{I64FastFieldReader, U64FastFieldReader};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::MultiValueIntFastFieldReader;
|
||||
/// Trait for types that are allowed for fast fields: (u64 or i64).
|
||||
pub trait FastValue: Default + Clone + Copy {
|
||||
/// Converts a value from u64
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
fn from_u64(val: u64) -> Self;
|
||||
|
||||
use common;
|
||||
use schema::Value;
|
||||
/// Converts a value to u64.
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
fn to_u64(&self) -> u64;
|
||||
|
||||
/// Returns the fast field cardinality that can be extracted from the given
|
||||
/// `FieldType`.
|
||||
///
|
||||
/// If the type is not a fast field, `None` is returned.
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality>;
|
||||
|
||||
/// Cast value to `u64`.
|
||||
/// The value is just reinterpreted in memory.
|
||||
fn as_u64(&self) -> u64;
|
||||
}
|
||||
|
||||
|
||||
impl FastValue for u64 {
|
||||
fn from_u64(val: u64) -> Self {
|
||||
val
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
*self
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
*self
|
||||
}
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::U64(ref integer_options) =>
|
||||
integer_options.get_fastfield_cardinality(),
|
||||
FieldType::HierarchicalFacet =>
|
||||
Some(Cardinality::MultiValues),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FastValue for i64 {
|
||||
fn from_u64(val: u64) -> Self {
|
||||
common::u64_to_i64(val)
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
common::i64_to_u64(*self)
|
||||
}
|
||||
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::I64(ref integer_options) =>
|
||||
integer_options.get_fastfield_cardinality(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
*self as u64
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_u64(value: &Value) -> u64 {
|
||||
match *value {
|
||||
@@ -55,21 +126,22 @@ fn value_to_u64(value: &Value) -> u64 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use schema::Field;
|
||||
use std::path::Path;
|
||||
|
||||
use common::CompositeFile;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use schema::Document;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use schema::FAST;
|
||||
use std::collections::HashMap;
|
||||
use test::Bencher;
|
||||
use test;
|
||||
use fastfield::FastFieldReader;
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
use common::CompositeFile;
|
||||
use rand::XorShiftRng;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use schema::Document;
|
||||
use schema::FAST;
|
||||
use schema::Field;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use super::*;
|
||||
use test;
|
||||
use test::Bencher;
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA: Schema = {
|
||||
@@ -84,7 +156,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield() {
|
||||
let test_fastfield = U64FastFieldReader::from(vec![100, 200, 300]);
|
||||
let test_fastfield = FastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||
assert_eq!(test_fastfield.get(0), 100);
|
||||
assert_eq!(test_fastfield.get(1), 200);
|
||||
assert_eq!(test_fastfield.get(2), 300);
|
||||
@@ -113,7 +185,7 @@ mod tests {
|
||||
{
|
||||
let composite_file = CompositeFile::open(&source).unwrap();
|
||||
let field_source = composite_file.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = U64FastFieldReader::open(field_source);
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(field_source);
|
||||
assert_eq!(fast_field_reader.get(0), 13u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14u64);
|
||||
assert_eq!(fast_field_reader.get(2), 2u64);
|
||||
@@ -148,8 +220,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0), 4u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
|
||||
assert_eq!(fast_field_reader.get(2), 3_052u64);
|
||||
@@ -185,8 +257,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
for doc in 0..10_000 {
|
||||
assert_eq!(fast_field_reader.get(doc), 100_000u64);
|
||||
}
|
||||
@@ -218,9 +290,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0), 0u64);
|
||||
for doc in 1..10_001 {
|
||||
assert_eq!(
|
||||
@@ -259,8 +330,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: I64FastFieldReader =
|
||||
I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<i64>::open(data);
|
||||
|
||||
assert_eq!(fast_field_reader.min_value(), -100i64);
|
||||
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
||||
@@ -298,8 +369,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: I64FastFieldReader =
|
||||
I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<i64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0u32), 0i64);
|
||||
}
|
||||
}
|
||||
@@ -333,8 +404,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
let mut a = 0u64;
|
||||
for _ in 0..n {
|
||||
@@ -390,8 +461,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
@@ -424,8 +495,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000u32);
|
||||
|
||||
@@ -4,8 +4,6 @@ mod reader;
|
||||
pub use self::writer::MultiValueIntFastFieldWriter;
|
||||
pub use self::reader::MultiValueIntFastFieldReader;
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -88,26 +86,4 @@ mod tests {
|
||||
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_multivalued_unreachable() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_i64_field(
|
||||
"multifield",
|
||||
IntOptions::default().set_fast(Cardinality::MultiValues)
|
||||
);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
index_writer.add_document(doc!(field=> 1i64, field => 3i64));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
let reader = searcher.segment_reader(0);
|
||||
let multi_value_reader = reader.multi_fast_field_reader::<String>(field).unwrap();
|
||||
let mut vals = Vec::new();
|
||||
multi_value_reader.get_vals(0, &mut vals);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,5 @@
|
||||
use DocId;
|
||||
use fastfield::FastFieldReader;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use std::marker::PhantomData;
|
||||
use common;
|
||||
|
||||
|
||||
use fastfield::{FastFieldReader, FastValue};
|
||||
|
||||
|
||||
/// Reader for a multivalued `u64` fast field.
|
||||
@@ -16,44 +11,19 @@ use common;
|
||||
/// The `idx_reader` associated, for each document, the index of its first value.
|
||||
///
|
||||
#[derive(Clone)]
|
||||
pub struct MultiValueIntFastFieldReader<Item> {
|
||||
idx_reader: U64FastFieldReader,
|
||||
vals_reader: U64FastFieldReader,
|
||||
__phantom__: PhantomData<Item>
|
||||
pub struct MultiValueIntFastFieldReader<Item: FastValue> {
|
||||
idx_reader: FastFieldReader<u64>,
|
||||
vals_reader: FastFieldReader<Item>
|
||||
}
|
||||
|
||||
trait ConvertU64<Item> {
|
||||
fn from_u64(val: u64) -> Item;
|
||||
}
|
||||
|
||||
impl<Item> ConvertU64<Item> for MultiValueIntFastFieldReader<Item> {
|
||||
default fn from_u64(_: u64) -> Item {
|
||||
unimplemented!("MultiValueIntFastField only exists for u64 and i64.");
|
||||
}
|
||||
}
|
||||
|
||||
impl ConvertU64<u64> for MultiValueIntFastFieldReader<u64> {
|
||||
fn from_u64(val: u64) -> u64 {
|
||||
val
|
||||
}
|
||||
}
|
||||
|
||||
impl ConvertU64<i64> for MultiValueIntFastFieldReader<i64> {
|
||||
fn from_u64(val: u64) -> i64 {
|
||||
common::u64_to_i64(val)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<Item> MultiValueIntFastFieldReader<Item> {
|
||||
impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
|
||||
pub(crate) fn open(
|
||||
idx_reader: U64FastFieldReader,
|
||||
vals_reader: U64FastFieldReader,
|
||||
idx_reader: FastFieldReader<u64>,
|
||||
vals_reader: FastFieldReader<Item>,
|
||||
) -> MultiValueIntFastFieldReader<Item> {
|
||||
MultiValueIntFastFieldReader {
|
||||
idx_reader,
|
||||
vals_reader,
|
||||
__phantom__: PhantomData,
|
||||
vals_reader
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,11 +31,9 @@ impl<Item> MultiValueIntFastFieldReader<Item> {
|
||||
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
|
||||
let start = self.idx_reader.get(doc) as u32;
|
||||
let stop = self.idx_reader.get(doc + 1) as u32;
|
||||
vals.clear();
|
||||
for val_id in start..stop {
|
||||
let val = self.vals_reader.get(val_id);
|
||||
vals.push(Self::from_u64(val));
|
||||
}
|
||||
let len = (stop - start) as usize;
|
||||
vals.resize(len, Item::default());
|
||||
self.vals_reader.get_range(start, &mut vals[..]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,111 +1,36 @@
|
||||
use directory::ReadOnlySource;
|
||||
use common::{self, BinarySerializable};
|
||||
use common::compute_num_bits;
|
||||
use common::BinarySerializable;
|
||||
use common::bitpacker::BitUnpacker;
|
||||
use DocId;
|
||||
use schema::SchemaBuilder;
|
||||
use std::path::Path;
|
||||
use schema::FAST;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||
use schema::FieldType;
|
||||
use std::mem;
|
||||
use common::CompositeFile;
|
||||
use std::collections::HashMap;
|
||||
use common::compute_num_bits;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
use fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||
use owning_ref::OwningRef;
|
||||
use schema::FAST;
|
||||
use schema::SchemaBuilder;
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::path::Path;
|
||||
use super::FastValue;
|
||||
|
||||
/// Trait for accessing a fastfield.
|
||||
///
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
pub trait FastFieldReader: Sized {
|
||||
/// Type of the value stored in the fastfield.
|
||||
type ValueType;
|
||||
#[derive(Clone)]
|
||||
pub struct FastFieldReader<Item: FastValue> {
|
||||
bit_unpacker: BitUnpacker<OwningRef<ReadOnlySource, [u8]>>,
|
||||
min_value_u64: u64,
|
||||
max_value_u64: u64,
|
||||
_phantom: PhantomData<Item>
|
||||
}
|
||||
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> Self::ValueType;
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]);
|
||||
impl<Item: FastValue> FastFieldReader<Item> {
|
||||
|
||||
/// Opens a fast field given a source.
|
||||
fn open(source: ReadOnlySource) -> Self;
|
||||
|
||||
/// Returns true iff the given field_type makes
|
||||
/// it possible to access the field values via a
|
||||
/// fastfield.
|
||||
fn is_enabled(field_type: &FieldType) -> bool;
|
||||
}
|
||||
|
||||
/// `FastFieldReader` for unsigned 64-bits integers.
|
||||
#[derive(Clone)]
|
||||
pub struct U64FastFieldReader {
|
||||
bit_unpacker: BitUnpacker<OwningRef<ReadOnlySource, [u8]>>,
|
||||
min_value: u64,
|
||||
max_value: u64,
|
||||
}
|
||||
|
||||
impl U64FastFieldReader {
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The min value does not take in account of possible
|
||||
/// deleted document, and should be considered as a lower bound
|
||||
/// of the actual minimum value.
|
||||
pub fn min_value(&self) -> u64 {
|
||||
self.min_value
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> u64 {
|
||||
self.max_value
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for U64FastFieldReader {
|
||||
type ValueType = u64;
|
||||
|
||||
fn get(&self, doc: DocId) -> u64 {
|
||||
self.min_value + self.bit_unpacker.get(doc as usize)
|
||||
}
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match *field_type {
|
||||
FieldType::U64(ref integer_options) => integer_options.is_fast(),
|
||||
FieldType::HierarchicalFacet => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
self.bit_unpacker.get_range(start, output);
|
||||
for out in output.iter_mut() {
|
||||
*out += self.min_value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
fn open(data: ReadOnlySource) -> U64FastFieldReader {
|
||||
pub fn open(data: ReadOnlySource) -> Self {
|
||||
let min_value: u64;
|
||||
let amplitude: u64;
|
||||
{
|
||||
@@ -119,16 +44,64 @@ impl FastFieldReader for U64FastFieldReader {
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
let owning_ref = OwningRef::new(data).map(|data| &data[16..]);
|
||||
let bit_unpacker = BitUnpacker::new(owning_ref, num_bits);
|
||||
U64FastFieldReader {
|
||||
min_value,
|
||||
max_value,
|
||||
FastFieldReader {
|
||||
min_value_u64: min_value,
|
||||
max_value_u64: max_value,
|
||||
bit_unpacker,
|
||||
_phantom: PhantomData
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
pub fn get(&self, doc: DocId) -> Item {
|
||||
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc as usize))
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
pub fn get_range(&self, start: u32, output: &mut [Item]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
self.bit_unpacker.get_range(start, output_u64);
|
||||
for out in output_u64.iter_mut() {
|
||||
*out = Item::from_u64(*out + self.min_value_u64).as_u64();
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn min_value(&self) -> Item {
|
||||
Item::from_u64(self.min_value_u64)
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> Item {
|
||||
Item::from_u64(self.max_value_u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u64>> for U64FastFieldReader {
|
||||
fn from(vals: Vec<u64>) -> U64FastFieldReader {
|
||||
impl<Item: FastValue> From<Vec<Item>> for FastFieldReader<Item> {
|
||||
fn from(vals: Vec<Item>) -> FastFieldReader<Item> {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
@@ -146,7 +119,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
.get_field_writer(field)
|
||||
.expect("With a RAMDirectory, this should never fail.");
|
||||
for val in vals {
|
||||
fast_field_writer.add_val(val);
|
||||
fast_field_writer.add_val(val.to_u64());
|
||||
}
|
||||
}
|
||||
fast_field_writers
|
||||
@@ -158,79 +131,10 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
let source = directory.open_read(path).expect("Failed to open the file");
|
||||
let composite_file =
|
||||
CompositeFile::open(&source).expect("Failed to read the composite file");
|
||||
|
||||
let field_source = composite_file
|
||||
.open_read(field)
|
||||
.expect("File component not found");
|
||||
U64FastFieldReader::open(field_source)
|
||||
FastFieldReader::open(field_source)
|
||||
}
|
||||
}
|
||||
|
||||
/// `FastFieldReader` for signed 64-bits integers.
|
||||
pub struct I64FastFieldReader {
|
||||
underlying: U64FastFieldReader,
|
||||
}
|
||||
|
||||
impl I64FastFieldReader {
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The min value does not take in account of possible
|
||||
/// deleted document, and should be considered as a lower bound
|
||||
/// of the actual minimum value.
|
||||
pub fn min_value(&self) -> i64 {
|
||||
common::u64_to_i64(self.underlying.min_value())
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> i64 {
|
||||
common::u64_to_i64(self.underlying.max_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for I64FastFieldReader {
|
||||
type ValueType = i64;
|
||||
|
||||
///
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic or return wrong random result if `doc`
|
||||
/// is greater or equal to the segment's `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> i64 {
|
||||
common::u64_to_i64(self.underlying.get(doc))
|
||||
}
|
||||
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic or return wrong random result if `doc`
|
||||
/// is greater or equal to the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
self.underlying.get_range(start, output_u64);
|
||||
for mut_val in output_u64.iter_mut() {
|
||||
*mut_val = common::u64_to_i64(*mut_val as u64) as u64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
fn open(data: ReadOnlySource) -> I64FastFieldReader {
|
||||
I64FastFieldReader {
|
||||
underlying: U64FastFieldReader::open(data),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match *field_type {
|
||||
FieldType::I64(ref integer_options) => integer_options.is_fast(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,8 +52,8 @@ impl FastFieldsWriter {
|
||||
}
|
||||
}
|
||||
FastFieldsWriter {
|
||||
single_value_writers: single_value_writers,
|
||||
multi_values_writers: multi_values_writers,
|
||||
single_value_writers,
|
||||
multi_values_writers
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ use DocId;
|
||||
use core::SerializableSegment;
|
||||
use indexer::SegmentSerializer;
|
||||
use postings::InvertedIndexSerializer;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use itertools::Itertools;
|
||||
use postings::Postings;
|
||||
use docset::DocSet;
|
||||
@@ -26,7 +25,7 @@ pub struct IndexMerger {
|
||||
}
|
||||
|
||||
fn compute_min_max_val(
|
||||
u64_reader: &U64FastFieldReader,
|
||||
u64_reader: &FastFieldReader<u64>,
|
||||
max_doc: DocId,
|
||||
delete_bitset: &DeleteBitSet,
|
||||
) -> Option<(u64, u64)> {
|
||||
@@ -50,14 +49,14 @@ fn compute_min_max_val(
|
||||
fn extract_fieldnorm_reader(
|
||||
segment_reader: &SegmentReader,
|
||||
field: Field,
|
||||
) -> Option<U64FastFieldReader> {
|
||||
) -> Option<FastFieldReader<u64>> {
|
||||
segment_reader.get_fieldnorms_reader(field)
|
||||
}
|
||||
|
||||
fn extract_fast_field_reader(
|
||||
segment_reader: &SegmentReader,
|
||||
field: Field,
|
||||
) -> Option<U64FastFieldReader> {
|
||||
) -> Option<FastFieldReader<u64>> {
|
||||
segment_reader.fast_field_reader(field).ok()
|
||||
}
|
||||
|
||||
@@ -137,7 +136,7 @@ impl IndexMerger {
|
||||
fn generic_write_fast_field(
|
||||
&self,
|
||||
fields: Vec<Field>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> Option<U64FastFieldReader>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> Option<FastFieldReader<u64>>,
|
||||
fast_field_serializer: &mut FastFieldSerializer,
|
||||
) -> Result<()> {
|
||||
for field in fields {
|
||||
@@ -368,7 +367,6 @@ mod tests {
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use core::Index;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use Searcher;
|
||||
use DocAddress;
|
||||
use collector::tests::FastFieldTestCollector;
|
||||
@@ -628,16 +626,16 @@ mod tests {
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 1);
|
||||
assert_eq!(score_field_reader.max_value(), 3);
|
||||
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(1)
|
||||
.fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 4000);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -685,9 +683,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 3);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -731,9 +729,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 3);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -782,9 +780,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 6000);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
|
||||
12
src/lib.rs
12
src/lib.rs
@@ -3,7 +3,6 @@
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(inline_always))]
|
||||
#![feature(box_syntax)]
|
||||
#![feature(optin_builtin_traits)]
|
||||
#![feature(specialization)]
|
||||
#![feature(conservative_impl_trait)]
|
||||
#![feature(collections_range)]
|
||||
#![feature(integer_atomics)]
|
||||
@@ -287,7 +286,6 @@ mod tests {
|
||||
use schema::*;
|
||||
use docset::DocSet;
|
||||
use IndexWriter;
|
||||
use fastfield::{FastFieldReader, I64FastFieldReader, U64FastFieldReader};
|
||||
use Postings;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
@@ -858,22 +856,22 @@ mod tests {
|
||||
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.fast_field_reader::<U64FastFieldReader>(text_field);
|
||||
segment_reader.fast_field_reader::<u64>(text_field);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.fast_field_reader::<U64FastFieldReader>(stored_int_field);
|
||||
segment_reader.fast_field_reader::<u64>(stored_int_field);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.fast_field_reader::<U64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<u64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.fast_field_reader::<I64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<i64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_ok());
|
||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
@@ -881,7 +879,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.fast_field_reader::<I64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<i64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_ok());
|
||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
|
||||
@@ -51,7 +51,6 @@ pub mod tests {
|
||||
use schema::IndexRecordOption;
|
||||
use std::iter;
|
||||
use datastruct::stacker::Heap;
|
||||
use fastfield::FastFieldReader;
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use test::{self, Bencher};
|
||||
|
||||
@@ -13,7 +13,6 @@ mod tests {
|
||||
use postings::SegmentPostings;
|
||||
use query::{Query, Scorer};
|
||||
use query::term_query::TermScorer;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use query::TermQuery;
|
||||
use Index;
|
||||
use schema::*;
|
||||
@@ -56,7 +55,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn test_term_scorer() {
|
||||
let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
|
||||
let left_fieldnorms = FastFieldReader::from(vec![10, 4]);
|
||||
assert_eq!(left_fieldnorms.get(0), 10);
|
||||
assert_eq!(left_fieldnorms.get(1), 4);
|
||||
let left = SegmentPostings::create_from_docs(&[1]);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use Score;
|
||||
use DocId;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use fastfield::U64FastFieldReader;
|
||||
use postings::SegmentPostings;
|
||||
use query::Scorer;
|
||||
use postings::Postings;
|
||||
@@ -9,7 +8,7 @@ use fastfield::FastFieldReader;
|
||||
|
||||
pub struct TermScorer {
|
||||
pub idf: Score,
|
||||
pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
|
||||
pub fieldnorm_reader_opt: Option<FastFieldReader<u64>>,
|
||||
pub postings: SegmentPostings,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user