mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
Merge branch 'feature/multivalued-i64-u64'
This commit is contained in:
@@ -94,7 +94,6 @@ pub mod tests {
|
||||
use Score;
|
||||
use core::SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Field;
|
||||
|
||||
@@ -148,7 +147,7 @@ pub mod tests {
|
||||
pub struct FastFieldTestCollector {
|
||||
vals: Vec<u64>,
|
||||
field: Field,
|
||||
ff_reader: Option<U64FastFieldReader>,
|
||||
ff_reader: Option<FastFieldReader<u64>>,
|
||||
}
|
||||
|
||||
impl FastFieldTestCollector {
|
||||
@@ -167,7 +166,7 @@ pub mod tests {
|
||||
|
||||
impl Collector for FastFieldTestCollector {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
|
||||
self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
|
||||
self.ff_reader = Some(reader.fast_field_reader(self.field)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ where
|
||||
|
||||
pub fn get(&self, idx: usize) -> u64 {
|
||||
if self.num_bits == 0 {
|
||||
return 0;
|
||||
return 0u64;
|
||||
}
|
||||
let data: &[u8] = &*self.data;
|
||||
let num_bits = self.num_bits;
|
||||
@@ -107,7 +107,7 @@ where
|
||||
);
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
(val_shifted & mask)
|
||||
val_shifted & mask
|
||||
} else {
|
||||
let val_unshifted_unmasked: u64 = if addr + 8 <= data.len() {
|
||||
unsafe { *(data[addr..].as_ptr() as *const u64) }
|
||||
@@ -119,14 +119,18 @@ where
|
||||
unsafe { *(buffer[..].as_ptr() as *const u64) }
|
||||
};
|
||||
let val_shifted = val_unshifted_unmasked >> (bit_shift as u64);
|
||||
(val_shifted & mask)
|
||||
val_shifted & mask
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads a range of values from the fast field.
|
||||
///
|
||||
/// The range of values read is from
|
||||
/// `[start..start + output.len()[`
|
||||
pub fn get_range(&self, start: u32, output: &mut [u64]) {
|
||||
if self.num_bits == 0 {
|
||||
for val in output.iter_mut() {
|
||||
*val = 0;
|
||||
*val = 0u64;
|
||||
}
|
||||
} else {
|
||||
let data: &[u8] = &*self.data;
|
||||
|
||||
@@ -21,10 +21,11 @@ use schema::FieldType;
|
||||
use error::ErrorKind;
|
||||
use termdict::TermDictionaryImpl;
|
||||
use fastfield::FacetReader;
|
||||
use fastfield::{FastFieldReader, U64FastFieldReader};
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Schema;
|
||||
use termdict::TermDictionary;
|
||||
use fastfield::MultiValueIntFastFieldReader;
|
||||
use fastfield::{FastValue, MultiValueIntFastFieldReader};
|
||||
use schema::Cardinality;
|
||||
|
||||
/// Entry point to access all of the datastructures of the `Segment`
|
||||
///
|
||||
@@ -91,18 +92,37 @@ impl SegmentReader {
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the index is corrupted.
|
||||
pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(
|
||||
pub fn fast_field_reader<Item: FastValue>(
|
||||
&self,
|
||||
field: Field,
|
||||
) -> fastfield::Result<TFastFieldReader> {
|
||||
) -> fastfield::Result<FastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if !TFastFieldReader::is_enabled(field_entry.field_type()) {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
} else {
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue) {
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(TFastFieldReader::open)
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
|
||||
/// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
|
||||
/// May panick if the field is not a multivalued fastfield of the type `Item`.
|
||||
pub fn multi_fast_field_reader<Item: FastValue>(&self, field: Field) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues) {
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,7 +136,7 @@ impl SegmentReader {
|
||||
field_entry
|
||||
)).into());
|
||||
}
|
||||
let term_ords_reader = self.multi_value_reader(field)?;
|
||||
let term_ords_reader = self.multi_fast_field_reader(field)?;
|
||||
let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
|
||||
ErrorKind::InvalidArgument(format!(
|
||||
"The field \"{}\" is a hierarchical \
|
||||
@@ -130,20 +150,6 @@ impl SegmentReader {
|
||||
Ok(facet_reader)
|
||||
}
|
||||
|
||||
/// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
|
||||
pub fn multi_value_reader(&self, field: Field) -> Result<MultiValueIntFastFieldReader> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(U64FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(U64FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `Field norms`'s reader.
|
||||
///
|
||||
/// Field norms are the length (in tokens) of the fields.
|
||||
@@ -152,10 +158,10 @@ impl SegmentReader {
|
||||
///
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<FastFieldReader<u64>> {
|
||||
self.fieldnorms_composite
|
||||
.open_read(field)
|
||||
.map(U64FastFieldReader::open)
|
||||
.map(FastFieldReader::open)
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `StoreReader`.
|
||||
|
||||
@@ -189,11 +189,11 @@ impl<'a> TermHashMap<'a> {
|
||||
let (addr, val): (u32, &mut V) = self.heap.allocate_object();
|
||||
assert_eq!(addr, key_bytes_ref.addr() + 2 + key_bytes.len() as u32);
|
||||
self.set_bucket(hash, key_bytes_ref, bucket);
|
||||
return (bucket, val);
|
||||
return (bucket as UnorderedTermId, val);
|
||||
} else if kv.hash == hash {
|
||||
let (stored_key, expull_addr): (&[u8], u32) = self.get_key_value(kv.key_value_addr);
|
||||
if stored_key == key_bytes {
|
||||
return (bucket, self.heap.get_mut_ref(expull_addr));
|
||||
return (bucket as UnorderedTermId, self.heap.get_mut_ref(expull_addr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use termdict::{TermDictionary, TermDictionaryImpl};
|
||||
/// list of facets. This ordinal is segment local and
|
||||
/// only makes sense for a given segment.
|
||||
pub struct FacetReader {
|
||||
term_ords: MultiValueIntFastFieldReader,
|
||||
term_ords: MultiValueIntFastFieldReader<u64>,
|
||||
term_dict: TermDictionaryImpl,
|
||||
}
|
||||
|
||||
@@ -31,12 +31,12 @@ impl FacetReader {
|
||||
/// - a `TermDictionaryImpl` that helps associating a facet to
|
||||
/// an ordinal and vice versa.
|
||||
pub fn new(
|
||||
term_ords: MultiValueIntFastFieldReader,
|
||||
term_ords: MultiValueIntFastFieldReader<u64>,
|
||||
term_dict: TermDictionaryImpl,
|
||||
) -> FacetReader {
|
||||
FacetReader {
|
||||
term_ords: term_ords,
|
||||
term_dict: term_dict,
|
||||
term_ords,
|
||||
term_dict
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,19 @@ values stored.
|
||||
Read access performance is comparable to that of an array lookup.
|
||||
*/
|
||||
|
||||
use common;
|
||||
use schema::Cardinality;
|
||||
use schema::FieldType;
|
||||
use schema::Value;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::MultiValueIntFastFieldReader;
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
|
||||
mod reader;
|
||||
mod writer;
|
||||
mod serializer;
|
||||
@@ -31,33 +44,104 @@ mod delete;
|
||||
mod facet_reader;
|
||||
mod multivalued;
|
||||
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
pub use self::reader::{I64FastFieldReader, U64FastFieldReader};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::MultiValueIntFastFieldReader;
|
||||
/// Trait for types that are allowed for fast fields: (u64 or i64).
|
||||
pub trait FastValue: Default + Clone + Copy {
|
||||
/// Converts a value from u64
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
fn from_u64(val: u64) -> Self;
|
||||
|
||||
/// Converts a value to u64.
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
fn to_u64(&self) -> u64;
|
||||
|
||||
/// Returns the fast field cardinality that can be extracted from the given
|
||||
/// `FieldType`.
|
||||
///
|
||||
/// If the type is not a fast field, `None` is returned.
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality>;
|
||||
|
||||
/// Cast value to `u64`.
|
||||
/// The value is just reinterpreted in memory.
|
||||
fn as_u64(&self) -> u64;
|
||||
}
|
||||
|
||||
|
||||
impl FastValue for u64 {
|
||||
fn from_u64(val: u64) -> Self {
|
||||
val
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
*self
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
*self
|
||||
}
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::U64(ref integer_options) =>
|
||||
integer_options.get_fastfield_cardinality(),
|
||||
FieldType::HierarchicalFacet =>
|
||||
Some(Cardinality::MultiValues),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FastValue for i64 {
|
||||
fn from_u64(val: u64) -> Self {
|
||||
common::u64_to_i64(val)
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
common::i64_to_u64(*self)
|
||||
}
|
||||
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::I64(ref integer_options) =>
|
||||
integer_options.get_fastfield_cardinality(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
*self as u64
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_u64(value: &Value) -> u64 {
|
||||
match *value {
|
||||
Value::U64(ref val) => *val,
|
||||
Value::I64(ref val) => common::i64_to_u64(*val),
|
||||
_ => panic!("Expected a u64/i64 field, got {:?} ", value),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use schema::Field;
|
||||
use std::path::Path;
|
||||
|
||||
use common::CompositeFile;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use schema::Document;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use schema::FAST;
|
||||
use std::collections::HashMap;
|
||||
use test::Bencher;
|
||||
use test;
|
||||
use fastfield::FastFieldReader;
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
use common::CompositeFile;
|
||||
use rand::XorShiftRng;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use schema::Document;
|
||||
use schema::FAST;
|
||||
use schema::Field;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use super::*;
|
||||
use test;
|
||||
use test::Bencher;
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA: Schema = {
|
||||
@@ -70,15 +154,9 @@ mod tests {
|
||||
};
|
||||
}
|
||||
|
||||
fn add_single_field_doc(fast_field_writers: &mut FastFieldsWriter, field: Field, value: u64) {
|
||||
let mut doc = Document::default();
|
||||
doc.add_u64(field, value);
|
||||
fast_field_writers.add_document(&doc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield() {
|
||||
let test_fastfield = U64FastFieldReader::from(vec![100, 200, 300]);
|
||||
let test_fastfield = FastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||
assert_eq!(test_fastfield.get(0), 100);
|
||||
assert_eq!(test_fastfield.get(1), 200);
|
||||
assert_eq!(test_fastfield.get(2), 300);
|
||||
@@ -92,9 +170,9 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 13u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 14u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 2u64);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>13u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>14u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>2u64));
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
.unwrap();
|
||||
@@ -107,7 +185,7 @@ mod tests {
|
||||
{
|
||||
let composite_file = CompositeFile::open(&source).unwrap();
|
||||
let field_source = composite_file.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader = U64FastFieldReader::open(field_source);
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(field_source);
|
||||
assert_eq!(fast_field_reader.get(0), 13u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14u64);
|
||||
assert_eq!(fast_field_reader.get(2), 2u64);
|
||||
@@ -122,15 +200,15 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 4u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 777u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u64);
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 215u64);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>4u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>3_052u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>9_002u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>15_001u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>777u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>1_002u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>1_501u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>215u64));
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
.unwrap();
|
||||
@@ -142,8 +220,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0), 4u64);
|
||||
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
|
||||
assert_eq!(fast_field_reader.get(2), 3_052u64);
|
||||
@@ -166,7 +244,7 @@ mod tests {
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for _ in 0..10_000 {
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u64);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>100_000u64));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
@@ -179,8 +257,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
for doc in 0..10_000 {
|
||||
assert_eq!(fast_field_reader.get(doc), 100_000u64);
|
||||
}
|
||||
@@ -197,13 +275,9 @@ mod tests {
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
// forcing the amplitude to be high
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, 0u64);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>0u64));
|
||||
for i in 0u64..10_000u64 {
|
||||
add_single_field_doc(
|
||||
&mut fast_field_writers,
|
||||
*FIELD,
|
||||
5_000_000_000_000_000_000u64 + i,
|
||||
);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
@@ -216,9 +290,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0), 0u64);
|
||||
for doc in 1..10_001 {
|
||||
assert_eq!(
|
||||
@@ -257,8 +330,8 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: I64FastFieldReader =
|
||||
I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<i64>::open(data);
|
||||
|
||||
assert_eq!(fast_field_reader.min_value(), -100i64);
|
||||
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
||||
@@ -296,8 +369,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: I64FastFieldReader =
|
||||
I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<i64>::open(data);
|
||||
assert_eq!(fast_field_reader.get(0u32), 0i64);
|
||||
}
|
||||
}
|
||||
@@ -320,8 +393,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
@@ -331,8 +404,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
let mut a = 0u64;
|
||||
for _ in 0..n {
|
||||
@@ -377,8 +450,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
@@ -388,8 +461,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
@@ -411,8 +484,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
@@ -422,8 +495,8 @@ mod tests {
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let fast_field_reader: U64FastFieldReader =
|
||||
U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000u32);
|
||||
|
||||
@@ -3,3 +3,87 @@ mod reader;
|
||||
|
||||
pub use self::writer::MultiValueIntFastFieldWriter;
|
||||
pub use self::reader::MultiValueIntFastFieldReader;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use schema::SchemaBuilder;
|
||||
use schema::Cardinality;
|
||||
use schema::IntOptions;
|
||||
use Index;
|
||||
|
||||
#[test]
|
||||
fn test_multivalued_u64() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_u64_field(
|
||||
"multifield",
|
||||
IntOptions::default().set_fast(Cardinality::MultiValues)
|
||||
);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
index_writer.add_document(doc!(field=>1u64, field=>3u64));
|
||||
index_writer.add_document(doc!());
|
||||
index_writer.add_document(doc!(field=>4u64));
|
||||
index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
let reader = searcher.segment_reader(0);
|
||||
let mut vals = Vec::new();
|
||||
let multi_value_reader = reader.multi_fast_field_reader::<u64>(field).unwrap();
|
||||
{
|
||||
multi_value_reader.get_vals(2, &mut vals);
|
||||
assert_eq!(&vals, &[4u64]);
|
||||
}
|
||||
{
|
||||
multi_value_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[1u64, 3u64]);
|
||||
}
|
||||
{
|
||||
multi_value_reader.get_vals(1, &mut vals);
|
||||
assert!(vals.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_multivalued_i64() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_i64_field(
|
||||
"multifield",
|
||||
IntOptions::default().set_fast(Cardinality::MultiValues)
|
||||
);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
index_writer.add_document(doc!(field=> 1i64, field => 3i64));
|
||||
index_writer.add_document(doc!());
|
||||
index_writer.add_document(doc!(field=> -4i64));
|
||||
index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
let reader = searcher.segment_reader(0);
|
||||
let mut vals = Vec::new();
|
||||
let multi_value_reader = reader.multi_fast_field_reader::<i64>(field).unwrap();
|
||||
{
|
||||
multi_value_reader.get_vals(2, &mut vals);
|
||||
assert_eq!(&vals, &[-4i64]);
|
||||
}
|
||||
{
|
||||
multi_value_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[1i64, 3i64]);
|
||||
}
|
||||
{
|
||||
multi_value_reader.get_vals(1, &mut vals);
|
||||
assert!(vals.is_empty());
|
||||
}
|
||||
{
|
||||
multi_value_reader.get_vals(3, &mut vals);
|
||||
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
use DocId;
|
||||
use fastfield::FastFieldReader;
|
||||
use fastfield::{FastFieldReader, FastValue};
|
||||
|
||||
use fastfield::U64FastFieldReader;
|
||||
|
||||
/// Reader for a multivalued `u64` fast field.
|
||||
///
|
||||
@@ -12,31 +11,29 @@ use fastfield::U64FastFieldReader;
|
||||
/// The `idx_reader` associated, for each document, the index of its first value.
|
||||
///
|
||||
#[derive(Clone)]
|
||||
pub struct MultiValueIntFastFieldReader {
|
||||
idx_reader: U64FastFieldReader,
|
||||
vals_reader: U64FastFieldReader,
|
||||
pub struct MultiValueIntFastFieldReader<Item: FastValue> {
|
||||
idx_reader: FastFieldReader<u64>,
|
||||
vals_reader: FastFieldReader<Item>
|
||||
}
|
||||
|
||||
impl MultiValueIntFastFieldReader {
|
||||
impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
|
||||
pub(crate) fn open(
|
||||
idx_reader: U64FastFieldReader,
|
||||
vals_reader: U64FastFieldReader,
|
||||
) -> MultiValueIntFastFieldReader {
|
||||
idx_reader: FastFieldReader<u64>,
|
||||
vals_reader: FastFieldReader<Item>,
|
||||
) -> MultiValueIntFastFieldReader<Item> {
|
||||
MultiValueIntFastFieldReader {
|
||||
idx_reader: idx_reader,
|
||||
vals_reader: vals_reader,
|
||||
idx_reader,
|
||||
vals_reader
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the array of values associated to the given `doc`.
|
||||
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<u64>) {
|
||||
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
|
||||
let start = self.idx_reader.get(doc) as u32;
|
||||
let stop = self.idx_reader.get(doc + 1) as u32;
|
||||
vals.clear();
|
||||
for val_id in start..stop {
|
||||
let val = self.vals_reader.get(val_id);
|
||||
vals.push(val);
|
||||
}
|
||||
let len = (stop - start) as usize;
|
||||
vals.resize(len, Item::default());
|
||||
self.vals_reader.get_range(start, &mut vals[..]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
use fastfield::FastFieldSerializer;
|
||||
use fastfield::serializer::FastSingleFieldSerializer;
|
||||
use fastfield::value_to_u64;
|
||||
use std::collections::HashMap;
|
||||
use postings::UnorderedTermId;
|
||||
use schema::Field;
|
||||
use schema::{Document, Field};
|
||||
use std::io;
|
||||
use itertools::Itertools;
|
||||
|
||||
|
||||
pub struct MultiValueIntFastFieldWriter {
|
||||
field: Field,
|
||||
vals: Vec<UnorderedTermId>,
|
||||
vals: Vec<u64>,
|
||||
doc_index: Vec<u64>,
|
||||
is_facet: bool
|
||||
}
|
||||
|
||||
impl MultiValueIntFastFieldWriter {
|
||||
/// Creates a new `IntFastFieldWriter`
|
||||
pub fn new(field: Field) -> Self {
|
||||
pub fn new(field: Field, is_facet: bool) -> Self {
|
||||
MultiValueIntFastFieldWriter {
|
||||
field: field,
|
||||
field,
|
||||
vals: Vec::new(),
|
||||
doc_index: Vec::new(),
|
||||
is_facet
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,11 +43,32 @@ impl MultiValueIntFastFieldWriter {
|
||||
self.vals.push(val);
|
||||
}
|
||||
|
||||
/// Push the fast fields value to the `FastFieldWriter`.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
if !self.is_facet {
|
||||
for field_value in doc.field_values() {
|
||||
if field_value.field() == self.field {
|
||||
self.add_val(value_to_u64(field_value.value()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Serializes fast field values by pushing them to the `FastFieldSerializer`.
|
||||
///
|
||||
/// HashMap makes it possible to remap them before serializing.
|
||||
/// Specifically, string terms are first stored in the writer as their
|
||||
/// position in the `IndexWriter`'s `HashMap`. This value is called
|
||||
/// an `UnorderedTermId`.
|
||||
///
|
||||
/// During the serialization of the segment, terms gets sorted and
|
||||
/// `tantivy` builds a mapping to convert this `UnorderedTermId` into
|
||||
/// term ordinals.
|
||||
///
|
||||
pub fn serialize(
|
||||
&self,
|
||||
serializer: &mut FastFieldSerializer,
|
||||
mapping: &HashMap<UnorderedTermId, usize>,
|
||||
mapping_opt: Option<&HashMap<UnorderedTermId, usize>>,
|
||||
) -> io::Result<()> {
|
||||
{
|
||||
// writing the offset index
|
||||
@@ -55,10 +82,25 @@ impl MultiValueIntFastFieldWriter {
|
||||
}
|
||||
{
|
||||
// writing the values themselves.
|
||||
let mut value_serializer =
|
||||
serializer.new_u64_fast_field_with_idx(self.field, 0u64, mapping.len() as u64, 1)?;
|
||||
for val in &self.vals {
|
||||
value_serializer.add_val(*mapping.get(val).expect("Missing term ordinal") as u64)?;
|
||||
let mut value_serializer: FastSingleFieldSerializer<_>;
|
||||
match mapping_opt {
|
||||
Some(mapping) => {
|
||||
value_serializer =
|
||||
serializer.new_u64_fast_field_with_idx(self.field, 0u64, mapping.len() as u64, 1)?;
|
||||
for val in &self.vals {
|
||||
let remapped_val = *mapping.get(val).expect("Missing term ordinal") as u64;
|
||||
value_serializer.add_val(remapped_val)?;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let val_min_max = self.vals.iter().cloned().minmax();
|
||||
let (val_min, val_max) = val_min_max.into_option().unwrap_or((0u64, 0));
|
||||
value_serializer =
|
||||
serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?;
|
||||
for &val in &self.vals {
|
||||
value_serializer.add_val(val)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
value_serializer.close_field()?;
|
||||
}
|
||||
|
||||
@@ -1,111 +1,36 @@
|
||||
use directory::ReadOnlySource;
|
||||
use common::{self, BinarySerializable};
|
||||
use common::compute_num_bits;
|
||||
use common::BinarySerializable;
|
||||
use common::bitpacker::BitUnpacker;
|
||||
use DocId;
|
||||
use schema::SchemaBuilder;
|
||||
use std::path::Path;
|
||||
use schema::FAST;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||
use schema::FieldType;
|
||||
use std::mem;
|
||||
use common::CompositeFile;
|
||||
use std::collections::HashMap;
|
||||
use common::compute_num_bits;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
use fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||
use owning_ref::OwningRef;
|
||||
use schema::FAST;
|
||||
use schema::SchemaBuilder;
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::path::Path;
|
||||
use super::FastValue;
|
||||
|
||||
/// Trait for accessing a fastfield.
|
||||
///
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
pub trait FastFieldReader: Sized {
|
||||
/// Type of the value stored in the fastfield.
|
||||
type ValueType;
|
||||
#[derive(Clone)]
|
||||
pub struct FastFieldReader<Item: FastValue> {
|
||||
bit_unpacker: BitUnpacker<OwningRef<ReadOnlySource, [u8]>>,
|
||||
min_value_u64: u64,
|
||||
max_value_u64: u64,
|
||||
_phantom: PhantomData<Item>
|
||||
}
|
||||
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> Self::ValueType;
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]);
|
||||
impl<Item: FastValue> FastFieldReader<Item> {
|
||||
|
||||
/// Opens a fast field given a source.
|
||||
fn open(source: ReadOnlySource) -> Self;
|
||||
|
||||
/// Returns true iff the given field_type makes
|
||||
/// it possible to access the field values via a
|
||||
/// fastfield.
|
||||
fn is_enabled(field_type: &FieldType) -> bool;
|
||||
}
|
||||
|
||||
/// `FastFieldReader` for unsigned 64-bits integers.
|
||||
#[derive(Clone)]
|
||||
pub struct U64FastFieldReader {
|
||||
bit_unpacker: BitUnpacker<OwningRef<ReadOnlySource, [u8]>>,
|
||||
min_value: u64,
|
||||
max_value: u64,
|
||||
}
|
||||
|
||||
impl U64FastFieldReader {
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The min value does not take in account of possible
|
||||
/// deleted document, and should be considered as a lower bound
|
||||
/// of the actual minimum value.
|
||||
pub fn min_value(&self) -> u64 {
|
||||
self.min_value
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> u64 {
|
||||
self.max_value
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for U64FastFieldReader {
|
||||
type ValueType = u64;
|
||||
|
||||
fn get(&self, doc: DocId) -> u64 {
|
||||
self.min_value + self.bit_unpacker.get(doc as usize)
|
||||
}
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match *field_type {
|
||||
FieldType::U64(ref integer_options) => integer_options.is_fast(),
|
||||
FieldType::HierarchicalFacet => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
self.bit_unpacker.get_range(start, output);
|
||||
for out in output.iter_mut() {
|
||||
*out += self.min_value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
fn open(data: ReadOnlySource) -> U64FastFieldReader {
|
||||
pub fn open(data: ReadOnlySource) -> Self {
|
||||
let min_value: u64;
|
||||
let amplitude: u64;
|
||||
{
|
||||
@@ -119,16 +44,64 @@ impl FastFieldReader for U64FastFieldReader {
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
let owning_ref = OwningRef::new(data).map(|data| &data[16..]);
|
||||
let bit_unpacker = BitUnpacker::new(owning_ref, num_bits);
|
||||
U64FastFieldReader {
|
||||
min_value,
|
||||
max_value,
|
||||
FastFieldReader {
|
||||
min_value_u64: min_value,
|
||||
max_value_u64: max_value,
|
||||
bit_unpacker,
|
||||
_phantom: PhantomData
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
pub fn get(&self, doc: DocId) -> Item {
|
||||
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc as usize))
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
pub fn get_range(&self, start: u32, output: &mut [Item]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
self.bit_unpacker.get_range(start, output_u64);
|
||||
for out in output_u64.iter_mut() {
|
||||
*out = Item::from_u64(*out + self.min_value_u64).as_u64();
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn min_value(&self) -> Item {
|
||||
Item::from_u64(self.min_value_u64)
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> Item {
|
||||
Item::from_u64(self.max_value_u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u64>> for U64FastFieldReader {
|
||||
fn from(vals: Vec<u64>) -> U64FastFieldReader {
|
||||
impl<Item: FastValue> From<Vec<Item>> for FastFieldReader<Item> {
|
||||
fn from(vals: Vec<Item>) -> FastFieldReader<Item> {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let field = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
@@ -146,7 +119,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
.get_field_writer(field)
|
||||
.expect("With a RAMDirectory, this should never fail.");
|
||||
for val in vals {
|
||||
fast_field_writer.add_val(val);
|
||||
fast_field_writer.add_val(val.to_u64());
|
||||
}
|
||||
}
|
||||
fast_field_writers
|
||||
@@ -158,79 +131,10 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
let source = directory.open_read(path).expect("Failed to open the file");
|
||||
let composite_file =
|
||||
CompositeFile::open(&source).expect("Failed to read the composite file");
|
||||
|
||||
let field_source = composite_file
|
||||
.open_read(field)
|
||||
.expect("File component not found");
|
||||
U64FastFieldReader::open(field_source)
|
||||
FastFieldReader::open(field_source)
|
||||
}
|
||||
}
|
||||
|
||||
/// `FastFieldReader` for signed 64-bits integers.
|
||||
pub struct I64FastFieldReader {
|
||||
underlying: U64FastFieldReader,
|
||||
}
|
||||
|
||||
impl I64FastFieldReader {
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The min value does not take in account of possible
|
||||
/// deleted document, and should be considered as a lower bound
|
||||
/// of the actual minimum value.
|
||||
pub fn min_value(&self) -> i64 {
|
||||
common::u64_to_i64(self.underlying.min_value())
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
pub fn max_value(&self) -> i64 {
|
||||
common::u64_to_i64(self.underlying.max_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldReader for I64FastFieldReader {
|
||||
type ValueType = i64;
|
||||
|
||||
///
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic or return wrong random result if `doc`
|
||||
/// is greater or equal to the segment's `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> i64 {
|
||||
common::u64_to_i64(self.underlying.get(doc))
|
||||
}
|
||||
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic or return wrong random result if `doc`
|
||||
/// is greater or equal to the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
self.underlying.get_range(start, output_u64);
|
||||
for mut_val in output_u64.iter_mut() {
|
||||
*mut_val = common::u64_to_i64(*mut_val as u64) as u64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the data is corrupted.
|
||||
fn open(data: ReadOnlySource) -> I64FastFieldReader {
|
||||
I64FastFieldReader {
|
||||
underlying: U64FastFieldReader::open(data),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_enabled(field_type: &FieldType) -> bool {
|
||||
match *field_type {
|
||||
FieldType::I64(ref integer_options) => integer_options.is_fast(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ impl FastFieldSerializer {
|
||||
// just making room for the pointer to header.
|
||||
let composite_write = CompositeWrite::wrap(write);
|
||||
Ok(FastFieldSerializer {
|
||||
composite_write: composite_write,
|
||||
composite_write
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use schema::{Cardinality, Document, Field, Schema};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use std::io;
|
||||
use schema::Value;
|
||||
use DocId;
|
||||
use schema::FieldType;
|
||||
use common;
|
||||
@@ -39,22 +38,22 @@ impl FastFieldsWriter {
|
||||
single_value_writers.push(fast_field_writer);
|
||||
}
|
||||
Some(Cardinality::MultiValues) => {
|
||||
let fast_field_writer = MultiValueIntFastFieldWriter::new(field);
|
||||
let fast_field_writer = MultiValueIntFastFieldWriter::new(field, false);
|
||||
multi_values_writers.push(fast_field_writer);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
FieldType::HierarchicalFacet => {
|
||||
let fast_field_writer = MultiValueIntFastFieldWriter::new(field);
|
||||
let fast_field_writer = MultiValueIntFastFieldWriter::new(field, true);
|
||||
multi_values_writers.push(fast_field_writer);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
FastFieldsWriter {
|
||||
single_value_writers: single_value_writers,
|
||||
multi_values_writers: multi_values_writers,
|
||||
single_value_writers,
|
||||
multi_values_writers
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,6 +96,7 @@ impl FastFieldsWriter {
|
||||
}
|
||||
for field_writer in &mut self.multi_values_writers {
|
||||
field_writer.next_doc();
|
||||
field_writer.add_document(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,11 +112,7 @@ impl FastFieldsWriter {
|
||||
}
|
||||
for field_writer in &self.multi_values_writers {
|
||||
let field = field_writer.field();
|
||||
if let Some(mapping) = mapping.get(&field) {
|
||||
field_writer.serialize(serializer, mapping)?;
|
||||
} else {
|
||||
panic!("Term ordinal mapping missing for {:?}", field);
|
||||
}
|
||||
field_writer.serialize(serializer, mapping.get(&field))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -160,7 +156,7 @@ impl IntFastFieldWriter {
|
||||
/// Creates a new `IntFastFieldWriter`
|
||||
pub fn new(field: Field) -> IntFastFieldWriter {
|
||||
IntFastFieldWriter {
|
||||
field: field,
|
||||
field,
|
||||
vals: Vec::new(),
|
||||
val_count: 0,
|
||||
val_if_missing: 0u64,
|
||||
@@ -227,11 +223,7 @@ impl IntFastFieldWriter {
|
||||
/// only the first one is taken in account.
|
||||
fn extract_val(&self, doc: &Document) -> u64 {
|
||||
match doc.get_first(self.field) {
|
||||
Some(v) => match *v {
|
||||
Value::U64(ref val) => *val,
|
||||
Value::I64(ref val) => common::i64_to_u64(*val),
|
||||
_ => panic!("Expected a u64field, got {:?} ", v),
|
||||
},
|
||||
Some(v) => super::value_to_u64(v),
|
||||
None => self.val_if_missing,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use DocId;
|
||||
use core::SerializableSegment;
|
||||
use indexer::SegmentSerializer;
|
||||
use postings::InvertedIndexSerializer;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use itertools::Itertools;
|
||||
use postings::Postings;
|
||||
use docset::DocSet;
|
||||
@@ -26,7 +25,7 @@ pub struct IndexMerger {
|
||||
}
|
||||
|
||||
fn compute_min_max_val(
|
||||
u64_reader: &U64FastFieldReader,
|
||||
u64_reader: &FastFieldReader<u64>,
|
||||
max_doc: DocId,
|
||||
delete_bitset: &DeleteBitSet,
|
||||
) -> Option<(u64, u64)> {
|
||||
@@ -50,15 +49,15 @@ fn compute_min_max_val(
|
||||
fn extract_fieldnorm_reader(
|
||||
segment_reader: &SegmentReader,
|
||||
field: Field,
|
||||
) -> Option<U64FastFieldReader> {
|
||||
) -> Option<FastFieldReader<u64>> {
|
||||
segment_reader.get_fieldnorms_reader(field)
|
||||
}
|
||||
|
||||
fn extract_fast_field_reader(
|
||||
segment_reader: &SegmentReader,
|
||||
field: Field,
|
||||
) -> Option<U64FastFieldReader> {
|
||||
segment_reader.get_fast_field_reader(field).ok()
|
||||
) -> Option<FastFieldReader<u64>> {
|
||||
segment_reader.fast_field_reader(field).ok()
|
||||
}
|
||||
|
||||
struct DeltaComputer {
|
||||
@@ -137,7 +136,7 @@ impl IndexMerger {
|
||||
fn generic_write_fast_field(
|
||||
&self,
|
||||
fields: Vec<Field>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> Option<U64FastFieldReader>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> Option<FastFieldReader<u64>>,
|
||||
fast_field_serializer: &mut FastFieldSerializer,
|
||||
) -> Result<()> {
|
||||
for field in fields {
|
||||
@@ -368,7 +367,6 @@ mod tests {
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use core::Index;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use Searcher;
|
||||
use DocAddress;
|
||||
use collector::tests::FastFieldTestCollector;
|
||||
@@ -628,16 +626,16 @@ mod tests {
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.get_fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 1);
|
||||
assert_eq!(score_field_reader.max_value(), 3);
|
||||
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(1)
|
||||
.get_fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 4000);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -685,9 +683,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.get_fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 3);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -731,9 +729,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.get_fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 3);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
@@ -782,9 +780,9 @@ mod tests {
|
||||
search_term(&searcher, Term::from_field_text(text_field, "g")),
|
||||
vec![6_000, 7_000]
|
||||
);
|
||||
let score_field_reader: U64FastFieldReader = searcher
|
||||
let score_field_reader = searcher
|
||||
.segment_reader(0)
|
||||
.get_fast_field_reader(score_field)
|
||||
.fast_field_reader::<u64>(score_field)
|
||||
.unwrap();
|
||||
assert_eq!(score_field_reader.min_value(), 6000);
|
||||
assert_eq!(score_field_reader.max_value(), 7000);
|
||||
|
||||
@@ -160,7 +160,6 @@ impl<'a> SegmentWriter<'a> {
|
||||
self.multifield_postings.subscribe(doc_id, &term);
|
||||
unordered_term_id_opt = Some(unordered_term_id);
|
||||
});
|
||||
|
||||
if let Some(unordered_term_id) = unordered_term_id_opt {
|
||||
self.fast_field_writers
|
||||
.get_multivalue_writer(field)
|
||||
|
||||
11
src/lib.rs
11
src/lib.rs
@@ -286,7 +286,6 @@ mod tests {
|
||||
use schema::*;
|
||||
use docset::DocSet;
|
||||
use IndexWriter;
|
||||
use fastfield::{FastFieldReader, I64FastFieldReader, U64FastFieldReader};
|
||||
use Postings;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
@@ -857,22 +856,22 @@ mod tests {
|
||||
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.get_fast_field_reader::<U64FastFieldReader>(text_field);
|
||||
segment_reader.fast_field_reader::<u64>(text_field);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.get_fast_field_reader::<U64FastFieldReader>(stored_int_field);
|
||||
segment_reader.fast_field_reader::<u64>(stored_int_field);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.get_fast_field_reader::<U64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<u64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_err());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.get_fast_field_reader::<I64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<i64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_ok());
|
||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
@@ -880,7 +879,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let fast_field_reader_res =
|
||||
segment_reader.get_fast_field_reader::<I64FastFieldReader>(fast_field_signed);
|
||||
segment_reader.fast_field_reader::<i64>(fast_field_signed);
|
||||
assert!(fast_field_reader_res.is_ok());
|
||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
|
||||
@@ -54,7 +54,7 @@ macro_rules! doc(
|
||||
($crate::Document::default())
|
||||
}
|
||||
}; // avoids a warning due to the useless `mut`.
|
||||
($($field:ident => $value:expr),*) => {
|
||||
($($field:expr => $value:expr),*) => {
|
||||
{
|
||||
let mut document = $crate::Document::default();
|
||||
$(
|
||||
|
||||
@@ -25,7 +25,7 @@ pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
|
||||
|
||||
pub use common::HasLen;
|
||||
|
||||
pub(crate) type UnorderedTermId = usize;
|
||||
pub(crate) type UnorderedTermId = u64;
|
||||
|
||||
#[allow(enum_variant_names)]
|
||||
pub(crate) enum FreqReadingOption {
|
||||
@@ -51,7 +51,6 @@ pub mod tests {
|
||||
use schema::IndexRecordOption;
|
||||
use std::iter;
|
||||
use datastruct::stacker::Heap;
|
||||
use fastfield::FastFieldReader;
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use test::{self, Bencher};
|
||||
|
||||
@@ -221,7 +221,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
|
||||
heap: &Heap,
|
||||
) -> UnorderedTermId {
|
||||
debug_assert!(term.as_slice().len() >= 4);
|
||||
let (term_ord, recorder): (usize, &mut Rec) = term_index.get_or_create(term);
|
||||
let (term_ord, recorder): (UnorderedTermId, &mut Rec) = term_index.get_or_create(term);
|
||||
let current_doc = recorder.current_doc();
|
||||
if current_doc != doc {
|
||||
if current_doc != u32::max_value() {
|
||||
|
||||
@@ -13,7 +13,6 @@ mod tests {
|
||||
use postings::SegmentPostings;
|
||||
use query::{Query, Scorer};
|
||||
use query::term_query::TermScorer;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use query::TermQuery;
|
||||
use Index;
|
||||
use schema::*;
|
||||
@@ -56,7 +55,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn test_term_scorer() {
|
||||
let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
|
||||
let left_fieldnorms = FastFieldReader::from(vec![10, 4]);
|
||||
assert_eq!(left_fieldnorms.get(0), 10);
|
||||
assert_eq!(left_fieldnorms.get(1), 4);
|
||||
let left = SegmentPostings::create_from_docs(&[1]);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use Score;
|
||||
use DocId;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use fastfield::U64FastFieldReader;
|
||||
use postings::SegmentPostings;
|
||||
use query::Scorer;
|
||||
use postings::Postings;
|
||||
@@ -9,7 +8,7 @@ use fastfield::FastFieldReader;
|
||||
|
||||
pub struct TermScorer {
|
||||
pub idf: Score,
|
||||
pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
|
||||
pub fieldnorm_reader_opt: Option<FastFieldReader<u64>>,
|
||||
pub postings: SegmentPostings,
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use DocId;
|
||||
use Score;
|
||||
use query::score_combiner::{DoNothingCombiner, ScoreCombiner};
|
||||
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 32;
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 64;
|
||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
|
||||
Reference in New Issue
Block a user