diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 218ead2e9..066803182 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -94,7 +94,6 @@ pub mod tests { use Score; use core::SegmentReader; use SegmentLocalId; - use fastfield::U64FastFieldReader; use fastfield::FastFieldReader; use schema::Field; @@ -148,7 +147,7 @@ pub mod tests { pub struct FastFieldTestCollector { vals: Vec, field: Field, - ff_reader: Option, + ff_reader: Option>, } impl FastFieldTestCollector { @@ -167,7 +166,7 @@ pub mod tests { impl Collector for FastFieldTestCollector { fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> { - self.ff_reader = Some(reader.get_fast_field_reader(self.field)?); + self.ff_reader = Some(reader.fast_field_reader(self.field)?); Ok(()) } diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index 1521fd2af..04524013a 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -89,7 +89,7 @@ where pub fn get(&self, idx: usize) -> u64 { if self.num_bits == 0 { - return 0; + return 0u64; } let data: &[u8] = &*self.data; let num_bits = self.num_bits; @@ -107,7 +107,7 @@ where ); let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; - (val_shifted & mask) + val_shifted & mask } else { let val_unshifted_unmasked: u64 = if addr + 8 <= data.len() { unsafe { *(data[addr..].as_ptr() as *const u64) } @@ -119,14 +119,18 @@ where unsafe { *(buffer[..].as_ptr() as *const u64) } }; let val_shifted = val_unshifted_unmasked >> (bit_shift as u64); - (val_shifted & mask) + val_shifted & mask } } + /// Reads a range of values from the fast field. + /// + /// The range of values read is from + /// `[start..start + output.len()[` pub fn get_range(&self, start: u32, output: &mut [u64]) { if self.num_bits == 0 { for val in output.iter_mut() { - *val = 0; + *val = 0u64; } } else { let data: &[u8] = &*self.data; diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 6f66c5f17..85ddbfea0 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -21,10 +21,11 @@ use schema::FieldType; use error::ErrorKind; use termdict::TermDictionaryImpl; use fastfield::FacetReader; -use fastfield::{FastFieldReader, U64FastFieldReader}; +use fastfield::FastFieldReader; use schema::Schema; use termdict::TermDictionary; -use fastfield::MultiValueIntFastFieldReader; +use fastfield::{FastValue, MultiValueIntFastFieldReader}; +use schema::Cardinality; /// Entry point to access all of the datastructures of the `Segment` /// @@ -91,18 +92,37 @@ impl SegmentReader { /// /// # Panics /// May panic if the index is corrupted. - pub fn get_fast_field_reader( + pub fn fast_field_reader( &self, field: Field, - ) -> fastfield::Result { + ) -> fastfield::Result> { let field_entry = self.schema.get_field_entry(field); - if !TFastFieldReader::is_enabled(field_entry.field_type()) { - Err(FastFieldNotAvailableError::new(field_entry)) - } else { + if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue) { self.fast_fields_composite .open_read(field) .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(TFastFieldReader::open) + .map(FastFieldReader::open) + } else { + Err(FastFieldNotAvailableError::new(field_entry)) + } + } + + /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`. + /// May panick if the field is not a multivalued fastfield of the type `Item`. + pub fn multi_fast_field_reader(&self, field: Field) -> fastfield::Result> { + let field_entry = self.schema.get_field_entry(field); + if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues) { + let idx_reader = self.fast_fields_composite + .open_read_with_idx(field, 0) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open)?; + let vals_reader = self.fast_fields_composite + .open_read_with_idx(field, 1) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open)?; + Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader)) + } else { + Err(FastFieldNotAvailableError::new(field_entry)) } } @@ -116,7 +136,7 @@ impl SegmentReader { field_entry )).into()); } - let term_ords_reader = self.multi_value_reader(field)?; + let term_ords_reader = self.multi_fast_field_reader(field)?; let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| { ErrorKind::InvalidArgument(format!( "The field \"{}\" is a hierarchical \ @@ -130,20 +150,6 @@ impl SegmentReader { Ok(facet_reader) } - /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`. - pub fn multi_value_reader(&self, field: Field) -> Result { - let field_entry = self.schema.get_field_entry(field); - let idx_reader = self.fast_fields_composite - .open_read_with_idx(field, 0) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(U64FastFieldReader::open)?; - let vals_reader = self.fast_fields_composite - .open_read_with_idx(field, 1) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(U64FastFieldReader::open)?; - Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader)) - } - /// Accessor to the segment's `Field norms`'s reader. /// /// Field norms are the length (in tokens) of the fields. @@ -152,10 +158,10 @@ impl SegmentReader { /// /// They are simply stored as a fast field, serialized in /// the `.fieldnorm` file of the segment. - pub fn get_fieldnorms_reader(&self, field: Field) -> Option { + pub fn get_fieldnorms_reader(&self, field: Field) -> Option> { self.fieldnorms_composite .open_read(field) - .map(U64FastFieldReader::open) + .map(FastFieldReader::open) } /// Accessor to the segment's `StoreReader`. diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs index 6e804889b..f1a61702c 100644 --- a/src/datastruct/stacker/hashmap.rs +++ b/src/datastruct/stacker/hashmap.rs @@ -189,11 +189,11 @@ impl<'a> TermHashMap<'a> { let (addr, val): (u32, &mut V) = self.heap.allocate_object(); assert_eq!(addr, key_bytes_ref.addr() + 2 + key_bytes.len() as u32); self.set_bucket(hash, key_bytes_ref, bucket); - return (bucket, val); + return (bucket as UnorderedTermId, val); } else if kv.hash == hash { let (stored_key, expull_addr): (&[u8], u32) = self.get_key_value(kv.key_value_addr); if stored_key == key_bytes { - return (bucket, self.heap.get_mut_ref(expull_addr)); + return (bucket as UnorderedTermId, self.heap.get_mut_ref(expull_addr)); } } } diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index ea5a9e25a..049311094 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -18,7 +18,7 @@ use termdict::{TermDictionary, TermDictionaryImpl}; /// list of facets. This ordinal is segment local and /// only makes sense for a given segment. pub struct FacetReader { - term_ords: MultiValueIntFastFieldReader, + term_ords: MultiValueIntFastFieldReader, term_dict: TermDictionaryImpl, } @@ -31,12 +31,12 @@ impl FacetReader { /// - a `TermDictionaryImpl` that helps associating a facet to /// an ordinal and vice versa. pub fn new( - term_ords: MultiValueIntFastFieldReader, + term_ords: MultiValueIntFastFieldReader, term_dict: TermDictionaryImpl, ) -> FacetReader { FacetReader { - term_ords: term_ords, - term_dict: term_dict, + term_ords, + term_dict } } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index ffca841b7..14d54f2a8 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -23,6 +23,19 @@ values stored. Read access performance is comparable to that of an array lookup. */ +use common; +use schema::Cardinality; +use schema::FieldType; +use schema::Value; +pub use self::delete::DeleteBitSet; +pub use self::delete::write_delete_bitset; +pub use self::error::{FastFieldNotAvailableError, Result}; +pub use self::facet_reader::FacetReader; +pub use self::multivalued::MultiValueIntFastFieldReader; +pub use self::reader::FastFieldReader; +pub use self::serializer::FastFieldSerializer; +pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; + mod reader; mod writer; mod serializer; @@ -31,33 +44,104 @@ mod delete; mod facet_reader; mod multivalued; -pub use self::delete::write_delete_bitset; -pub use self::delete::DeleteBitSet; -pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; -pub use self::reader::{I64FastFieldReader, U64FastFieldReader}; -pub use self::reader::FastFieldReader; -pub use self::serializer::FastFieldSerializer; -pub use self::error::{FastFieldNotAvailableError, Result}; -pub use self::facet_reader::FacetReader; -pub use self::multivalued::MultiValueIntFastFieldReader; +/// Trait for types that are allowed for fast fields: (u64 or i64). +pub trait FastValue: Default + Clone + Copy { + /// Converts a value from u64 + /// + /// Internally all fast field values are encoded as u64. + fn from_u64(val: u64) -> Self; + + /// Converts a value to u64. + /// + /// Internally all fast field values are encoded as u64. + fn to_u64(&self) -> u64; + + /// Returns the fast field cardinality that can be extracted from the given + /// `FieldType`. + /// + /// If the type is not a fast field, `None` is returned. + fn fast_field_cardinality(field_type: &FieldType) -> Option; + + /// Cast value to `u64`. + /// The value is just reinterpreted in memory. + fn as_u64(&self) -> u64; +} + + +impl FastValue for u64 { + fn from_u64(val: u64) -> Self { + val + } + + fn to_u64(&self) -> u64 { + *self + } + + fn as_u64(&self) -> u64 { + *self + } + + fn fast_field_cardinality(field_type: &FieldType) -> Option { + match *field_type { + FieldType::U64(ref integer_options) => + integer_options.get_fastfield_cardinality(), + FieldType::HierarchicalFacet => + Some(Cardinality::MultiValues), + _ => None, + } + } +} + +impl FastValue for i64 { + fn from_u64(val: u64) -> Self { + common::u64_to_i64(val) + } + + fn to_u64(&self) -> u64 { + common::i64_to_u64(*self) + } + + + fn fast_field_cardinality(field_type: &FieldType) -> Option { + match *field_type { + FieldType::I64(ref integer_options) => + integer_options.get_fastfield_cardinality(), + _ => None, + } + } + + fn as_u64(&self) -> u64 { + *self as u64 + } +} + +fn value_to_u64(value: &Value) -> u64 { + match *value { + Value::U64(ref val) => *val, + Value::I64(ref val) => common::i64_to_u64(*val), + _ => panic!("Expected a u64/i64 field, got {:?} ", value), + } +} + #[cfg(test)] mod tests { - use super::*; - use schema::Field; - use std::path::Path; + + use common::CompositeFile; use directory::{Directory, RAMDirectory, WritePtr}; - use schema::Document; - use schema::{Schema, SchemaBuilder}; - use schema::FAST; - use std::collections::HashMap; - use test::Bencher; - use test; use fastfield::FastFieldReader; use rand::Rng; use rand::SeedableRng; - use common::CompositeFile; use rand::XorShiftRng; + use schema::{Schema, SchemaBuilder}; + use schema::Document; + use schema::FAST; + use schema::Field; + use std::collections::HashMap; + use std::path::Path; + use super::*; + use test; + use test::Bencher; lazy_static! { static ref SCHEMA: Schema = { @@ -70,15 +154,9 @@ mod tests { }; } - fn add_single_field_doc(fast_field_writers: &mut FastFieldsWriter, field: Field, value: u64) { - let mut doc = Document::default(); - doc.add_u64(field, value); - fast_field_writers.add_document(&doc); - } - #[test] pub fn test_fastfield() { - let test_fastfield = U64FastFieldReader::from(vec![100, 200, 300]); + let test_fastfield = FastFieldReader::::from(vec![100, 200, 300]); assert_eq!(test_fastfield.get(0), 100); assert_eq!(test_fastfield.get(1), 200); assert_eq!(test_fastfield.get(2), 300); @@ -92,9 +170,9 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - add_single_field_doc(&mut fast_field_writers, *FIELD, 13u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 14u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 2u64); + fast_field_writers.add_document(&doc!(*FIELD=>13u64)); + fast_field_writers.add_document(&doc!(*FIELD=>14u64)); + fast_field_writers.add_document(&doc!(*FIELD=>2u64)); fast_field_writers .serialize(&mut serializer, &HashMap::new()) .unwrap(); @@ -107,7 +185,7 @@ mod tests { { let composite_file = CompositeFile::open(&source).unwrap(); let field_source = composite_file.open_read(*FIELD).unwrap(); - let fast_field_reader: U64FastFieldReader = U64FastFieldReader::open(field_source); + let fast_field_reader = FastFieldReader::::open(field_source); assert_eq!(fast_field_reader.get(0), 13u64); assert_eq!(fast_field_reader.get(1), 14u64); assert_eq!(fast_field_reader.get(2), 2u64); @@ -122,15 +200,15 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - add_single_field_doc(&mut fast_field_writers, *FIELD, 4u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 777u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u64); - add_single_field_doc(&mut fast_field_writers, *FIELD, 215u64); + fast_field_writers.add_document(&doc!(*FIELD=>4u64)); + fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64)); + fast_field_writers.add_document(&doc!(*FIELD=>3_052u64)); + fast_field_writers.add_document(&doc!(*FIELD=>9_002u64)); + fast_field_writers.add_document(&doc!(*FIELD=>15_001u64)); + fast_field_writers.add_document(&doc!(*FIELD=>777u64)); + fast_field_writers.add_document(&doc!(*FIELD=>1_002u64)); + fast_field_writers.add_document(&doc!(*FIELD=>1_501u64)); + fast_field_writers.add_document(&doc!(*FIELD=>215u64)); fast_field_writers .serialize(&mut serializer, &HashMap::new()) .unwrap(); @@ -142,8 +220,8 @@ mod tests { } { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); assert_eq!(fast_field_reader.get(0), 4u64); assert_eq!(fast_field_reader.get(1), 14_082_001u64); assert_eq!(fast_field_reader.get(2), 3_052u64); @@ -166,7 +244,7 @@ mod tests { let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); for _ in 0..10_000 { - add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u64); + fast_field_writers.add_document(&doc!(*FIELD=>100_000u64)); } fast_field_writers .serialize(&mut serializer, &HashMap::new()) @@ -179,8 +257,8 @@ mod tests { } { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); for doc in 0..10_000 { assert_eq!(fast_field_reader.get(doc), 100_000u64); } @@ -197,13 +275,9 @@ mod tests { let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); // forcing the amplitude to be high - add_single_field_doc(&mut fast_field_writers, *FIELD, 0u64); + fast_field_writers.add_document(&doc!(*FIELD=>0u64)); for i in 0u64..10_000u64 { - add_single_field_doc( - &mut fast_field_writers, - *FIELD, - 5_000_000_000_000_000_000u64 + i, - ); + fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i)); } fast_field_writers .serialize(&mut serializer, &HashMap::new()) @@ -216,9 +290,8 @@ mod tests { } { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); - + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); assert_eq!(fast_field_reader.get(0), 0u64); for doc in 1..10_001 { assert_eq!( @@ -257,8 +330,8 @@ mod tests { } { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: I64FastFieldReader = - I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap()); + let data = fast_fields_composite.open_read(i64_field).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); assert_eq!(fast_field_reader.min_value(), -100i64); assert_eq!(fast_field_reader.max_value(), 9_999i64); @@ -296,8 +369,8 @@ mod tests { let source = directory.open_read(&path).unwrap(); { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: I64FastFieldReader = - I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap()); + let data = fast_fields_composite.open_read(i64_field).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); assert_eq!(fast_field_reader.get(0u32), 0i64); } } @@ -320,8 +393,8 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - for x in &permutation { - add_single_field_doc(&mut fast_field_writers, *FIELD, *x); + for &x in &permutation { + fast_field_writers.add_document(&doc!(*FIELD=>x)); } fast_field_writers .serialize(&mut serializer, &HashMap::new()) @@ -331,8 +404,8 @@ mod tests { let source = directory.open_read(&path).unwrap(); { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); let mut a = 0u64; for _ in 0..n { @@ -377,8 +450,8 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - for x in &permutation { - add_single_field_doc(&mut fast_field_writers, *FIELD, *x); + for &x in &permutation { + fast_field_writers.add_document(&doc!(*FIELD=>x)); } fast_field_writers .serialize(&mut serializer, &HashMap::new()) @@ -388,8 +461,8 @@ mod tests { let source = directory.open_read(&path).unwrap(); { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); b.iter(|| { let n = test::black_box(7000u32); @@ -411,8 +484,8 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - for x in &permutation { - add_single_field_doc(&mut fast_field_writers, *FIELD, *x); + for &x in &permutation { + fast_field_writers.add_document(&doc!(*FIELD=>x)); } fast_field_writers .serialize(&mut serializer, &HashMap::new()) @@ -422,8 +495,8 @@ mod tests { let source = directory.open_read(&path).unwrap(); { let fast_fields_composite = CompositeFile::open(&source).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); + let data = fast_fields_composite.open_read(*FIELD).unwrap(); + let fast_field_reader = FastFieldReader::::open(data); b.iter(|| { let n = test::black_box(1000u32); diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 0043e7783..5c9f4dc9a 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -3,3 +3,87 @@ mod reader; pub use self::writer::MultiValueIntFastFieldWriter; pub use self::reader::MultiValueIntFastFieldReader; + +#[cfg(test)] +mod tests { + + use schema::SchemaBuilder; + use schema::Cardinality; + use schema::IntOptions; + use Index; + + #[test] + fn test_multivalued_u64() { + let mut schema_builder = SchemaBuilder::default(); + let field = schema_builder.add_u64_field( + "multifield", + IntOptions::default().set_fast(Cardinality::MultiValues) + ); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + index_writer.add_document(doc!(field=>1u64, field=>3u64)); + index_writer.add_document(doc!()); + index_writer.add_document(doc!(field=>4u64)); + index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64)); + assert!(index_writer.commit().is_ok()); + + index.load_searchers().unwrap(); + let searcher = index.searcher(); + let reader = searcher.segment_reader(0); + let mut vals = Vec::new(); + let multi_value_reader = reader.multi_fast_field_reader::(field).unwrap(); + { + multi_value_reader.get_vals(2, &mut vals); + assert_eq!(&vals, &[4u64]); + } + { + multi_value_reader.get_vals(0, &mut vals); + assert_eq!(&vals, &[1u64, 3u64]); + } + { + multi_value_reader.get_vals(1, &mut vals); + assert!(vals.is_empty()); + } + } + + + #[test] + fn test_multivalued_i64() { + let mut schema_builder = SchemaBuilder::default(); + let field = schema_builder.add_i64_field( + "multifield", + IntOptions::default().set_fast(Cardinality::MultiValues) + ); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + index_writer.add_document(doc!(field=> 1i64, field => 3i64)); + index_writer.add_document(doc!()); + index_writer.add_document(doc!(field=> -4i64)); + index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64)); + assert!(index_writer.commit().is_ok()); + + index.load_searchers().unwrap(); + let searcher = index.searcher(); + let reader = searcher.segment_reader(0); + let mut vals = Vec::new(); + let multi_value_reader = reader.multi_fast_field_reader::(field).unwrap(); + { + multi_value_reader.get_vals(2, &mut vals); + assert_eq!(&vals, &[-4i64]); + } + { + multi_value_reader.get_vals(0, &mut vals); + assert_eq!(&vals, &[1i64, 3i64]); + } + { + multi_value_reader.get_vals(1, &mut vals); + assert!(vals.is_empty()); + } + { + multi_value_reader.get_vals(3, &mut vals); + assert_eq!(&vals, &[-5i64, -20i64, 1i64]); + } + } +} \ No newline at end of file diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 147cee89a..4dbe49717 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -1,7 +1,6 @@ use DocId; -use fastfield::FastFieldReader; +use fastfield::{FastFieldReader, FastValue}; -use fastfield::U64FastFieldReader; /// Reader for a multivalued `u64` fast field. /// @@ -12,31 +11,29 @@ use fastfield::U64FastFieldReader; /// The `idx_reader` associated, for each document, the index of its first value. /// #[derive(Clone)] -pub struct MultiValueIntFastFieldReader { - idx_reader: U64FastFieldReader, - vals_reader: U64FastFieldReader, +pub struct MultiValueIntFastFieldReader { + idx_reader: FastFieldReader, + vals_reader: FastFieldReader } -impl MultiValueIntFastFieldReader { +impl MultiValueIntFastFieldReader { pub(crate) fn open( - idx_reader: U64FastFieldReader, - vals_reader: U64FastFieldReader, - ) -> MultiValueIntFastFieldReader { + idx_reader: FastFieldReader, + vals_reader: FastFieldReader, + ) -> MultiValueIntFastFieldReader { MultiValueIntFastFieldReader { - idx_reader: idx_reader, - vals_reader: vals_reader, + idx_reader, + vals_reader } } /// Returns the array of values associated to the given `doc`. - pub fn get_vals(&self, doc: DocId, vals: &mut Vec) { + pub fn get_vals(&self, doc: DocId, vals: &mut Vec) { let start = self.idx_reader.get(doc) as u32; let stop = self.idx_reader.get(doc + 1) as u32; - vals.clear(); - for val_id in start..stop { - let val = self.vals_reader.get(val_id); - vals.push(val); - } + let len = (stop - start) as usize; + vals.resize(len, Item::default()); + self.vals_reader.get_range(start, &mut vals[..]); } } diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index d988656ce..15b3fa91e 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -1,22 +1,28 @@ use fastfield::FastFieldSerializer; +use fastfield::serializer::FastSingleFieldSerializer; +use fastfield::value_to_u64; use std::collections::HashMap; use postings::UnorderedTermId; -use schema::Field; +use schema::{Document, Field}; use std::io; +use itertools::Itertools; + pub struct MultiValueIntFastFieldWriter { field: Field, - vals: Vec, + vals: Vec, doc_index: Vec, + is_facet: bool } impl MultiValueIntFastFieldWriter { /// Creates a new `IntFastFieldWriter` - pub fn new(field: Field) -> Self { + pub fn new(field: Field, is_facet: bool) -> Self { MultiValueIntFastFieldWriter { - field: field, + field, vals: Vec::new(), doc_index: Vec::new(), + is_facet } } @@ -37,11 +43,32 @@ impl MultiValueIntFastFieldWriter { self.vals.push(val); } - /// Push the fast fields value to the `FastFieldWriter`. + pub fn add_document(&mut self, doc: &Document) { + if !self.is_facet { + for field_value in doc.field_values() { + if field_value.field() == self.field { + self.add_val(value_to_u64(field_value.value())); + } + } + } + + } + + /// Serializes fast field values by pushing them to the `FastFieldSerializer`. + /// + /// HashMap makes it possible to remap them before serializing. + /// Specifically, string terms are first stored in the writer as their + /// position in the `IndexWriter`'s `HashMap`. This value is called + /// an `UnorderedTermId`. + /// + /// During the serialization of the segment, terms gets sorted and + /// `tantivy` builds a mapping to convert this `UnorderedTermId` into + /// term ordinals. + /// pub fn serialize( &self, serializer: &mut FastFieldSerializer, - mapping: &HashMap, + mapping_opt: Option<&HashMap>, ) -> io::Result<()> { { // writing the offset index @@ -55,10 +82,25 @@ impl MultiValueIntFastFieldWriter { } { // writing the values themselves. - let mut value_serializer = - serializer.new_u64_fast_field_with_idx(self.field, 0u64, mapping.len() as u64, 1)?; - for val in &self.vals { - value_serializer.add_val(*mapping.get(val).expect("Missing term ordinal") as u64)?; + let mut value_serializer: FastSingleFieldSerializer<_>; + match mapping_opt { + Some(mapping) => { + value_serializer = + serializer.new_u64_fast_field_with_idx(self.field, 0u64, mapping.len() as u64, 1)?; + for val in &self.vals { + let remapped_val = *mapping.get(val).expect("Missing term ordinal") as u64; + value_serializer.add_val(remapped_val)?; + } + } + None => { + let val_min_max = self.vals.iter().cloned().minmax(); + let (val_min, val_max) = val_min_max.into_option().unwrap_or((0u64, 0)); + value_serializer = + serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?; + for &val in &self.vals { + value_serializer.add_val(val)?; + } + } } value_serializer.close_field()?; } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 003a75a8e..cfe0c2b77 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -1,111 +1,36 @@ -use directory::ReadOnlySource; -use common::{self, BinarySerializable}; -use common::compute_num_bits; +use common::BinarySerializable; use common::bitpacker::BitUnpacker; -use DocId; -use schema::SchemaBuilder; -use std::path::Path; -use schema::FAST; -use directory::{Directory, RAMDirectory, WritePtr}; -use fastfield::{FastFieldSerializer, FastFieldsWriter}; -use schema::FieldType; -use std::mem; use common::CompositeFile; -use std::collections::HashMap; +use common::compute_num_bits; +use directory::{Directory, RAMDirectory, WritePtr}; +use directory::ReadOnlySource; +use DocId; +use fastfield::{FastFieldSerializer, FastFieldsWriter}; use owning_ref::OwningRef; +use schema::FAST; +use schema::SchemaBuilder; +use std::collections::HashMap; +use std::marker::PhantomData; +use std::mem; +use std::path::Path; +use super::FastValue; /// Trait for accessing a fastfield. /// /// Depending on the field type, a different /// fast field is required. -pub trait FastFieldReader: Sized { - /// Type of the value stored in the fastfield. - type ValueType; +#[derive(Clone)] +pub struct FastFieldReader { + bit_unpacker: BitUnpacker>, + min_value_u64: u64, + max_value_u64: u64, + _phantom: PhantomData +} - /// Return the value associated to the given document. - /// - /// This accessor should return as fast as possible. - /// - /// # Panics - /// - /// May panic if `doc` is greater than the segment - // `maxdoc`. - fn get(&self, doc: DocId) -> Self::ValueType; - - /// Fills an output buffer with the fast field values - /// associated with the `DocId` going from - /// `start` to `start + output.len()`. - /// - /// # Panics - /// - /// May panic if `start + output.len()` is greater than - /// the segment's `maxdoc`. - fn get_range(&self, start: u32, output: &mut [Self::ValueType]); +impl FastFieldReader { /// Opens a fast field given a source. - fn open(source: ReadOnlySource) -> Self; - - /// Returns true iff the given field_type makes - /// it possible to access the field values via a - /// fastfield. - fn is_enabled(field_type: &FieldType) -> bool; -} - -/// `FastFieldReader` for unsigned 64-bits integers. -#[derive(Clone)] -pub struct U64FastFieldReader { - bit_unpacker: BitUnpacker>, - min_value: u64, - max_value: u64, -} - -impl U64FastFieldReader { - /// Returns the minimum value for this fast field. - /// - /// The min value does not take in account of possible - /// deleted document, and should be considered as a lower bound - /// of the actual minimum value. - pub fn min_value(&self) -> u64 { - self.min_value - } - - /// Returns the maximum value for this fast field. - /// - /// The max value does not take in account of possible - /// deleted document, and should be considered as an upper bound - /// of the actual maximum value. - pub fn max_value(&self) -> u64 { - self.max_value - } -} - -impl FastFieldReader for U64FastFieldReader { - type ValueType = u64; - - fn get(&self, doc: DocId) -> u64 { - self.min_value + self.bit_unpacker.get(doc as usize) - } - - fn is_enabled(field_type: &FieldType) -> bool { - match *field_type { - FieldType::U64(ref integer_options) => integer_options.is_fast(), - FieldType::HierarchicalFacet => true, - _ => false, - } - } - - fn get_range(&self, start: u32, output: &mut [Self::ValueType]) { - self.bit_unpacker.get_range(start, output); - for out in output.iter_mut() { - *out += self.min_value; - } - } - - /// Opens a new fast field reader given a read only source. - /// - /// # Panics - /// Panics if the data is corrupted. - fn open(data: ReadOnlySource) -> U64FastFieldReader { + pub fn open(data: ReadOnlySource) -> Self { let min_value: u64; let amplitude: u64; { @@ -119,16 +44,64 @@ impl FastFieldReader for U64FastFieldReader { let num_bits = compute_num_bits(amplitude); let owning_ref = OwningRef::new(data).map(|data| &data[16..]); let bit_unpacker = BitUnpacker::new(owning_ref, num_bits); - U64FastFieldReader { - min_value, - max_value, + FastFieldReader { + min_value_u64: min_value, + max_value_u64: max_value, bit_unpacker, + _phantom: PhantomData } } + + + /// Return the value associated to the given document. + /// + /// This accessor should return as fast as possible. + /// + /// # Panics + /// + /// May panic if `doc` is greater than the segment + // `maxdoc`. + pub fn get(&self, doc: DocId) -> Item { + Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc as usize)) + } + + /// Fills an output buffer with the fast field values + /// associated with the `DocId` going from + /// `start` to `start + output.len()`. + /// + /// # Panics + /// + /// May panic if `start + output.len()` is greater than + /// the segment's `maxdoc`. + pub fn get_range(&self, start: u32, output: &mut [Item]) { + let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; + self.bit_unpacker.get_range(start, output_u64); + for out in output_u64.iter_mut() { + *out = Item::from_u64(*out + self.min_value_u64).as_u64(); + } + } + + /// Returns the minimum value for this fast field. + /// + /// The max value does not take in account of possible + /// deleted document, and should be considered as an upper bound + /// of the actual maximum value. + pub fn min_value(&self) -> Item { + Item::from_u64(self.min_value_u64) + } + + /// Returns the maximum value for this fast field. + /// + /// The max value does not take in account of possible + /// deleted document, and should be considered as an upper bound + /// of the actual maximum value. + pub fn max_value(&self) -> Item { + Item::from_u64(self.max_value_u64) + } } -impl From> for U64FastFieldReader { - fn from(vals: Vec) -> U64FastFieldReader { +impl From> for FastFieldReader { + fn from(vals: Vec) -> FastFieldReader { let mut schema_builder = SchemaBuilder::default(); let field = schema_builder.add_u64_field("field", FAST); let schema = schema_builder.build(); @@ -146,7 +119,7 @@ impl From> for U64FastFieldReader { .get_field_writer(field) .expect("With a RAMDirectory, this should never fail."); for val in vals { - fast_field_writer.add_val(val); + fast_field_writer.add_val(val.to_u64()); } } fast_field_writers @@ -158,79 +131,10 @@ impl From> for U64FastFieldReader { let source = directory.open_read(path).expect("Failed to open the file"); let composite_file = CompositeFile::open(&source).expect("Failed to read the composite file"); - let field_source = composite_file .open_read(field) .expect("File component not found"); - U64FastFieldReader::open(field_source) + FastFieldReader::open(field_source) } } -/// `FastFieldReader` for signed 64-bits integers. -pub struct I64FastFieldReader { - underlying: U64FastFieldReader, -} - -impl I64FastFieldReader { - /// Returns the minimum value for this fast field. - /// - /// The min value does not take in account of possible - /// deleted document, and should be considered as a lower bound - /// of the actual minimum value. - pub fn min_value(&self) -> i64 { - common::u64_to_i64(self.underlying.min_value()) - } - - /// Returns the maximum value for this fast field. - /// - /// The max value does not take in account of possible - /// deleted document, and should be considered as an upper bound - /// of the actual maximum value. - pub fn max_value(&self) -> i64 { - common::u64_to_i64(self.underlying.max_value()) - } -} - -impl FastFieldReader for I64FastFieldReader { - type ValueType = i64; - - /// - /// - /// # Panics - /// - /// May panic or return wrong random result if `doc` - /// is greater or equal to the segment's `maxdoc`. - fn get(&self, doc: DocId) -> i64 { - common::u64_to_i64(self.underlying.get(doc)) - } - - /// - /// # Panics - /// - /// May panic or return wrong random result if `doc` - /// is greater or equal to the segment's `maxdoc`. - fn get_range(&self, start: u32, output: &mut [Self::ValueType]) { - let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; - self.underlying.get_range(start, output_u64); - for mut_val in output_u64.iter_mut() { - *mut_val = common::u64_to_i64(*mut_val as u64) as u64; - } - } - - /// Opens a new fast field reader given a read only source. - /// - /// # Panics - /// Panics if the data is corrupted. - fn open(data: ReadOnlySource) -> I64FastFieldReader { - I64FastFieldReader { - underlying: U64FastFieldReader::open(data), - } - } - - fn is_enabled(field_type: &FieldType) -> bool { - match *field_type { - FieldType::I64(ref integer_options) => integer_options.is_fast(), - _ => false, - } - } -} diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 43b55daf0..208b9e2ea 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -37,7 +37,7 @@ impl FastFieldSerializer { // just making room for the pointer to header. let composite_write = CompositeWrite::wrap(write); Ok(FastFieldSerializer { - composite_write: composite_write, + composite_write }) } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 7248b93e1..19cd5cef2 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,7 +1,6 @@ use schema::{Cardinality, Document, Field, Schema}; use fastfield::FastFieldSerializer; use std::io; -use schema::Value; use DocId; use schema::FieldType; use common; @@ -39,22 +38,22 @@ impl FastFieldsWriter { single_value_writers.push(fast_field_writer); } Some(Cardinality::MultiValues) => { - let fast_field_writer = MultiValueIntFastFieldWriter::new(field); + let fast_field_writer = MultiValueIntFastFieldWriter::new(field, false); multi_values_writers.push(fast_field_writer); } None => {} } } FieldType::HierarchicalFacet => { - let fast_field_writer = MultiValueIntFastFieldWriter::new(field); + let fast_field_writer = MultiValueIntFastFieldWriter::new(field, true); multi_values_writers.push(fast_field_writer); } _ => {} } } FastFieldsWriter { - single_value_writers: single_value_writers, - multi_values_writers: multi_values_writers, + single_value_writers, + multi_values_writers } } @@ -97,6 +96,7 @@ impl FastFieldsWriter { } for field_writer in &mut self.multi_values_writers { field_writer.next_doc(); + field_writer.add_document(doc); } } @@ -112,11 +112,7 @@ impl FastFieldsWriter { } for field_writer in &self.multi_values_writers { let field = field_writer.field(); - if let Some(mapping) = mapping.get(&field) { - field_writer.serialize(serializer, mapping)?; - } else { - panic!("Term ordinal mapping missing for {:?}", field); - } + field_writer.serialize(serializer, mapping.get(&field))?; } Ok(()) } @@ -160,7 +156,7 @@ impl IntFastFieldWriter { /// Creates a new `IntFastFieldWriter` pub fn new(field: Field) -> IntFastFieldWriter { IntFastFieldWriter { - field: field, + field, vals: Vec::new(), val_count: 0, val_if_missing: 0u64, @@ -227,11 +223,7 @@ impl IntFastFieldWriter { /// only the first one is taken in account. fn extract_val(&self, doc: &Document) -> u64 { match doc.get_first(self.field) { - Some(v) => match *v { - Value::U64(ref val) => *val, - Value::I64(ref val) => common::i64_to_u64(*val), - _ => panic!("Expected a u64field, got {:?} ", v), - }, + Some(v) => super::value_to_u64(v), None => self.val_if_missing, } } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index ce2671167..7e1a0580b 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -5,7 +5,6 @@ use DocId; use core::SerializableSegment; use indexer::SegmentSerializer; use postings::InvertedIndexSerializer; -use fastfield::U64FastFieldReader; use itertools::Itertools; use postings::Postings; use docset::DocSet; @@ -26,7 +25,7 @@ pub struct IndexMerger { } fn compute_min_max_val( - u64_reader: &U64FastFieldReader, + u64_reader: &FastFieldReader, max_doc: DocId, delete_bitset: &DeleteBitSet, ) -> Option<(u64, u64)> { @@ -50,15 +49,15 @@ fn compute_min_max_val( fn extract_fieldnorm_reader( segment_reader: &SegmentReader, field: Field, -) -> Option { +) -> Option> { segment_reader.get_fieldnorms_reader(field) } fn extract_fast_field_reader( segment_reader: &SegmentReader, field: Field, -) -> Option { - segment_reader.get_fast_field_reader(field).ok() +) -> Option> { + segment_reader.fast_field_reader(field).ok() } struct DeltaComputer { @@ -137,7 +136,7 @@ impl IndexMerger { fn generic_write_fast_field( &self, fields: Vec, - field_reader_extractor: &Fn(&SegmentReader, Field) -> Option, + field_reader_extractor: &Fn(&SegmentReader, Field) -> Option>, fast_field_serializer: &mut FastFieldSerializer, ) -> Result<()> { for field in fields { @@ -368,7 +367,6 @@ mod tests { use query::TermQuery; use schema::Field; use core::Index; - use fastfield::U64FastFieldReader; use Searcher; use DocAddress; use collector::tests::FastFieldTestCollector; @@ -628,16 +626,16 @@ mod tests { vec![6_000, 7_000] ); - let score_field_reader: U64FastFieldReader = searcher + let score_field_reader = searcher .segment_reader(0) - .get_fast_field_reader(score_field) + .fast_field_reader::(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 1); assert_eq!(score_field_reader.max_value(), 3); - let score_field_reader: U64FastFieldReader = searcher + let score_field_reader = searcher .segment_reader(1) - .get_fast_field_reader(score_field) + .fast_field_reader::(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 4000); assert_eq!(score_field_reader.max_value(), 7000); @@ -685,9 +683,9 @@ mod tests { search_term(&searcher, Term::from_field_text(text_field, "g")), vec![6_000, 7_000] ); - let score_field_reader: U64FastFieldReader = searcher + let score_field_reader = searcher .segment_reader(0) - .get_fast_field_reader(score_field) + .fast_field_reader::(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 3); assert_eq!(score_field_reader.max_value(), 7000); @@ -731,9 +729,9 @@ mod tests { search_term(&searcher, Term::from_field_text(text_field, "g")), vec![6_000, 7_000] ); - let score_field_reader: U64FastFieldReader = searcher + let score_field_reader = searcher .segment_reader(0) - .get_fast_field_reader(score_field) + .fast_field_reader::(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 3); assert_eq!(score_field_reader.max_value(), 7000); @@ -782,9 +780,9 @@ mod tests { search_term(&searcher, Term::from_field_text(text_field, "g")), vec![6_000, 7_000] ); - let score_field_reader: U64FastFieldReader = searcher + let score_field_reader = searcher .segment_reader(0) - .get_fast_field_reader(score_field) + .fast_field_reader::(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 6000); assert_eq!(score_field_reader.max_value(), 7000); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index d3fcbd736..b46466f47 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -160,7 +160,6 @@ impl<'a> SegmentWriter<'a> { self.multifield_postings.subscribe(doc_id, &term); unordered_term_id_opt = Some(unordered_term_id); }); - if let Some(unordered_term_id) = unordered_term_id_opt { self.fast_field_writers .get_multivalue_writer(field) diff --git a/src/lib.rs b/src/lib.rs index bd8c31e80..792269ba3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -286,7 +286,6 @@ mod tests { use schema::*; use docset::DocSet; use IndexWriter; - use fastfield::{FastFieldReader, I64FastFieldReader, U64FastFieldReader}; use Postings; use rand::{Rng, SeedableRng, XorShiftRng}; use rand::distributions::{IndependentSample, Range}; @@ -857,22 +856,22 @@ mod tests { let segment_reader: &SegmentReader = searcher.segment_reader(0); { let fast_field_reader_res = - segment_reader.get_fast_field_reader::(text_field); + segment_reader.fast_field_reader::(text_field); assert!(fast_field_reader_res.is_err()); } { let fast_field_reader_res = - segment_reader.get_fast_field_reader::(stored_int_field); + segment_reader.fast_field_reader::(stored_int_field); assert!(fast_field_reader_res.is_err()); } { let fast_field_reader_res = - segment_reader.get_fast_field_reader::(fast_field_signed); + segment_reader.fast_field_reader::(fast_field_signed); assert!(fast_field_reader_res.is_err()); } { let fast_field_reader_res = - segment_reader.get_fast_field_reader::(fast_field_signed); + segment_reader.fast_field_reader::(fast_field_signed); assert!(fast_field_reader_res.is_ok()); let fast_field_reader = fast_field_reader_res.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) @@ -880,7 +879,7 @@ mod tests { { let fast_field_reader_res = - segment_reader.get_fast_field_reader::(fast_field_signed); + segment_reader.fast_field_reader::(fast_field_signed); assert!(fast_field_reader_res.is_ok()); let fast_field_reader = fast_field_reader_res.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) diff --git a/src/macros.rs b/src/macros.rs index 1dcca3181..525108c58 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -54,7 +54,7 @@ macro_rules! doc( ($crate::Document::default()) } }; // avoids a warning due to the useless `mut`. - ($($field:ident => $value:expr),*) => { + ($($field:expr => $value:expr),*) => { { let mut document = $crate::Document::default(); $( diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 112519b99..c67a9f855 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -25,7 +25,7 @@ pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings}; pub use common::HasLen; -pub(crate) type UnorderedTermId = usize; +pub(crate) type UnorderedTermId = u64; #[allow(enum_variant_names)] pub(crate) enum FreqReadingOption { @@ -51,7 +51,6 @@ pub mod tests { use schema::IndexRecordOption; use std::iter; use datastruct::stacker::Heap; - use fastfield::FastFieldReader; use query::TermQuery; use schema::Field; use test::{self, Bencher}; diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 2b9e4d677..44d016088 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -221,7 +221,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<' heap: &Heap, ) -> UnorderedTermId { debug_assert!(term.as_slice().len() >= 4); - let (term_ord, recorder): (usize, &mut Rec) = term_index.get_or_create(term); + let (term_ord, recorder): (UnorderedTermId, &mut Rec) = term_index.get_or_create(term); let current_doc = recorder.current_doc(); if current_doc != doc { if current_doc != u32::max_value() { diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 04efc2f43..11e70aa8a 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -13,7 +13,6 @@ mod tests { use postings::SegmentPostings; use query::{Query, Scorer}; use query::term_query::TermScorer; - use fastfield::U64FastFieldReader; use query::TermQuery; use Index; use schema::*; @@ -56,7 +55,7 @@ mod tests { #[test] pub fn test_term_scorer() { - let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]); + let left_fieldnorms = FastFieldReader::from(vec![10, 4]); assert_eq!(left_fieldnorms.get(0), 10); assert_eq!(left_fieldnorms.get(1), 4); let left = SegmentPostings::create_from_docs(&[1]); diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index e9faf1d67..d8352780c 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -1,7 +1,6 @@ use Score; use DocId; use docset::{DocSet, SkipResult}; -use fastfield::U64FastFieldReader; use postings::SegmentPostings; use query::Scorer; use postings::Postings; @@ -9,7 +8,7 @@ use fastfield::FastFieldReader; pub struct TermScorer { pub idf: Score, - pub fieldnorm_reader_opt: Option, + pub fieldnorm_reader_opt: Option>, pub postings: SegmentPostings, } diff --git a/src/query/union.rs b/src/query/union.rs index 88df78b2e..1f85b65e7 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -6,7 +6,7 @@ use DocId; use Score; use query::score_combiner::{DoNothingCombiner, ScoreCombiner}; -const HORIZON_NUM_TINYBITSETS: usize = 32; +const HORIZON_NUM_TINYBITSETS: usize = 64; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; /// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.