diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 8ca96cbf8..810e314d5 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -12,6 +12,7 @@ use schema::Document; use directory::ReadOnlySource; use DocId; use std::str; +use std::cmp; use postings::TermInfo; use datastruct::fstmap::FstMap; use std::sync::Arc; @@ -201,34 +202,16 @@ impl SegmentReader { let field = term.field(); let field_entry = self.schema.get_field_entry(field); let term_info = get!(self.get_term_info(term)); - let possible_option = match *field_entry.field_type() { - FieldType::Str(ref options) => { - let indexing_options = options.get_indexing_options(); - match option { - SegmentPostingsOption::NoFreq => SegmentPostingsOption::NoFreq, - SegmentPostingsOption::Freq => { - if indexing_options.is_termfreq_enabled() { - SegmentPostingsOption::Freq - } else { - SegmentPostingsOption::NoFreq - } - } - SegmentPostingsOption::FreqAndPositions => { - if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition { - SegmentPostingsOption::FreqAndPositions - } else if indexing_options.is_termfreq_enabled() { - SegmentPostingsOption::Freq - } else { - SegmentPostingsOption::NoFreq - } - } - } - } - _ => { SegmentPostingsOption::NoFreq }, - }; - Some(self.read_postings_from_terminfo(&term_info, possible_option)) + let maximum_option = get!(field_entry.field_type().get_segment_postings_option()); + let best_effort_option = cmp::min(maximum_option, option); + Some(self.read_postings_from_terminfo(&term_info, best_effort_option)) } + + /// Returns a posting object given a `term_info`. + /// This method is for an advanced usage only. + /// + /// Most user should prefer using `read_postings` instead. pub fn read_postings_from_terminfo(&self, term_info: &TermInfo, option: SegmentPostingsOption) diff --git a/src/datastruct/fstmap/fstmap.rs b/src/datastruct/fstmap/fstmap.rs index 54a17ef20..58c02d490 100644 --- a/src/datastruct/fstmap/fstmap.rs +++ b/src/datastruct/fstmap/fstmap.rs @@ -129,7 +129,8 @@ mod tests { use super::*; use directory::{RAMDirectory, Directory}; use std::path::PathBuf; - + use fst::Streamer; + #[test] fn test_fstmap() { let mut directory = RAMDirectory::create(); @@ -146,10 +147,12 @@ mod tests { assert_eq!(fstmap.get("abc"), Some(34u32)); assert_eq!(fstmap.get("abcd"), Some(346u32)); let mut stream = fstmap.stream(); - assert!(stream.advance()); + assert_eq!(stream.next().unwrap(), "abc".as_bytes()); assert_eq!(stream.key(), "abc".as_bytes()); - assert!(stream.advance()); + assert_eq!(stream.value(), 34u32); + assert_eq!(stream.next().unwrap(), "abcd".as_bytes()); assert_eq!(stream.key(), "abcd".as_bytes()); + assert_eq!(stream.value(), 346u32); assert!(!stream.advance()); } diff --git a/src/datastruct/fstmap/fstmerger.rs b/src/datastruct/fstmap/fstmerger.rs index 687ba5127..918065ce6 100644 --- a/src/datastruct/fstmap/fstmerger.rs +++ b/src/datastruct/fstmap/fstmerger.rs @@ -142,7 +142,7 @@ impl<'a, V> Streamer<'a> for FstMerger<'a, V> where V: BinarySerializable { #[cfg(test)] mod tests { - use super::*; + use schema::{Term, SchemaBuilder, Document, TEXT}; use core::Index; diff --git a/src/datastruct/fstmap/streamer.rs b/src/datastruct/fstmap/streamer.rs index b91b13019..c243cecc2 100644 --- a/src/datastruct/fstmap/streamer.rs +++ b/src/datastruct/fstmap/streamer.rs @@ -59,11 +59,11 @@ pub struct FstMapStreamer<'a, V> where V: 'a + BinarySerializable { } -impl<'a, V> fst::Streamer<'a> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable { +impl<'a, 'b, V> fst::Streamer<'b> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable { - type Item = &'a [u8]; + type Item = &'b [u8]; - fn next<'b>(&'b mut self) -> Option<&'b [u8]> { + fn next(&'b mut self) -> Option<&'b [u8]> { if self.advance() { Some(&self.buffer) } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index c94b21ea8..aa8e9dc94 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -232,7 +232,10 @@ impl IndexMerger { // we reached a new field. let field_entry = self.schema.get_field_entry(current_field); // ... set segment postings option the new field. - segment_postings_option = field_entry.field_type().get_segment_postings_option(); + segment_postings_option = field_entry + .field_type() + .get_segment_postings_option() + .expect("Encounterred a field that is not supposed to be indexed. Have you modified the index?"); last_field = Some(current_field); need_to_call_new_field = true; } diff --git a/src/postings/segment_postings_option.rs b/src/postings/segment_postings_option.rs index 53aac366a..925722c4a 100644 --- a/src/postings/segment_postings_option.rs +++ b/src/postings/segment_postings_option.rs @@ -6,7 +6,7 @@ /// avoid this extra cost when the information is not required. /// For instance, positions are useful when running phrase queries /// but useless in other queries. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)] pub enum SegmentPostingsOption { /// Only the doc ids are decoded NoFreq, @@ -15,3 +15,15 @@ pub enum SegmentPostingsOption { /// DocIds, term frequencies and positions will be decoded. FreqAndPositions, } + +#[cfg(test)] +mod tests { + + use super::SegmentPostingsOption; + + #[test] + fn test_cmp_segment_postings_option() { + assert!(SegmentPostingsOption::FreqAndPositions > SegmentPostingsOption::Freq); + assert!(SegmentPostingsOption::Freq > SegmentPostingsOption::NoFreq); + } +} diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 999f919ba..26e5e85e9 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -40,23 +40,37 @@ impl FieldType { } } - pub fn get_segment_postings_option(&self) -> SegmentPostingsOption { + /// Given a field configuration, return the maximal possible + /// `SegmentPostingsOption` available. + /// + /// If the field is not indexed, then returns `None`. + pub fn get_segment_postings_option(&self) -> Option { match *self { FieldType::Str(ref text_options) => { match text_options.get_indexing_options() { + TextIndexingOptions::Untokenized => + Some(SegmentPostingsOption::NoFreq), TextIndexingOptions::TokenizedNoFreq => - SegmentPostingsOption::NoFreq, + Some(SegmentPostingsOption::NoFreq), TextIndexingOptions::TokenizedWithFreq => - SegmentPostingsOption::Freq, + Some(SegmentPostingsOption::Freq), TextIndexingOptions::TokenizedWithFreqAndPosition => - SegmentPostingsOption::FreqAndPositions, - _ => { - SegmentPostingsOption::NoFreq + Some(SegmentPostingsOption::FreqAndPositions), + TextIndexingOptions::Unindexed => { + None } } } - FieldType::U64(_) | - FieldType::I64(_) => SegmentPostingsOption::NoFreq, + FieldType::U64(ref int_options) | + FieldType::I64(ref int_options) => { + if int_options.is_indexed() { + Some(SegmentPostingsOption::NoFreq) + } + else { + None + } + + }, } }