diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 06816f361..dc904550d 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -2,7 +2,7 @@ use directory::{SourceRead, ReadOnlySource}; use termdict::{TermDictionary, TermDictionaryImpl}; use postings::{SegmentPostings, BlockSegmentPostings}; use postings::TermInfo; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use schema::Term; use std::cmp; use fastfield::DeleteBitSet; @@ -91,7 +91,7 @@ impl InvertedIndexReader { pub fn read_block_postings_from_terminfo( &self, term_info: &TermInfo, - option: SegmentPostingsOption, + option: IndexRecordOption, ) -> BlockSegmentPostings { let offset = term_info.postings_offset as usize; let postings_data = self.postings_source.slice_from(offset); @@ -110,7 +110,7 @@ impl InvertedIndexReader { pub fn read_postings_from_terminfo( &self, term_info: &TermInfo, - option: SegmentPostingsOption, + option: IndexRecordOption, ) -> SegmentPostings { let block_postings = self.read_block_postings_from_terminfo(term_info, option); let delete_bitset = self.delete_bitset.clone(); @@ -135,18 +135,18 @@ impl InvertedIndexReader { /// the requested options, the returned `SegmentPostings` the method does not fail /// and returns a `SegmentPostings` with as much information as possible. /// - /// For instance, requesting `SegmentPostingsOption::FreqAndPositions` for a + /// For instance, requesting `IndexRecordOption::Freq` for a /// `TextIndexingOptions` that does not index position will return a `SegmentPostings` /// with `DocId`s and frequencies. pub fn read_postings( &self, term: &Term, - option: SegmentPostingsOption, + option: IndexRecordOption, ) -> Option { let field = term.field(); let field_entry = self.schema.get_field_entry(field); let term_info = get!(self.get_term_info(term)); - let maximum_option = get!(field_entry.field_type().get_segment_postings_option()); + let maximum_option = get!(field_entry.field_type().get_index_record_option()); let best_effort_option = cmp::min(maximum_option, option); Some(self.read_postings_from_terminfo( &term_info, diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 57acc00a7..22f39cac6 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -22,7 +22,7 @@ use indexer::operation::DeleteOperation; use indexer::SegmentEntry; use indexer::SegmentWriter; use postings::DocSet; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use schema::Document; use schema::Schema; use schema::Term; @@ -184,7 +184,7 @@ pub fn compute_deleted_bitset( let inverted_index = segment_reader.inverted_index(delete_op.term.field()); if let Some(mut docset) = inverted_index.read_postings( &delete_op.term, - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ) { while docset.advance() { diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index ab40e4d0a..b6f0a7c15 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -271,7 +271,7 @@ impl IndexMerger { // ... set segment postings option the new field. let segment_postings_option = field_entry .field_type() - .get_segment_postings_option() + .get_index_record_option() .expect( "Encountered a field that is not supposed to be indexed. Have you modified the schema?", @@ -398,8 +398,7 @@ mod tests { use collector::tests::FastFieldTestCollector; use collector::tests::TestCollector; use query::BooleanQuery; - use postings::SegmentPostingsOption; - use schema::TextIndexingOptions; + use schema::IndexRecordOption; use futures::Future; #[test] @@ -408,7 +407,7 @@ mod tests { let text_fieldtype = schema::TextOptions::default() .set_indexing_options(TextFieldIndexing::default() .set_analyzer("default") - .set_index_option(TextIndexingOptions::WithFreqs)) + .set_index_option(IndexRecordOption::WithFreqs)) .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let score_fieldtype = schema::IntOptions::default().set_fast(); @@ -531,7 +530,7 @@ mod tests { fn search_term(searcher: &Searcher, term: Term) -> Vec { let mut collector = FastFieldTestCollector::for_field(Field(1)); - let term_query = TermQuery::new(term, SegmentPostingsOption::NoFreq); + let term_query = TermQuery::new(term, IndexRecordOption::Basic); searcher.search(&term_query, &mut collector).unwrap(); collector.vals() } @@ -542,7 +541,7 @@ mod tests { let text_fieldtype = schema::TextOptions::default() .set_indexing_options( TextFieldIndexing::default() - .set_index_option(TextIndexingOptions::WithFreqs)) + .set_index_option(IndexRecordOption::WithFreqs)) .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let score_fieldtype = schema::IntOptions::default().set_fast(); diff --git a/src/lib.rs b/src/lib.rs index f9620b700..5adc8e888 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,7 +124,6 @@ pub use self::common::TimerTree; pub use postings::DocSet; pub use postings::Postings; pub use core::SegmentComponent; -pub use postings::SegmentPostingsOption; pub use common::{i64_to_u64, u64_to_i64}; @@ -197,11 +196,10 @@ mod tests { use Index; use core::SegmentReader; use query::BooleanQuery; - use postings::SegmentPostingsOption; + use schema::IndexRecordOption; use schema::*; use DocSet; use IndexWriter; - use postings::SegmentPostingsOption::FreqAndPositions; use fastfield::{FastFieldReader, U64FastFieldReader, I64FastFieldReader}; use Postings; use rand::{XorShiftRng, Rng, SeedableRng}; @@ -386,12 +384,12 @@ mod tests { let inverted_index = reader.inverted_index(text_field); assert!( inverted_index - .read_postings(&term_abcd, FreqAndPositions) + .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .is_none() ); { let mut postings = inverted_index - .read_postings(&term_a, FreqAndPositions) + .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 5); @@ -399,7 +397,7 @@ mod tests { } { let mut postings = inverted_index - .read_postings(&term_b, FreqAndPositions) + .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); @@ -430,12 +428,12 @@ mod tests { assert!( inverted_index - .read_postings(&term_abcd, FreqAndPositions) + .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .is_none() ); { let mut postings = inverted_index - .read_postings(&term_a, FreqAndPositions) + .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 5); @@ -443,7 +441,7 @@ mod tests { } { let mut postings = inverted_index - .read_postings(&term_b, FreqAndPositions) + .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); @@ -473,18 +471,18 @@ mod tests { let inverted_index = reader.inverted_index(term_abcd.field()); assert!( inverted_index - .read_postings(&term_abcd, FreqAndPositions) + .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .is_none() ); { let mut postings = inverted_index - .read_postings(&term_a, FreqAndPositions) + .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(!postings.advance()); } { let mut postings = inverted_index - .read_postings(&term_b, FreqAndPositions) + .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); @@ -494,7 +492,7 @@ mod tests { } { let mut postings = inverted_index - .read_postings(&term_c, FreqAndPositions) + .read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 4); @@ -520,7 +518,7 @@ mod tests { let mut postings = searcher .segment_reader(0) .inverted_index(term.field()) - .read_postings(&term, SegmentPostingsOption::NoFreq) + .read_postings(&term, IndexRecordOption::Basic) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 0); @@ -544,7 +542,7 @@ mod tests { let mut postings = searcher .segment_reader(0) .inverted_index(term.field()) - .read_postings(&term, SegmentPostingsOption::NoFreq) + .read_postings(&term, IndexRecordOption::Basic) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 0); @@ -610,12 +608,12 @@ mod tests { let term_abcd = Term::from_field_text(text_field, "abcd"); assert!( inverted_index - .read_postings(&term_abcd, FreqAndPositions) + .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .is_none() ); let term_af = Term::from_field_text(text_field, "af"); let mut postings = inverted_index - .read_postings(&term_af, FreqAndPositions) + .read_postings(&term_af, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 0); diff --git a/src/postings/mod.rs b/src/postings/mod.rs index cbe059e31..57f4df1e8 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -17,7 +17,6 @@ mod vec_postings; mod segment_postings; mod intersection; mod docset; -mod segment_postings_option; pub use self::docset::{SkipResult, DocSet}; use self::recorder::{Recorder, NothingRecorder, TermFrequencyRecorder, TFAndPositionRecorder}; @@ -32,7 +31,6 @@ pub use self::vec_postings::VecPostings; pub use self::segment_postings::{SegmentPostings, BlockSegmentPostings}; pub use self::intersection::IntersectionDocSet; -pub use self::segment_postings_option::SegmentPostingsOption; pub use common::HasLen; @@ -45,7 +43,7 @@ mod tests { use indexer::SegmentWriter; use core::SegmentReader; use core::Index; - use postings::SegmentPostingsOption::FreqAndPositions; + use schema::IndexRecordOption; use std::iter; use datastruct::stacker::Heap; use fastfield::FastFieldReader; @@ -140,7 +138,7 @@ mod tests { assert!( segment_reader .inverted_index(term_a.field()) - .read_postings(&term_a, FreqAndPositions) + .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .is_none() ); } @@ -148,7 +146,7 @@ mod tests { let term_a = Term::from_field_text(text_field, "a"); let mut postings_a = segment_reader .inverted_index(term_a.field()) - .read_postings(&term_a, FreqAndPositions) + .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert_eq!(postings_a.len(), 1000); assert!(postings_a.advance()); @@ -171,7 +169,7 @@ mod tests { let term_e = Term::from_field_text(text_field, "e"); let mut postings_e = segment_reader .inverted_index(term_e.field()) - .read_postings(&term_e, FreqAndPositions) + .read_postings(&term_e, IndexRecordOption::WithFreqsAndPositions) .unwrap(); assert_eq!(postings_e.len(), 1000 - 2); for i in 2u32..1000u32 { @@ -212,11 +210,11 @@ mod tests { index.load_searchers().unwrap(); let term_query = TermQuery::new( Term::from_field_text(text_field, "a"), - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ); let searcher = index.searcher(); let mut term_weight = term_query.specialized_weight(&*searcher); - term_weight.segment_postings_options = SegmentPostingsOption::FreqAndPositions; + term_weight.index_record_option = IndexRecordOption::WithFreqsAndPositions; let segment_reader = &searcher.segment_readers()[0]; let mut term_scorer = term_weight.specialized_scorer(segment_reader).unwrap(); assert!(term_scorer.advance()); @@ -261,7 +259,7 @@ mod tests { for j in i + 1..num_docs { let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); assert_eq!(segment_postings.skip_next(i), SkipResult::Reached); @@ -275,7 +273,7 @@ mod tests { { let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); // check that `skip_next` advances the iterator @@ -296,7 +294,7 @@ mod tests { { let mut segment_postings = segment_reader .inverted_index(term_0.field()) - .read_postings(&term_0, SegmentPostingsOption::NoFreq) + .read_postings(&term_0, IndexRecordOption::Basic) .unwrap(); for i in 0..num_docs / 2 { @@ -306,7 +304,7 @@ mod tests { let mut segment_postings = segment_reader .inverted_index(term_0.field()) - .read_postings(&term_0, SegmentPostingsOption::NoFreq) + .read_postings(&term_0, IndexRecordOption::Basic) .unwrap(); for i in 0..num_docs / 2 - 1 { @@ -331,7 +329,7 @@ mod tests { for i in 0..num_docs { let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); if i % 2 == 0 { @@ -347,7 +345,7 @@ mod tests { { let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); let mut last = 2; // start from 5 to avoid seeking to 3 twice @@ -383,14 +381,14 @@ mod tests { { let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); assert_eq!(segment_postings.skip_next(0), SkipResult::End); let mut segment_postings = segment_reader .inverted_index(term_2.field()) - .read_postings(&term_2, SegmentPostingsOption::NoFreq) + .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); assert_eq!(segment_postings.skip_next(num_docs), SkipResult::End); @@ -458,7 +456,7 @@ mod tests { b.iter(|| { let mut segment_postings = segment_reader .inverted_index(TERM_A.field()) - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_A, IndexRecordOption::Basic) .unwrap(); while segment_postings.advance() {} }); @@ -471,19 +469,19 @@ mod tests { b.iter(|| { let segment_postings_a = segment_reader .inverted_index(TERM_A.field()) - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_A, IndexRecordOption::Basic) .unwrap(); let segment_postings_b = segment_reader .inverted_index(TERM_B.field()) - .read_postings(&*TERM_B, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_B, IndexRecordOption::Basic) .unwrap(); let segment_postings_c = segment_reader .inverted_index(TERM_C.field()) - .read_postings(&*TERM_C, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_C, IndexRecordOption::Basic) .unwrap(); let segment_postings_d = segment_reader .inverted_index(TERM_D.field()) - .read_postings(&*TERM_D, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_D, IndexRecordOption::Basic) .unwrap(); let mut intersection = IntersectionDocSet::from(vec![ segment_postings_a, @@ -502,7 +500,7 @@ mod tests { let mut segment_postings = segment_reader .inverted_index(TERM_A.field()) - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_A, IndexRecordOption::Basic) .unwrap(); let mut existing_docs = Vec::new(); @@ -519,7 +517,7 @@ mod tests { b.iter(|| { let mut segment_postings = segment_reader .inverted_index(TERM_A.field()) - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_A, IndexRecordOption::Basic) .unwrap(); for doc in &existing_docs { if segment_postings.skip_next(*doc) == SkipResult::End { @@ -557,7 +555,7 @@ mod tests { let n: u32 = test::black_box(17); let mut segment_postings = segment_reader .inverted_index(TERM_A.field()) - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .read_postings(&*TERM_A, IndexRecordOption::Basic) .unwrap(); let mut s = 0u32; while segment_postings.advance() { diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 052f03366..4b4922dac 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -13,7 +13,7 @@ use postings::{NothingRecorder, TermFrequencyRecorder, TFAndPositionRecorder}; use schema::FieldEntry; use schema::FieldType; use analyzer::TokenStream; -use schema::TextIndexingOptions; +use schema::IndexRecordOption; fn posting_from_field_entry<'a>( field_entry: &FieldEntry, @@ -25,13 +25,13 @@ fn posting_from_field_entry<'a>( .get_indexing_options() .map(|indexing_options| { match indexing_options.index_option() { - TextIndexingOptions::Basic => { + IndexRecordOption::Basic => { SpecializedPostingsWriter::::new_boxed(heap) } - TextIndexingOptions::WithFreqs => { + IndexRecordOption::WithFreqs => { SpecializedPostingsWriter::::new_boxed(heap) } - TextIndexingOptions::WithFreqsAndPositions => { + IndexRecordOption::WithFreqsAndPositions => { SpecializedPostingsWriter::::new_boxed(heap) } } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index cadc85401..bf1cff08b 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -11,8 +11,6 @@ use directory::{SourceRead, ReadOnlySource}; const EMPTY_POSITIONS: [u32; 0] = [0u32; 0]; - - struct PositionComputer { // store the amount of position int // before reading positions. @@ -474,7 +472,7 @@ mod tests { use schema::INT_INDEXED; use schema::Term; use fst::Streamer; - use postings::SegmentPostingsOption; + use schema::IndexRecordOption; use common::HasLen; use super::BlockSegmentPostings; @@ -513,7 +511,7 @@ mod tests { let term_info = inverted_index.get_term_info(&term).unwrap(); let mut block_segments = inverted_index.read_block_postings_from_terminfo( &term_info, - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ); let mut offset: u32 = 0u32; // checking that the block before calling advance is empty @@ -554,7 +552,7 @@ mod tests { let term_info = inverted_index.get_term_info(&term).unwrap(); block_segments = inverted_index.read_block_postings_from_terminfo( &term_info, - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ); } assert!(block_segments.advance()); diff --git a/src/postings/segment_postings_option.rs b/src/postings/segment_postings_option.rs deleted file mode 100644 index b50e2eee4..000000000 --- a/src/postings/segment_postings_option.rs +++ /dev/null @@ -1,49 +0,0 @@ - - -/// Object describing the amount of information required when reading a postings. -/// -/// Since decoding information is not free, this makes it possible to -/// avoid this extra cost when the information is not required. -/// For instance, positions are useful when running phrase queries -/// but useless in other queries. -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)] -pub enum SegmentPostingsOption { - /// Only the doc ids are decoded - NoFreq, - /// DocIds and term frequencies are decoded - Freq, - /// DocIds, term frequencies and positions will be decoded. - FreqAndPositions, -} - -impl SegmentPostingsOption { - /// Returns true iff this option includes encoding - /// term frequencies. - pub fn has_freq(&self) -> bool { - match *self { - SegmentPostingsOption::NoFreq => false, - _ => true, - } - } - - /// Returns true iff this option include encoding - /// term positions. - pub fn has_positions(&self) -> bool { - match *self { - SegmentPostingsOption::FreqAndPositions => true, - _ => false, - } - } -} - -#[cfg(test)] -mod tests { - - use super::SegmentPostingsOption; - - #[test] - fn test_cmp_segment_postings_option() { - assert!(SegmentPostingsOption::FreqAndPositions > SegmentPostingsOption::Freq); - assert!(SegmentPostingsOption::Freq > SegmentPostingsOption::NoFreq); - } -} diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index ba9f93b19..520cb18ed 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -6,7 +6,7 @@ use Searcher; use query::Query; use schema::Term; use query::TermQuery; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use query::Occur; use query::OccurFilter; @@ -59,7 +59,7 @@ impl BooleanQuery { let occur_term_queries: Vec<(Occur, Box)> = terms .into_iter() .map(|term| { - let term_query: Box = box TermQuery::new(term, SegmentPostingsOption::Freq); + let term_query: Box = box TermQuery::new(term, IndexRecordOption::WithFreqs); (Occur::Should, term_query) }) .collect(); diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 73f659a03..19d40e59c 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -24,7 +24,7 @@ mod tests { use Index; use schema::*; use fastfield::U64FastFieldReader; - use postings::SegmentPostingsOption; + use schema::IndexRecordOption; fn abs_diff(left: f32, right: f32) -> f32 { (right - left).abs() @@ -66,7 +66,7 @@ mod tests { let make_term_query = |text: &str| { let term_query = TermQuery::new( Term::from_field_text(text_field, text), - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ); let query: Box = box term_query; query diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index 1a85342b9..50287c14b 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -1,7 +1,7 @@ use query::Weight; use query::Scorer; use schema::Term; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use core::SegmentReader; use super::PhraseScorer; use postings::IntersectionDocSet; @@ -24,7 +24,7 @@ impl Weight for PhraseWeight { for term in &self.phrase_terms { let inverted_index = reader.inverted_index(term.field()); let term_postings_option = - inverted_index.read_postings(term, SegmentPostingsOption::FreqAndPositions); + inverted_index.read_postings(term, IndexRecordOption::WithFreqsAndPositions); if let Some(term_postings) = term_postings_option { term_postings_list.push(term_postings); } else { diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index a742fbae8..4154fdeba 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -6,7 +6,7 @@ use super::user_input_ast::*; use super::query_grammar::parse_to_ast; use query::Occur; use query::TermQuery; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use query::PhraseQuery; use schema::{Term, FieldType}; use std::str::FromStr; @@ -306,7 +306,7 @@ fn compose_occur(left: Occur, right: Occur) -> Occur { fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { match logical_literal { - LogicalLiteral::Term(term) => box TermQuery::new(term, SegmentPostingsOption::Freq), + LogicalLiteral::Term(term) => box TermQuery::new(term, IndexRecordOption::WithFreqs), LogicalLiteral::Phrase(terms) => box PhraseQuery::from(terms), } } diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index bbc751c5e..e19d8b315 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -18,7 +18,7 @@ mod tests { use query::TermQuery; use Index; use schema::*; - use postings::SegmentPostingsOption; + use schema::IndexRecordOption; use fastfield::FastFieldReader; fn abs_diff(left: f32, right: f32) -> f32 { @@ -46,7 +46,7 @@ mod tests { let searcher = index.searcher(); let term_query = TermQuery::new( Term::from_field_text(text_field, "a"), - SegmentPostingsOption::NoFreq, + IndexRecordOption::Basic, ); let term_weight = term_query.weight(&searcher).unwrap(); let segment_reader = searcher.segment_reader(0); diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index 5c468c442..7ab36c112 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -3,7 +3,7 @@ use Result; use super::term_weight::TermWeight; use query::Query; use query::Weight; -use postings::SegmentPostingsOption; +use schema::IndexRecordOption; use Searcher; use std::any::Any; @@ -19,15 +19,15 @@ use std::any::Any; #[derive(Debug)] pub struct TermQuery { term: Term, - segment_postings_options: SegmentPostingsOption, + index_record_option: IndexRecordOption, } impl TermQuery { /// Creates a new term query. - pub fn new(term: Term, segment_postings_options: SegmentPostingsOption) -> TermQuery { + pub fn new(term: Term, segment_postings_options: IndexRecordOption) -> TermQuery { TermQuery { term: term, - segment_postings_options: segment_postings_options, + index_record_option: segment_postings_options, } } @@ -42,7 +42,7 @@ impl TermQuery { num_docs: searcher.num_docs(), doc_freq: searcher.doc_freq(&self.term), term: self.term.clone(), - segment_postings_options: self.segment_postings_options, + index_record_option: self.index_record_option, } } } diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index d837a63fd..42f9cfe23 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -2,22 +2,22 @@ use Term; use query::Weight; use core::SegmentReader; use query::Scorer; -use postings::SegmentPostingsOption; use postings::SegmentPostings; +use schema::IndexRecordOption; use super::term_scorer::TermScorer; use Result; pub struct TermWeight { - pub num_docs: u32, - pub doc_freq: u32, - pub term: Term, - pub segment_postings_options: SegmentPostingsOption, + pub(crate) num_docs: u32, + pub(crate) doc_freq: u32, + pub(crate) term: Term, + pub(crate) index_record_option: IndexRecordOption, } impl Weight for TermWeight { fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result> { - let specialized_scorer = try!(self.specialized_scorer(reader)); + let specialized_scorer = self.specialized_scorer(reader)?; Ok(box specialized_scorer) } } @@ -36,7 +36,7 @@ impl TermWeight { let inverted_index = reader.inverted_index(field); let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field); let postings_opt: Option = - inverted_index.read_postings(&self.term, self.segment_postings_options); + inverted_index.read_postings(&self.term, self.index_record_option); if let Some(segment_postings) = postings_opt { Ok(TermScorer { idf: self.idf(), diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 12b9a5ace..2a045fb77 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -23,10 +23,10 @@ pub struct FieldEntry { impl FieldEntry { /// Creates a new u64 field entry in the schema, given /// a name, and some options. - pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry { + pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry { FieldEntry { name: field_name, - field_type: FieldType::Str(field_type), + field_type: FieldType::Str(text_options), } } @@ -221,7 +221,10 @@ mod tests { "name": "title", "type": "text", "options": { - "indexing": "position", + "indexing": { + "record": "position", + "analyzer": "default" + }, "stored": false } }"#; diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index dea01d8c8..2604c6c2e 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -2,8 +2,7 @@ use schema::{TextOptions, IntOptions}; use serde_json::Value as JsonValue; use schema::Value; -use postings::SegmentPostingsOption; -use schema::TextIndexingOptions; +use schema::IndexRecordOption; /// Possible error that may occur while parsing a field value /// At this point the JSON is known to be valid. @@ -41,28 +40,20 @@ impl FieldType { } /// Given a field configuration, return the maximal possible - /// `SegmentPostingsOption` available. + /// `IndexRecordOption` available. /// /// If the field is not indexed, then returns `None`. - pub fn get_segment_postings_option(&self) -> Option { + pub fn get_index_record_option(&self) -> Option { match *self { FieldType::Str(ref text_options) => { - // TODO remove SegmentPostingsOption + TextIndexingOptions - // they are now basically the same object text_options .get_indexing_options() - .map(|text_indexing_options| { - match text_indexing_options.index_option() { - TextIndexingOptions::Basic => SegmentPostingsOption::NoFreq, - TextIndexingOptions::WithFreqs => SegmentPostingsOption::Freq, - TextIndexingOptions::WithFreqsAndPositions => SegmentPostingsOption::FreqAndPositions - } - }) + .map(|indexing_options| indexing_options.index_option()) } FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => { if int_options.is_indexed() { - Some(SegmentPostingsOption::NoFreq) + Some(IndexRecordOption::Basic) } else { None } diff --git a/src/schema/index_record_option.rs b/src/schema/index_record_option.rs new file mode 100644 index 000000000..97ad95b23 --- /dev/null +++ b/src/schema/index_record_option.rs @@ -0,0 +1,55 @@ + +/// Describing the amount of information indexed. +/// +/// Since decoding information is not free, this makes it possible to +/// avoid this extra cost when the information is not required. +/// For instance, positions are useful when running phrase queries +/// but useless in other queries. +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)] +pub enum IndexRecordOption { + #[serde(rename = "basic")] + Basic, + #[serde(rename = "freq")] + WithFreqs, + #[serde(rename = "position")] + WithFreqsAndPositions, +} + +impl IndexRecordOption { + /// Returns true iff the term frequency will be encoded. + pub fn is_termfreq_enabled(&self) -> bool { + match *self { + IndexRecordOption::WithFreqsAndPositions | + IndexRecordOption::WithFreqs => true, + _ => false, + } + } + + /// Returns true iff the term positions within the document are stored as well. + pub fn is_position_enabled(&self) -> bool { + match *self { + IndexRecordOption::WithFreqsAndPositions => true, + _ => false, + } + } + + /// Returns true iff this option includes encoding + /// term frequencies. + pub fn has_freq(&self) -> bool { + match *self { + IndexRecordOption::Basic => false, + IndexRecordOption::WithFreqs | + IndexRecordOption::WithFreqsAndPositions => true, + } + } + + /// Returns true iff this option include encoding + /// term positions. + pub fn has_positions(&self) -> bool { + match *self { + IndexRecordOption::Basic | + IndexRecordOption::WithFreqs => false, + IndexRecordOption::WithFreqsAndPositions => true, + } + } +} \ No newline at end of file diff --git a/src/schema/mod.rs b/src/schema/mod.rs index da4d92c4f..9176424e3 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -30,7 +30,9 @@ use tantivy::schema::*; let mut schema_builder = SchemaBuilder::default(); let title_options = TextOptions::default() .set_stored() - .set_indexing_options(TextIndexingOptions::TokenizedWithFreqAndPosition); + .set_indexing_options(TextFieldIndexing::default() + .set_analyzer("default") + .set_index_option(IndexRecordOption::WithFreqsAndPositions)); schema_builder.add_text_field("title_options", title_options); let schema = schema_builder.build(); ``` @@ -112,6 +114,8 @@ mod int_options; mod field; mod value; mod named_field_document; +mod index_record_option; + pub use self::named_field_document::NamedFieldDocument; pub use self::schema::{Schema, SchemaBuilder}; @@ -127,7 +131,7 @@ pub use self::field_entry::FieldEntry; pub use self::field_value::FieldValue; pub use self::text_options::TextOptions; -pub use self::text_options::TextIndexingOptions; +pub use self::index_record_option::IndexRecordOption; pub use self::text_options::TextFieldIndexing; pub use self::text_options::TEXT; pub use self::text_options::STRING; diff --git a/src/schema/schema.rs b/src/schema/schema.rs index c512704b9..d80641485 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -358,7 +358,10 @@ mod tests { "name": "title", "type": "text", "options": { - "indexing": "position", + "indexing": { + "record": "position", + "analyzer": "default" + }, "stored": false } }, @@ -366,7 +369,10 @@ mod tests { "name": "author", "type": "text", "options": { - "indexing": "untokenized", + "indexing": { + "record": "basic", + "analyzer": "raw" + }, "stored": false } }, diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 8a93878c9..7fb713a0b 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -1,5 +1,6 @@ use std::ops::BitOr; use std::borrow::Cow; +use schema::IndexRecordOption; /// Define how a text field should be handled by tantivy. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] @@ -45,8 +46,8 @@ impl Default for TextOptions { #[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)] pub struct TextFieldIndexing { + record: IndexRecordOption, analyzer: Cow<'static, str>, - index_option: TextIndexingOptions, } @@ -54,7 +55,7 @@ impl Default for TextFieldIndexing { fn default() -> TextFieldIndexing { TextFieldIndexing { analyzer: Cow::Borrowed("default"), - index_option: TextIndexingOptions::Basic, + record: IndexRecordOption::Basic, } } } @@ -69,56 +70,22 @@ impl TextFieldIndexing { &self.analyzer } - pub fn set_index_option(mut self, index_option: TextIndexingOptions) -> TextFieldIndexing { - self.index_option = index_option; + pub fn set_index_option(mut self, index_option: IndexRecordOption) -> TextFieldIndexing { + self.record = index_option; self } - pub fn index_option(&self) -> TextIndexingOptions { - self.index_option + pub fn index_option(&self) -> IndexRecordOption { + self.record } } -/// Describe how a field should be indexed -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Hash, Serialize, Deserialize)] -pub enum TextIndexingOptions { - /// - #[serde(rename = "basic")] - Basic, - /// - #[serde(rename = "freq")] - WithFreqs, - /// #[serde(rename = "position")] - WithFreqsAndPositions, -} - -impl TextIndexingOptions { - /// Returns true iff the term frequency will be encoded. - pub fn is_termfreq_enabled(&self) -> bool { - match *self { - TextIndexingOptions::WithFreqsAndPositions | - TextIndexingOptions::WithFreqs => true, - _ => false, - } - } - - /// Returns true iff the term positions within the document are stored as well. - pub fn is_position_enabled(&self) -> bool { - match *self { - TextIndexingOptions::WithFreqsAndPositions => true, - _ => false, - } - } -} - - - /// The field will be untokenized and indexed pub const STRING: TextOptions = TextOptions { indexing: Some( TextFieldIndexing { analyzer: Cow::Borrowed("raw"), - index_option: TextIndexingOptions::Basic, + record: IndexRecordOption::Basic, }), stored: false, }; @@ -129,7 +96,7 @@ pub const TEXT: TextOptions = TextOptions { indexing: Some( TextFieldIndexing { analyzer: Cow::Borrowed("default"), - index_option: TextIndexingOptions::WithFreqsAndPositions, + record: IndexRecordOption::WithFreqsAndPositions, }), stored: false, }; @@ -184,4 +151,12 @@ mod tests { } } } + + #[test] + fn test_cmp_index_record_option() { + assert!(IndexRecordOption::WithFreqsAndPositions > IndexRecordOption::WithFreqs); + assert!(IndexRecordOption::WithFreqs > IndexRecordOption::Basic); + } } + +