diff --git a/src/analyzer/analyzer.rs b/src/analyzer/analyzer.rs index fdd5cbd25..f2a485557 100644 --- a/src/analyzer/analyzer.rs +++ b/src/analyzer/analyzer.rs @@ -30,6 +30,11 @@ impl Default for Token { } } + +// Warning! TODO may change once associated type constructor +// land in nightly. + + pub trait Analyzer<'a>: Sized + Clone { type TokenStreamImpl: TokenStream; diff --git a/src/analyzer/analyzer_manager.rs b/src/analyzer/analyzer_manager.rs index 9a3bddc67..da35ade1a 100644 --- a/src/analyzer/analyzer_manager.rs +++ b/src/analyzer/analyzer_manager.rs @@ -11,6 +11,14 @@ use analyzer::LowerCaser; use analyzer::Stemmer; + +/// The analyzer manager serves as a store for +/// all of the configured analyzers. +/// +/// By default, it is populated with the following managers. +/// +/// * raw : does not process nor tokenize the text. +/// * default : Tokenizes according to whitespace and punctuation, removes tokens that are too long, lowercases the #[derive(Clone)] pub struct AnalyzerManager { analyzers: Arc< RwLock >> > diff --git a/src/core/index.rs b/src/core/index.rs index 5376b85ca..4cfd6096c 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -67,8 +67,10 @@ impl Index { Index::from_directory(directory, schema) } - pub fn analyzers(&self) -> AnalyzerManager { - self.analyzers.clone() + + /// Accessor for the analyzer manager. + pub fn analyzers(&self) -> &AnalyzerManager { + &self.analyzers } /// Creates a new index in a temp directory. diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index c77c71a7b..6e15146bd 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -11,7 +11,6 @@ use store::StoreReader; use directory::ReadOnlySource; use schema::Document; use DocId; -use std::str; use std::sync::Arc; use std::collections::HashMap; use common::CompositeFile; diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index bf1cff08b..4dff60a0e 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -90,8 +90,8 @@ impl SegmentPostings { SegmentPostings { block_cursor: segment_block_postings, cur: COMPRESSION_BLOCK_SIZE, // cursor within the block - delete_bitset: delete_bitset, - position_computer: position_computer, + delete_bitset, + position_computer, } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 4154fdeba..356afaa6e 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -89,10 +89,10 @@ impl QueryParser { default_fields: Vec, analyzer_manager: AnalyzerManager) -> QueryParser { QueryParser { - schema: schema, - default_fields: default_fields, + schema, + default_fields, + analyzer_manager, conjunction_by_default: false, - analyzer_manager: analyzer_manager, } } @@ -101,7 +101,7 @@ impl QueryParser { QueryParser::new( index.schema(), default_fields, - index.analyzers()) + index.analyzers().clone()) } /// Set the default way to compose queries to a conjunction. @@ -223,8 +223,8 @@ impl QueryParser { match user_input_ast { UserInputAST::Clause(sub_queries) => { let default_occur = self.default_occur(); - let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!( - sub_queries + let logical_sub_queries: Vec<(Occur, LogicalAST)> = + try!(sub_queries .into_iter() .map(|sub_query| self.compute_logical_ast_with_occur(*sub_query)) .map(|res| { @@ -232,24 +232,23 @@ impl QueryParser { (compose_occur(default_occur, occur), sub_ast) }) }) - .collect() - ); + .collect()); Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) } UserInputAST::Not(subquery) => { let (occur, logical_sub_queries) = - try!(self.compute_logical_ast_with_occur(*subquery)); + self.compute_logical_ast_with_occur(*subquery)?; Ok((compose_occur(Occur::MustNot, occur), logical_sub_queries)) } UserInputAST::Must(subquery) => { let (occur, logical_sub_queries) = - try!(self.compute_logical_ast_with_occur(*subquery)); + self.compute_logical_ast_with_occur(*subquery)?; Ok((compose_occur(Occur::Must, occur), logical_sub_queries)) } UserInputAST::Leaf(literal) => { let term_phrases: Vec<(Field, String)> = match literal.field_name { Some(ref field_name) => { - let field = try!(self.resolve_field_name(field_name)); + let field = self.resolve_field_name(field_name)?; vec![(field, literal.phrase.clone())] } None => { diff --git a/src/schema/index_record_option.rs b/src/schema/index_record_option.rs index 97ad95b23..edb57eb3a 100644 --- a/src/schema/index_record_option.rs +++ b/src/schema/index_record_option.rs @@ -1,18 +1,24 @@ -/// Describing the amount of information indexed. +/// `IndexRecordOption` describes an amount of information associated +/// for a given field. +/// +/// It is used in the schema to configure how much data should be +/// indexed for a given field. +/// +/// It is also used to describe the amount of information that +/// you want to be decoded as you go through a posting list. /// -/// Since decoding information is not free, this makes it possible to -/// avoid this extra cost when the information is not required. /// For instance, positions are useful when running phrase queries -/// but useless in other queries. +/// but useless for most queries. +/// #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)] pub enum IndexRecordOption { #[serde(rename = "basic")] - Basic, + Basic, //< records only the `DocId`s #[serde(rename = "freq")] - WithFreqs, + WithFreqs, //< records the document ids as well as the term frequency. #[serde(rename = "position")] - WithFreqsAndPositions, + WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document. } impl IndexRecordOption { diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 9176424e3..b26d76491 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -40,7 +40,7 @@ let schema = schema_builder.build(); We can split the problem of generating a search result page into two phases : * identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`) -* for each of these documents, retrieving the information required to generate the serp page. +* for each of these documents, retrieving the information required to generate the search results page. (`doc_ids[] -> Document[]`) In the first phase, the ability to search for documents by the given field is determined by the