diff --git a/src/indexer/log_merge_policy.rs b/src/indexer/log_merge_policy.rs index f61df2a1c..1a1b62dbd 100644 --- a/src/indexer/log_merge_policy.rs +++ b/src/indexer/log_merge_policy.rs @@ -5,28 +5,39 @@ use std::cmp; use std::f64; const DEFAULT_LEVEL_LOG_SIZE: f64 = 0.75; -const DEFAULT_MIN_SEGMENT_SIZE: u32 = 10_000; +const DEFAULT_MIN_LAYER_SIZE: u32 = 10_000; const DEFAULT_MIN_MERGE_SIZE: usize = 8; pub struct LogMergePolicy { min_merge_size: usize, - min_segment_size: u32, + min_layer_size: u32, level_log_size: f64, } impl LogMergePolicy { fn clip_min_size(&self, size: u32) -> u32 { - cmp::max(self.min_segment_size, size) + cmp::max(self.min_layer_size, size) } - + + /// Set the minimum number of segment that may be merge together. pub fn set_min_merge_size(&mut self, min_merge_size: usize) { self.min_merge_size = min_merge_size; } - - pub fn set_min_segment_size(&mut self, min_segment_size: u32) { - self.min_segment_size = min_segment_size; + + /// Set the minimum segment size under which all segment belong + /// to the same level. + pub fn set_min_layer_size(&mut self, min_layer_size: u32) { + self.min_layer_size = min_layer_size; } - + + + /// Set the ratio between two consecutive levels. + /// + /// Segment are group in levels according to their sizes. + /// These levels are defined as intervals of exponentially growing sizes. + /// level_log_size define the factor by which one should multiply the limit + /// to reach a level, in order to get the limit to reach the following + /// level. pub fn set_level_log_size(&mut self, level_log_size: f64) { self.level_log_size = level_log_size; } @@ -34,25 +45,25 @@ impl LogMergePolicy { impl MergePolicy for LogMergePolicy { fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec { - + if segments.is_empty() { return Vec::new(); } - + let mut size_sorted_tuples = segments.iter() .map(|x| x.num_docs) .enumerate() .collect::>(); - size_sorted_tuples.sort_by(|x,y| y.cmp(x)); - + size_sorted_tuples.sort_by(|x, y| y.cmp(x)); + let size_sorted_log_tuples: Vec<_> = size_sorted_tuples.into_iter() .map(|(ind, num_docs)| (ind, (self.clip_min_size(num_docs) as f64).log2())) .collect(); let (first_ind, first_score) = size_sorted_log_tuples[0]; let mut current_max_log_size = first_score; - let mut levels = vec!(vec!(first_ind)); + let mut levels = vec![vec![first_ind]]; for &(ind, score) in (&size_sorted_log_tuples).iter().skip(1) { if score < (current_max_log_size - self.level_log_size) { current_max_log_size = score; @@ -62,7 +73,7 @@ impl MergePolicy for LogMergePolicy { } let result = levels.iter() - .filter(|level| {level.len() >= self.min_merge_size}) + .filter(|level| level.len() >= self.min_merge_size) .map(|ind_vec| { MergeCandidate(ind_vec.iter() .map(|&ind| segments[ind].segment_id) @@ -78,7 +89,7 @@ impl Default for LogMergePolicy { fn default() -> LogMergePolicy { LogMergePolicy { min_merge_size: DEFAULT_MIN_MERGE_SIZE, - min_segment_size: DEFAULT_MIN_SEGMENT_SIZE, + min_layer_size: DEFAULT_MIN_LAYER_SIZE, level_log_size: DEFAULT_LEVEL_LOG_SIZE, } } @@ -93,7 +104,7 @@ mod tests { fn test_merge_policy() -> LogMergePolicy { let mut log_merge_policy = LogMergePolicy::default(); log_merge_policy.set_min_merge_size(3); - log_merge_policy.set_min_segment_size(2); + log_merge_policy.set_min_layer_size(2); log_merge_policy } @@ -108,7 +119,7 @@ mod tests { fn test_log_merge_policy_pair() { let test_input = vec![SegmentMeta::new(SegmentId::generate_random(), 10), SegmentMeta::new(SegmentId::generate_random(), 10), - SegmentMeta::new(SegmentId::generate_random(), 10)]; + SegmentMeta::new(SegmentId::generate_random(), 10)]; let result_list = test_merge_policy().compute_merge_candidates(&test_input); assert_eq!(result_list.len(), 1); } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 8c0ef57be..e2d0cb3e1 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -278,7 +278,7 @@ mod tests { use DocAddress; use collector::tests::FastFieldTestCollector; use collector::tests::TestCollector; - use query::MultiTermQuery; + use query::BooleanQuery; use schema::TextIndexingOptions; #[test] @@ -341,7 +341,7 @@ mod tests { let searcher = index.searcher(); let get_doc_ids = |terms: Vec| { let mut collector = TestCollector::default(); - let query = MultiTermQuery::from(terms); + let query = BooleanQuery::new_multiterms_query(terms); assert!(searcher.search(&query, &mut collector).is_ok()); collector.docs() }; @@ -385,7 +385,7 @@ mod tests { } { let get_fast_vals = |terms: Vec| { - let query = MultiTermQuery::from(terms); + let query = BooleanQuery::new_multiterms_query(terms); let mut collector = FastFieldTestCollector::for_field(score_field); assert!(searcher.search(&query, &mut collector).is_ok()); collector.vals().clone() diff --git a/src/lib.rs b/src/lib.rs index 97ec128f8..0fd0dc027 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,9 +157,9 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId); mod tests { use collector::tests::TestCollector; - use query::MultiTermQuery; use Index; use core::SegmentReader; + use query::BooleanQuery; use schema::*; use DocSet; use Postings; @@ -316,7 +316,7 @@ mod tests { { let searcher = index.searcher(); let get_doc_ids = |terms: Vec| { - let query = MultiTermQuery::from(terms); + let query = BooleanQuery::new_multiterms_query(terms); let mut collector = TestCollector::default(); assert!(searcher.search(&query, &mut collector).is_ok()); collector.docs() diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index 2d660f7aa..524293fb8 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -4,6 +4,9 @@ use super::boolean_weight::BooleanWeight; use query::Weight; use Searcher; use query::Query; +use schema::Term; +use query::TermQuery; +use postings::SegmentPostingsOption; use query::Occur; use query::OccurFilter; @@ -19,7 +22,7 @@ use query::OccurFilter; /// a `MustNot` occurence. #[derive(Debug)] pub struct BooleanQuery { - subqueries: Vec<(Occur, Box)> + subqueries: Vec<(Occur, Box)>, } impl From)>> for BooleanQuery { @@ -45,4 +48,18 @@ impl Query for BooleanQuery { let filter = OccurFilter::new(&occurs); Ok(box BooleanWeight::new(sub_weights, filter)) } +} + +impl BooleanQuery { + /// Helper method to create a boolean query matching a given list of terms. + /// The resulting query is a disjunction of the terms. + pub fn new_multiterms_query(terms: Vec) -> BooleanQuery { + let occur_term_queries: Vec<(Occur, Box)> = terms.into_iter() + .map(|term| { + let term_query: Box = box TermQuery::new(term, SegmentPostingsOption::Freq); + (Occur::Should, term_query) + }) + .collect(); + BooleanQuery::from(occur_term_queries) + } } \ No newline at end of file diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs index c24f67760..46300c7a6 100644 --- a/src/query/boolean_query/boolean_scorer.rs +++ b/src/query/boolean_query/boolean_scorer.rs @@ -42,10 +42,6 @@ pub struct BooleanScorer { impl BooleanScorer { - pub fn scorers(&self) -> &[TScorer] { - &self.scorers - } - pub fn new(scorers: Vec, occur_filter: OccurFilter) -> BooleanScorer { let score_combiner = ScoreCombiner::default_for_num_scorers(scorers.len()); diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index f4bedfd29..0c07180d9 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -24,6 +24,7 @@ mod tests { use Index; use schema::*; use fastfield::{U32FastFieldReader}; + use postings::SegmentPostingsOption; fn abs_diff(left: f32, right: f32) -> f32 { (right - left).abs() diff --git a/src/query/mod.rs b/src/query/mod.rs index b0c39c286..75ef5845a 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -5,18 +5,15 @@ mod query; mod boolean_query; -mod multi_term_query; -mod phrase_query; mod scorer; mod occur; mod weight; mod occur_filter; mod term_query; mod query_parser; +mod phrase_query; pub use self::boolean_query::BooleanQuery; -pub use self::multi_term_query::MultiTermQuery; -pub use self::multi_term_query::MultiTermWeight; pub use self::occur_filter::OccurFilter; pub use self::occur::Occur; pub use self::phrase_query::PhraseQuery; diff --git a/src/query/multi_term_query/mod.rs b/src/query/multi_term_query/mod.rs deleted file mode 100644 index 38cd209a4..000000000 --- a/src/query/multi_term_query/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod multi_term_query; -mod multi_term_weight; - -pub use self::multi_term_query::MultiTermQuery; -pub use self::multi_term_weight::MultiTermWeight; \ No newline at end of file diff --git a/src/query/multi_term_query/multi_term_query.rs b/src/query/multi_term_query/multi_term_query.rs deleted file mode 100644 index 4d83b051c..000000000 --- a/src/query/multi_term_query/multi_term_query.rs +++ /dev/null @@ -1,73 +0,0 @@ -use Result; -use query::Weight; -use std::any::Any; -use schema::Term; -use query::MultiTermWeight; -use query::Query; -use core::searcher::Searcher; -use query::occur::Occur; -use query::occur_filter::OccurFilter; -use query::term_query::TermQuery; -use postings::SegmentPostingsOption; - - -/// Query involving one or more terms. -#[derive(Eq, Clone, PartialEq, Debug)] -pub struct MultiTermQuery { - // TODO need a better Debug - occur_terms: Vec<(Occur, Term)>, -} - -impl MultiTermQuery { - /// Accessor for the number of terms - pub fn num_terms(&self) -> usize { - self.occur_terms.len() - } - - /// Same as `weight()`, except that rather than a boxed trait, - /// `specialized_weight` returns a specific type of the weight, allowing for - /// compile-time optimization. - pub fn specialized_weight(&self, searcher: &Searcher) -> MultiTermWeight { - let term_queries: Vec = self.occur_terms - .iter() - .map(|&(_, ref term)| TermQuery::new(term.clone()), SegmentPostingsOption::FreqAndPositions) - .collect(); - let occurs: Vec = self.occur_terms - .iter() - .map(|&(occur, _)| occur.clone()) - .collect(); - let occur_filter = OccurFilter::new(&occurs); - let weights = term_queries.iter() - .map(|term_query| term_query.specialized_weight(searcher)) - .collect(); - MultiTermWeight::new(weights, occur_filter) - } -} - - - -impl Query for MultiTermQuery { - fn as_any(&self) -> &Any { - self - } - - fn weight(&self, searcher: &Searcher) -> Result> { - Ok(box self.specialized_weight(searcher)) - } -} - - -impl From> for MultiTermQuery { - fn from(occur_terms: Vec<(Occur, Term)>) -> MultiTermQuery { - MultiTermQuery { occur_terms: occur_terms } - } -} - -impl From> for MultiTermQuery { - fn from(terms: Vec) -> MultiTermQuery { - let should_terms: Vec<(Occur, Term)> = terms.into_iter() - .map(|term| (Occur::Should, term)) - .collect(); - MultiTermQuery::from(should_terms) - } -} \ No newline at end of file diff --git a/src/query/multi_term_query/multi_term_weight.rs b/src/query/multi_term_query/multi_term_weight.rs deleted file mode 100644 index 17e58d877..000000000 --- a/src/query/multi_term_query/multi_term_weight.rs +++ /dev/null @@ -1,45 +0,0 @@ -use Result; -use query::Weight; -use core::SegmentReader; -use query::Scorer; -use query::occur_filter::OccurFilter; -use postings::SegmentPostings; -use query::term_query::{TermWeight, TermScorer}; -use query::boolean_query::BooleanScorer; - -/// Weight object associated to a [`MultiTermQuery`](./struct.MultiTermQuery.html). -pub struct MultiTermWeight { - weights: Vec, - occur_filter: OccurFilter, -} - -impl MultiTermWeight { - /// MultiTermWeigh constructor. - /// The `OccurFilter` is tied with the weights order. - pub fn new(weights: Vec, occur_filter: OccurFilter) -> MultiTermWeight { - MultiTermWeight { - weights: weights, - occur_filter: occur_filter, - } - } - - /// Same as `scorer()`, except that rather than a boxed trait, - /// `specialized_scorer` returns a specific type of the scorer, allowing for - /// compile-time optimization. - pub fn specialized_scorer<'a>(&'a self, - reader: &'a SegmentReader) - -> Result>>> { - let mut term_scorers: Vec> = Vec::new(); - for term_weight in &self.weights { - let term_scorer = try!(term_weight.specialized_scorer(reader)); - term_scorers.push(term_scorer); - } - Ok(BooleanScorer::new(term_scorers, self.occur_filter)) - } -} - -impl Weight for MultiTermWeight { - fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result> { - Ok(box try!(self.specialized_scorer(reader))) - } -} diff --git a/src/query/occur.rs b/src/query/occur.rs index 63c077940..7d0ee2f02 100644 --- a/src/query/occur.rs +++ b/src/query/occur.rs @@ -2,7 +2,7 @@ /// should be present or must not be present. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum Occur { - /// For a given document to be considered for scoring, + /// For a given document to be considered for scoring, /// at least one of the document with the Should or the Must /// Occur constraint must be within the document. Should, @@ -12,27 +12,3 @@ pub enum Occur { /// search. MustNot, } - -impl Occur { - pub fn compose(&self, other: Occur) -> Occur { - match *self { - Occur::Should => other, - Occur::Must => { - if other == Occur::MustNot { - Occur::MustNot - } - else { - Occur::Must - } - } - Occur::MustNot => { - if other == Occur::MustNot { - Occur::Must - } - else { - Occur::MustNot - } - } - } - } -} \ No newline at end of file diff --git a/src/query/query_parser/mod.rs b/src/query/query_parser/mod.rs index 83251158b..147374ccc 100644 --- a/src/query/query_parser/mod.rs +++ b/src/query/query_parser/mod.rs @@ -1,7 +1,7 @@ mod query_parser; mod query_grammar; mod user_input_ast; -mod logical_ast; +pub mod logical_ast; pub use self::query_parser::QueryParser; pub use self::query_parser::QueryParserError; \ No newline at end of file diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 5a01828c0..bef6cfd38 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -2,79 +2,83 @@ use combine::*; use combine::char::*; use super::user_input_ast::*; -fn literal(input: I) -> ParseResult - where I: Stream { +fn literal(input: I) -> ParseResult + where I: Stream +{ let term_val = || { let word = many1(satisfy(|c: char| c.is_alphanumeric())); - let phrase = - (char('"'), many1(satisfy(|c| c != '"')), char('"'),) - .map(|(_, s, _)| s); + let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s); phrase.or(word) }; let field = many1(letter()); - let term_query = (field, char(':'), term_val()) - .map(|(field_name,_, phrase)| { - UserInputLiteral { - field_name: Some(field_name), - phrase: phrase - } - }); - let term_default_field = term_val() - .map(|phrase| { - UserInputLiteral { - field_name: None, - phrase: phrase - } - }); - try(term_query).or(term_default_field) - .map(|query_literal| UserInputAST::from(query_literal)) - .parse_stream(input) -} + let term_query = (field, char(':'), term_val()).map(|(field_name, _, phrase)| { + UserInputLiteral { + field_name: Some(field_name), + phrase: phrase, + } + }); + let term_default_field = term_val().map(|phrase| { + UserInputLiteral { + field_name: None, + phrase: phrase, + } + }); + try(term_query) + .or(term_default_field) + .map(|query_literal| UserInputAST::from(query_literal)) + .parse_stream(input) +} -fn leaf(input: I) -> ParseResult - where I: Stream { - (char('-'), parser(literal)).map(|(_, expr)| UserInputAST::Not(box expr)) - .or((char('+'), parser(literal)).map(|(_, expr)| UserInputAST::Must(box expr))) - .or(parser(literal)) - .parse_stream(input) +fn leaf(input: I) -> ParseResult + where I: Stream +{ + (char('-'), parser(literal)) + .map(|(_, expr)| UserInputAST::Not(box expr)) + .or((char('+'), parser(literal)).map(|(_, expr)| UserInputAST::Must(box expr))) + .or(parser(literal)) + .parse_stream(input) } pub fn parse_to_ast(input: I) -> ParseResult where I: Stream -{ +{ sep_by(parser(leaf), spaces()) - .map(|subqueries: Vec| { - if subqueries.len() == 1 { - subqueries.into_iter().next().unwrap() - } - else { - UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect()) - } - }) - .parse_stream(input) + .map(|subqueries: Vec| { + if subqueries.len() == 1 { + subqueries.into_iter().next().unwrap() + } else { + UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect()) + } + }) + .parse_stream(input) } #[cfg(test)] mod test { - + use super::*; - + fn test_parse_query_to_ast_helper(query: &str, expected: &str) { let query = parse_to_ast(query).unwrap().0; let query_str = format!("{:?}", query); assert_eq!(query_str, expected); } + fn test_is_parse_err(query: &str) { + assert!(parse_to_ast(query).is_err()); + } + #[test] - pub fn test_parse_query_to_ast() { + fn test_parse_query_to_ast() { test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\""); test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")"); test_parse_query_to_ast_helper("+abc:toto -titi", "+(abc:\"toto\") -(\"titi\")"); test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")"); test_parse_query_to_ast_helper("abc:a b", "abc:\"a\" \"b\""); test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\""); + test_is_parse_err("abc + "); } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 46cf01f77..d255430a2 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -6,6 +6,7 @@ use super::user_input_ast::*; use super::query_grammar::parse_to_ast; use query::Occur; use query::TermQuery; +use postings::SegmentPostingsOption; use query::PhraseQuery; use analyzer::SimpleTokenizer; use analyzer::StreamingIterator; @@ -25,9 +26,10 @@ pub enum QueryParserError { /// The query contains a term for a `u32`-field, but the value /// is not a u32. ExpectedU32(String, String), - + /// It is forbidden queries that are only "excluding". (e.g. -title:pop) AllButQueryForbidden, - + /// If no default field is declared, running a query without any + /// field specified is forbbidden. NoDefaultFieldDeclared, } @@ -37,14 +39,14 @@ pub enum QueryParserError { /// /// The language covered by the current parser is extremely simple. /// -/// * simple terms: "e.g.: `Barack Obama` are simply analyzed using +/// * simple terms: "e.g.: `Barack Obama` are simply analyzed using /// tantivy's `StandardTokenizer`, hence becoming `["barack", "obama"]`. /// The terms are then searched within the default terms of the query parser. -/// +/// /// e.g. If `body` and `title` are default fields, our example terms are /// `["title:barack", "body:barack", "title:obama", "body:obama"]`. /// By default, all tokenized and indexed fields are default fields. -/// +/// /// Multiple terms are handled as an `OR` : any document containing at least /// one of the term will go through the scoring. /// @@ -54,13 +56,13 @@ pub enum QueryParserError { /// are not relevant anymore. /// Making it possible to make this behavior customizable is tracked in /// [issue #27](https://github.com/fulmicoton/tantivy/issues/27). -/// +/// /// * negative terms: By prepending a term by a `-`, a term can be excluded /// from the search. This is useful for disambiguating a query. -/// e.g. `apple -fruit` +/// e.g. `apple -fruit` /// /// * must terms: By prepending a term by a `+`, a term can be made required for the search. -/// +/// pub struct QueryParser { schema: Schema, default_fields: Vec, @@ -73,16 +75,18 @@ impl QueryParser { /// * schema - index Schema /// * default_fields - fields used to search if no field is specifically defined /// in the query. - pub fn new(schema: Schema, - default_fields: Vec) -> QueryParser { + pub fn new(schema: Schema, default_fields: Vec) -> QueryParser { QueryParser { schema: schema, default_fields: default_fields, conjunction_by_default: false, analyzer: box SimpleTokenizer, } - } + } + /// Set the default way to compose queries to a conjunction. + /// + /// By default a , pub fn set_conjunction_by_default(&mut self) { self.conjunction_by_default = true; } @@ -91,36 +95,44 @@ impl QueryParser { /// /// Note that `parse_query` returns an error if the input /// is not a valid query. - /// + /// /// There is currently no lenient mode for the query parser /// which makes it a bad choice for a public/broad user search engine. /// - /// Implementing a lenient mode for this query parser is tracked + /// Implementing a lenient mode for this query parser is tracked /// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5) pub fn parse_query(&self, query: &str) -> Result, QueryParserError> { let logical_ast = self.parse_query_to_logical_ast(query)?; - Ok(self.convert_to_query(logical_ast)) + Ok(convert_to_query(logical_ast)) } - - pub fn parse_query_to_logical_ast(&self, query: &str) -> Result { - let (user_input_ast, remaining) = parse_to_ast(query).map_err(|_| QueryParserError::SyntaxError)?; + + /// Parse the user query into an AST. + fn parse_query_to_logical_ast(&self, query: &str) -> Result { + let (user_input_ast, _remaining) = + parse_to_ast(query).map_err(|_| QueryParserError::SyntaxError)?; self.compute_logical_ast(user_input_ast) } - + fn resolve_field_name(&self, field_name: &str) -> Result { - self.schema.get_field(field_name) - .ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name))) + self.schema + .get_field(field_name) + .ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name))) } - pub fn compute_logical_ast(&self, user_input_ast: UserInputAST) -> Result { + fn compute_logical_ast(&self, + user_input_ast: UserInputAST) + -> Result { let (occur, ast) = try!(self.compute_logical_ast_with_occur(user_input_ast)); if occur == Occur::MustNot { - return Err(QueryParserError::AllButQueryForbidden) + return Err(QueryParserError::AllButQueryForbidden); } - Ok(ast) + Ok(ast) } - - fn compute_logical_ast_for_leaf(&self, field: Field, phrase: &str) -> Result, QueryParserError> { + + fn compute_logical_ast_for_leaf(&self, + field: Field, + phrase: &str) + -> Result, QueryParserError> { let mut token_iter = self.analyzer.tokenize(phrase); let mut tokens: Vec = Vec::new(); loop { @@ -129,98 +141,63 @@ impl QueryParser { // TODO Handle u32 let term = Term::from_field_text(field, &text); tokens.push(term); - } - else { + } else { break; } } if tokens.is_empty() { Ok(None) - } - else if tokens.len() == 1 { + } else if tokens.len() == 1 { Ok(Some(LogicalLiteral::Term(tokens.into_iter().next().unwrap()))) - } - else { + } else { Ok(Some(LogicalLiteral::Phrase(tokens))) } } - + fn default_occur(&self) -> Occur { if self.conjunction_by_default { Occur::Must - } - else { + } else { Occur::Should } } - - fn convert_literal_to_query(&self, logical_literal: LogicalLiteral) -> Box { - match logical_literal { - LogicalLiteral::Term(term) => { - let field = term.field(); - TODO check the schema to get the correct segment otpins - box TermQuery::from(term) - } - LogicalLiteral::Phrase(terms) => { - TODO check the schema to get the correct segment otpins - box PhraseQuery::from(terms) - } - } - - } - - fn convert_to_query(&self, logical_ast: LogicalAST) -> Box { - match logical_ast { - LogicalAST::Clause(clause) => { - let occur_subqueries = clause.into_iter() - .map(|(occur, subquery)| (occur, self.convert_to_query(subquery))) - .collect::>(); - box BooleanQuery::from(occur_subqueries) - } - LogicalAST::Leaf(logical_literal) => { - self.convert_literal_to_query(*logical_literal) - } - } -} - - pub fn compute_logical_ast_with_occur(&self, user_input_ast: UserInputAST) -> Result<(Occur, LogicalAST), QueryParserError> { + fn compute_logical_ast_with_occur(&self, + user_input_ast: UserInputAST) + -> Result<(Occur, LogicalAST), QueryParserError> { match user_input_ast { UserInputAST::Clause(sub_queries) => { let default_occur = self.default_occur(); - let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries - .into_iter() + let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries.into_iter() .map(|sub_query| self.compute_logical_ast_with_occur(*sub_query)) - .map(|res| - res.map( - |(occur, sub_ast)| (default_occur.compose(occur), sub_ast) - ) - ) + .map(|res| { + res.map(|(occur, sub_ast)| (compose_occur(default_occur, occur), sub_ast)) + }) .collect()); Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) } UserInputAST::Not(subquery) => { - let (occur, logical_sub_queries) = try!(self.compute_logical_ast_with_occur(*subquery)); - Ok((Occur::MustNot.compose(occur), logical_sub_queries)) - }, + let (occur, logical_sub_queries) = + try!(self.compute_logical_ast_with_occur(*subquery)); + Ok((compose_occur(Occur::MustNot, occur), logical_sub_queries)) + } UserInputAST::Must(subquery) => { - let (occur, logical_sub_queries) = try!(self.compute_logical_ast_with_occur(*subquery)); - Ok((Occur::Must.compose(occur), logical_sub_queries)) - }, + let (occur, logical_sub_queries) = + try!(self.compute_logical_ast_with_occur(*subquery)); + Ok((compose_occur(Occur::Must, occur), logical_sub_queries)) + } UserInputAST::Leaf(literal) => { let term_phrases: Vec<(Field, String)> = match literal.field_name { Some(ref field_name) => { let field = try!(self.resolve_field_name(&field_name)); - vec!((field, literal.phrase.clone())) + vec![(field, literal.phrase.clone())] } None => { if self.default_fields.len() == 0 { - return Err(QueryParserError::NoDefaultFieldDeclared) - } - else if self.default_fields.len() == 1 { - vec!((self.default_fields[0], literal.phrase.clone())) - } - else { + return Err(QueryParserError::NoDefaultFieldDeclared); + } else if self.default_fields.len() == 1 { + vec![(self.default_fields[0], literal.phrase.clone())] + } else { self.default_fields .iter() .map(|default_field| (*default_field, literal.phrase.clone())) @@ -234,29 +211,61 @@ impl QueryParser { asts.push(LogicalAST::Leaf(box ast)); } } - let result_ast = - if asts.len() == 0 { - panic!("not working"); - } - else if asts.len() == 1 { - asts[0].clone() - } - else { - LogicalAST::Clause(asts - .into_iter() - .map(|ast| (Occur::Should, ast)) - .collect()) - }; + let result_ast = if asts.len() == 0 { + panic!("not working"); + } else if asts.len() == 1 { + asts[0].clone() + } else { + LogicalAST::Clause(asts.into_iter() + .map(|ast| (Occur::Should, ast)) + .collect()) + }; Ok((Occur::Should, result_ast)) - } + } } } - } +/// Compose two occur values. +fn compose_occur(left: Occur, right: Occur) -> Occur { + match left { + Occur::Should => right, + Occur::Must => { + if right == Occur::MustNot { + Occur::MustNot + } else { + Occur::Must + } + } + Occur::MustNot => { + if right == Occur::MustNot { + Occur::Must + } else { + Occur::MustNot + } + } + } +} +fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { + match logical_literal { + LogicalLiteral::Term(term) => box TermQuery::new(term, SegmentPostingsOption::Freq), + LogicalLiteral::Phrase(terms) => box PhraseQuery::from(terms), + } +} +fn convert_to_query(logical_ast: LogicalAST) -> Box { + match logical_ast { + LogicalAST::Clause(clause) => { + let occur_subqueries = clause.into_iter() + .map(|(occur, subquery)| (occur, convert_to_query(subquery))) + .collect::>(); + box BooleanQuery::from(occur_subqueries) + } + LogicalAST::Leaf(logical_literal) => convert_literal_to_query(*logical_literal), + } +} @@ -266,13 +275,15 @@ mod test { use super::QueryParser; use super::QueryParserError; use super::super::logical_ast::*; - - fn parse_query_to_logical_ast(query: &str, default_conjunction: bool) -> Result { + + fn parse_query_to_logical_ast(query: &str, + default_conjunction: bool) + -> Result { let mut schema_builder = SchemaBuilder::default(); let title = schema_builder.add_text_field("title", TEXT); let text = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); - let default_fields = vec!(title, text); + let default_fields = vec![title, text]; let mut query_parser = QueryParser::new(schema, default_fields); if default_conjunction { query_parser.set_conjunction_by_default(); @@ -280,7 +291,9 @@ mod test { query_parser.parse_query_to_logical_ast(query) } - fn test_parse_query_to_logical_ast_helper(query: &str, expected: &str, default_conjunction: bool) { + fn test_parse_query_to_logical_ast_helper(query: &str, + expected: &str, + default_conjunction: bool) { let query = parse_query_to_logical_ast(query, default_conjunction).unwrap(); let query_str = format!("{:?}", query); assert_eq!(query_str, expected); @@ -288,21 +301,43 @@ mod test { #[test] pub fn test_parse_query_to_ast_disjunction() { - test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", false); - test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", false); - test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", false); - assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(), QueryParserError::AllButQueryForbidden); - test_parse_query_to_logical_ast_helper("title:a b", "(Term([0, 97]) (Term([0, 98]) Term([1, 98])))", false); - test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", false); + test_parse_query_to_logical_ast_helper("title:toto", + "Term([0, 116, 111, 116, 111])", + false); + test_parse_query_to_logical_ast_helper("+title:toto", + "Term([0, 116, 111, 116, 111])", + false); + test_parse_query_to_logical_ast_helper("+title:toto -titi", + "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \ + 105, 116, 105]) Term([1, 116, 105, 116, 105])))", + false); + assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(), + QueryParserError::AllButQueryForbidden); + test_parse_query_to_logical_ast_helper("title:a b", + "(Term([0, 97]) (Term([0, 98]) Term([1, 98])))", + false); + test_parse_query_to_logical_ast_helper("title:\"a b\"", + "\"[Term([0, 97]), Term([0, 98])]\"", + false); } #[test] pub fn test_parse_query_to_ast_conjunction() { test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true); - test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", true); - test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", true); - assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(), QueryParserError::AllButQueryForbidden); - test_parse_query_to_logical_ast_helper("title:a b", "(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))", true); - test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", true); + test_parse_query_to_logical_ast_helper("+title:toto", + "Term([0, 116, 111, 116, 111])", + true); + test_parse_query_to_logical_ast_helper("+title:toto -titi", + "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \ + 105, 116, 105]) Term([1, 116, 105, 116, 105])))", + true); + assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(), + QueryParserError::AllButQueryForbidden); + test_parse_query_to_logical_ast_helper("title:a b", + "(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))", + true); + test_parse_query_to_logical_ast_helper("title:\"a b\"", + "\"[Term([0, 97]), Term([0, 98])]\"", + true); } } diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index 8e2d96a82..05115a9b2 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -23,6 +23,14 @@ pub struct TermQuery { } impl TermQuery { + /// Creates a new term query. + pub fn new(term: Term, segment_postings_options: SegmentPostingsOption) -> TermQuery { + TermQuery { + term: term, + segment_postings_options: segment_postings_options, + } + } + /// Returns a weight object. /// @@ -37,14 +45,6 @@ impl TermQuery { segment_postings_options: self.segment_postings_options, } } - - - pub fn new(term: Term, segment_postings_options: SegmentPostingsOption) -> TermQuery { - TermQuery { - term: term, - segment_postings_options: segment_postings_options, - } - } } impl Query for TermQuery {