use super::boolean_weight::BooleanWeight; use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight}; use crate::schema::{IndexRecordOption, Term}; /// The boolean query returns a set of documents /// that matches the Boolean combination of constituent subqueries. /// /// The documents matched by the boolean query are /// those which /// * match all of the sub queries associated with the /// `Must` occurrence /// * match none of the sub queries associated with the /// `MustNot` occurrence. /// * match at least one of the sub queries associated /// with the `Must` or `Should` occurrence. /// /// /// You can combine other query types and their `Occur`ances into one `BooleanQuery` /// /// ```rust /// use tantivy::collector::Count; /// use tantivy::doc; /// use tantivy::query::{BooleanQuery, Occur, PhraseQuery, Query, TermQuery}; /// use tantivy::schema::{IndexRecordOption, Schema, TEXT}; /// use tantivy::Term; /// use tantivy::Index; /// use tantivy::IndexWriter; /// /// fn main() -> tantivy::Result<()> { /// let mut schema_builder = Schema::builder(); /// let title = schema_builder.add_text_field("title", TEXT); /// let body = schema_builder.add_text_field("body", TEXT); /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// { /// let mut index_writer: IndexWriter = index.writer(15_000_000)?; /// index_writer.add_document(doc!( /// title => "The Name of the Wind", /// ))?; /// index_writer.add_document(doc!( /// title => "The Diary of Muadib", /// ))?; /// index_writer.add_document(doc!( /// title => "A Dairy Cow", /// body => "hidden", /// ))?; /// index_writer.add_document(doc!( /// title => "A Dairy Cow", /// body => "found", /// ))?; /// index_writer.add_document(doc!( /// title => "The Diary of a Young Girl", /// ))?; /// index_writer.commit()?; /// } /// /// let reader = index.reader()?; /// let searcher = reader.searcher(); /// /// // Make TermQuery's for "girl" and "diary" in the title /// let girl_term_query: Box = Box::new(TermQuery::new( /// Term::from_field_text(title, "girl"), /// IndexRecordOption::Basic, /// )); /// let diary_term_query: Box = Box::new(TermQuery::new( /// Term::from_field_text(title, "diary"), /// IndexRecordOption::Basic, /// )); /// let cow_term_query: Box = Box::new(TermQuery::new( /// Term::from_field_text(title, "cow"), /// IndexRecordOption::Basic /// )); /// // A TermQuery with "found" in the body /// let body_term_query: Box = Box::new(TermQuery::new( /// Term::from_field_text(body, "found"), /// IndexRecordOption::Basic, /// )); /// // TermQuery "diary" must and "girl" must not be present /// let queries_with_occurs1 = vec![ /// (Occur::Must, diary_term_query.box_clone()), /// (Occur::MustNot, girl_term_query.box_clone()), /// ]; /// // Make a BooleanQuery equivalent to /// // title:+diary title:-girl /// let diary_must_and_girl_mustnot = BooleanQuery::new(queries_with_occurs1); /// let count1 = searcher.search(&diary_must_and_girl_mustnot, &Count)?; /// assert_eq!(count1, 1); /// /// // "title:diary OR title:cow" /// let title_diary_or_cow = BooleanQuery::new(vec![ /// (Occur::Should, diary_term_query.box_clone()), /// (Occur::Should, cow_term_query.box_clone()), /// ]); /// let count2 = searcher.search(&title_diary_or_cow, &Count)?; /// assert_eq!(count2, 4); /// /// // Make a `PhraseQuery` from a vector of `Term`s /// let phrase_query: Box = Box::new(PhraseQuery::new(vec![ /// Term::from_field_text(title, "dairy"), /// Term::from_field_text(title, "cow"), /// ])); /// // You can combine subqueries of different types into 1 BooleanQuery: /// // `TermQuery` and `PhraseQuery` /// // "title:diary OR "dairy cow" /// let term_of_phrase_query = BooleanQuery::new(vec![ /// (Occur::Should, diary_term_query.box_clone()), /// (Occur::Should, phrase_query.box_clone()), /// ]); /// let count3 = searcher.search(&term_of_phrase_query, &Count)?; /// assert_eq!(count3, 4); /// /// // You can nest one BooleanQuery inside another /// // body:found AND ("title:diary OR "dairy cow") /// let nested_query = BooleanQuery::new(vec![ /// (Occur::Must, body_term_query), /// (Occur::Must, Box::new(term_of_phrase_query)) /// ]); /// let count4 = searcher.search(&nested_query, &Count)?; /// assert_eq!(count4, 1); /// /// // You may call `with_minimum_required_clauses` to /// // specify the number of should clauses the returned documents must match. /// let minimum_required_query = BooleanQuery::with_minimum_required_clauses(vec![ /// (Occur::Should, cow_term_query.box_clone()), /// (Occur::Should, girl_term_query.box_clone()), /// (Occur::Should, diary_term_query.box_clone()), /// ], 2); /// // Return documents contains "Diary Cow", "Diary Girl" or "Cow Girl" /// // Notice: "Diary" isn't "Dairy". ;-) /// let count5 = searcher.search(&minimum_required_query, &Count)?; /// assert_eq!(count5, 1); /// Ok(()) /// } /// ``` #[derive(Debug)] pub struct BooleanQuery { subqueries: Vec<(Occur, Box)>, minimum_number_should_match: usize, } impl Clone for BooleanQuery { fn clone(&self) -> Self { let subqueries = self .subqueries .iter() .map(|(occur, subquery)| (*occur, subquery.box_clone())) .collect::>(); Self { subqueries, minimum_number_should_match: self.minimum_number_should_match, } } } impl From)>> for BooleanQuery { fn from(subqueries: Vec<(Occur, Box)>) -> BooleanQuery { BooleanQuery::new(subqueries) } } impl Query for BooleanQuery { fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { let sub_weights = self .subqueries .iter() .map(|(occur, subquery)| Ok((*occur, subquery.weight(enable_scoring)?))) .collect::>()?; Ok(Box::new(BooleanWeight::with_minimum_number_should_match( sub_weights, self.minimum_number_should_match, enable_scoring.is_scoring_enabled(), Box::new(SumWithCoordsCombiner::default), ))) } fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) { for (_occur, subquery) in &self.subqueries { subquery.query_terms(visitor); } } } impl BooleanQuery { /// Creates a new boolean query. pub fn new(subqueries: Vec<(Occur, Box)>) -> BooleanQuery { // If the bool query includes at least one should clause // and no Must or MustNot clauses, the default value is 1. Otherwise, the default value is // 0. Keep pace with Elasticsearch. let mut minimum_required = 0; for (occur, _) in &subqueries { match occur { Occur::Should => minimum_required = 1, Occur::Must | Occur::MustNot => { minimum_required = 0; break; } } } Self::with_minimum_required_clauses(subqueries, minimum_required) } /// Create a new boolean query with minimum number of required should clauses specified. pub fn with_minimum_required_clauses( subqueries: Vec<(Occur, Box)>, minimum_number_should_match: usize, ) -> BooleanQuery { BooleanQuery { subqueries, minimum_number_should_match, } } /// Getter for `minimum_number_should_match` pub fn get_minimum_number_should_match(&self) -> usize { self.minimum_number_should_match } /// Setter for `minimum_number_should_match` pub fn set_minimum_number_should_match(&mut self, minimum_number_should_match: usize) { self.minimum_number_should_match = minimum_number_should_match; } /// Returns the intersection of the queries. pub fn intersection(queries: Vec>) -> BooleanQuery { let subqueries = queries.into_iter().map(|s| (Occur::Must, s)).collect(); BooleanQuery::new(subqueries) } /// Returns the union of the queries. pub fn union(queries: Vec>) -> BooleanQuery { let subqueries = queries.into_iter().map(|s| (Occur::Should, s)).collect(); BooleanQuery::new(subqueries) } /// Returns the union of the queries with minimum required clause. pub fn union_with_minimum_required_clauses( queries: Vec>, minimum_required_clauses: usize, ) -> BooleanQuery { let subqueries = queries .into_iter() .map(|sub_query| (Occur::Should, sub_query)) .collect(); BooleanQuery::with_minimum_required_clauses(subqueries, minimum_required_clauses) } /// Helper method to create a boolean query matching a given list of terms. /// The resulting query is a disjunction of the terms. pub fn new_multiterms_query(terms: Vec) -> BooleanQuery { let occur_term_queries: Vec<(Occur, Box)> = terms .into_iter() .map(|term| { let term_query: Box = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)); (Occur::Should, term_query) }) .collect(); BooleanQuery::new(occur_term_queries) } /// Deconstructed view of the clauses making up this query. pub fn clauses(&self) -> &[(Occur, Box)] { &self.subqueries[..] } } #[cfg(test)] mod tests { use std::collections::HashSet; use super::BooleanQuery; use crate::collector::{Count, DocSetCollector}; use crate::query::{Query, QueryClone, QueryParser, TermQuery}; use crate::schema::{Field, IndexRecordOption, Schema, TEXT}; use crate::{DocAddress, DocId, Index, Term}; fn create_test_index() -> crate::Result { let mut schema_builder = Schema::builder(); let text = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut writer = index.writer_for_tests()?; writer.add_document(doc!(text=>"b c"))?; writer.add_document(doc!(text=>"a c"))?; writer.add_document(doc!(text=>"a b"))?; writer.add_document(doc!(text=>"a d"))?; writer.commit()?; Ok(index) } #[test] fn test_minimum_required() -> crate::Result<()> { fn create_test_index_with>( docs: T, ) -> crate::Result { let mut schema_builder = Schema::builder(); let text = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut writer = index.writer_for_tests()?; for doc in docs { writer.add_document(doc!(text => doc))?; } writer.commit()?; Ok(index) } fn create_boolean_query_with_mr>( queries: T, field: Field, mr: usize, ) -> BooleanQuery { let terms = queries .into_iter() .map(|t| Term::from_field_text(field, t)) .map(|t| TermQuery::new(t, IndexRecordOption::Basic)) .map(|q| -> Box { Box::new(q) }) .collect(); BooleanQuery::union_with_minimum_required_clauses(terms, mr) } fn check_doc_id>( expected: T, actually: HashSet, seg: u32, ) { assert_eq!( actually, expected .into_iter() .map(|id| DocAddress::new(seg, id)) .collect() ); } let index = create_test_index_with(["a b c", "a c e", "d f g", "z z z", "c i b"])?; let searcher = index.reader()?.searcher(); let text = index.schema().get_field("text").unwrap(); // Documents contains 'a c' 'a z' 'a i' 'c z' 'c i' or 'z i' shall be return. let q1 = create_boolean_query_with_mr(["a", "c", "z", "i"], text, 2); let docs = searcher.search(&q1, &DocSetCollector)?; check_doc_id([0, 1, 4], docs, 0); // Documents contains 'a b c', 'a b e', 'a c e' or 'b c e' shall be return. let q2 = create_boolean_query_with_mr(["a", "b", "c", "e"], text, 3); let docs = searcher.search(&q2, &DocSetCollector)?; check_doc_id([0, 1], docs, 0); // Nothing queried since minimum_required is too large. let q3 = create_boolean_query_with_mr(["a", "b"], text, 3); let docs = searcher.search(&q3, &DocSetCollector)?; assert!(docs.is_empty()); // When mr is set to zero or one, there are no difference with `Boolean::Union`. let q4 = create_boolean_query_with_mr(["a", "z"], text, 1); let docs = searcher.search(&q4, &DocSetCollector)?; check_doc_id([0, 1, 3], docs, 0); let q5 = create_boolean_query_with_mr(["a", "b"], text, 0); let docs = searcher.search(&q5, &DocSetCollector)?; check_doc_id([0, 1, 4], docs, 0); Ok(()) } #[test] fn test_union() -> crate::Result<()> { let index = create_test_index()?; let searcher = index.reader()?.searcher(); let text = index.schema().get_field("text").unwrap(); let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic); let term_d = TermQuery::new(Term::from_field_text(text, "d"), IndexRecordOption::Basic); let union_ad = BooleanQuery::union(vec![term_a.box_clone(), term_d.box_clone()]); let docs = searcher.search(&union_ad, &DocSetCollector)?; assert_eq!( docs, vec![ DocAddress::new(0u32, 1u32), DocAddress::new(0u32, 2u32), DocAddress::new(0u32, 3u32) ] .into_iter() .collect() ); Ok(()) } #[test] fn test_intersection() -> crate::Result<()> { let index = create_test_index()?; let searcher = index.reader()?.searcher(); let text = index.schema().get_field("text").unwrap(); let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic); let term_b = TermQuery::new(Term::from_field_text(text, "b"), IndexRecordOption::Basic); let term_c = TermQuery::new(Term::from_field_text(text, "c"), IndexRecordOption::Basic); let intersection_ab = BooleanQuery::intersection(vec![term_a.box_clone(), term_b.box_clone()]); let intersection_ac = BooleanQuery::intersection(vec![term_a.box_clone(), term_c.box_clone()]); let intersection_bc = BooleanQuery::intersection(vec![term_b.box_clone(), term_c.box_clone()]); { let docs = searcher.search(&intersection_ab, &DocSetCollector)?; assert_eq!( docs, vec![DocAddress::new(0u32, 2u32)].into_iter().collect() ); } { let docs = searcher.search(&intersection_ac, &DocSetCollector)?; assert_eq!( docs, vec![DocAddress::new(0u32, 1u32)].into_iter().collect() ); } { let docs = searcher.search(&intersection_bc, &DocSetCollector)?; assert_eq!( docs, vec![DocAddress::new(0u32, 0u32)].into_iter().collect() ); } Ok(()) } #[test] pub fn test_json_array_pitfall_bag_of_terms() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let json_field = schema_builder.add_json_field("json", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!(json_field=>json!({ "cart": [ {"product_type": "sneakers", "attributes": {"color": "white"}}, {"product_type": "t-shirt", "attributes": {"color": "red"}}, {"product_type": "cd", "attributes": {"genre": "blues"}}, ] })))?; index_writer.commit()?; } let searcher = index.reader()?.searcher(); let doc_matches = |query: &str| { let query_parser = QueryParser::for_index(&index, vec![json_field]); let query = query_parser.parse_query(query).unwrap(); searcher.search(&query, &Count).unwrap() == 1 }; // As expected assert!(doc_matches( r#"cart.product_type:sneakers AND cart.attributes.color:white"# )); // Unexpected match, due to the fact that array do not act as nested docs. assert!(doc_matches( r#"cart.product_type:sneakers AND cart.attributes.color:red"# )); // However, bviously this works... assert!(!doc_matches( r#"cart.product_type:sneakers AND cart.attributes.color:blues"# )); Ok(()) } }