From 7d773abc92d844941f047db5646c1ea9c71caa99 Mon Sep 17 00:00:00 2001 From: Ype Kingma <39171155+YpeKingma@users.noreply.github.com> Date: Mon, 8 Jun 2020 13:01:19 +0200 Subject: [PATCH] Boolean query: do not combine excluded scores. (#840) * Do nothing when combining score values of excluded scores. * Add test case for two excluded. * Test score for two excluded terms. * Use TopDocs in test_boolean_query_two_excluded --- src/query/boolean_query/boolean_weight.rs | 2 +- src/query/boolean_query/mod.rs | 53 ++++++++++++++++++++++- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 95f6091cc..650be3c1c 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -94,7 +94,7 @@ impl BooleanWeight { let exclude_scorer_opt: Option> = per_occur_scorers .remove(&Occur::MustNot) - .map(scorer_union::) + .map(scorer_union::) .map(Into::into); let must_scorer_opt: Option> = per_occur_scorers diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 84a42c7e5..30993b11f 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -8,6 +8,7 @@ mod tests { use super::*; use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE; + use crate::collector::TopDocs; use crate::query::score_combiner::SumWithCoordsCombiner; use crate::query::term_query::TermScorer; use crate::query::Intersection; @@ -20,7 +21,7 @@ mod tests { use crate::schema::*; use crate::tests::assert_nearly_equals; use crate::Index; - use crate::{DocAddress, DocId}; + use crate::{DocAddress, DocId, Score}; fn aux_test_helper() -> (Index, Field) { let mut schema_builder = Schema::builder(); @@ -177,6 +178,54 @@ mod tests { } } + #[test] + pub fn test_boolean_query_two_excluded() { + let (index, text_field) = aux_test_helper(); + + let make_term_query = |text: &str| { + let term_query = TermQuery::new( + Term::from_field_text(text_field, text), + IndexRecordOption::Basic, + ); + let query: Box = Box::new(term_query); + query + }; + + let reader = index.reader().unwrap(); + + let matching_topdocs = |query: &dyn Query| { + reader + .searcher() + .search(query, &TopDocs::with_limit(3)) + .unwrap() + }; + + let score_doc_4: Score; // score of doc 4 should not be influenced by exclusion + { + let boolean_query_no_excluded = + BooleanQuery::from(vec![(Occur::Must, make_term_query("d"))]); + let topdocs_no_excluded = matching_topdocs(&boolean_query_no_excluded); + assert_eq!(topdocs_no_excluded.len(), 2); + let (top_score, top_doc) = topdocs_no_excluded[0]; + assert_eq!(top_doc, DocAddress(0, 4)); + assert_eq!(topdocs_no_excluded[1].1, DocAddress(0, 3)); // ignore score of doc 3. + score_doc_4 = top_score; + } + + { + let boolean_query_two_excluded = BooleanQuery::from(vec![ + (Occur::Must, make_term_query("d")), + (Occur::MustNot, make_term_query("a")), + (Occur::MustNot, make_term_query("b")), + ]); + let topdocs_excluded = matching_topdocs(&boolean_query_two_excluded); + assert_eq!(topdocs_excluded.len(), 1); + let (top_score, top_doc) = topdocs_excluded[0]; + assert_eq!(top_doc, DocAddress(0, 4)); + assert_eq!(top_score, score_doc_4); + } + } + #[test] pub fn test_boolean_query_with_weight() { let mut schema_builder = Schema::builder(); @@ -274,7 +323,7 @@ mod tests { index_writer.add_document(doc!( // tf = 1 1 title => "PDF Мастер Класс \"Морячок\" (Оксана Лифенко)", - // tf = 0 0 + // tf = 0 0 text => "https://i.ibb.co/pzvHrDN/I3d U T6 Gg TM.jpg\nhttps://i.ibb.co/NFrb6v6/N0ls Z9nwjb U.jpg\nВ описание входит штаны, кофта, берет, матросский воротник. Описание продается в формате PDF, состоит из 12 страниц формата А4 и может быть напечатано на любом принтере.\nОписание предназначено для кукол BJD RealPuki от FairyLand, но может подойти и другим подобным куклам. Также вы можете вязать этот наряд из обычной пряжи, и он подойдет для куколок побольше.\nhttps://vk.com/market 95724412?w=product 95724412_2212" )); for _ in 0..1_000 {