diff --git a/master/src/tantivy/query/boolean_query/mod.rs.html b/master/src/tantivy/query/boolean_query/mod.rs.html index 25daeb759..8232dc84f 100644 --- a/master/src/tantivy/query/boolean_query/mod.rs.html +++ b/master/src/tantivy/query/boolean_query/mod.rs.html @@ -228,6 +228,33 @@ 171 172 173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200
mod boolean_query; mod boolean_weight; @@ -401,6 +428,33 @@ assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new()); } } + + + #[test] + pub fn test_intersection_score() { + let (index, text_field) = aux_test_helper(); + + let make_term_query = |text: &str| { + let term_query = TermQuery::new( + Term::from_field_text(text_field, text), + IndexRecordOption::Basic, + ); + let query: Box<Query> = box term_query; + query + }; + + let score_docs = |boolean_query: &Query| { + let searcher = index.searcher(); + let mut test_collector = TestCollector::default(); + searcher.search(boolean_query, &mut test_collector).unwrap(); + test_collector.scores() + }; + + { + let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), (Occur::Must, make_term_query("b"))]); + assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]); + } + } }diff --git a/master/src/tantivy/query/phrase_query/phrase_query.rs.html b/master/src/tantivy/query/phrase_query/phrase_query.rs.html index 0c9ee6b1a..0b7b92a23 100644 --- a/master/src/tantivy/query/phrase_query/phrase_query.rs.html +++ b/master/src/tantivy/query/phrase_query/phrase_query.rs.html @@ -115,6 +115,7 @@ 58 59 60 +61
use schema::Term; use query::Query; @@ -166,12 +167,13 @@ let terms = self.phrase_terms.clone(); if scoring_enabled { let bm25_weight = BM25Weight::for_terms(searcher, &terms); - Ok(box PhraseWeight::with_scoring( + Ok(box PhraseWeight::new( terms, - bm25_weight + bm25_weight, + true )) } else { - Ok(box PhraseWeight::no_scoring(terms)) + Ok(box PhraseWeight::new(terms, BM25Weight::null(), false)) } } diff --git a/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html b/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html index a83535914..2bf966d9f 100644 --- a/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html +++ b/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html @@ -302,6 +302,51 @@ 245 246 247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292
use DocId; use docset::{DocSet, SkipResult}; @@ -354,10 +399,28 @@ phrase_count: u32, fieldnorm_reader: FieldNormReader, similarity_weight: BM25Weight, + score_needed: bool } -/// Computes the length of the intersection of two sorted arrays. +/// Returns true iff the two sorted array contain a common element +fn intersection_exists(left: &[u32], right: &[u32]) -> bool { + let mut left_i = 0; + let mut right_i = 0; + while left_i < left.len() && right_i < right.len() { + let left_val = left[left_i]; + let right_val = right[right_i]; + if left_val < right_val { + left_i += 1; + } else if right_val < left_val { + right_i += 1; + } else { + return true; + } + } + false +} + fn intersection_count(left: &[u32], right: &[u32]) -> usize { let mut left_i = 0; let mut right_i = 0; @@ -410,7 +473,8 @@ pub fn new(term_postings: Vec<TPostings>, similarity_weight: BM25Weight, - fieldnorm_reader: FieldNormReader) -> PhraseScorer<TPostings> { + fieldnorm_reader: FieldNormReader, + score_needed: bool) -> PhraseScorer<TPostings> { let num_docsets = term_postings.len(); let postings_with_offsets = term_postings .into_iter() @@ -425,13 +489,40 @@ phrase_count: 0u32, similarity_weight, fieldnorm_reader, + score_needed, } } fn phrase_match(&mut self) -> bool { - let count = self.phrase_count(); - self.phrase_count = count; - count > 0u32 + if self.score_needed { + let count = self.phrase_count(); + self.phrase_count = count; + count > 0u32 + } else { + self.phrase_exists() + } + } + + + fn phrase_exists(&mut self) -> bool { + { + self.intersection_docset + .docset_mut_specialized(0) + .positions(&mut self.left); + } + let mut intersection_len = self.left.len(); + for i in 1..self.num_docsets - 1 { + { + self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right); + } + intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]); + if intersection_len == 0 { + return false; + } + } + + self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right); + intersection_exists(&self.left[..intersection_len], &self.right[..]) } fn phrase_count(&mut self) -> u32 { @@ -452,8 +543,7 @@ } self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right); - intersection_len = intersection_count(&mut self.left[..intersection_len], &self.right[..]); - intersection_len as u32 + intersection_count(&self.left[..intersection_len], &self.right[..]) as u32 } } diff --git a/master/src/tantivy/query/phrase_query/phrase_weight.rs.html b/master/src/tantivy/query/phrase_query/phrase_weight.rs.html index 0dcf18ebd..a8f97fe08 100644 --- a/master/src/tantivy/query/phrase_query/phrase_weight.rs.html +++ b/master/src/tantivy/query/phrase_query/phrase_weight.rs.html @@ -116,11 +116,6 @@ 59 60 61 -62 -63 -64 -65 -66
use query::Weight; use query::Scorer; @@ -135,23 +130,18 @@ pub struct PhraseWeight { phrase_terms: Vec<Term>, similarity_weight: BM25Weight, + score_needed: bool, } impl PhraseWeight { /// Creates a new phrase weight. - pub fn with_scoring(phrase_terms: Vec<Term>, - similarity_weight: BM25Weight) -> PhraseWeight { + pub fn new(phrase_terms: Vec<Term>, + similarity_weight: BM25Weight, + score_needed: bool) -> PhraseWeight { PhraseWeight { phrase_terms, - similarity_weight - } - } - - - pub fn no_scoring(phrase_terms: Vec<Term>) -> PhraseWeight { - PhraseWeight { - phrase_terms, - similarity_weight: BM25Weight::null() + similarity_weight, + score_needed } } } @@ -172,7 +162,7 @@ return Ok(box EmptyScorer); } } - Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader)) + Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)) } else { let mut term_postings_list = Vec::new(); for term in &self.phrase_terms { @@ -184,7 +174,7 @@ return Ok(box EmptyScorer); } } - Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader)) + Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)) } } } diff --git a/master/tantivy/query/struct.PhraseQuery.html b/master/tantivy/query/struct.PhraseQuery.html index caf5b56f4..cf950eb36 100644 --- a/master/tantivy/query/struct.PhraseQuery.html +++ b/master/tantivy/query/struct.PhraseQuery.html @@ -86,8 +86,8 @@ must belong to the same field.
impl Debug for PhraseQuery[src]
fn fmt(&self, __arg_0: &mut Formatter) -> Result[src]Formats the value using the given formatter. Read more
--
impl Query for PhraseQuery[src]+
impl Query for PhraseQuery[src]
fn weight([src]
&self,
searcher: &Searcher,
scoring_enabled: bool
) -> Result<Box<Weight>>Create the weight associated to a query.
See
Weight.diff --git a/master/tantivy/query/trait.Query.html b/master/tantivy/query/trait.Query.html index 20cb81bf3..02a1458e0 100644 --- a/master/tantivy/query/trait.Query.html +++ b/master/tantivy/query/trait.Query.html @@ -125,7 +125,7 @@ can increase performances.
fn count(&self, searcher: &Searcher) -> Result<usize>[src]
impl Query for BooleanQuery- -
impl Query for TermQuery- +
impl Query for PhraseQueryimpl Query for PhraseQueryimpl Query for AllQueryimpl Query for RangeQuery