From f17e46b97d692dcac5a90621a06beb2bf956338f Mon Sep 17 00:00:00 2001
From: Travis CI User
mod boolean_query;
mod boolean_weight;
@@ -401,6 +428,33 @@
assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new());
}
}
+
+
+ #[test]
+ pub fn test_intersection_score() {
+ let (index, text_field) = aux_test_helper();
+
+ let make_term_query = |text: &str| {
+ let term_query = TermQuery::new(
+ Term::from_field_text(text_field, text),
+ IndexRecordOption::Basic,
+ );
+ let query: Box<Query> = box term_query;
+ query
+ };
+
+ let score_docs = |boolean_query: &Query| {
+ let searcher = index.searcher();
+ let mut test_collector = TestCollector::default();
+ searcher.search(boolean_query, &mut test_collector).unwrap();
+ test_collector.scores()
+ };
+
+ {
+ let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), (Occur::Must, make_term_query("b"))]);
+ assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]);
+ }
+ }
}
diff --git a/master/src/tantivy/query/phrase_query/phrase_query.rs.html b/master/src/tantivy/query/phrase_query/phrase_query.rs.html
index 0c9ee6b1a..0b7b92a23 100644
--- a/master/src/tantivy/query/phrase_query/phrase_query.rs.html
+++ b/master/src/tantivy/query/phrase_query/phrase_query.rs.html
@@ -115,6 +115,7 @@
58
59
60
+61
use schema::Term;
use query::Query;
@@ -166,12 +167,13 @@
let terms = self.phrase_terms.clone();
if scoring_enabled {
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
- Ok(box PhraseWeight::with_scoring(
+ Ok(box PhraseWeight::new(
terms,
- bm25_weight
+ bm25_weight,
+ true
))
} else {
- Ok(box PhraseWeight::no_scoring(terms))
+ Ok(box PhraseWeight::new(terms, BM25Weight::null(), false))
}
}
diff --git a/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html b/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html
index a83535914..2bf966d9f 100644
--- a/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html
+++ b/master/src/tantivy/query/phrase_query/phrase_scorer.rs.html
@@ -302,6 +302,51 @@
245
246
247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
use DocId;
use docset::{DocSet, SkipResult};
@@ -354,10 +399,28 @@
phrase_count: u32,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight,
+ score_needed: bool
}
-/// Computes the length of the intersection of two sorted arrays.
+/// Returns true iff the two sorted array contain a common element
+fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
+ let mut left_i = 0;
+ let mut right_i = 0;
+ while left_i < left.len() && right_i < right.len() {
+ let left_val = left[left_i];
+ let right_val = right[right_i];
+ if left_val < right_val {
+ left_i += 1;
+ } else if right_val < left_val {
+ right_i += 1;
+ } else {
+ return true;
+ }
+ }
+ false
+}
+
fn intersection_count(left: &[u32], right: &[u32]) -> usize {
let mut left_i = 0;
let mut right_i = 0;
@@ -410,7 +473,8 @@
pub fn new(term_postings: Vec<TPostings>,
similarity_weight: BM25Weight,
- fieldnorm_reader: FieldNormReader) -> PhraseScorer<TPostings> {
+ fieldnorm_reader: FieldNormReader,
+ score_needed: bool) -> PhraseScorer<TPostings> {
let num_docsets = term_postings.len();
let postings_with_offsets = term_postings
.into_iter()
@@ -425,13 +489,40 @@
phrase_count: 0u32,
similarity_weight,
fieldnorm_reader,
+ score_needed,
}
}
fn phrase_match(&mut self) -> bool {
- let count = self.phrase_count();
- self.phrase_count = count;
- count > 0u32
+ if self.score_needed {
+ let count = self.phrase_count();
+ self.phrase_count = count;
+ count > 0u32
+ } else {
+ self.phrase_exists()
+ }
+ }
+
+
+ fn phrase_exists(&mut self) -> bool {
+ {
+ self.intersection_docset
+ .docset_mut_specialized(0)
+ .positions(&mut self.left);
+ }
+ let mut intersection_len = self.left.len();
+ for i in 1..self.num_docsets - 1 {
+ {
+ self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
+ }
+ intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
+ if intersection_len == 0 {
+ return false;
+ }
+ }
+
+ self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
+ intersection_exists(&self.left[..intersection_len], &self.right[..])
}
fn phrase_count(&mut self) -> u32 {
@@ -452,8 +543,7 @@
}
self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
- intersection_len = intersection_count(&mut self.left[..intersection_len], &self.right[..]);
- intersection_len as u32
+ intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
}
}
diff --git a/master/src/tantivy/query/phrase_query/phrase_weight.rs.html b/master/src/tantivy/query/phrase_query/phrase_weight.rs.html
index 0dcf18ebd..a8f97fe08 100644
--- a/master/src/tantivy/query/phrase_query/phrase_weight.rs.html
+++ b/master/src/tantivy/query/phrase_query/phrase_weight.rs.html
@@ -116,11 +116,6 @@
59
60
61
-62
-63
-64
-65
-66
use query::Weight;
use query::Scorer;
@@ -135,23 +130,18 @@
pub struct PhraseWeight {
phrase_terms: Vec<Term>,
similarity_weight: BM25Weight,
+ score_needed: bool,
}
impl PhraseWeight {
/// Creates a new phrase weight.
- pub fn with_scoring(phrase_terms: Vec<Term>,
- similarity_weight: BM25Weight) -> PhraseWeight {
+ pub fn new(phrase_terms: Vec<Term>,
+ similarity_weight: BM25Weight,
+ score_needed: bool) -> PhraseWeight {
PhraseWeight {
phrase_terms,
- similarity_weight
- }
- }
-
-
- pub fn no_scoring(phrase_terms: Vec<Term>) -> PhraseWeight {
- PhraseWeight {
- phrase_terms,
- similarity_weight: BM25Weight::null()
+ similarity_weight,
+ score_needed
}
}
}
@@ -172,7 +162,7 @@
return Ok(box EmptyScorer);
}
}
- Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader))
+ Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed))
} else {
let mut term_postings_list = Vec::new();
for term in &self.phrase_terms {
@@ -184,7 +174,7 @@
return Ok(box EmptyScorer);
}
}
- Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader))
+ Ok(box PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed))
}
}
}
diff --git a/master/tantivy/query/struct.PhraseQuery.html b/master/tantivy/query/struct.PhraseQuery.html
index caf5b56f4..cf950eb36 100644
--- a/master/tantivy/query/struct.PhraseQuery.html
+++ b/master/tantivy/query/struct.PhraseQuery.html
@@ -86,8 +86,8 @@ must belong to the same field.
impl Debug for PhraseQuery[src]fn fmt(&self, __arg_0: &mut Formatter) -> Result[src]Formats the value using the given formatter. Read more
-impl Query for PhraseQuery[src]impl Query for PhraseQuery[src]fn weight(
&self,
searcher: &Searcher,
scoring_enabled: bool
) -> Result<Box<Weight>>[src]Create the weight associated to a query.
See Weight.
fn count(&self, searcher: &Searcher) -> Result<usize>[src]impl Query for BooleanQueryimpl Query for TermQueryimpl Query for PhraseQueryimpl Query for PhraseQueryimpl Query for AllQueryimpl Query for RangeQuery