Disable scoring

- Disabling scoring is an argument of the `.weight()` method
- Collectors declare whether they need scoring
This commit is contained in:
Paul Masurel
2018-02-17 12:43:16 +09:00
parent 0300e7272b
commit 292bb17346
17 changed files with 90 additions and 54 deletions

View File

@@ -16,6 +16,10 @@ impl Collector for DoNothingCollector {
}
#[inline]
fn collect(&mut self, _doc: DocId, _score: Score) {}
#[inline]
fn requires_scoring(&self) -> bool {
false
}
}
/// Zero-cost abstraction used to collect on multiple collectors.
@@ -51,6 +55,10 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
self.left.collect(doc, score);
self.right.collect(doc, score);
}
fn requires_scoring(&self) -> bool {
self.left.requires_scoring() || self.right.requires_scoring()
}
}
/// Creates a `ChainedCollector`

View File

@@ -28,6 +28,10 @@ impl Collector for CountCollector {
fn collect(&mut self, _: DocId, _: Score) {
self.count += 1;
}
fn requires_scoring(&self) -> bool {
false
}
}
#[cfg(test)]
@@ -44,5 +48,8 @@ mod tests {
assert_eq!(count_collector.count(), 1);
count_collector.collect(1u32, 1f32);
assert_eq!(count_collector.count(), 2);
assert!(!count_collector.requires_scoring());
}
}

View File

@@ -420,6 +420,10 @@ impl Collector for FacetCollector {
previous_collapsed_ord = collapsed_ord;
}
}
fn requires_scoring(&self) -> bool {
false
}
}
/// Intermediary result of the `FacetCollector` that stores

View File

@@ -62,6 +62,9 @@ pub trait Collector {
) -> Result<()>;
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score);
/// Returns true iff the collector requires to compute scores for documents.
fn requires_scoring(&self) -> bool;
}
impl<'a, C: Collector> Collector for &'a mut C {
@@ -74,7 +77,11 @@ impl<'a, C: Collector> Collector for &'a mut C {
}
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score) {
(*self).collect(doc, score);
C::collect(self, doc, score)
}
fn requires_scoring(&self) -> bool {
C::requires_scoring(self)
}
}
@@ -128,6 +135,10 @@ pub mod tests {
fn collect(&mut self, doc: DocId, _score: Score) {
self.docs.push(doc + self.offset);
}
fn requires_scoring(&self) -> bool {
false
}
}
/// Collects in order all of the fast fields for all of the
@@ -144,7 +155,7 @@ pub mod tests {
pub fn for_field(field: Field) -> FastFieldTestCollector {
FastFieldTestCollector {
vals: Vec::new(),
field: field,
field,
ff_reader: None,
}
}
@@ -164,6 +175,9 @@ pub mod tests {
let val = self.ff_reader.as_ref().unwrap().get(doc);
self.vals.push(val);
}
fn requires_scoring(&self) -> bool {
false
}
}
#[bench]

View File

@@ -17,7 +17,7 @@ impl<'a> MultiCollector<'a> {
/// Constructor
pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
MultiCollector {
collectors: collectors,
collectors,
}
}
}
@@ -39,6 +39,11 @@ impl<'a> Collector for MultiCollector<'a> {
collector.collect(doc, score);
}
}
fn requires_scoring(&self) -> bool {
self.collectors
.iter()
.any(|collector| collector.requires_scoring())
}
}
#[cfg(test)]

View File

@@ -125,6 +125,10 @@ impl Collector for TopCollector {
self.heap.push(wrapped_doc);
}
}
fn requires_scoring(&self) -> bool {
true
}
}
#[cfg(test)]

View File

@@ -95,14 +95,14 @@ pub mod tests {
index_writer.add_document(doc!(title => r#"abc abc abc"#));
}
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
index_writer.commit().unwrap();
index_writer .commit().unwrap();
index.load_searchers().unwrap();
let searcher = index.searcher();
let query = TermQuery::new(
Term::from_field_text(title, "abc"),
IndexRecordOption::WithFreqsAndPositions,
);
let weight = query.specialized_weight(&*searcher);
let weight = query.specialized_weight(&*searcher, true);
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
@@ -282,7 +282,7 @@ pub mod tests {
IndexRecordOption::Basic,
);
let searcher = index.searcher();
let mut term_weight = term_query.specialized_weight(&*searcher);
let mut term_weight = term_query.specialized_weight(&*searcher, true);
term_weight.index_record_option = IndexRecordOption::WithFreqsAndPositions;
let segment_reader = &searcher.segment_readers()[0];
let mut term_scorer = term_weight.specialized_scorer(segment_reader).unwrap();

View File

@@ -20,7 +20,7 @@ impl Query for AllQuery {
self
}
fn weight(&self, _: &Searcher) -> Result<Box<Weight>> {
fn weight(&self, _: &Searcher, _: bool) -> Result<Box<Weight>> {
Ok(box AllWeight)
}
}

View File

@@ -22,14 +22,12 @@ use query::Occur;
#[derive(Debug)]
pub struct BooleanQuery {
subqueries: Vec<(Occur, Box<Query>)>,
scoring_disabled: bool,
}
impl From<Vec<(Occur, Box<Query>)>> for BooleanQuery {
fn from(subqueries: Vec<(Occur, Box<Query>)>) -> BooleanQuery {
BooleanQuery {
subqueries,
scoring_disabled: false,
subqueries
}
}
}
@@ -39,19 +37,12 @@ impl Query for BooleanQuery {
self
}
fn disable_scoring(&mut self) {
self.scoring_disabled = true;
for &mut (_, ref mut subquery) in &mut self.subqueries {
subquery.disable_scoring();
}
}
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>> {
let sub_weights = self.subqueries
.iter()
.map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(searcher)?)))
.map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(searcher, scoring_enabled)?)))
.collect::<Result<_>>()?;
Ok(box BooleanWeight::new(sub_weights, self.scoring_disabled))
Ok(box BooleanWeight::new(sub_weights, scoring_enabled))
}
}

View File

@@ -5,7 +5,6 @@ use std::collections::HashMap;
use query::EmptyScorer;
use query::Scorer;
use query::Exclude;
use query::ConstScorer;
use query::Occur;
use query::RequiredOptionalScorer;
use query::score_combiner::{SumWithCoordsCombiner, DoNothingCombiner, ScoreCombiner};
@@ -25,14 +24,14 @@ fn scorer_union<'a, TScoreCombiner>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Score
pub struct BooleanWeight {
weights: Vec<(Occur, Box<Weight>)>,
scoring_disabled: bool,
scoring_enabled: bool,
}
impl BooleanWeight {
pub fn new(weights: Vec<(Occur, Box<Weight>)>, scoring_disabled: bool) -> BooleanWeight {
pub fn new(weights: Vec<(Occur, Box<Weight>)>, scoring_enabled: bool) -> BooleanWeight {
BooleanWeight {
weights,
scoring_disabled,
scoring_enabled,
}
}
@@ -67,10 +66,10 @@ impl BooleanWeight {
let positive_scorer: Box<Scorer> = match (should_scorer_opt, must_scorer_opt) {
(Some(should_scorer), Some(must_scorer)) => {
if self.scoring_disabled {
must_scorer
} else {
if self.scoring_enabled {
box RequiredOptionalScorer::<_,_,TScoreCombiner>::new(must_scorer, should_scorer)
} else {
must_scorer
}
}
(None, Some(must_scorer)) => must_scorer,
@@ -99,10 +98,10 @@ impl Weight for BooleanWeight {
} else {
weight.scorer(reader)
}
} else if self.scoring_disabled {
self.complex_scorer::<DoNothingCombiner>(reader)
} else {
} else if self.scoring_enabled {
self.complex_scorer::<SumWithCoordsCombiner>(reader)
} else {
self.complex_scorer::<DoNothingCombiner>(reader)
}
}
}

View File

@@ -35,8 +35,8 @@ impl Query for PhraseQuery {
/// Create the weight associated to a query.
///
/// See [`Weight`](./trait.Weight.html).
fn weight(&self, _searcher: &Searcher) -> Result<Box<Weight>> {
Ok(box PhraseWeight::from(self.phrase_terms.clone()))
fn weight(&self, _searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>> {
Ok(box PhraseWeight::new(self.phrase_terms.clone(), scoring_enabled))
}
}

View File

@@ -9,11 +9,15 @@ use Result;
pub struct PhraseWeight {
phrase_terms: Vec<Term>,
scoring_enabled: bool
}
impl From<Vec<Term>> for PhraseWeight {
fn from(phrase_terms: Vec<Term>) -> PhraseWeight {
PhraseWeight { phrase_terms }
impl PhraseWeight {
pub fn new(phrase_terms: Vec<Term>, scoring_enabled: bool) -> PhraseWeight {
PhraseWeight {
phrase_terms,
scoring_enabled // TODO compute the phrase freq if scoring is enabled. stop at first match else.
}
}
}

View File

@@ -45,16 +45,13 @@ pub trait Query: fmt::Debug {
/// into a specific type. This is mostly useful for unit tests.
fn as_any(&self) -> &Any;
/// Disable scoring.
///
/// For some query this may improve performance
/// when scoring is not required.
fn disable_scoring(&mut self) {}
/// Create the weight associated to a query.
///
/// If scoring is not required, setting `scoring_enabled` to `false`
/// can increase performances.
///
/// See [`Weight`](./trait.Weight.html).
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>>;
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>>;
/// Search works as follows :
///
@@ -67,7 +64,8 @@ pub trait Query: fmt::Debug {
///
fn search(&self, searcher: &Searcher, collector: &mut Collector) -> Result<TimerTree> {
let mut timer_tree = TimerTree::default();
let weight = self.weight(searcher)?;
let scoring_enabled = collector.requires_scoring();
let weight = self.weight(searcher, scoring_enabled)?;
{
let mut search_timer = timer_tree.open("search");
for (segment_ord, segment_reader) in searcher.segment_readers().iter().enumerate() {

View File

@@ -133,7 +133,7 @@ impl Query for RangeQuery {
self
}
fn weight(&self, _searcher: &Searcher) -> Result<Box<Weight>> {
fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
Ok(box RangeWeight {
field: self.field,
left_bound: self.left_bound.clone(),

View File

@@ -1,7 +1,7 @@
use DocId;
use DocSet;
use query::Scorer;
use query::score_combiner::{ScoreCombiner, SumCombiner};
use query::score_combiner::ScoreCombiner;
use Score;
use postings::SkipResult;
use std::cmp::Ordering;

View File

@@ -46,7 +46,7 @@ mod tests {
Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic,
);
let term_weight = term_query.weight(&searcher).unwrap();
let term_weight = term_query.weight(&searcher, true).unwrap();
let segment_reader = searcher.segment_reader(0);
let mut term_scorer = term_weight.scorer(segment_reader).unwrap();
assert!(term_scorer.advance());

View File

@@ -36,12 +36,18 @@ impl TermQuery {
/// While `.weight(...)` returns a boxed trait object,
/// this method return a specific implementation.
/// This is useful for optimization purpose.
pub fn specialized_weight(&self, searcher: &Searcher) -> TermWeight {
pub fn specialized_weight(&self, searcher: &Searcher, scoring_enabled: bool) -> TermWeight {
let index_record_option =
if scoring_enabled {
self.index_record_option
} else {
IndexRecordOption::Basic
};
TermWeight {
num_docs: searcher.num_docs(),
doc_freq: searcher.doc_freq(&self.term),
term: self.term.clone(),
index_record_option: self.index_record_option,
index_record_option
}
}
}
@@ -51,11 +57,7 @@ impl Query for TermQuery {
self
}
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
Ok(box self.specialized_weight(searcher))
}
fn disable_scoring(&mut self) {
self.index_record_option = IndexRecordOption::Basic;
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>> {
Ok(box self.specialized_weight(searcher, scoring_enabled))
}
}