From dfab201191c5979c7263bc7d77fe1ab34a750be8 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 26 Oct 2022 17:23:56 +0800 Subject: [PATCH] for_each_docset to iterate without score --- src/collector/mod.rs | 34 +++++++++++++----- src/indexer/index_writer.rs | 10 +++--- src/query/boolean_query/boolean_weight.rs | 25 ++++++++++--- src/query/term_query/term_weight.rs | 17 +++++++-- src/query/weight.rs | 44 ++++++++++++++--------- 5 files changed, 92 insertions(+), 38 deletions(-) diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 970affb0a..b0a08d48d 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -172,17 +172,33 @@ pub trait Collector: Sync + Send { ) -> crate::Result<::Fruit> { let mut segment_collector = self.for_segment(segment_ord as u32, reader)?; - if let Some(alive_bitset) = reader.alive_bitset() { - weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { - if alive_bitset.is_alive(doc) { + match (reader.alive_bitset(), self.requires_scoring()) { + (Some(alive_bitset), true) => { + weight.for_each(reader, &mut |doc, score| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, score); + } + })?; + } + (Some(alive_bitset), false) => { + weight.for_each_no_score(reader, &mut |doc| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, 0.0); + } + })?; + } + (None, true) => { + weight.for_each(reader, &mut |doc, score| { segment_collector.collect(doc, score); - } - })?; - } else { - weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { - segment_collector.collect(doc, score); - })?; + })?; + } + (None, false) => { + weight.for_each_no_score(reader, &mut |doc| { + segment_collector.collect(doc, 0.0); + })?; + } } + Ok(segment_collector.harvest()) } } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index b46809bc5..9869cd08a 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -93,16 +93,14 @@ fn compute_deleted_bitset( // A delete operation should only affect // document that were inserted before it. - delete_op.target.for_each( - segment_reader, - false, // requires_scoring - &mut |doc_matching_delete_query, _| { + delete_op + .target + .for_each_no_score(segment_reader, &mut |doc_matching_delete_query| { if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) { alive_bitset.remove(doc_matching_delete_query); might_have_changed = true; } - }, - )?; + })?; delete_cursor.advance(); } Ok(might_have_changed) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 13e0a89d8..dd43270d7 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -5,7 +5,7 @@ use crate::postings::FreqReadingOption; use crate::query::explanation::does_not_match; use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::term_query::TermScorer; -use crate::query::weight::{for_each_pruning_scorer, for_each_scorer}; +use crate::query::weight::{for_each_docset, for_each_pruning_scorer, for_each_scorer}; use crate::query::{ intersect_scorers, EmptyScorer, Exclude, Explanation, Occur, RequiredOptionalScorer, Scorer, Union, Weight, @@ -204,17 +204,34 @@ impl Weight for BooleanWeight crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; match scorer { SpecializedScorer::TermUnion(term_scorers) => { let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); - for_each_scorer(&mut union_scorer, requires_scoring, callback); + for_each_scorer(&mut union_scorer, callback); } SpecializedScorer::Other(mut scorer) => { - for_each_scorer(scorer.as_mut(), requires_scoring, callback); + for_each_scorer(scorer.as_mut(), callback); + } + } + Ok(()) + } + + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; + match scorer { + SpecializedScorer::TermUnion(term_scorers) => { + let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); + for_each_docset(&mut union_scorer, callback); + } + SpecializedScorer::Other(mut scorer) => { + for_each_docset(scorer.as_mut(), callback); } } Ok(()) diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 59fea2648..abe1835dc 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -5,7 +5,7 @@ use crate::fieldnorm::FieldNormReader; use crate::postings::SegmentPostings; use crate::query::bm25::Bm25Weight; use crate::query::explanation::does_not_match; -use crate::query::weight::for_each_scorer; +use crate::query::weight::{for_each_docset, for_each_scorer}; use crate::query::{Explanation, Scorer, Weight}; use crate::schema::IndexRecordOption; use crate::{DocId, Score, Term}; @@ -49,11 +49,22 @@ impl Weight for TermWeight { fn for_each( &self, reader: &SegmentReader, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.specialized_scorer(reader, 1.0)?; - for_each_scorer(&mut scorer, requires_scoring, callback); + for_each_scorer(&mut scorer, callback); + Ok(()) + } + + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut scorer = self.specialized_scorer(reader, 1.0)?; + for_each_docset(&mut scorer, callback); Ok(()) } diff --git a/src/query/weight.rs b/src/query/weight.rs index 8e57b1dc5..cb4c7af95 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -1,27 +1,28 @@ use super::Scorer; use crate::core::SegmentReader; use crate::query::Explanation; -use crate::{DocId, Score, TERMINATED}; +use crate::{DocId, DocSet, Score, TERMINATED}; -/// Iterates through all of the document matched by the DocSet -/// `DocSet` and push the scored documents to the collector. +/// Iterates through all of the documents and scores matched by the DocSet +/// `DocSet`. pub(crate) fn for_each_scorer( scorer: &mut TScorer, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) { let mut doc = scorer.doc(); + while doc != TERMINATED { + callback(doc, scorer.score()); + doc = scorer.advance(); + } +} - if requires_scoring { - while doc != TERMINATED { - callback(doc, scorer.score()); - doc = scorer.advance(); - } - } else { - while doc != TERMINATED { - callback(doc, 0.0); - doc = scorer.advance(); - } +/// Iterates through all of the documents matched by the DocSet +/// `DocSet`. +pub(crate) fn for_each_docset(scorer: &mut T, callback: &mut dyn FnMut(DocId)) { + let mut doc = scorer.doc(); + while doc != TERMINATED { + callback(doc); + doc = scorer.advance(); } } @@ -80,11 +81,22 @@ pub trait Weight: Send + Sync + 'static { fn for_each( &self, reader: &SegmentReader, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.scorer(reader, 1.0)?; - for_each_scorer(scorer.as_mut(), requires_scoring, callback); + for_each_scorer(scorer.as_mut(), callback); + Ok(()) + } + + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut scorer = self.scorer(reader, 1.0)?; + for_each_docset(scorer.as_mut(), callback); Ok(()) }