for_each_docset to iterate without score

This commit is contained in:
Pascal Seitz
2022-10-26 17:23:56 +08:00
parent af839753e0
commit dfab201191
5 changed files with 92 additions and 38 deletions

View File

@@ -172,17 +172,33 @@ pub trait Collector: Sync + Send {
) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;
if let Some(alive_bitset) = reader.alive_bitset() {
weight.for_each(reader, self.requires_scoring(), &mut |doc, score| {
if alive_bitset.is_alive(doc) {
match (reader.alive_bitset(), self.requires_scoring()) {
(Some(alive_bitset), true) => {
weight.for_each(reader, &mut |doc, score| {
if alive_bitset.is_alive(doc) {
segment_collector.collect(doc, score);
}
})?;
}
(Some(alive_bitset), false) => {
weight.for_each_no_score(reader, &mut |doc| {
if alive_bitset.is_alive(doc) {
segment_collector.collect(doc, 0.0);
}
})?;
}
(None, true) => {
weight.for_each(reader, &mut |doc, score| {
segment_collector.collect(doc, score);
}
})?;
} else {
weight.for_each(reader, self.requires_scoring(), &mut |doc, score| {
segment_collector.collect(doc, score);
})?;
})?;
}
(None, false) => {
weight.for_each_no_score(reader, &mut |doc| {
segment_collector.collect(doc, 0.0);
})?;
}
}
Ok(segment_collector.harvest())
}
}

View File

@@ -93,16 +93,14 @@ fn compute_deleted_bitset(
// A delete operation should only affect
// document that were inserted before it.
delete_op.target.for_each(
segment_reader,
false, // requires_scoring
&mut |doc_matching_delete_query, _| {
delete_op
.target
.for_each_no_score(segment_reader, &mut |doc_matching_delete_query| {
if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) {
alive_bitset.remove(doc_matching_delete_query);
might_have_changed = true;
}
},
)?;
})?;
delete_cursor.advance();
}
Ok(might_have_changed)

View File

@@ -5,7 +5,7 @@ use crate::postings::FreqReadingOption;
use crate::query::explanation::does_not_match;
use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
use crate::query::term_query::TermScorer;
use crate::query::weight::{for_each_pruning_scorer, for_each_scorer};
use crate::query::weight::{for_each_docset, for_each_pruning_scorer, for_each_scorer};
use crate::query::{
intersect_scorers, EmptyScorer, Exclude, Explanation, Occur, RequiredOptionalScorer, Scorer,
Union, Weight,
@@ -204,17 +204,34 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
fn for_each(
&self,
reader: &SegmentReader,
requires_scoring: bool,
callback: &mut dyn FnMut(DocId, Score),
) -> crate::Result<()> {
let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
match scorer {
SpecializedScorer::TermUnion(term_scorers) => {
let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn);
for_each_scorer(&mut union_scorer, requires_scoring, callback);
for_each_scorer(&mut union_scorer, callback);
}
SpecializedScorer::Other(mut scorer) => {
for_each_scorer(scorer.as_mut(), requires_scoring, callback);
for_each_scorer(scorer.as_mut(), callback);
}
}
Ok(())
}
fn for_each_no_score(
&self,
reader: &SegmentReader,
callback: &mut dyn FnMut(DocId),
) -> crate::Result<()> {
let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
match scorer {
SpecializedScorer::TermUnion(term_scorers) => {
let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn);
for_each_docset(&mut union_scorer, callback);
}
SpecializedScorer::Other(mut scorer) => {
for_each_docset(scorer.as_mut(), callback);
}
}
Ok(())

View File

@@ -5,7 +5,7 @@ use crate::fieldnorm::FieldNormReader;
use crate::postings::SegmentPostings;
use crate::query::bm25::Bm25Weight;
use crate::query::explanation::does_not_match;
use crate::query::weight::for_each_scorer;
use crate::query::weight::{for_each_docset, for_each_scorer};
use crate::query::{Explanation, Scorer, Weight};
use crate::schema::IndexRecordOption;
use crate::{DocId, Score, Term};
@@ -49,11 +49,22 @@ impl Weight for TermWeight {
fn for_each(
&self,
reader: &SegmentReader,
requires_scoring: bool,
callback: &mut dyn FnMut(DocId, Score),
) -> crate::Result<()> {
let mut scorer = self.specialized_scorer(reader, 1.0)?;
for_each_scorer(&mut scorer, requires_scoring, callback);
for_each_scorer(&mut scorer, callback);
Ok(())
}
/// Iterates through all of the document matched by the DocSet
/// `DocSet` and push the scored documents to the collector.
fn for_each_no_score(
&self,
reader: &SegmentReader,
callback: &mut dyn FnMut(DocId),
) -> crate::Result<()> {
let mut scorer = self.specialized_scorer(reader, 1.0)?;
for_each_docset(&mut scorer, callback);
Ok(())
}

View File

@@ -1,27 +1,28 @@
use super::Scorer;
use crate::core::SegmentReader;
use crate::query::Explanation;
use crate::{DocId, Score, TERMINATED};
use crate::{DocId, DocSet, Score, TERMINATED};
/// Iterates through all of the document matched by the DocSet
/// `DocSet` and push the scored documents to the collector.
/// Iterates through all of the documents and scores matched by the DocSet
/// `DocSet`.
pub(crate) fn for_each_scorer<TScorer: Scorer + ?Sized>(
scorer: &mut TScorer,
requires_scoring: bool,
callback: &mut dyn FnMut(DocId, Score),
) {
let mut doc = scorer.doc();
while doc != TERMINATED {
callback(doc, scorer.score());
doc = scorer.advance();
}
}
if requires_scoring {
while doc != TERMINATED {
callback(doc, scorer.score());
doc = scorer.advance();
}
} else {
while doc != TERMINATED {
callback(doc, 0.0);
doc = scorer.advance();
}
/// Iterates through all of the documents matched by the DocSet
/// `DocSet`.
pub(crate) fn for_each_docset<T: DocSet + ?Sized>(scorer: &mut T, callback: &mut dyn FnMut(DocId)) {
let mut doc = scorer.doc();
while doc != TERMINATED {
callback(doc);
doc = scorer.advance();
}
}
@@ -80,11 +81,22 @@ pub trait Weight: Send + Sync + 'static {
fn for_each(
&self,
reader: &SegmentReader,
requires_scoring: bool,
callback: &mut dyn FnMut(DocId, Score),
) -> crate::Result<()> {
let mut scorer = self.scorer(reader, 1.0)?;
for_each_scorer(scorer.as_mut(), requires_scoring, callback);
for_each_scorer(scorer.as_mut(), callback);
Ok(())
}
/// Iterates through all of the document matched by the DocSet
/// `DocSet` and push the scored documents to the collector.
fn for_each_no_score(
&self,
reader: &SegmentReader,
callback: &mut dyn FnMut(DocId),
) -> crate::Result<()> {
let mut scorer = self.scorer(reader, 1.0)?;
for_each_docset(scorer.as_mut(), callback);
Ok(())
}