From a7a98b11d770dbd4925a296de60600dbb1fd5eb1 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 8 May 2019 09:26:38 +0900 Subject: [PATCH] exploratory --- src/query/intersection.rs | 177 +++++++++++++++++++++----------------- src/query/scorer.rs | 3 + src/query/union.rs | 21 ++--- 3 files changed, 112 insertions(+), 89 deletions(-) diff --git a/src/query/intersection.rs b/src/query/intersection.rs index caedf03d9..0b6052640 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -214,6 +214,102 @@ impl DocSet for Intersection behind.doc()`. +fn next_in_intersection<'a, TScorer: Scorer>( + ahead: &'a mut TScorer, + behind: &'a mut TScorer, +) -> Option { + let candidate = ahead.doc(); + match behind.skip_next(candidate) { + SkipResult::Reached => Some(candidate), + SkipResult::OverStep => { + // yeah for tail-recursion + next_in_intersection(behind, ahead) + } + SkipResult::End => None, + } +} + +enum SkipResultComplex { + Reached, + Overstep { other_ord: usize, candidate: DocId }, + End, +} + +fn skip_several_scorers( + others: &mut [TDocSet], + except_candidate_ord: usize, + target: DocId, +) -> SkipResultComplex { + for (ord, docset) in others.iter_mut().enumerate() { + // `candidate_ord` is already at the + // right position. + // + // Calling `skip_next` would advance this docset + // and miss it. + if ord == except_candidate_ord { + continue; + } + match docset.skip_next(target) { + SkipResult::Reached => {} + SkipResult::OverStep => { + return SkipResultComplex::Overstep { + other_ord: ord, + candidate: docset.doc(), + }; + } + SkipResult::End => { + return SkipResultComplex::End; + } + } + } + SkipResultComplex::Reached +} + +fn for_each<'a, TScorer: Scorer, TOtherscorer: Scorer>( + left: &'a mut TScorer, + right: &'a mut TScorer, + others: &'a mut [TOtherscorer], + callback: &mut FnMut(DocId, Score), +) { + let mut other_candidate_ord: usize = usize::max_value(); + if !left.advance() { + return; + } + while let Some(candidate) = next_in_intersection(left, right) { + // test the remaining scorers + match skip_several_scorers(others, other_candidate_ord, candidate) { + SkipResultComplex::Reached => { + let intersection_score: Score = left.score() + + right.score() + + others.iter_mut().map(|other| other.score()).sum::(); + callback(candidate, intersection_score); + if !left.advance() { + return; + } + } + SkipResultComplex::Overstep { + other_ord, + candidate, + } => match left.skip_next(candidate) { + SkipResult::End => { + return; + } + SkipResult::Reached => { + other_candidate_ord = other_ord; + } + SkipResult::OverStep => other_candidate_ord = usize::max_value(), + }, + SkipResultComplex::End => { + return; + } + } + } +} + impl Scorer for Intersection where TScorer: Scorer, @@ -225,85 +321,8 @@ where + self.others.iter_mut().map(Scorer::score).sum::() } - fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) {b - let (left, right) = (&mut self.left, &mut self.right); - - if !left.advance() { - return; - } - - let mut candidate = left.doc(); - let mut other_candidate_ord: usize = usize::max_value(); - - 'outer: loop { - // In the first part we look for a document in the intersection - // of the two rarest `DocSet` in the intersection. - loop { - match right.skip_next(candidate) { - SkipResult::Reached => { - break; - } - SkipResult::OverStep => { - candidate = right.doc(); - other_candidate_ord = usize::max_value(); - } - SkipResult::End => { - return; - } - } - match left.skip_next(candidate) { - SkipResult::Reached => { - break; - } - SkipResult::OverStep => { - candidate = left.doc(); - other_candidate_ord = usize::max_value(); - } - SkipResult::End => { - return; - } - } - } - - - // test the remaining scorers; - for (ord, docset) in self.others.iter_mut().enumerate() { - if ord == other_candidate_ord { - continue; - } - // `candidate_ord` is already at the - // right position. - // - // Calling `skip_next` would advance this docset - // and miss it. - match docset.skip_next(candidate) { - SkipResult::Reached => {} - SkipResult::OverStep => { - // this is not in the intersection, - // let's update our candidate. - candidate = docset.doc(); - match left.skip_next(candidate) { - SkipResult::Reached => { - other_candidate_ord = ord; - } - SkipResult::OverStep => { - candidate = left.doc(); - other_candidate_ord = usize::max_value(); - } - SkipResult::End => { - return; - } - } - continue 'outer; - } - SkipResult::End => { - return; - } - } - callback(candidate, self.score()) - } - - } + fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) { + for_each(&mut self.left, &mut self.right, &mut self.others, callback); } } diff --git a/src/query/scorer.rs b/src/query/scorer.rs index 55f9ee1c0..eaf57d38d 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -16,6 +16,9 @@ pub trait Scorer: downcast_rs::Downcast + DocSet + 'static { /// Iterates through all of the document matched by the DocSet /// `DocSet` and push the scored documents to the collector. + /// + /// This method assumes that the Scorer is brand new, and `.advance()` + /// and `.skip()` haven't been called yet. fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) { while self.advance() { callback(self.doc(), self.score()); diff --git a/src/query/union.rs b/src/query/union.rs index 94ab7be73..87b6cce20 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -250,6 +250,16 @@ where fn size_hint(&self) -> u32 { 0u32 } +} + +impl Scorer for Union +where + TScoreCombiner: ScoreCombiner, + TScorer: Scorer, +{ + fn score(&mut self) -> Score { + self.score + } fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) { // TODO how do we deal with the fact that people may have called .advance() before. @@ -261,6 +271,7 @@ where let doc = offset + delta; let score_combiner = &mut self.scores[delta as usize]; let score = score_combiner.score(); + callback(doc, score); score_combiner.clear(); } } @@ -268,16 +279,6 @@ where } } -impl Scorer for Union -where - TScoreCombiner: ScoreCombiner, - TScorer: Scorer, -{ - fn score(&mut self) -> Score { - self.score - } -} - #[cfg(test)] mod tests {