exploratory

This commit is contained in:
Paul Masurel
2019-05-08 09:26:38 +09:00
parent a18932165f
commit a7a98b11d7
3 changed files with 112 additions and 89 deletions

View File

@@ -214,6 +214,102 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
}
}
// `ahead` is assumed to be initialized (ahead.advance() has been called at least once,
// and this returned true).
//
// If behind is either uninitialized or `ahead.doc() > behind.doc()`.
fn next_in_intersection<'a, TScorer: Scorer>(
ahead: &'a mut TScorer,
behind: &'a mut TScorer,
) -> Option<DocId> {
let candidate = ahead.doc();
match behind.skip_next(candidate) {
SkipResult::Reached => Some(candidate),
SkipResult::OverStep => {
// yeah for tail-recursion
next_in_intersection(behind, ahead)
}
SkipResult::End => None,
}
}
enum SkipResultComplex {
Reached,
Overstep { other_ord: usize, candidate: DocId },
End,
}
fn skip_several_scorers<TDocSet: DocSet>(
others: &mut [TDocSet],
except_candidate_ord: usize,
target: DocId,
) -> SkipResultComplex {
for (ord, docset) in others.iter_mut().enumerate() {
// `candidate_ord` is already at the
// right position.
//
// Calling `skip_next` would advance this docset
// and miss it.
if ord == except_candidate_ord {
continue;
}
match docset.skip_next(target) {
SkipResult::Reached => {}
SkipResult::OverStep => {
return SkipResultComplex::Overstep {
other_ord: ord,
candidate: docset.doc(),
};
}
SkipResult::End => {
return SkipResultComplex::End;
}
}
}
SkipResultComplex::Reached
}
fn for_each<'a, TScorer: Scorer, TOtherscorer: Scorer>(
left: &'a mut TScorer,
right: &'a mut TScorer,
others: &'a mut [TOtherscorer],
callback: &mut FnMut(DocId, Score),
) {
let mut other_candidate_ord: usize = usize::max_value();
if !left.advance() {
return;
}
while let Some(candidate) = next_in_intersection(left, right) {
// test the remaining scorers
match skip_several_scorers(others, other_candidate_ord, candidate) {
SkipResultComplex::Reached => {
let intersection_score: Score = left.score()
+ right.score()
+ others.iter_mut().map(|other| other.score()).sum::<Score>();
callback(candidate, intersection_score);
if !left.advance() {
return;
}
}
SkipResultComplex::Overstep {
other_ord,
candidate,
} => match left.skip_next(candidate) {
SkipResult::End => {
return;
}
SkipResult::Reached => {
other_candidate_ord = other_ord;
}
SkipResult::OverStep => other_candidate_ord = usize::max_value(),
},
SkipResultComplex::End => {
return;
}
}
}
}
impl<TScorer, TOtherScorer> Scorer for Intersection<TScorer, TOtherScorer>
where
TScorer: Scorer,
@@ -225,85 +321,8 @@ where
+ self.others.iter_mut().map(Scorer::score).sum::<Score>()
}
fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) {b
let (left, right) = (&mut self.left, &mut self.right);
if !left.advance() {
return;
}
let mut candidate = left.doc();
let mut other_candidate_ord: usize = usize::max_value();
'outer: loop {
// In the first part we look for a document in the intersection
// of the two rarest `DocSet` in the intersection.
loop {
match right.skip_next(candidate) {
SkipResult::Reached => {
break;
}
SkipResult::OverStep => {
candidate = right.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => {
return;
}
}
match left.skip_next(candidate) {
SkipResult::Reached => {
break;
}
SkipResult::OverStep => {
candidate = left.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => {
return;
}
}
}
// test the remaining scorers;
for (ord, docset) in self.others.iter_mut().enumerate() {
if ord == other_candidate_ord {
continue;
}
// `candidate_ord` is already at the
// right position.
//
// Calling `skip_next` would advance this docset
// and miss it.
match docset.skip_next(candidate) {
SkipResult::Reached => {}
SkipResult::OverStep => {
// this is not in the intersection,
// let's update our candidate.
candidate = docset.doc();
match left.skip_next(candidate) {
SkipResult::Reached => {
other_candidate_ord = ord;
}
SkipResult::OverStep => {
candidate = left.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => {
return;
}
}
continue 'outer;
}
SkipResult::End => {
return;
}
}
callback(candidate, self.score())
}
}
fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) {
for_each(&mut self.left, &mut self.right, &mut self.others, callback);
}
}

View File

@@ -16,6 +16,9 @@ pub trait Scorer: downcast_rs::Downcast + DocSet + 'static {
/// Iterates through all of the document matched by the DocSet
/// `DocSet` and push the scored documents to the collector.
///
/// This method assumes that the Scorer is brand new, and `.advance()`
/// and `.skip()` haven't been called yet.
fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) {
while self.advance() {
callback(self.doc(), self.score());

View File

@@ -250,6 +250,16 @@ where
fn size_hint(&self) -> u32 {
0u32
}
}
impl<TScorer, TScoreCombiner> Scorer for Union<TScorer, TScoreCombiner>
where
TScoreCombiner: ScoreCombiner,
TScorer: Scorer,
{
fn score(&mut self) -> Score {
self.score
}
fn for_each(&mut self, callback: &mut FnMut(DocId, Score)) {
// TODO how do we deal with the fact that people may have called .advance() before.
@@ -261,6 +271,7 @@ where
let doc = offset + delta;
let score_combiner = &mut self.scores[delta as usize];
let score = score_combiner.score();
callback(doc, score);
score_combiner.clear();
}
}
@@ -268,16 +279,6 @@ where
}
}
impl<TScorer, TScoreCombiner> Scorer for Union<TScorer, TScoreCombiner>
where
TScoreCombiner: ScoreCombiner,
TScorer: Scorer,
{
fn score(&mut self) -> Score {
self.score
}
}
#[cfg(test)]
mod tests {