From be830b03c51ddacb9dc409d7ef04b749372adb7a Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 23 Feb 2018 11:55:23 +0900 Subject: [PATCH] Bugfix in intersection.advance and impl skip_next --- src/query/intersection.rs | 55 +++++++++++++++---------- src/query/phrase_query/phrase_scorer.rs | 5 --- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/query/intersection.rs b/src/query/intersection.rs index 5635252ce..4a60fcb01 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -5,10 +5,10 @@ use DocId; use downcast::Downcast; use std::borrow::Borrow; use Score; -use postings::Postings; use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes}; pub fn intersect_scorers(mut docsets: Vec>) -> Box { + let num_docsets = docsets.len(); docsets.sort_by(|left, right| right.size_hint().cmp(&left.size_hint())); let rarest_opt = docsets.pop(); let second_rarest_opt = docsets.pop(); @@ -28,6 +28,7 @@ pub fn intersect_scorers(mut docsets: Vec>) -> Box { left, right, others: docsets, + num_docsets } } } @@ -43,6 +44,7 @@ pub fn intersect_scorers(mut docsets: Vec>) -> Box { left, right, others: docsets, + num_docsets } } } @@ -50,7 +52,8 @@ pub fn intersect_scorers(mut docsets: Vec>) -> Box { return box Intersection { left, right, - others: docsets + others: docsets, + num_docsets } } } @@ -62,12 +65,14 @@ pub fn intersect_scorers(mut docsets: Vec>) -> Box { pub struct Intersection> { left: TDocSet, right: TDocSet, - others: Vec + others: Vec, + num_docsets: usize } impl Intersection { pub(crate) fn new(mut docsets: Vec) -> Intersection { - assert!(docsets.len() >= 2); + let num_docsets = docsets.len(); + assert!(num_docsets >= 2); docsets.sort_by(|left, right| right.size_hint().cmp(&left.size_hint())); let left = docsets.pop().unwrap(); let right = docsets.pop().unwrap(); @@ -75,7 +80,8 @@ impl Intersection { Intersection { left, right, - others: docsets + others: docsets, + num_docsets } } } @@ -90,13 +96,25 @@ impl Intersection { } } +impl Intersection { + pub fn docset_mut(&mut self, ord: usize) -> &mut DocSet { + match ord { + 0 => &mut self.left, + 1 => &mut self.right, + n => &mut self.others[n - 2] + } + } +} + impl DocSet for Intersection { #[allow(never_loop)] fn advance(&mut self) -> bool { let (left, right) = (&mut self.left, &mut self.right); + if !left.advance() { return false; } + let mut candidate = left.doc(); let mut other_candidate_ord: usize = usize::max_value(); @@ -112,6 +130,7 @@ impl DocSet for Intersection { return false; } } + match left.skip_next(candidate) { SkipResult::Reached => { break; } SkipResult::OverStep => { @@ -120,8 +139,8 @@ impl DocSet for Intersection { return false; } } - } + } // test the remaining scorers; for (ord, docset) in self.others.iter_mut().enumerate() { if ord != other_candidate_ord { @@ -137,41 +156,34 @@ impl DocSet for Intersection { - other_candidate_ord = ord; - } + SkipResult::Reached => { other_candidate_ord = ord; } SkipResult::OverStep => { + candidate = left.doc(); other_candidate_ord = usize::max_value(); } - SkipResult::End => { - return false; - } + SkipResult::End => { return false; } } continue 'outer; } - SkipResult::End => { - return false; - } + SkipResult::End => { return false; } } } } - return true; } } + fn skip_next(&mut self, target: DocId) -> SkipResult { - unimplemented!("werwer"); - - /* // We optimize skipping by skipping every single member // of the intersection to target. let mut current_target: DocId = target; - let mut current_ord = self.docsets.len(); + let mut current_ord = self.num_docsets; 'outer: loop { - for (ord, docset) in self.docsets.iter_mut().enumerate() { + for ord in 0..self.num_docsets { + let docset = self.docset_mut(ord); if ord == current_ord { continue; } @@ -196,7 +208,6 @@ impl DocSet for Intersection DocId { diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index 334b7fdc0..431b77abc 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -2,10 +2,6 @@ use DocId; use docset::{DocSet, SkipResult}; use postings::Postings; use query::{Intersection, Scorer}; -use fastfield::DeleteBitSet; -use query::intersect_scorers; -use Score; -use std::cmp::Ordering; use std::mem; @@ -52,7 +48,6 @@ pub struct PhraseScorer { result: Vec } -#[inline(always)] fn intersection_arr(left: &[u32], right: &[u32], output: &mut [u32]) -> usize { let mut left_i = 0; let mut right_i = 0;