Faster union counts

This commit is contained in:
Paul Masurel
2018-02-19 10:17:04 +09:00
parent 42fd3fe5c7
commit da3d372e6e
3 changed files with 61 additions and 9 deletions

View File

@@ -119,6 +119,14 @@ impl TinySet {
pub fn range_greater_or_equal(from_included: u32) -> TinySet {
TinySet::range_lower(from_included).complement()
}
pub fn clear(&mut self) {
self.0 = 0u64;
}
pub fn len(&self) -> u32 {
self.0.count_ones()
}
}
#[derive(Clone)]

View File

@@ -82,7 +82,6 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Union<TScorer, TScoreCombin
self.offset = min_doc;
self.cursor = 0;
refill(&mut self.docsets, &mut *self.bitsets, &mut *self.scores, min_doc);
self.advance();
true
} else {
false
@@ -111,7 +110,34 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
if self.advance_buffered() {
return true;
}
self.refill()
if self.refill() {
self.advance();
true
} else {
false
}
}
fn count(&mut self) -> u32 {
let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
.iter()
.map(|bitset| bitset.len())
.sum::<u32>();
for bitset in self.bitsets.iter_mut() {
bitset.clear();
}
while self.refill() {
count += self.bitsets
.iter()
.map(|bitset| bitset.len())
.sum::<u32>();
for bitset in self.bitsets.iter_mut() {
bitset.clear();
}
}
self.cursor = HORIZON_NUM_TINYBITSETS;
count
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
@@ -134,7 +160,7 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
// Skipping to corresponding bucket.
let new_cursor = gap as usize / 64;
for obsolete_tinyset in &mut self.bitsets[self.cursor..new_cursor] {
*obsolete_tinyset = TinySet::empty();
obsolete_tinyset.clear();
}
for score_combiner in &mut self.scores[self.cursor*64..new_cursor*64] {
score_combiner.clear();
@@ -178,6 +204,7 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
// at this point all of the docsets
// are positionned on a doc >= to the target.
if self.refill() {
self.advance();
if self.doc() == target {
SkipResult::Reached
} else {

View File

@@ -13,15 +13,32 @@ use query::RequiredOptionalScorer;
use query::score_combiner::{SumWithCoordsCombiner, DoNothingCombiner, ScoreCombiner};
use Result;
fn scorer_union<'a, TScoreCombiner>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a>
fn scorer_union<'a, TScoreCombiner>(scorers: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a>
where TScoreCombiner: ScoreCombiner + 'static
{
assert!(!docsets.is_empty());
if docsets.len() == 1 {
docsets.into_iter().next().unwrap() //< we checked the size beforehands
assert!(!scorers.is_empty());
if scorers.len() == 1 {
scorers.into_iter().next().unwrap() //< we checked the size beforehands
} else {
// TODO have a UnionScorer instead.
box Union::<_, TScoreCombiner>::from(docsets)
if scorers
.iter()
.all(|scorer| {
let scorer_ref:&Scorer = scorer.borrow();
Downcast::<TermScorer>::is_type(scorer_ref)
}) {
let scorers: Vec<TermScorer> = scorers.into_iter()
.map(|scorer| {
*Downcast::<TermScorer>::downcast(scorer)
.expect("downcasting should not have failed, we\
checked in advance that the type were correct.")
})
.collect();
let scorer: Box<Scorer> = box Union::<TermScorer, TScoreCombiner>::from(scorers);
scorer
} else {
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
scorer
}
}
}