mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 15:40:40 +00:00
Faster union counts
This commit is contained in:
@@ -119,6 +119,14 @@ impl TinySet {
|
||||
pub fn range_greater_or_equal(from_included: u32) -> TinySet {
|
||||
TinySet::range_lower(from_included).complement()
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.0 = 0u64;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.0.count_ones()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -82,7 +82,6 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Union<TScorer, TScoreCombin
|
||||
self.offset = min_doc;
|
||||
self.cursor = 0;
|
||||
refill(&mut self.docsets, &mut *self.bitsets, &mut *self.scores, min_doc);
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
@@ -111,7 +110,34 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
|
||||
if self.advance_buffered() {
|
||||
return true;
|
||||
}
|
||||
self.refill()
|
||||
if self.refill() {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn count(&mut self) -> u32 {
|
||||
let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
|
||||
.iter()
|
||||
.map(|bitset| bitset.len())
|
||||
.sum::<u32>();
|
||||
for bitset in self.bitsets.iter_mut() {
|
||||
bitset.clear();
|
||||
}
|
||||
while self.refill() {
|
||||
count += self.bitsets
|
||||
.iter()
|
||||
.map(|bitset| bitset.len())
|
||||
.sum::<u32>();
|
||||
for bitset in self.bitsets.iter_mut() {
|
||||
bitset.clear();
|
||||
}
|
||||
}
|
||||
self.cursor = HORIZON_NUM_TINYBITSETS;
|
||||
count
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
@@ -134,7 +160,7 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
|
||||
// Skipping to corresponding bucket.
|
||||
let new_cursor = gap as usize / 64;
|
||||
for obsolete_tinyset in &mut self.bitsets[self.cursor..new_cursor] {
|
||||
*obsolete_tinyset = TinySet::empty();
|
||||
obsolete_tinyset.clear();
|
||||
}
|
||||
for score_combiner in &mut self.scores[self.cursor*64..new_cursor*64] {
|
||||
score_combiner.clear();
|
||||
@@ -178,6 +204,7 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, T
|
||||
// at this point all of the docsets
|
||||
// are positionned on a doc >= to the target.
|
||||
if self.refill() {
|
||||
self.advance();
|
||||
if self.doc() == target {
|
||||
SkipResult::Reached
|
||||
} else {
|
||||
|
||||
@@ -13,15 +13,32 @@ use query::RequiredOptionalScorer;
|
||||
use query::score_combiner::{SumWithCoordsCombiner, DoNothingCombiner, ScoreCombiner};
|
||||
use Result;
|
||||
|
||||
fn scorer_union<'a, TScoreCombiner>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a>
|
||||
fn scorer_union<'a, TScoreCombiner>(scorers: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a>
|
||||
where TScoreCombiner: ScoreCombiner + 'static
|
||||
{
|
||||
assert!(!docsets.is_empty());
|
||||
if docsets.len() == 1 {
|
||||
docsets.into_iter().next().unwrap() //< we checked the size beforehands
|
||||
assert!(!scorers.is_empty());
|
||||
if scorers.len() == 1 {
|
||||
scorers.into_iter().next().unwrap() //< we checked the size beforehands
|
||||
} else {
|
||||
// TODO have a UnionScorer instead.
|
||||
box Union::<_, TScoreCombiner>::from(docsets)
|
||||
if scorers
|
||||
.iter()
|
||||
.all(|scorer| {
|
||||
let scorer_ref:&Scorer = scorer.borrow();
|
||||
Downcast::<TermScorer>::is_type(scorer_ref)
|
||||
}) {
|
||||
let scorers: Vec<TermScorer> = scorers.into_iter()
|
||||
.map(|scorer| {
|
||||
*Downcast::<TermScorer>::downcast(scorer)
|
||||
.expect("downcasting should not have failed, we\
|
||||
checked in advance that the type were correct.")
|
||||
})
|
||||
.collect();
|
||||
let scorer: Box<Scorer> = box Union::<TermScorer, TScoreCombiner>::from(scorers);
|
||||
scorer
|
||||
} else {
|
||||
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
|
||||
scorer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user