mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
improve naming in buffered_union (#2705)
This commit is contained in:
@@ -5,8 +5,10 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
|
|||||||
use crate::query::Scorer;
|
use crate::query::Scorer;
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
|
|
||||||
const HORIZON_NUM_TINYBITSETS: usize = 64;
|
// The buffered union looks ahead within a fixed-size sliding window
|
||||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
// of upcoming document IDs (the "horizon").
|
||||||
|
const HORIZON_NUM_TINYBITSETS: usize = HORIZON as usize / 64;
|
||||||
|
const HORIZON: u32 = 64u32 * 64u32;
|
||||||
|
|
||||||
// `drain_filter` is not stable yet.
|
// `drain_filter` is not stable yet.
|
||||||
// This function is similar except that it does is not unstable, and
|
// This function is similar except that it does is not unstable, and
|
||||||
@@ -27,12 +29,26 @@ where P: FnMut(&mut T) -> bool {
|
|||||||
|
|
||||||
/// Creates a `DocSet` that iterate through the union of two or more `DocSet`s.
|
/// Creates a `DocSet` that iterate through the union of two or more `DocSet`s.
|
||||||
pub struct BufferedUnionScorer<TScorer, TScoreCombiner = DoNothingCombiner> {
|
pub struct BufferedUnionScorer<TScorer, TScoreCombiner = DoNothingCombiner> {
|
||||||
|
/// Active scorers (already filtered of `TERMINATED`).
|
||||||
docsets: Vec<TScorer>,
|
docsets: Vec<TScorer>,
|
||||||
|
/// Sliding window presence map for upcoming docs.
|
||||||
|
///
|
||||||
|
/// There are `HORIZON_NUM_TINYBITSETS` buckets, each covering
|
||||||
|
/// a span of 64 doc IDs. Bucket `i` represents the range
|
||||||
|
/// `[window_start_doc + i*64, window_start_doc + (i+1)*64)`.
|
||||||
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
|
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
|
||||||
|
// Index of the current TinySet bucket within the sliding window.
|
||||||
|
bucket_idx: usize,
|
||||||
|
/// Per-doc score combiners for the current window.
|
||||||
|
///
|
||||||
|
/// these accumulators merge contributions from all scorers that
|
||||||
|
/// hit the same doc within the buffered window.
|
||||||
scores: Box<[TScoreCombiner; HORIZON as usize]>,
|
scores: Box<[TScoreCombiner; HORIZON as usize]>,
|
||||||
cursor: usize,
|
/// Start doc ID (inclusive) of the current sliding window.
|
||||||
offset: DocId,
|
window_start_doc: DocId,
|
||||||
|
/// Current doc ID of the union.
|
||||||
doc: DocId,
|
doc: DocId,
|
||||||
|
/// Combined score for current `doc` as produced by `TScoreCombiner`.
|
||||||
score: Score,
|
score: Score,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,8 +90,8 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
|
|||||||
docsets: non_empty_docsets,
|
docsets: non_empty_docsets,
|
||||||
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
|
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
|
||||||
scores: Box::new([score_combiner_fn(); HORIZON as usize]),
|
scores: Box::new([score_combiner_fn(); HORIZON as usize]),
|
||||||
cursor: HORIZON_NUM_TINYBITSETS,
|
bucket_idx: HORIZON_NUM_TINYBITSETS,
|
||||||
offset: 0,
|
window_start_doc: 0,
|
||||||
doc: 0,
|
doc: 0,
|
||||||
score: 0.0,
|
score: 0.0,
|
||||||
};
|
};
|
||||||
@@ -89,8 +105,10 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
|
|||||||
|
|
||||||
fn refill(&mut self) -> bool {
|
fn refill(&mut self) -> bool {
|
||||||
if let Some(min_doc) = self.docsets.iter().map(DocSet::doc).min() {
|
if let Some(min_doc) = self.docsets.iter().map(DocSet::doc).min() {
|
||||||
self.offset = min_doc;
|
// Reset the sliding window to start at the smallest doc
|
||||||
self.cursor = 0;
|
// across all scorers and prebuffer within the horizon.
|
||||||
|
self.window_start_doc = min_doc;
|
||||||
|
self.bucket_idx = 0;
|
||||||
self.doc = min_doc;
|
self.doc = min_doc;
|
||||||
refill(
|
refill(
|
||||||
&mut self.docsets,
|
&mut self.docsets,
|
||||||
@@ -105,16 +123,16 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn advance_buffered(&mut self) -> bool {
|
fn advance_buffered(&mut self) -> bool {
|
||||||
while self.cursor < HORIZON_NUM_TINYBITSETS {
|
while self.bucket_idx < HORIZON_NUM_TINYBITSETS {
|
||||||
if let Some(val) = self.bitsets[self.cursor].pop_lowest() {
|
if let Some(val) = self.bitsets[self.bucket_idx].pop_lowest() {
|
||||||
let delta = val + (self.cursor as u32) * 64;
|
let delta = val + (self.bucket_idx as u32) * 64;
|
||||||
self.doc = self.offset + delta;
|
self.doc = self.window_start_doc + delta;
|
||||||
let score_combiner = &mut self.scores[delta as usize];
|
let score_combiner = &mut self.scores[delta as usize];
|
||||||
self.score = score_combiner.score();
|
self.score = score_combiner.score();
|
||||||
score_combiner.clear();
|
score_combiner.clear();
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
self.cursor += 1;
|
self.bucket_idx += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
@@ -144,19 +162,19 @@ where
|
|||||||
if self.doc >= target {
|
if self.doc >= target {
|
||||||
return self.doc;
|
return self.doc;
|
||||||
}
|
}
|
||||||
let gap = target - self.offset;
|
let gap = target - self.window_start_doc;
|
||||||
if gap < HORIZON {
|
if gap < HORIZON {
|
||||||
// Our value is within the buffered horizon.
|
// Our value is within the buffered horizon.
|
||||||
|
|
||||||
// Skipping to corresponding bucket.
|
// Skipping to corresponding bucket.
|
||||||
let new_cursor = gap as usize / 64;
|
let new_bucket_idx = gap as usize / 64;
|
||||||
for obsolete_tinyset in &mut self.bitsets[self.cursor..new_cursor] {
|
for obsolete_tinyset in &mut self.bitsets[self.bucket_idx..new_bucket_idx] {
|
||||||
obsolete_tinyset.clear();
|
obsolete_tinyset.clear();
|
||||||
}
|
}
|
||||||
for score_combiner in &mut self.scores[self.cursor * 64..new_cursor * 64] {
|
for score_combiner in &mut self.scores[self.bucket_idx * 64..new_bucket_idx * 64] {
|
||||||
score_combiner.clear();
|
score_combiner.clear();
|
||||||
}
|
}
|
||||||
self.cursor = new_cursor;
|
self.bucket_idx = new_bucket_idx;
|
||||||
|
|
||||||
// Advancing until we reach the end of the bucket
|
// Advancing until we reach the end of the bucket
|
||||||
// or we reach a doc greater or equal to the target.
|
// or we reach a doc greater or equal to the target.
|
||||||
@@ -211,7 +229,7 @@ where
|
|||||||
if self.doc == TERMINATED {
|
if self.doc == TERMINATED {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
|
let mut count = self.bitsets[self.bucket_idx..HORIZON_NUM_TINYBITSETS]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|bitset| bitset.len())
|
.map(|bitset| bitset.len())
|
||||||
.sum::<u32>()
|
.sum::<u32>()
|
||||||
@@ -225,7 +243,7 @@ where
|
|||||||
bitset.clear();
|
bitset.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.cursor = HORIZON_NUM_TINYBITSETS;
|
self.bucket_idx = HORIZON_NUM_TINYBITSETS;
|
||||||
count
|
count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user