improve naming in buffered_union (#2705)

This commit is contained in:
PSeitz-dd
2025-09-24 10:58:46 +02:00
committed by GitHub
parent e1e131a804
commit 40659d4d07

View File

@@ -5,8 +5,10 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
use crate::query::Scorer;
use crate::{DocId, Score};
const HORIZON_NUM_TINYBITSETS: usize = 64;
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
// The buffered union looks ahead within a fixed-size sliding window
// of upcoming document IDs (the "horizon").
const HORIZON_NUM_TINYBITSETS: usize = HORIZON as usize / 64;
const HORIZON: u32 = 64u32 * 64u32;
// `drain_filter` is not stable yet.
// This function is similar except that it does is not unstable, and
@@ -27,12 +29,26 @@ where P: FnMut(&mut T) -> bool {
/// Creates a `DocSet` that iterate through the union of two or more `DocSet`s.
pub struct BufferedUnionScorer<TScorer, TScoreCombiner = DoNothingCombiner> {
/// Active scorers (already filtered of `TERMINATED`).
docsets: Vec<TScorer>,
/// Sliding window presence map for upcoming docs.
///
/// There are `HORIZON_NUM_TINYBITSETS` buckets, each covering
/// a span of 64 doc IDs. Bucket `i` represents the range
/// `[window_start_doc + i*64, window_start_doc + (i+1)*64)`.
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
// Index of the current TinySet bucket within the sliding window.
bucket_idx: usize,
/// Per-doc score combiners for the current window.
///
/// these accumulators merge contributions from all scorers that
/// hit the same doc within the buffered window.
scores: Box<[TScoreCombiner; HORIZON as usize]>,
cursor: usize,
offset: DocId,
/// Start doc ID (inclusive) of the current sliding window.
window_start_doc: DocId,
/// Current doc ID of the union.
doc: DocId,
/// Combined score for current `doc` as produced by `TScoreCombiner`.
score: Score,
}
@@ -74,8 +90,8 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
docsets: non_empty_docsets,
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
scores: Box::new([score_combiner_fn(); HORIZON as usize]),
cursor: HORIZON_NUM_TINYBITSETS,
offset: 0,
bucket_idx: HORIZON_NUM_TINYBITSETS,
window_start_doc: 0,
doc: 0,
score: 0.0,
};
@@ -89,8 +105,10 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
fn refill(&mut self) -> bool {
if let Some(min_doc) = self.docsets.iter().map(DocSet::doc).min() {
self.offset = min_doc;
self.cursor = 0;
// Reset the sliding window to start at the smallest doc
// across all scorers and prebuffer within the horizon.
self.window_start_doc = min_doc;
self.bucket_idx = 0;
self.doc = min_doc;
refill(
&mut self.docsets,
@@ -105,16 +123,16 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> BufferedUnionScorer<TScorer
}
fn advance_buffered(&mut self) -> bool {
while self.cursor < HORIZON_NUM_TINYBITSETS {
if let Some(val) = self.bitsets[self.cursor].pop_lowest() {
let delta = val + (self.cursor as u32) * 64;
self.doc = self.offset + delta;
while self.bucket_idx < HORIZON_NUM_TINYBITSETS {
if let Some(val) = self.bitsets[self.bucket_idx].pop_lowest() {
let delta = val + (self.bucket_idx as u32) * 64;
self.doc = self.window_start_doc + delta;
let score_combiner = &mut self.scores[delta as usize];
self.score = score_combiner.score();
score_combiner.clear();
return true;
} else {
self.cursor += 1;
self.bucket_idx += 1;
}
}
false
@@ -144,19 +162,19 @@ where
if self.doc >= target {
return self.doc;
}
let gap = target - self.offset;
let gap = target - self.window_start_doc;
if gap < HORIZON {
// Our value is within the buffered horizon.
// Skipping to corresponding bucket.
let new_cursor = gap as usize / 64;
for obsolete_tinyset in &mut self.bitsets[self.cursor..new_cursor] {
// Skipping to corresponding bucket.
let new_bucket_idx = gap as usize / 64;
for obsolete_tinyset in &mut self.bitsets[self.bucket_idx..new_bucket_idx] {
obsolete_tinyset.clear();
}
for score_combiner in &mut self.scores[self.cursor * 64..new_cursor * 64] {
for score_combiner in &mut self.scores[self.bucket_idx * 64..new_bucket_idx * 64] {
score_combiner.clear();
}
self.cursor = new_cursor;
self.bucket_idx = new_bucket_idx;
// Advancing until we reach the end of the bucket
// or we reach a doc greater or equal to the target.
@@ -211,7 +229,7 @@ where
if self.doc == TERMINATED {
return 0;
}
let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
let mut count = self.bitsets[self.bucket_idx..HORIZON_NUM_TINYBITSETS]
.iter()
.map(|bitset| bitset.len())
.sum::<u32>()
@@ -225,7 +243,7 @@ where
bitset.clear();
}
}
self.cursor = HORIZON_NUM_TINYBITSETS;
self.bucket_idx = HORIZON_NUM_TINYBITSETS;
count
}
}