From 40659d4d07a2fb70e98b95a0c1e11730be55c7ec Mon Sep 17 00:00:00 2001 From: PSeitz-dd Date: Wed, 24 Sep 2025 10:58:46 +0200 Subject: [PATCH] improve naming in buffered_union (#2705) --- src/query/union/buffered_union.rs | 60 ++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/src/query/union/buffered_union.rs b/src/query/union/buffered_union.rs index 5fc946ee1..4811ad128 100644 --- a/src/query/union/buffered_union.rs +++ b/src/query/union/buffered_union.rs @@ -5,8 +5,10 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::Scorer; use crate::{DocId, Score}; -const HORIZON_NUM_TINYBITSETS: usize = 64; -const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; +// The buffered union looks ahead within a fixed-size sliding window +// of upcoming document IDs (the "horizon"). +const HORIZON_NUM_TINYBITSETS: usize = HORIZON as usize / 64; +const HORIZON: u32 = 64u32 * 64u32; // `drain_filter` is not stable yet. // This function is similar except that it does is not unstable, and @@ -27,12 +29,26 @@ where P: FnMut(&mut T) -> bool { /// Creates a `DocSet` that iterate through the union of two or more `DocSet`s. pub struct BufferedUnionScorer { + /// Active scorers (already filtered of `TERMINATED`). docsets: Vec, + /// Sliding window presence map for upcoming docs. + /// + /// There are `HORIZON_NUM_TINYBITSETS` buckets, each covering + /// a span of 64 doc IDs. Bucket `i` represents the range + /// `[window_start_doc + i*64, window_start_doc + (i+1)*64)`. bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>, + // Index of the current TinySet bucket within the sliding window. + bucket_idx: usize, + /// Per-doc score combiners for the current window. + /// + /// these accumulators merge contributions from all scorers that + /// hit the same doc within the buffered window. scores: Box<[TScoreCombiner; HORIZON as usize]>, - cursor: usize, - offset: DocId, + /// Start doc ID (inclusive) of the current sliding window. + window_start_doc: DocId, + /// Current doc ID of the union. doc: DocId, + /// Combined score for current `doc` as produced by `TScoreCombiner`. score: Score, } @@ -74,8 +90,8 @@ impl BufferedUnionScorer BufferedUnionScorer bool { if let Some(min_doc) = self.docsets.iter().map(DocSet::doc).min() { - self.offset = min_doc; - self.cursor = 0; + // Reset the sliding window to start at the smallest doc + // across all scorers and prebuffer within the horizon. + self.window_start_doc = min_doc; + self.bucket_idx = 0; self.doc = min_doc; refill( &mut self.docsets, @@ -105,16 +123,16 @@ impl BufferedUnionScorer bool { - while self.cursor < HORIZON_NUM_TINYBITSETS { - if let Some(val) = self.bitsets[self.cursor].pop_lowest() { - let delta = val + (self.cursor as u32) * 64; - self.doc = self.offset + delta; + while self.bucket_idx < HORIZON_NUM_TINYBITSETS { + if let Some(val) = self.bitsets[self.bucket_idx].pop_lowest() { + let delta = val + (self.bucket_idx as u32) * 64; + self.doc = self.window_start_doc + delta; let score_combiner = &mut self.scores[delta as usize]; self.score = score_combiner.score(); score_combiner.clear(); return true; } else { - self.cursor += 1; + self.bucket_idx += 1; } } false @@ -144,19 +162,19 @@ where if self.doc >= target { return self.doc; } - let gap = target - self.offset; + let gap = target - self.window_start_doc; if gap < HORIZON { // Our value is within the buffered horizon. - // Skipping to corresponding bucket. - let new_cursor = gap as usize / 64; - for obsolete_tinyset in &mut self.bitsets[self.cursor..new_cursor] { + // Skipping to corresponding bucket. + let new_bucket_idx = gap as usize / 64; + for obsolete_tinyset in &mut self.bitsets[self.bucket_idx..new_bucket_idx] { obsolete_tinyset.clear(); } - for score_combiner in &mut self.scores[self.cursor * 64..new_cursor * 64] { + for score_combiner in &mut self.scores[self.bucket_idx * 64..new_bucket_idx * 64] { score_combiner.clear(); } - self.cursor = new_cursor; + self.bucket_idx = new_bucket_idx; // Advancing until we reach the end of the bucket // or we reach a doc greater or equal to the target. @@ -211,7 +229,7 @@ where if self.doc == TERMINATED { return 0; } - let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS] + let mut count = self.bitsets[self.bucket_idx..HORIZON_NUM_TINYBITSETS] .iter() .map(|bitset| bitset.len()) .sum::() @@ -225,7 +243,7 @@ where bitset.clear(); } } - self.cursor = HORIZON_NUM_TINYBITSETS; + self.bucket_idx = HORIZON_NUM_TINYBITSETS; count } }