Suffix-sum pruning for multi-term intersection candidates

After scoring each secondary in Phase 2, check whether remaining
secondaries' block_max scores can still beat the threshold. Skip
to the next candidate early if impossible, avoiding expensive seeks
into later secondaries.

Improves three-term intersection by ~8% on the balanced benchmark
while keeping two-term performance neutral.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Paul Masurel
2026-03-30 22:43:52 +02:00
parent 8a7aeed030
commit 7559bad5fc
4 changed files with 45 additions and 11 deletions

View File

@@ -291,13 +291,12 @@ impl BlockSegmentPostings {
/// `.load_block()` needs to be called manually afterwards.
/// If all docs are smaller than target, the block loaded may be empty,
/// or be the last an incomplete VInt block.
#[inline]
pub(crate) fn seek_block(&mut self, target_doc: DocId) -> bool {
if self.skip_reader.seek(target_doc) {
self.block_max_score_cache = None;
self.block_loaded = false;
}
self.skip_reader.remaining_docs != 0
self.skip_reader.has_remaining_docs()
}
pub(crate) fn block_is_loaded(&self) -> bool {

View File

@@ -96,7 +96,7 @@ pub(crate) struct SkipReader {
owned_read: OwnedBytes,
skip_info: IndexRecordOption,
byte_offset: usize,
pub remaining_docs: u32, // number of docs remaining, including the
remaining_docs: u32, // number of docs remaining, including the
// documents in the current block.
block_info: BlockInfo,
@@ -146,6 +146,11 @@ impl SkipReader {
skip_reader
}
#[inline(always)]
pub fn has_remaining_docs(&self) -> bool {
self.remaining_docs != 0
}
pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
0

View File

@@ -1,6 +1,7 @@
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
use crate::query::term_query::TermScorer;
use crate::query::Scorer;
use crate::query::weight::for_each_pruning_scorer;
use crate::query::{Intersection, Scorer};
use crate::{DocId, DocSet, Score, TERMINATED};
/// Block-max pruning for top-K over intersection of term scorers.
@@ -15,13 +16,15 @@ use crate::{DocId, DocSet, Score, TERMINATED};
///
/// # Preconditions
/// - `scorers` has at least 2 elements
/// - `scorers` has less than 16 elements
/// - All scorers read frequencies (`FreqReadingOption::ReadFreq`)
pub fn block_wand_intersection(
pub(crate) fn block_wand_intersection(
mut scorers: Vec<TermScorer>,
mut threshold: Score,
callback: &mut dyn FnMut(DocId, Score) -> Score,
) {
assert!(scorers.len() >= 2);
assert!(scorers.len() <= 16);
// Sort by cost (ascending). scorers[0] becomes the "leader" (rarest term).
scorers.sort_by_key(TermScorer::size_hint);
@@ -61,12 +64,28 @@ pub fn block_wand_intersection(
let mut window_end: DocId = leader.last_doc_in_block();
let mut secondary_block_max_sum: Score = 0.0;
for secondary in secondaries.iter_mut() {
let mut secondary_block_max_scores = [0.0f32; 16];
let num_secondaries = secondaries.len();
for (idx, secondary) in secondaries.iter_mut().enumerate() {
if !secondary.block_cursor().seek_block(doc) {
return;
}
window_end = window_end.min(secondary.last_doc_in_block());
secondary_block_max_sum += secondary.block_max_score();
let bms = secondary.block_max_score();
secondary_block_max_scores[idx] = bms;
secondary_block_max_sum += bms;
}
// Precompute suffix sums: suffix[i] = sum of block_max for secondaries[i+1..].
// Used in Phase 2 to prune candidates that can't beat threshold even with
// remaining secondaries contributing their block_max.
let mut secondary_suffix_block_max = [0.0f32; 16];
{
let mut running = 0.0f32;
for idx in (0..num_secondaries).rev() {
secondary_suffix_block_max[idx] = running;
running += secondary_block_max_scores[idx];
}
}
if leader_block_max + secondary_block_max_sum <= threshold {
@@ -122,7 +141,7 @@ pub fn block_wand_intersection(
let candidate_doc = candidate_doc_ids[candidate_idx];
let mut total_score: Score = candidate_scores[candidate_idx];
for secondary in secondaries.iter_mut() {
for (secondary_idx, secondary) in secondaries.iter_mut().enumerate() {
// If a previous candidate already advanced this secondary past
// candidate_doc, the candidate can't be in the intersection.
if secondary.doc() > candidate_doc {
@@ -133,6 +152,12 @@ pub fn block_wand_intersection(
continue 'next_candidate;
}
total_score += secondary.score();
// Prune: even if all remaining secondaries score at their block max,
// can we still beat the threshold?
if total_score + secondary_suffix_block_max[secondary_idx] <= threshold {
continue 'next_candidate;
}
}
// All secondaries matched.

View File

@@ -9,8 +9,8 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
use crate::query::term_query::TermScorer;
use crate::query::weight::{for_each_docset_buffered, for_each_pruning_scorer, for_each_scorer};
use crate::query::{
intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
RequiredOptionalScorer, Scorer, Weight,
intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation,
Intersection, Occur, RequiredOptionalScorer, Scorer, Weight,
};
use crate::{DocId, Score};
@@ -574,7 +574,12 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
super::block_wand(term_scorers, threshold, callback);
}
SpecializedScorer::TermIntersection(term_scorers) => {
super::block_wand_intersection(term_scorers, threshold, callback);
if term_scorers.len() >= 16 {
let mut intersection = Intersection::new(term_scorers, reader.max_doc());
for_each_pruning_scorer(&mut intersection, threshold, callback);
} else {
super::block_wand_intersection(term_scorers, threshold, callback);
}
}
SpecializedScorer::Other(mut scorer) => {
for_each_pruning_scorer(scorer.as_mut(), threshold, callback);