This commit is contained in:
Paul Masurel
2026-01-13 20:21:22 +01:00
parent 1d5fe6bc7c
commit 799e88adbd
7 changed files with 58 additions and 17 deletions

View File

@@ -4,6 +4,7 @@ use common::OwnedBytes;
use crate::fieldnorm::FieldNormReader;
use crate::postings::FreqReadingOption;
use crate::query::Bm25Weight;
use crate::schema::IndexRecordOption;
use crate::{DocId, Score};
@@ -70,4 +71,10 @@ pub trait PostingsReader: Sized {
// TODO Move to the codec and use the serializer.
fn empty() -> Self;
fn block_max_score(
&mut self,
fieldnorm_reader: &FieldNormReader,
bm25_weight: &Bm25Weight,
) -> Score;
}

View File

@@ -4,12 +4,12 @@ use crate::schema::IndexRecordOption;
use crate::Score;
mod block;
mod block_segment_postings;
mod postings_serializer;
mod standard_postings_reader;
mod standard_postings_serializer;
mod skip;
pub use block_segment_postings::StandardPostingsReader;
pub use postings_serializer::StandardPostingsSerializer;
pub use standard_postings_reader::StandardPostingsReader;
pub use standard_postings_serializer::StandardPostingsSerializer;
pub struct StandardPostingsCodec;

View File

@@ -269,6 +269,45 @@ impl PostingsReader for StandardPostingsReader {
skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
}
}
/// Returns the block_max_score for the current block.
/// It does not require the block to be loaded. For instance, it is ok to call this method
/// after having called `.shallow_advance(..)`.
///
/// See `TermScorer::block_max_score(..)` for more information.
fn block_max_score(
&mut self,
fieldnorm_reader: &FieldNormReader,
bm25_weight: &Bm25Weight,
) -> Score {
if let Some(score) = self.block_max_score_cache {
return score;
}
if let Some(skip_reader_max_score) = self.skip_reader.block_max_score(bm25_weight) {
// if we are on a full block, the skip reader should have the block max information
// for us
self.block_max_score_cache = Some(skip_reader_max_score);
return skip_reader_max_score;
}
// this is the last block of the segment posting list.
// If it is actually loaded, we can compute block max manually.
if self.block_loaded {
let docs = self.doc_decoder.output_array().iter().cloned();
let freqs = self.freq_decoder.output_array().iter().cloned();
let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
bm25_weight.score(fieldnorm_id, term_freq)
});
let block_max_score = max_score(bm25_scores).unwrap_or(0.0);
self.block_max_score_cache = Some(block_max_score);
return block_max_score;
}
// We do not have access to any good block max value. We return bm25_weight.max_score()
// as it is a valid upperbound.
//
// We do not cache it however, so that it gets computed when once block is loaded.
bm25_weight.max_score()
}
}
impl StandardPostingsReader {

View File

@@ -159,7 +159,7 @@ impl BlockSegmentPostings {
}
// this is the last block of the segment posting list.
// If it is actually loaded, we can compute block max manually.
if self.block_is_loaded() {
if self.block_loaded {
let docs = self.doc_decoder.output_array().iter().cloned();
let freqs = self.freq_decoder.output_array().iter().cloned();
let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
@@ -222,7 +222,7 @@ impl BlockSegmentPostings {
/// returned by `.docs()` is empty.
#[inline]
pub fn docs(&self) -> &[DocId] {
debug_assert!(self.block_is_loaded());
debug_assert!(self.block_loaded);
self.doc_decoder.output_array()
}
@@ -235,14 +235,14 @@ impl BlockSegmentPostings {
/// Return the array of `term freq` in the block.
#[inline]
pub fn freqs(&self) -> &[u32] {
debug_assert!(self.block_is_loaded());
debug_assert!(self.block_loaded);
self.freq_decoder.output_array()
}
/// Return the frequency at index `idx` of the block.
#[inline]
pub fn freq(&self, idx: usize) -> u32 {
debug_assert!(self.block_is_loaded());
debug_assert!(self.block_loaded);
self.freq_decoder.output(idx)
}
@@ -253,7 +253,7 @@ impl BlockSegmentPostings {
/// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
#[inline]
pub fn block_len(&self) -> usize {
debug_assert!(self.block_is_loaded());
debug_assert!(self.block_loaded);
self.doc_decoder.output_len
}
@@ -297,13 +297,9 @@ impl BlockSegmentPostings {
}
}
pub(crate) fn block_is_loaded(&self) -> bool {
self.block_loaded
}
pub(crate) fn load_block(&mut self) {
fn load_block(&mut self) {
let offset = self.skip_reader.byte_offset();
if self.block_is_loaded() {
if self.block_loaded {
return;
}
match self.skip_reader.block_info() {

View File

@@ -35,7 +35,7 @@ pub use self::term_info::TermInfo;
#[expect(clippy::enum_variant_names)]
#[derive(Debug, PartialEq, Clone, Copy, Eq)]
pub(crate) enum FreqReadingOption {
pub enum FreqReadingOption {
NoFreq,
SkipFreq,
ReadFreq,

View File

@@ -166,7 +166,6 @@ impl DocSet for SegmentPostings {
// next needs to be called a first time to point to the correct element.
#[inline]
fn advance(&mut self) -> DocId {
debug_assert!(self.block_cursor.block_is_loaded());
if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
self.cur = 0;
self.block_cursor.advance();