blop

2026-06-01 08:00:41 +00:00 · 2026-01-13 20:21:22 +01:00
parent 1d5fe6bc7c
commit 799e88adbd
7 changed files with 58 additions and 17 deletions
--- a/src/codec/postings/mod.rs
+++ b/src/codec/postings/mod.rs
@@ -4,6 +4,7 @@ use common::OwnedBytes;

 use crate::fieldnorm::FieldNormReader;
 use crate::postings::FreqReadingOption;
+use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score};

@@ -70,4 +71,10 @@ pub trait PostingsReader: Sized {

    // TODO Move to the codec and use the serializer.
    fn empty() -> Self;
+
+    fn block_max_score(
+        &mut self,
+        fieldnorm_reader: &FieldNormReader,
+        bm25_weight: &Bm25Weight,
+    ) -> Score;
 }
--- a/src/codec/standard/postings/mod.rs
+++ b/src/codec/standard/postings/mod.rs
@@ -4,12 +4,12 @@ use crate::schema::IndexRecordOption;
 use crate::Score;

 mod block;
-mod block_segment_postings;
-mod postings_serializer;
+mod standard_postings_reader;
+mod standard_postings_serializer;
 mod skip;

-pub use block_segment_postings::StandardPostingsReader;
-pub use postings_serializer::StandardPostingsSerializer;
+pub use standard_postings_reader::StandardPostingsReader;
+pub use standard_postings_serializer::StandardPostingsSerializer;

 pub struct StandardPostingsCodec;

--- a/src/codec/standard/postings/standard_postings_reader.rs
+++ b/src/codec/standard/postings/standard_postings_reader.rs
@@ -269,6 +269,45 @@ impl PostingsReader for StandardPostingsReader {
            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
        }
    }
+
+    /// Returns the block_max_score for the current block.
+    /// It does not require the block to be loaded. For instance, it is ok to call this method
+    /// after having called `.shallow_advance(..)`.
+    ///
+    /// See `TermScorer::block_max_score(..)` for more information.
+    fn block_max_score(
+        &mut self,
+        fieldnorm_reader: &FieldNormReader,
+        bm25_weight: &Bm25Weight,
+    ) -> Score {
+        if let Some(score) = self.block_max_score_cache {
+            return score;
+        }
+        if let Some(skip_reader_max_score) = self.skip_reader.block_max_score(bm25_weight) {
+            // if we are on a full block, the skip reader should have the block max information
+            // for us
+            self.block_max_score_cache = Some(skip_reader_max_score);
+            return skip_reader_max_score;
+        }
+        // this is the last block of the segment posting list.
+        // If it is actually loaded, we can compute block max manually.
+        if self.block_loaded {
+            let docs = self.doc_decoder.output_array().iter().cloned();
+            let freqs = self.freq_decoder.output_array().iter().cloned();
+            let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
+                let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
+                bm25_weight.score(fieldnorm_id, term_freq)
+            });
+            let block_max_score = max_score(bm25_scores).unwrap_or(0.0);
+            self.block_max_score_cache = Some(block_max_score);
+            return block_max_score;
+        }
+        // We do not have access to any good block max value. We return bm25_weight.max_score()
+        // as it is a valid upperbound.
+        //
+        // We do not cache it however, so that it gets computed when once block is loaded.
+        bm25_weight.max_score()
+    }
 }

 impl StandardPostingsReader {
--- a/src/codec/standard/postings/standard_postings_serializer.rs
+++ b/src/codec/standard/postings/standard_postings_serializer.rs
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -159,7 +159,7 @@ impl BlockSegmentPostings {
        }
        // this is the last block of the segment posting list.
        // If it is actually loaded, we can compute block max manually.
-        if self.block_is_loaded() {
+        if self.block_loaded {
            let docs = self.doc_decoder.output_array().iter().cloned();
            let freqs = self.freq_decoder.output_array().iter().cloned();
            let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
@@ -222,7 +222,7 @@ impl BlockSegmentPostings {
    /// returned by `.docs()` is empty.
    #[inline]
    pub fn docs(&self) -> &[DocId] {
-        debug_assert!(self.block_is_loaded());
+        debug_assert!(self.block_loaded);
        self.doc_decoder.output_array()
    }

@@ -235,14 +235,14 @@ impl BlockSegmentPostings {
    /// Return the array of `term freq` in the block.
    #[inline]
    pub fn freqs(&self) -> &[u32] {
-        debug_assert!(self.block_is_loaded());
+        debug_assert!(self.block_loaded);
        self.freq_decoder.output_array()
    }

    /// Return the frequency at index `idx` of the block.
    #[inline]
    pub fn freq(&self, idx: usize) -> u32 {
-        debug_assert!(self.block_is_loaded());
+        debug_assert!(self.block_loaded);
        self.freq_decoder.output(idx)
    }

@@ -253,7 +253,7 @@ impl BlockSegmentPostings {
    /// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
    #[inline]
    pub fn block_len(&self) -> usize {
-        debug_assert!(self.block_is_loaded());
+        debug_assert!(self.block_loaded);
        self.doc_decoder.output_len
    }

@@ -297,13 +297,9 @@ impl BlockSegmentPostings {
        }
    }

-    pub(crate) fn block_is_loaded(&self) -> bool {
-        self.block_loaded
-    }
-
-    pub(crate) fn load_block(&mut self) {
+    fn load_block(&mut self) {
        let offset = self.skip_reader.byte_offset();
-        if self.block_is_loaded() {
+        if self.block_loaded {
            return;
        }
        match self.skip_reader.block_info() {
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -35,7 +35,7 @@ pub use self::term_info::TermInfo;

 #[expect(clippy::enum_variant_names)]
 #[derive(Debug, PartialEq, Clone, Copy, Eq)]
-pub(crate) enum FreqReadingOption {
+pub enum FreqReadingOption {
    NoFreq,
    SkipFreq,
    ReadFreq,
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -166,7 +166,6 @@ impl DocSet for SegmentPostings {
    // next needs to be called a first time to point to the correct element.
    #[inline]
    fn advance(&mut self) -> DocId {
-        debug_assert!(self.block_cursor.block_is_loaded());
        if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
            self.cur = 0;
            self.block_cursor.advance();