mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-01 08:00:41 +00:00
blop
This commit is contained in:
@@ -4,6 +4,7 @@ use common::OwnedBytes;
|
||||
|
||||
use crate::fieldnorm::FieldNormReader;
|
||||
use crate::postings::FreqReadingOption;
|
||||
use crate::query::Bm25Weight;
|
||||
use crate::schema::IndexRecordOption;
|
||||
use crate::{DocId, Score};
|
||||
|
||||
@@ -70,4 +71,10 @@ pub trait PostingsReader: Sized {
|
||||
|
||||
// TODO Move to the codec and use the serializer.
|
||||
fn empty() -> Self;
|
||||
|
||||
fn block_max_score(
|
||||
&mut self,
|
||||
fieldnorm_reader: &FieldNormReader,
|
||||
bm25_weight: &Bm25Weight,
|
||||
) -> Score;
|
||||
}
|
||||
|
||||
@@ -4,12 +4,12 @@ use crate::schema::IndexRecordOption;
|
||||
use crate::Score;
|
||||
|
||||
mod block;
|
||||
mod block_segment_postings;
|
||||
mod postings_serializer;
|
||||
mod standard_postings_reader;
|
||||
mod standard_postings_serializer;
|
||||
mod skip;
|
||||
|
||||
pub use block_segment_postings::StandardPostingsReader;
|
||||
pub use postings_serializer::StandardPostingsSerializer;
|
||||
pub use standard_postings_reader::StandardPostingsReader;
|
||||
pub use standard_postings_serializer::StandardPostingsSerializer;
|
||||
|
||||
pub struct StandardPostingsCodec;
|
||||
|
||||
|
||||
@@ -269,6 +269,45 @@ impl PostingsReader for StandardPostingsReader {
|
||||
skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the block_max_score for the current block.
|
||||
/// It does not require the block to be loaded. For instance, it is ok to call this method
|
||||
/// after having called `.shallow_advance(..)`.
|
||||
///
|
||||
/// See `TermScorer::block_max_score(..)` for more information.
|
||||
fn block_max_score(
|
||||
&mut self,
|
||||
fieldnorm_reader: &FieldNormReader,
|
||||
bm25_weight: &Bm25Weight,
|
||||
) -> Score {
|
||||
if let Some(score) = self.block_max_score_cache {
|
||||
return score;
|
||||
}
|
||||
if let Some(skip_reader_max_score) = self.skip_reader.block_max_score(bm25_weight) {
|
||||
// if we are on a full block, the skip reader should have the block max information
|
||||
// for us
|
||||
self.block_max_score_cache = Some(skip_reader_max_score);
|
||||
return skip_reader_max_score;
|
||||
}
|
||||
// this is the last block of the segment posting list.
|
||||
// If it is actually loaded, we can compute block max manually.
|
||||
if self.block_loaded {
|
||||
let docs = self.doc_decoder.output_array().iter().cloned();
|
||||
let freqs = self.freq_decoder.output_array().iter().cloned();
|
||||
let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
|
||||
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
|
||||
bm25_weight.score(fieldnorm_id, term_freq)
|
||||
});
|
||||
let block_max_score = max_score(bm25_scores).unwrap_or(0.0);
|
||||
self.block_max_score_cache = Some(block_max_score);
|
||||
return block_max_score;
|
||||
}
|
||||
// We do not have access to any good block max value. We return bm25_weight.max_score()
|
||||
// as it is a valid upperbound.
|
||||
//
|
||||
// We do not cache it however, so that it gets computed when once block is loaded.
|
||||
bm25_weight.max_score()
|
||||
}
|
||||
}
|
||||
|
||||
impl StandardPostingsReader {
|
||||
@@ -159,7 +159,7 @@ impl BlockSegmentPostings {
|
||||
}
|
||||
// this is the last block of the segment posting list.
|
||||
// If it is actually loaded, we can compute block max manually.
|
||||
if self.block_is_loaded() {
|
||||
if self.block_loaded {
|
||||
let docs = self.doc_decoder.output_array().iter().cloned();
|
||||
let freqs = self.freq_decoder.output_array().iter().cloned();
|
||||
let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
|
||||
@@ -222,7 +222,7 @@ impl BlockSegmentPostings {
|
||||
/// returned by `.docs()` is empty.
|
||||
#[inline]
|
||||
pub fn docs(&self) -> &[DocId] {
|
||||
debug_assert!(self.block_is_loaded());
|
||||
debug_assert!(self.block_loaded);
|
||||
self.doc_decoder.output_array()
|
||||
}
|
||||
|
||||
@@ -235,14 +235,14 @@ impl BlockSegmentPostings {
|
||||
/// Return the array of `term freq` in the block.
|
||||
#[inline]
|
||||
pub fn freqs(&self) -> &[u32] {
|
||||
debug_assert!(self.block_is_loaded());
|
||||
debug_assert!(self.block_loaded);
|
||||
self.freq_decoder.output_array()
|
||||
}
|
||||
|
||||
/// Return the frequency at index `idx` of the block.
|
||||
#[inline]
|
||||
pub fn freq(&self, idx: usize) -> u32 {
|
||||
debug_assert!(self.block_is_loaded());
|
||||
debug_assert!(self.block_loaded);
|
||||
self.freq_decoder.output(idx)
|
||||
}
|
||||
|
||||
@@ -253,7 +253,7 @@ impl BlockSegmentPostings {
|
||||
/// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
|
||||
#[inline]
|
||||
pub fn block_len(&self) -> usize {
|
||||
debug_assert!(self.block_is_loaded());
|
||||
debug_assert!(self.block_loaded);
|
||||
self.doc_decoder.output_len
|
||||
}
|
||||
|
||||
@@ -297,13 +297,9 @@ impl BlockSegmentPostings {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn block_is_loaded(&self) -> bool {
|
||||
self.block_loaded
|
||||
}
|
||||
|
||||
pub(crate) fn load_block(&mut self) {
|
||||
fn load_block(&mut self) {
|
||||
let offset = self.skip_reader.byte_offset();
|
||||
if self.block_is_loaded() {
|
||||
if self.block_loaded {
|
||||
return;
|
||||
}
|
||||
match self.skip_reader.block_info() {
|
||||
|
||||
@@ -35,7 +35,7 @@ pub use self::term_info::TermInfo;
|
||||
|
||||
#[expect(clippy::enum_variant_names)]
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Eq)]
|
||||
pub(crate) enum FreqReadingOption {
|
||||
pub enum FreqReadingOption {
|
||||
NoFreq,
|
||||
SkipFreq,
|
||||
ReadFreq,
|
||||
|
||||
@@ -166,7 +166,6 @@ impl DocSet for SegmentPostings {
|
||||
// next needs to be called a first time to point to the correct element.
|
||||
#[inline]
|
||||
fn advance(&mut self) -> DocId {
|
||||
debug_assert!(self.block_cursor.block_is_loaded());
|
||||
if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
|
||||
self.cur = 0;
|
||||
self.block_cursor.advance();
|
||||
|
||||
Reference in New Issue
Block a user