Removed BlockNotLoaded

This commit is contained in:
Paul Masurel
2026-01-06 14:33:38 +01:00
parent f62a806f47
commit a5dc888cb9
6 changed files with 53 additions and 67 deletions

View File

@@ -11,9 +11,7 @@ use tantivy_fst::automaton::{AlwaysMatch, Automaton};
use crate::directory::FileSlice;
use crate::positions::PositionReader;
use crate::postings::{
BlockSegmentPostings, BlockSegmentPostingsNotLoaded, SegmentPostings, TermInfo,
};
use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
use crate::schema::{IndexRecordOption, Term, Type};
use crate::termdict::TermDictionary;
use crate::DocId;
@@ -207,11 +205,12 @@ impl InvertedIndexReader {
/// This method is for an advanced usage only.
///
/// Most users should prefer using [`Self::read_postings()`] instead.
pub(crate) fn read_block_postings_from_terminfo_not_loaded(
pub(crate) fn read_block_postings_from_terminfo_with_seek(
&self,
term_info: &TermInfo,
requested_option: IndexRecordOption,
) -> io::Result<BlockSegmentPostingsNotLoaded> {
seek_doc: DocId,
) -> io::Result<(BlockSegmentPostings, usize)> {
let postings_data = self
.postings_file_slice
.slice(term_info.postings_range.clone());
@@ -220,6 +219,7 @@ impl InvertedIndexReader {
postings_data,
self.record_option,
requested_option,
seek_doc,
)
}
@@ -232,10 +232,9 @@ impl InvertedIndexReader {
term_info: &TermInfo,
requested_option: IndexRecordOption,
) -> io::Result<BlockSegmentPostings> {
let block_segment_postings_not_loaded = self
.read_block_postings_from_terminfo_not_loaded(term_info, requested_option)?
.load_at_start();
Ok(block_segment_postings_not_loaded)
let (block_segment_postings, _) =
self.read_block_postings_from_terminfo_with_seek(term_info, requested_option, 0)?;
Ok(block_segment_postings)
}
/// Returns a posting object given a `term_info`.
@@ -248,8 +247,8 @@ impl InvertedIndexReader {
record_option: IndexRecordOption,
seek_doc: DocId,
) -> io::Result<SegmentPostings> {
let block_segment_postings_not_loaded =
self.read_block_postings_from_terminfo_not_loaded(term_info, record_option)?;
let (block_segment_postings, position_within_block) =
self.read_block_postings_from_terminfo_with_seek(term_info, record_option, seek_doc)?;
let position_reader = {
if record_option.has_positions() {
let positions_data = self
@@ -262,9 +261,9 @@ impl InvertedIndexReader {
}
};
Ok(SegmentPostings::from_block_postings(
block_segment_postings_not_loaded,
block_segment_postings,
position_reader,
seek_doc,
position_within_block,
))
}

View File

@@ -87,31 +87,6 @@ fn split_into_skips_and_postings(
Ok((Some(skip_data), postings_data))
}
/// A block segment postings for which the first block has not been loaded yet.
///
/// You can either call `load_at_start` to load it its first block,
/// or skip a few blocks by calling `seek_and_load`.
pub(crate) struct BlockSegmentPostingsNotLoaded(BlockSegmentPostings);
impl BlockSegmentPostingsNotLoaded {
/// Seek into the block segment postings directly, possibly avoiding loading its first block.
pub fn seek_and_load(self, seek_doc: DocId) -> (BlockSegmentPostings, usize) {
let BlockSegmentPostingsNotLoaded(mut block_segment_postings) = self;
let inner_pos = if seek_doc == 0 {
block_segment_postings.load_block();
0
} else {
block_segment_postings.seek(seek_doc)
};
(block_segment_postings, inner_pos)
}
/// Load the first block of segment postings.
pub fn load_at_start(self) -> BlockSegmentPostings {
self.seek_and_load(0u32).0
}
}
impl BlockSegmentPostings {
/// Opens a `BlockSegmentPostings`.
/// `doc_freq` is the number of documents in the posting list.
@@ -124,7 +99,8 @@ impl BlockSegmentPostings {
data: FileSlice,
mut record_option: IndexRecordOption,
requested_option: IndexRecordOption,
) -> io::Result<BlockSegmentPostingsNotLoaded> {
seek_doc: DocId,
) -> io::Result<(BlockSegmentPostings, usize)> {
let bytes = data.read_bytes()?;
let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
let skip_reader = match skip_data_opt {
@@ -150,7 +126,7 @@ impl BlockSegmentPostings {
(_, _) => FreqReadingOption::ReadFreq,
};
Ok(BlockSegmentPostingsNotLoaded(BlockSegmentPostings {
let mut block_segment_postings: BlockSegmentPostings = BlockSegmentPostings {
doc_decoder: BlockDecoder::with_val(TERMINATED),
block_loaded: false,
freq_decoder: BlockDecoder::with_val(1),
@@ -159,7 +135,14 @@ impl BlockSegmentPostings {
doc_freq,
data: postings_data,
skip_reader,
}))
};
let inner_pos = if seek_doc == 0 {
block_segment_postings.load_block();
0
} else {
block_segment_postings.seek(seek_doc)
};
Ok((block_segment_postings, inner_pos))
}
/// Returns the block_max_score for the current block.
@@ -413,7 +396,7 @@ mod tests {
use crate::index::Index;
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
use crate::postings::postings::Postings;
use crate::postings::{BlockSegmentPostingsNotLoaded, SegmentPostings};
use crate::postings::SegmentPostings;
use crate::schema::{IndexRecordOption, Schema, Term, INDEXED};
use crate::DocId;
@@ -452,8 +435,7 @@ mod tests {
#[test]
fn test_block_segment_postings() -> crate::Result<()> {
let mut block_segments =
build_block_postings(&(0..100_000).collect::<Vec<u32>>())?.load_at_start();
let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
let mut offset: u32 = 0u32;
// checking that the `doc_freq` is correct
assert_eq!(block_segments.doc_freq(), 100_000);
@@ -487,7 +469,7 @@ mod tests {
assert_eq!(docset.advance(), TERMINATED);
}
{
let block_segments = build_block_postings(&doc_ids).unwrap();
let block_segments = build_block_postings(&doc_ids)?;
let mut docset = SegmentPostings::from_block_postings(block_segments, None, 0);
assert_eq!(docset.seek(129), 129);
assert_eq!(docset.doc(), 129);
@@ -505,7 +487,7 @@ mod tests {
Ok(())
}
fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostingsNotLoaded> {
fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
let mut schema_builder = Schema::builder();
let int_field = schema_builder.add_u64_field("id", INDEXED);
let schema = schema_builder.build();
@@ -525,9 +507,9 @@ mod tests {
let inverted_index = segment_reader.inverted_index(int_field).unwrap();
let term = Term::from_field_u64(int_field, 0u64);
let term_info = inverted_index.get_term_info(&term)?.unwrap();
let block_postings_not_loaded = inverted_index
.read_block_postings_from_terminfo_not_loaded(&term_info, IndexRecordOption::Basic)?;
Ok(block_postings_not_loaded)
let block_postings = inverted_index
.read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
Ok(block_postings)
}
#[test]
@@ -536,7 +518,7 @@ mod tests {
for i in 0..1300 {
docs.push((i * i / 100) + i);
}
let mut block_postings = build_block_postings(&docs[..])?.load_at_start();
let mut block_postings = build_block_postings(&docs[..])?;
for i in &[0, 424, 10000] {
block_postings.seek(*i);
let docs = block_postings.docs();

View File

@@ -22,7 +22,6 @@ pub(crate) use loaded_postings::LoadedPostings;
pub(crate) use stacker::compute_table_memory_size;
pub use self::block_segment_postings::BlockSegmentPostings;
pub(crate) use self::block_segment_postings::BlockSegmentPostingsNotLoaded;
pub(crate) use self::indexing_context::IndexingContext;
pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter;
pub use self::postings::Postings;

View File

@@ -4,7 +4,7 @@ use crate::docset::DocSet;
use crate::fastfield::AliveBitSet;
use crate::positions::PositionReader;
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
use crate::postings::{BlockSegmentPostings, BlockSegmentPostingsNotLoaded, Postings};
use crate::postings::{BlockSegmentPostings, Postings};
use crate::{DocId, TERMINATED};
/// `SegmentPostings` represents the inverted list or postings associated with
@@ -79,14 +79,15 @@ impl SegmentPostings {
.close_term(docs.len() as u32)
.expect("In memory Serialization should never fail.");
}
let block_segment_postings = BlockSegmentPostings::open(
let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
docs.len() as u32,
FileSlice::from(buffer),
IndexRecordOption::Basic,
IndexRecordOption::Basic,
0u32,
)
.unwrap();
SegmentPostings::from_block_postings(block_segment_postings, None, 0)
SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
}
/// Helper functions to create `SegmentPostings` for tests.
@@ -127,14 +128,15 @@ impl SegmentPostings {
postings_serializer
.close_term(doc_and_tfs.len() as u32)
.unwrap();
let block_segment_postings = BlockSegmentPostings::open(
let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
doc_and_tfs.len() as u32,
FileSlice::from(buffer),
IndexRecordOption::WithFreqs,
IndexRecordOption::WithFreqs,
0u32,
)
.unwrap();
SegmentPostings::from_block_postings(block_segment_postings, None, 0)
SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
}
/// Creates a Segment Postings from a
@@ -142,14 +144,13 @@ impl SegmentPostings {
/// - a position reader
/// - a target document to seek to
pub(crate) fn from_block_postings(
segment_block_postings: BlockSegmentPostingsNotLoaded,
segment_block_postings: BlockSegmentPostings,
position_reader: Option<PositionReader>,
seek_doc: DocId,
position_within_block: usize,
) -> SegmentPostings {
let (block_cursor, cur) = segment_block_postings.seek_and_load(seek_doc);
SegmentPostings {
block_cursor,
cur,
block_cursor: segment_block_postings,
cur: position_within_block,
position_reader,
}
}

View File

@@ -97,10 +97,12 @@ where
let mut term_stream = self.automaton_stream(term_dict)?;
while term_stream.advance() {
let term_info = term_stream.value();
let mut block_segment_postings = inverted_index
.read_block_postings_from_terminfo_not_loaded(term_info, IndexRecordOption::Basic)?
.seek_and_load(seek_doc)
.0;
let (mut block_segment_postings, _) = inverted_index
.read_block_postings_from_terminfo_with_seek(
term_info,
IndexRecordOption::Basic,
seek_doc,
)?;
loop {
let docs = block_segment_postings.docs();
if docs.is_empty() {

View File

@@ -234,8 +234,11 @@ impl Weight for InvertedIndexRangeWeight {
processed_count += 1;
let term_info = term_range.value();
let mut block_segment_postings = inverted_index
.read_block_postings_from_terminfo_not_loaded(term_info, IndexRecordOption::Basic)?
.seek_and_load(seek_doc)
.read_block_postings_from_terminfo_with_seek(
term_info,
IndexRecordOption::Basic,
seek_doc,
)?
.0;
loop {
let docs = block_segment_postings.docs();