mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-22 03:00:42 +00:00
Removed BlockNotLoaded
This commit is contained in:
@@ -11,9 +11,7 @@ use tantivy_fst::automaton::{AlwaysMatch, Automaton};
|
||||
|
||||
use crate::directory::FileSlice;
|
||||
use crate::positions::PositionReader;
|
||||
use crate::postings::{
|
||||
BlockSegmentPostings, BlockSegmentPostingsNotLoaded, SegmentPostings, TermInfo,
|
||||
};
|
||||
use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
|
||||
use crate::schema::{IndexRecordOption, Term, Type};
|
||||
use crate::termdict::TermDictionary;
|
||||
use crate::DocId;
|
||||
@@ -207,11 +205,12 @@ impl InvertedIndexReader {
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub(crate) fn read_block_postings_from_terminfo_not_loaded(
|
||||
pub(crate) fn read_block_postings_from_terminfo_with_seek(
|
||||
&self,
|
||||
term_info: &TermInfo,
|
||||
requested_option: IndexRecordOption,
|
||||
) -> io::Result<BlockSegmentPostingsNotLoaded> {
|
||||
seek_doc: DocId,
|
||||
) -> io::Result<(BlockSegmentPostings, usize)> {
|
||||
let postings_data = self
|
||||
.postings_file_slice
|
||||
.slice(term_info.postings_range.clone());
|
||||
@@ -220,6 +219,7 @@ impl InvertedIndexReader {
|
||||
postings_data,
|
||||
self.record_option,
|
||||
requested_option,
|
||||
seek_doc,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -232,10 +232,9 @@ impl InvertedIndexReader {
|
||||
term_info: &TermInfo,
|
||||
requested_option: IndexRecordOption,
|
||||
) -> io::Result<BlockSegmentPostings> {
|
||||
let block_segment_postings_not_loaded = self
|
||||
.read_block_postings_from_terminfo_not_loaded(term_info, requested_option)?
|
||||
.load_at_start();
|
||||
Ok(block_segment_postings_not_loaded)
|
||||
let (block_segment_postings, _) =
|
||||
self.read_block_postings_from_terminfo_with_seek(term_info, requested_option, 0)?;
|
||||
Ok(block_segment_postings)
|
||||
}
|
||||
|
||||
/// Returns a posting object given a `term_info`.
|
||||
@@ -248,8 +247,8 @@ impl InvertedIndexReader {
|
||||
record_option: IndexRecordOption,
|
||||
seek_doc: DocId,
|
||||
) -> io::Result<SegmentPostings> {
|
||||
let block_segment_postings_not_loaded =
|
||||
self.read_block_postings_from_terminfo_not_loaded(term_info, record_option)?;
|
||||
let (block_segment_postings, position_within_block) =
|
||||
self.read_block_postings_from_terminfo_with_seek(term_info, record_option, seek_doc)?;
|
||||
let position_reader = {
|
||||
if record_option.has_positions() {
|
||||
let positions_data = self
|
||||
@@ -262,9 +261,9 @@ impl InvertedIndexReader {
|
||||
}
|
||||
};
|
||||
Ok(SegmentPostings::from_block_postings(
|
||||
block_segment_postings_not_loaded,
|
||||
block_segment_postings,
|
||||
position_reader,
|
||||
seek_doc,
|
||||
position_within_block,
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -87,31 +87,6 @@ fn split_into_skips_and_postings(
|
||||
Ok((Some(skip_data), postings_data))
|
||||
}
|
||||
|
||||
/// A block segment postings for which the first block has not been loaded yet.
|
||||
///
|
||||
/// You can either call `load_at_start` to load it its first block,
|
||||
/// or skip a few blocks by calling `seek_and_load`.
|
||||
pub(crate) struct BlockSegmentPostingsNotLoaded(BlockSegmentPostings);
|
||||
|
||||
impl BlockSegmentPostingsNotLoaded {
|
||||
/// Seek into the block segment postings directly, possibly avoiding loading its first block.
|
||||
pub fn seek_and_load(self, seek_doc: DocId) -> (BlockSegmentPostings, usize) {
|
||||
let BlockSegmentPostingsNotLoaded(mut block_segment_postings) = self;
|
||||
let inner_pos = if seek_doc == 0 {
|
||||
block_segment_postings.load_block();
|
||||
0
|
||||
} else {
|
||||
block_segment_postings.seek(seek_doc)
|
||||
};
|
||||
(block_segment_postings, inner_pos)
|
||||
}
|
||||
|
||||
/// Load the first block of segment postings.
|
||||
pub fn load_at_start(self) -> BlockSegmentPostings {
|
||||
self.seek_and_load(0u32).0
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockSegmentPostings {
|
||||
/// Opens a `BlockSegmentPostings`.
|
||||
/// `doc_freq` is the number of documents in the posting list.
|
||||
@@ -124,7 +99,8 @@ impl BlockSegmentPostings {
|
||||
data: FileSlice,
|
||||
mut record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
) -> io::Result<BlockSegmentPostingsNotLoaded> {
|
||||
seek_doc: DocId,
|
||||
) -> io::Result<(BlockSegmentPostings, usize)> {
|
||||
let bytes = data.read_bytes()?;
|
||||
let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
|
||||
let skip_reader = match skip_data_opt {
|
||||
@@ -150,7 +126,7 @@ impl BlockSegmentPostings {
|
||||
(_, _) => FreqReadingOption::ReadFreq,
|
||||
};
|
||||
|
||||
Ok(BlockSegmentPostingsNotLoaded(BlockSegmentPostings {
|
||||
let mut block_segment_postings: BlockSegmentPostings = BlockSegmentPostings {
|
||||
doc_decoder: BlockDecoder::with_val(TERMINATED),
|
||||
block_loaded: false,
|
||||
freq_decoder: BlockDecoder::with_val(1),
|
||||
@@ -159,7 +135,14 @@ impl BlockSegmentPostings {
|
||||
doc_freq,
|
||||
data: postings_data,
|
||||
skip_reader,
|
||||
}))
|
||||
};
|
||||
let inner_pos = if seek_doc == 0 {
|
||||
block_segment_postings.load_block();
|
||||
0
|
||||
} else {
|
||||
block_segment_postings.seek(seek_doc)
|
||||
};
|
||||
Ok((block_segment_postings, inner_pos))
|
||||
}
|
||||
|
||||
/// Returns the block_max_score for the current block.
|
||||
@@ -413,7 +396,7 @@ mod tests {
|
||||
use crate::index::Index;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
use crate::postings::postings::Postings;
|
||||
use crate::postings::{BlockSegmentPostingsNotLoaded, SegmentPostings};
|
||||
use crate::postings::SegmentPostings;
|
||||
use crate::schema::{IndexRecordOption, Schema, Term, INDEXED};
|
||||
use crate::DocId;
|
||||
|
||||
@@ -452,8 +435,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_block_segment_postings() -> crate::Result<()> {
|
||||
let mut block_segments =
|
||||
build_block_postings(&(0..100_000).collect::<Vec<u32>>())?.load_at_start();
|
||||
let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
|
||||
let mut offset: u32 = 0u32;
|
||||
// checking that the `doc_freq` is correct
|
||||
assert_eq!(block_segments.doc_freq(), 100_000);
|
||||
@@ -487,7 +469,7 @@ mod tests {
|
||||
assert_eq!(docset.advance(), TERMINATED);
|
||||
}
|
||||
{
|
||||
let block_segments = build_block_postings(&doc_ids).unwrap();
|
||||
let block_segments = build_block_postings(&doc_ids)?;
|
||||
let mut docset = SegmentPostings::from_block_postings(block_segments, None, 0);
|
||||
assert_eq!(docset.seek(129), 129);
|
||||
assert_eq!(docset.doc(), 129);
|
||||
@@ -505,7 +487,7 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostingsNotLoaded> {
|
||||
fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let int_field = schema_builder.add_u64_field("id", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
@@ -525,9 +507,9 @@ mod tests {
|
||||
let inverted_index = segment_reader.inverted_index(int_field).unwrap();
|
||||
let term = Term::from_field_u64(int_field, 0u64);
|
||||
let term_info = inverted_index.get_term_info(&term)?.unwrap();
|
||||
let block_postings_not_loaded = inverted_index
|
||||
.read_block_postings_from_terminfo_not_loaded(&term_info, IndexRecordOption::Basic)?;
|
||||
Ok(block_postings_not_loaded)
|
||||
let block_postings = inverted_index
|
||||
.read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
|
||||
Ok(block_postings)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -536,7 +518,7 @@ mod tests {
|
||||
for i in 0..1300 {
|
||||
docs.push((i * i / 100) + i);
|
||||
}
|
||||
let mut block_postings = build_block_postings(&docs[..])?.load_at_start();
|
||||
let mut block_postings = build_block_postings(&docs[..])?;
|
||||
for i in &[0, 424, 10000] {
|
||||
block_postings.seek(*i);
|
||||
let docs = block_postings.docs();
|
||||
|
||||
@@ -22,7 +22,6 @@ pub(crate) use loaded_postings::LoadedPostings;
|
||||
pub(crate) use stacker::compute_table_memory_size;
|
||||
|
||||
pub use self::block_segment_postings::BlockSegmentPostings;
|
||||
pub(crate) use self::block_segment_postings::BlockSegmentPostingsNotLoaded;
|
||||
pub(crate) use self::indexing_context::IndexingContext;
|
||||
pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter;
|
||||
pub use self::postings::Postings;
|
||||
|
||||
@@ -4,7 +4,7 @@ use crate::docset::DocSet;
|
||||
use crate::fastfield::AliveBitSet;
|
||||
use crate::positions::PositionReader;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
use crate::postings::{BlockSegmentPostings, BlockSegmentPostingsNotLoaded, Postings};
|
||||
use crate::postings::{BlockSegmentPostings, Postings};
|
||||
use crate::{DocId, TERMINATED};
|
||||
|
||||
/// `SegmentPostings` represents the inverted list or postings associated with
|
||||
@@ -79,14 +79,15 @@ impl SegmentPostings {
|
||||
.close_term(docs.len() as u32)
|
||||
.expect("In memory Serialization should never fail.");
|
||||
}
|
||||
let block_segment_postings = BlockSegmentPostings::open(
|
||||
let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
|
||||
docs.len() as u32,
|
||||
FileSlice::from(buffer),
|
||||
IndexRecordOption::Basic,
|
||||
IndexRecordOption::Basic,
|
||||
0u32,
|
||||
)
|
||||
.unwrap();
|
||||
SegmentPostings::from_block_postings(block_segment_postings, None, 0)
|
||||
SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
|
||||
}
|
||||
|
||||
/// Helper functions to create `SegmentPostings` for tests.
|
||||
@@ -127,14 +128,15 @@ impl SegmentPostings {
|
||||
postings_serializer
|
||||
.close_term(doc_and_tfs.len() as u32)
|
||||
.unwrap();
|
||||
let block_segment_postings = BlockSegmentPostings::open(
|
||||
let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
|
||||
doc_and_tfs.len() as u32,
|
||||
FileSlice::from(buffer),
|
||||
IndexRecordOption::WithFreqs,
|
||||
IndexRecordOption::WithFreqs,
|
||||
0u32,
|
||||
)
|
||||
.unwrap();
|
||||
SegmentPostings::from_block_postings(block_segment_postings, None, 0)
|
||||
SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
|
||||
}
|
||||
|
||||
/// Creates a Segment Postings from a
|
||||
@@ -142,14 +144,13 @@ impl SegmentPostings {
|
||||
/// - a position reader
|
||||
/// - a target document to seek to
|
||||
pub(crate) fn from_block_postings(
|
||||
segment_block_postings: BlockSegmentPostingsNotLoaded,
|
||||
segment_block_postings: BlockSegmentPostings,
|
||||
position_reader: Option<PositionReader>,
|
||||
seek_doc: DocId,
|
||||
position_within_block: usize,
|
||||
) -> SegmentPostings {
|
||||
let (block_cursor, cur) = segment_block_postings.seek_and_load(seek_doc);
|
||||
SegmentPostings {
|
||||
block_cursor,
|
||||
cur,
|
||||
block_cursor: segment_block_postings,
|
||||
cur: position_within_block,
|
||||
position_reader,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,10 +97,12 @@ where
|
||||
let mut term_stream = self.automaton_stream(term_dict)?;
|
||||
while term_stream.advance() {
|
||||
let term_info = term_stream.value();
|
||||
let mut block_segment_postings = inverted_index
|
||||
.read_block_postings_from_terminfo_not_loaded(term_info, IndexRecordOption::Basic)?
|
||||
.seek_and_load(seek_doc)
|
||||
.0;
|
||||
let (mut block_segment_postings, _) = inverted_index
|
||||
.read_block_postings_from_terminfo_with_seek(
|
||||
term_info,
|
||||
IndexRecordOption::Basic,
|
||||
seek_doc,
|
||||
)?;
|
||||
loop {
|
||||
let docs = block_segment_postings.docs();
|
||||
if docs.is_empty() {
|
||||
|
||||
@@ -234,8 +234,11 @@ impl Weight for InvertedIndexRangeWeight {
|
||||
processed_count += 1;
|
||||
let term_info = term_range.value();
|
||||
let mut block_segment_postings = inverted_index
|
||||
.read_block_postings_from_terminfo_not_loaded(term_info, IndexRecordOption::Basic)?
|
||||
.seek_and_load(seek_doc)
|
||||
.read_block_postings_from_terminfo_with_seek(
|
||||
term_info,
|
||||
IndexRecordOption::Basic,
|
||||
seek_doc,
|
||||
)?
|
||||
.0;
|
||||
loop {
|
||||
let docs = block_segment_postings.docs();
|
||||
|
||||
Reference in New Issue
Block a user