diff --git a/src/docset.rs b/src/docset.rs index 01ea1125a..0926b6df7 100644 --- a/src/docset.rs +++ b/src/docset.rs @@ -1,5 +1,7 @@ use std::borrow::{Borrow, BorrowMut}; +use common::BitSet; + use crate::fastfield::AliveBitSet; use crate::DocId; @@ -106,6 +108,15 @@ pub trait DocSet: Send { buffer.len() } + // comment on the size of the bitset + fn fill_bitset(&mut self, bitset: &mut BitSet) { + let mut doc = self.doc(); + while doc != TERMINATED { + bitset.insert(doc); + doc = self.advance(); + } + } + /// Returns the current document /// Right after creating a new `DocSet`, the docset points to the first document. /// diff --git a/src/index/inverted_index_reader.rs b/src/index/inverted_index_reader.rs index 7817e5cd8..5137619b5 100644 --- a/src/index/inverted_index_reader.rs +++ b/src/index/inverted_index_reader.rs @@ -12,7 +12,7 @@ use tantivy_fst::automaton::{AlwaysMatch, Automaton}; use crate::codec::postings::PostingsReader as _; use crate::directory::FileSlice; use crate::positions::PositionReader; -use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo}; +use crate::postings::{BlockSegmentPostings, Postings, SegmentPostings, TermInfo}; use crate::schema::{IndexRecordOption, Term, Type}; use crate::termdict::TermDictionary; diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index b024bef8a..24ad49423 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -1,4 +1,4 @@ -use common::HasLen; +use common::{BitSet, HasLen}; use crate::codec::postings::PostingsReader; use crate::docset::DocSet; @@ -201,6 +201,19 @@ impl DocSet for SegmentPostings { fn size_hint(&self) -> u32 { self.len() as u32 } + + fn fill_bitset(&mut self, bitset: &mut BitSet) { + loop { + let docs = self.block_cursor.docs(); + if docs.is_empty() { + break; + } + for &doc in docs { + bitset.insert(doc); + } + self.block_cursor.advance(); + } + } } impl HasLen for SegmentPostings { diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 1ef960299..d9171c696 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -5,13 +5,12 @@ use common::BitSet; use tantivy_fst::Automaton; use super::phrase_prefix_query::prefix_end; -use crate::codec::postings::PostingsReader as _; use crate::index::SegmentReader; use crate::postings::TermInfo; use crate::query::{BitSetDocSet, ConstScorer, Explanation, Scorer, Weight}; use crate::schema::{Field, IndexRecordOption}; use crate::termdict::{TermDictionary, TermStreamer}; -use crate::{DocId, Score, TantivyError}; +use crate::{DocId, DocSet, Score, TantivyError}; /// A weight struct for Fuzzy Term and Regex Queries pub struct AutomatonWeight { @@ -93,18 +92,9 @@ where let mut term_stream = self.automaton_stream(term_dict)?; while term_stream.advance() { let term_info = term_stream.value(); - let mut block_segment_postings = inverted_index - .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; - loop { - let docs = block_segment_postings.docs(); - if docs.is_empty() { - break; - } - for &doc in docs { - doc_bitset.insert(doc); - } - block_segment_postings.advance(); - } + let mut block_segment_postings = + inverted_index.read_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; + block_segment_postings.fill_bitset(&mut doc_bitset); } let doc_bitset = BitSetDocSet::from(doc_bitset); let const_scorer = ConstScorer::new(doc_bitset, boost); diff --git a/src/query/phrase_query/regex_phrase_weight.rs b/src/query/phrase_query/regex_phrase_weight.rs index 4b1c7091a..a34efa830 100644 --- a/src/query/phrase_query/regex_phrase_weight.rs +++ b/src/query/phrase_query/regex_phrase_weight.rs @@ -104,18 +104,9 @@ impl RegexPhraseWeight { term_info: &TermInfo, doc_bitset: &mut BitSet, ) -> crate::Result<()> { - let mut block_segment_postings = inverted_index - .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; - loop { - let docs = block_segment_postings.docs(); - if docs.is_empty() { - break; - } - for &doc in docs { - doc_bitset.insert(doc); - } - block_segment_postings.advance(); - } + let mut segment_postings = + inverted_index.read_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; + segment_postings.fill_bitset(doc_bitset); Ok(()) } diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 193ff0346..cbfb75256 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -12,7 +12,7 @@ use crate::query::range_query::is_type_valid_for_fastfield_range_query; use crate::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight}; use crate::schema::{Field, IndexRecordOption, Term, Type}; use crate::termdict::{TermDictionary, TermStreamer}; -use crate::{DocId, Score}; +use crate::{DocId, DocSet, Score}; /// `RangeQuery` matches all documents that have at least one term within a defined range. /// @@ -229,18 +229,9 @@ impl Weight for InvertedIndexRangeWeight { } processed_count += 1; let term_info = term_range.value(); - let mut block_segment_postings = inverted_index - .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; - loop { - let docs = block_segment_postings.docs(); - if docs.is_empty() { - break; - } - for &doc in block_segment_postings.docs() { - doc_bitset.insert(doc); - } - block_segment_postings.advance(); - } + let mut postings = + inverted_index.read_postings_from_terminfo(term_info, IndexRecordOption::Basic)?; + postings.fill_bitset(&mut doc_bitset); } let doc_bitset = BitSetDocSet::from(doc_bitset); Ok(Box::new(ConstScorer::new(doc_bitset, boost)))