diff --git a/src/postings/loaded_postings.rs b/src/postings/loaded_postings.rs index 7258f5cea..545571d0b 100644 --- a/src/postings/loaded_postings.rs +++ b/src/postings/loaded_postings.rs @@ -101,6 +101,19 @@ impl Postings for LoadedPostings { output.push(*pos + offset); } } + + fn seek_block( + &mut self, + target_doc: crate::DocId, + fieldnorm_reader: &crate::fieldnorm::FieldNormReader, + similarity_weight: &crate::query::Bm25Weight, + ) -> crate::Score { + unimplemented!() + } + + fn freq_reading_option(&self) -> super::FreqReadingOption { + super::FreqReadingOption::ReadFreq + } } #[cfg(test)] diff --git a/src/postings/postings.rs b/src/postings/postings.rs index 8606f00a9..14ffff890 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -1,4 +1,8 @@ use crate::docset::DocSet; +use crate::fieldnorm::FieldNormReader; +use crate::postings::FreqReadingOption; +use crate::query::{Bm25Weight, Scorer}; +use crate::{DocId, Score}; /// Postings (also called inverted list) /// @@ -11,6 +15,8 @@ use crate::docset::DocSet; /// but other implementations mocking `SegmentPostings` exist, /// for merging segments or for testing. pub trait Postings: DocSet + 'static { + fn new_term_scorer(self: Box, fieldnorm_reader: &FieldNormReader, similarity_weight: &Bm25Weight) -> Box; + /// The number of times the term appears in the document. fn term_freq(&self) -> u32; @@ -31,6 +37,30 @@ pub trait Postings: DocSet + 'static { fn positions(&mut self, output: &mut Vec) { self.positions_with_offset(0u32, output); } + + // supports Block-Wand + fn supports_block_max(&self) -> bool { + false + } + + // TODO document + // Only allowed for block max. + fn seek_block( + &mut self, + target_doc: crate::DocId, + fieldnorm_reader: &FieldNormReader, + similarity_weight: &Bm25Weight, + ) -> Score { + unimplemented!() + } + + // TODO + // Only allowed for block max. + fn last_doc_in_block(&self) -> crate::DocId { + unimplemented!() + } + + fn freq_reading_option(&self) -> FreqReadingOption; } impl Postings for Box { @@ -41,4 +71,25 @@ impl Postings for Box { fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec) { (**self).append_positions_with_offset(offset, output); } + + fn supports_block_max(&self) -> bool { + (**self).supports_block_max() + } + + fn seek_block( + &mut self, + target_doc: crate::DocId, + fieldnorm_reader: &FieldNormReader, + similarity_weight: &Bm25Weight, + ) -> Score { + (**self).seek_block(target_doc, fieldnorm_reader, similarity_weight) + } + + fn last_doc_in_block(&self) -> crate::DocId { + (**self).last_doc_in_block() + } + + fn freq_reading_option(&self) -> FreqReadingOption { + (**self).freq_reading_option() + } } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 1e064586a..2be94bf66 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -3,10 +3,12 @@ use common::HasLen; use crate::codec::postings::PostingsReader; use crate::docset::DocSet; use crate::fastfield::AliveBitSet; +use crate::fieldnorm::FieldNormReader; use crate::positions::PositionReader; use crate::postings::compression::COMPRESSION_BLOCK_SIZE; -use crate::postings::{BlockSegmentPostings, Postings}; -use crate::{DocId, TERMINATED}; +use crate::postings::{BlockSegmentPostings, FreqReadingOption, Postings}; +use crate::query::Bm25Weight; +use crate::{DocId, Score, TERMINATED}; /// `SegmentPostings` represents the inverted list or postings associated with /// a term in a `Segment`. @@ -252,6 +254,29 @@ impl Postings for SegmentPostings { } } } + + fn supports_block_max(&self) -> bool { + true + } + + fn seek_block( + &mut self, + target_doc: crate::DocId, + fieldnorm_reader: &FieldNormReader, + similarity_weight: &Bm25Weight, + ) -> Score { + self.block_cursor.seek_block(target_doc); + self.block_cursor + .block_max_score(&fieldnorm_reader, &similarity_weight) + } + + fn last_doc_in_block(&self) -> crate::DocId { + self.block_cursor.skip_reader().last_doc_in_block() + } + + fn freq_reading_option(&self) -> FreqReadingOption { + self.block_cursor.freq_reading_option() + } } #[cfg(test)] diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 9da788ad2..f49977c6c 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -7,18 +7,18 @@ use crate::query::{Explanation, Scorer}; use crate::{DocId, Score}; #[derive(Clone)] -pub struct TermScorer { - postings: SegmentPostings, +pub struct TermScorer { + postings: TPostings, fieldnorm_reader: FieldNormReader, similarity_weight: Bm25Weight, } -impl TermScorer { +impl TermScorer { pub fn new( - postings: SegmentPostings, + postings: TPostings, fieldnorm_reader: FieldNormReader, similarity_weight: Bm25Weight, - ) -> TermScorer { + ) -> TermScorer { TermScorer { postings, fieldnorm_reader, @@ -26,11 +26,6 @@ impl TermScorer { } } - pub(crate) fn seek_block(&mut self, target_doc: DocId) -> Score { - self.postings.block_cursor.seek_block(target_doc); - self.block_max_score() - } - #[cfg(test)] pub fn create_for_test( doc_and_tfs: &[(DocId, u32)], @@ -54,27 +49,7 @@ impl TermScorer { /// See `FreqReadingOption`. pub(crate) fn freq_reading_option(&self) -> FreqReadingOption { - self.postings.block_cursor.freq_reading_option() - } - - /// Returns the maximum score for the current block. - /// - /// In some rare case, the result may not be exact. In this case a lower value is returned, - /// (and may lead us to return a lesser document). - /// - /// At index time, we store the (fieldnorm_id, term frequency) pair that maximizes the - /// score assuming the average fieldnorm computed on this segment. - /// - /// Though extremely rare, it is theoretically possible that the actual average fieldnorm - /// is different enough from the current segment average fieldnorm that the maximum over a - /// specific is achieved on a different document. - /// - /// (The result is on the other hand guaranteed to be correct if there is only one segment). - #[inline(always)] - fn block_max_score(&mut self) -> Score { - self.postings - .block_cursor - .block_max_score(&self.fieldnorm_reader, &self.similarity_weight) + self.postings.freq_reading_option() } pub fn term_freq(&self) -> u32 { @@ -96,11 +71,16 @@ impl TermScorer { } pub fn last_doc_in_block(&self) -> DocId { - self.postings.block_cursor.skip_reader().last_doc_in_block() + self.postings.last_doc_in_block() + } + + pub(crate) fn seek_block(&mut self, target_doc: DocId) -> Score { + self.postings + .seek_block(target_doc, &self.fieldnorm_reader, &self.similarity_weight) } } -impl DocSet for TermScorer { +impl DocSet for TermScorer { #[inline] fn advance(&mut self) -> DocId { self.postings.advance() @@ -282,8 +262,8 @@ mod tests { { let mut term_scorer = term_weight.term_scorer_for_test(reader, 1.0)?.unwrap(); for d in docs { - term_scorer.seek_block(d); - block_max_scores_b.push(term_scorer.block_max_score()); + let block_max_score = term_scorer.seek_block(d); + block_max_scores_b.push(block_max_score); } } for (l, r) in block_max_scores diff --git a/src/query/union/bitset_union.rs b/src/query/union/bitset_union.rs index 8af1703ee..3bde00280 100644 --- a/src/query/union/bitset_union.rs +++ b/src/query/union/bitset_union.rs @@ -1,7 +1,7 @@ use std::cell::RefCell; use crate::docset::DocSet; -use crate::postings::Postings; +use crate::postings::{FreqReadingOption, Postings}; use crate::query::BitSetDocSet; use crate::DocId; @@ -46,6 +46,10 @@ impl Postings for BitSetPostingUnion { term_freq } + fn freq_reading_option(&self) -> FreqReadingOption { + FreqReadingOption::ReadFreq + } + fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec) { let curr_doc = self.bitset.doc(); let mut docsets = self.docsets.borrow_mut(); diff --git a/src/query/union/simple_union.rs b/src/query/union/simple_union.rs index b153a7f22..c041a6a50 100644 --- a/src/query/union/simple_union.rs +++ b/src/query/union/simple_union.rs @@ -1,5 +1,5 @@ use crate::docset::{DocSet, TERMINATED}; -use crate::postings::Postings; +use crate::postings::{FreqReadingOption, Postings}; use crate::DocId; /// A `SimpleUnion` is a `DocSet` that is the union of multiple `DocSet`. @@ -56,6 +56,10 @@ impl Postings for SimpleUnion { term_freq } + fn freq_reading_option(&self) -> FreqReadingOption { + FreqReadingOption::ReadFreq + } + fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec) { for docset in &mut self.docsets { let doc = docset.doc();