rename seek_exact

This commit is contained in:
Pascal Seitz
2024-12-02 11:36:45 +01:00
parent a0123192c8
commit eb586fb210
10 changed files with 39 additions and 32 deletions

View File

@@ -53,24 +53,22 @@ pub trait DocSet: Send {
/// Seeks to the target if possible and returns true if the target is in the DocSet.
///
/// Implementations may choose to advance past the target if target does not exist.
///
/// DocSets that already have an efficient `seek` method don't need to implement `seek_exact`.
/// All wrapper DocSets should forward `seek_exact` to the underlying DocSet.
///
/// ## API Behaviour
/// If `seek_exact` is returning true, a call to `doc()` has to return target.
/// If `seek_exact` is returning false, a call to `doc()` may return any doc and should not be
/// used until `seek_exact` returns true again. The DocSet is considered to be in an invalid
/// state until `seek_exact` returns true again.
/// If `seek_exact` is returning false, a call to `doc()` may return any doc between
/// the last doc that matched and target or a doc that is a valid next hit after target.
/// The DocSet is considered to be in an invalid state until `seek_exact` returns true again.
///
/// target needs to be equal or larger than `doc` when in a valid state.
/// `target` needs to be equal or larger than `doc` when in a valid state.
///
/// Consecutive calls are not allowed to have decreasing `target` values.
///
/// # Warning
/// This is an advanced API used by intersection. The API contract is tricky, avoid using it.
fn seek_exact(&mut self, target: DocId) -> bool {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
let current_doc = self.doc();
if current_doc < target {
self.seek(target);
@@ -175,8 +173,8 @@ impl DocSet for &mut dyn DocSet {
(**self).seek(target)
}
fn seek_exact(&mut self, target: DocId) -> bool {
(**self).seek_exact(target)
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
(**self).seek_into_the_danger_zone(target)
}
fn doc(&self) -> u32 {
@@ -211,9 +209,9 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
unboxed.seek(target)
}
fn seek_exact(&mut self, target: DocId) -> bool {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
let unboxed: &mut TDocSet = self.borrow_mut();
unboxed.seek_exact(target)
unboxed.seek_into_the_danger_zone(target)
}
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {

View File

@@ -9,6 +9,7 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * MAX_VINT_SIZE;
mod vint;
/// Returns the size in bytes of a compressed block, given `num_bits`.
#[inline]
pub fn compressed_block_size(num_bits: u8) -> usize {
(num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
}

View File

@@ -104,8 +104,8 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
fn seek(&mut self, target: DocId) -> DocId {
self.underlying.seek(target)
}
fn seek_exact(&mut self, target: DocId) -> bool {
self.underlying.seek_exact(target)
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
self.underlying.seek_into_the_danger_zone(target)
}
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {

View File

@@ -67,8 +67,8 @@ impl<T: Scorer> DocSet for ScorerWrapper<T> {
self.current_doc = doc_id;
doc_id
}
fn seek_exact(&mut self, target: DocId) -> bool {
let found = self.scorer.seek_exact(target);
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
let found = self.scorer.seek_into_the_danger_zone(target);
self.current_doc = self.scorer.doc();
found
}

View File

@@ -118,12 +118,17 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
// of the two rarest `DocSet` in the intersection.
loop {
if right.seek_exact(candidate) {
if right.seek_into_the_danger_zone(candidate) {
break;
}
// `left.advance().max(right.doc())` yielded a regression in the search game
// benchmark It may make sense in certain scenarios though.
candidate = left.advance();
let right_doc = right.doc();
// TODO: Think about which value would make sense here
// It depends on the DocSet implementation, when a seek would outweigh an advance.
if right_doc > candidate.wrapping_add(100) {
candidate = left.seek(right_doc);
} else {
candidate = left.advance();
}
if candidate == TERMINATED {
return TERMINATED;
}
@@ -134,7 +139,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
if self
.others
.iter_mut()
.all(|docset| docset.seek_exact(candidate))
.all(|docset| docset.seek_into_the_danger_zone(candidate))
{
debug_assert_eq!(candidate, self.left.doc());
debug_assert_eq!(candidate, self.right.doc());
@@ -161,13 +166,13 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
///
/// Some implementations may choose to advance past the target if beneficial for performance.
/// The return value is `true` if the target is in the docset, and `false` otherwise.
fn seek_exact(&mut self, target: DocId) -> bool {
self.left.seek_exact(target)
&& self.right.seek_exact(target)
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
self.left.seek_into_the_danger_zone(target)
&& self.right.seek_into_the_danger_zone(target)
&& self
.others
.iter_mut()
.all(|docset| docset.seek_exact(target))
.all(|docset| docset.seek_into_the_danger_zone(target))
}
fn doc(&self) -> DocId {

View File

@@ -193,8 +193,8 @@ impl<TPostings: Postings> DocSet for PhrasePrefixScorer<TPostings> {
self.advance()
}
fn seek_exact(&mut self, target: DocId) -> bool {
if self.phrase_scorer.seek_exact(target) {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
if self.phrase_scorer.seek_into_the_danger_zone(target) {
self.matches_prefix()
} else {
false

View File

@@ -530,9 +530,9 @@ impl<TPostings: Postings> DocSet for PhraseScorer<TPostings> {
self.advance()
}
fn seek_exact(&mut self, target: DocId) -> bool {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
debug_assert!(target >= self.doc());
if self.intersection_docset.seek_exact(target) && self.phrase_match() {
if self.intersection_docset.seek_into_the_danger_zone(target) && self.phrase_match() {
return true;
}
false

View File

@@ -56,9 +56,9 @@ where
self.req_scorer.seek(target)
}
fn seek_exact(&mut self, target: DocId) -> bool {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
self.score_cache = None;
self.req_scorer.seek_exact(target)
self.req_scorer.seek_into_the_danger_zone(target)
}
fn doc(&self) -> DocId {

View File

@@ -98,14 +98,17 @@ impl TermScorer {
}
impl DocSet for TermScorer {
#[inline]
fn advance(&mut self) -> DocId {
self.postings.advance()
}
#[inline]
fn seek(&mut self, target: DocId) -> DocId {
self.postings.seek(target)
}
#[inline]
fn doc(&self) -> DocId {
self.postings.doc()
}

View File

@@ -217,11 +217,11 @@ where
}
}
fn seek_exact(&mut self, target: DocId) -> bool {
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
let is_hit = self
.docsets
.iter_mut()
.all(|docset| docset.seek_exact(target));
.all(|docset| docset.seek_into_the_danger_zone(target));
// The API requires the DocSet to be in a valid state when `seek_exact` returns true.
if is_hit {
self.seek(target);