mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-17 06:22:54 +00:00
rename seek_exact
This commit is contained in:
@@ -53,24 +53,22 @@ pub trait DocSet: Send {
|
||||
|
||||
/// Seeks to the target if possible and returns true if the target is in the DocSet.
|
||||
///
|
||||
/// Implementations may choose to advance past the target if target does not exist.
|
||||
///
|
||||
/// DocSets that already have an efficient `seek` method don't need to implement `seek_exact`.
|
||||
/// All wrapper DocSets should forward `seek_exact` to the underlying DocSet.
|
||||
///
|
||||
/// ## API Behaviour
|
||||
/// If `seek_exact` is returning true, a call to `doc()` has to return target.
|
||||
/// If `seek_exact` is returning false, a call to `doc()` may return any doc and should not be
|
||||
/// used until `seek_exact` returns true again. The DocSet is considered to be in an invalid
|
||||
/// state until `seek_exact` returns true again.
|
||||
/// If `seek_exact` is returning false, a call to `doc()` may return any doc between
|
||||
/// the last doc that matched and target or a doc that is a valid next hit after target.
|
||||
/// The DocSet is considered to be in an invalid state until `seek_exact` returns true again.
|
||||
///
|
||||
/// target needs to be equal or larger than `doc` when in a valid state.
|
||||
/// `target` needs to be equal or larger than `doc` when in a valid state.
|
||||
///
|
||||
/// Consecutive calls are not allowed to have decreasing `target` values.
|
||||
///
|
||||
/// # Warning
|
||||
/// This is an advanced API used by intersection. The API contract is tricky, avoid using it.
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
let current_doc = self.doc();
|
||||
if current_doc < target {
|
||||
self.seek(target);
|
||||
@@ -175,8 +173,8 @@ impl DocSet for &mut dyn DocSet {
|
||||
(**self).seek(target)
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
(**self).seek_exact(target)
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
(**self).seek_into_the_danger_zone(target)
|
||||
}
|
||||
|
||||
fn doc(&self) -> u32 {
|
||||
@@ -211,9 +209,9 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
|
||||
unboxed.seek(target)
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
let unboxed: &mut TDocSet = self.borrow_mut();
|
||||
unboxed.seek_exact(target)
|
||||
unboxed.seek_into_the_danger_zone(target)
|
||||
}
|
||||
|
||||
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
|
||||
|
||||
@@ -9,6 +9,7 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * MAX_VINT_SIZE;
|
||||
mod vint;
|
||||
|
||||
/// Returns the size in bytes of a compressed block, given `num_bits`.
|
||||
#[inline]
|
||||
pub fn compressed_block_size(num_bits: u8) -> usize {
|
||||
(num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
|
||||
}
|
||||
|
||||
@@ -104,8 +104,8 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
|
||||
fn seek(&mut self, target: DocId) -> DocId {
|
||||
self.underlying.seek(target)
|
||||
}
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
self.underlying.seek_exact(target)
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
self.underlying.seek_into_the_danger_zone(target)
|
||||
}
|
||||
|
||||
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
|
||||
|
||||
@@ -67,8 +67,8 @@ impl<T: Scorer> DocSet for ScorerWrapper<T> {
|
||||
self.current_doc = doc_id;
|
||||
doc_id
|
||||
}
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
let found = self.scorer.seek_exact(target);
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
let found = self.scorer.seek_into_the_danger_zone(target);
|
||||
self.current_doc = self.scorer.doc();
|
||||
found
|
||||
}
|
||||
|
||||
@@ -118,12 +118,17 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
// of the two rarest `DocSet` in the intersection.
|
||||
|
||||
loop {
|
||||
if right.seek_exact(candidate) {
|
||||
if right.seek_into_the_danger_zone(candidate) {
|
||||
break;
|
||||
}
|
||||
// `left.advance().max(right.doc())` yielded a regression in the search game
|
||||
// benchmark It may make sense in certain scenarios though.
|
||||
candidate = left.advance();
|
||||
let right_doc = right.doc();
|
||||
// TODO: Think about which value would make sense here
|
||||
// It depends on the DocSet implementation, when a seek would outweigh an advance.
|
||||
if right_doc > candidate.wrapping_add(100) {
|
||||
candidate = left.seek(right_doc);
|
||||
} else {
|
||||
candidate = left.advance();
|
||||
}
|
||||
if candidate == TERMINATED {
|
||||
return TERMINATED;
|
||||
}
|
||||
@@ -134,7 +139,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
if self
|
||||
.others
|
||||
.iter_mut()
|
||||
.all(|docset| docset.seek_exact(candidate))
|
||||
.all(|docset| docset.seek_into_the_danger_zone(candidate))
|
||||
{
|
||||
debug_assert_eq!(candidate, self.left.doc());
|
||||
debug_assert_eq!(candidate, self.right.doc());
|
||||
@@ -161,13 +166,13 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
///
|
||||
/// Some implementations may choose to advance past the target if beneficial for performance.
|
||||
/// The return value is `true` if the target is in the docset, and `false` otherwise.
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
self.left.seek_exact(target)
|
||||
&& self.right.seek_exact(target)
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
self.left.seek_into_the_danger_zone(target)
|
||||
&& self.right.seek_into_the_danger_zone(target)
|
||||
&& self
|
||||
.others
|
||||
.iter_mut()
|
||||
.all(|docset| docset.seek_exact(target))
|
||||
.all(|docset| docset.seek_into_the_danger_zone(target))
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
|
||||
@@ -193,8 +193,8 @@ impl<TPostings: Postings> DocSet for PhrasePrefixScorer<TPostings> {
|
||||
self.advance()
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
if self.phrase_scorer.seek_exact(target) {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
if self.phrase_scorer.seek_into_the_danger_zone(target) {
|
||||
self.matches_prefix()
|
||||
} else {
|
||||
false
|
||||
|
||||
@@ -530,9 +530,9 @@ impl<TPostings: Postings> DocSet for PhraseScorer<TPostings> {
|
||||
self.advance()
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
debug_assert!(target >= self.doc());
|
||||
if self.intersection_docset.seek_exact(target) && self.phrase_match() {
|
||||
if self.intersection_docset.seek_into_the_danger_zone(target) && self.phrase_match() {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
|
||||
@@ -56,9 +56,9 @@ where
|
||||
self.req_scorer.seek(target)
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
self.score_cache = None;
|
||||
self.req_scorer.seek_exact(target)
|
||||
self.req_scorer.seek_into_the_danger_zone(target)
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
|
||||
@@ -98,14 +98,17 @@ impl TermScorer {
|
||||
}
|
||||
|
||||
impl DocSet for TermScorer {
|
||||
#[inline]
|
||||
fn advance(&mut self) -> DocId {
|
||||
self.postings.advance()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn seek(&mut self, target: DocId) -> DocId {
|
||||
self.postings.seek(target)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn doc(&self) -> DocId {
|
||||
self.postings.doc()
|
||||
}
|
||||
|
||||
@@ -217,11 +217,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool {
|
||||
let is_hit = self
|
||||
.docsets
|
||||
.iter_mut()
|
||||
.all(|docset| docset.seek_exact(target));
|
||||
.all(|docset| docset.seek_into_the_danger_zone(target));
|
||||
// The API requires the DocSet to be in a valid state when `seek_exact` returns true.
|
||||
if is_hit {
|
||||
self.seek(target);
|
||||
|
||||
Reference in New Issue
Block a user