mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
add API contract verfication
This commit is contained in:
committed by
Pascal Seitz
parent
6c756117f8
commit
e5bf84c7d0
@@ -40,6 +40,8 @@ pub trait DocSet: Send {
|
||||
/// of `DocSet` should support it.
|
||||
///
|
||||
/// Calling `seek(TERMINATED)` is also legal and is the normal way to consume a `DocSet`.
|
||||
///
|
||||
/// `target` has to be larger or equal to `.doc()` when calling `seek`.
|
||||
fn seek(&mut self, target: DocId) -> DocId {
|
||||
let mut doc = self.doc();
|
||||
debug_assert!(doc <= target);
|
||||
@@ -58,11 +60,22 @@ pub trait DocSet: Send {
|
||||
///
|
||||
/// ## API Behaviour
|
||||
/// If `seek_exact` is returning true, a call to `doc()` has to return target.
|
||||
/// If `seek_exact` is returning false, a call to `doc()` may return the previous doc,
|
||||
/// which may be lower than target.
|
||||
/// If `seek_exact` is returning false, a call to `doc()` may return any doc and should not be
|
||||
/// used until `seek_exact` returns true again. The DocSet is considered to be in an invalid
|
||||
/// state until `seek_exact` returns true again.
|
||||
///
|
||||
/// target needs to be equal or larger than `doc` when in a valid state.
|
||||
///
|
||||
/// Consecutive calls are not allowed to have decreasing `target` values.
|
||||
///
|
||||
/// # Warning
|
||||
/// This is an advanced API used by intersection. The API contract is tricky, avoid using it.
|
||||
fn seek_exact(&mut self, target: DocId) -> bool {
|
||||
let doc = self.seek(target);
|
||||
doc == target
|
||||
let current_doc = self.doc();
|
||||
if current_doc < target {
|
||||
self.seek(target);
|
||||
}
|
||||
self.doc() == target
|
||||
}
|
||||
|
||||
/// Fills a given mutable buffer with the next doc ids from the
|
||||
|
||||
@@ -13,6 +13,9 @@ use crate::{DocId, Score};
|
||||
/// For better performance, the function uses a
|
||||
/// specialized implementation if the two
|
||||
/// shortest scorers are `TermScorer`s.
|
||||
///
|
||||
/// num_docs_segment is the number of documents in the segment. It is used for estimating the
|
||||
/// `size_hint` of the intersection.
|
||||
pub fn intersect_scorers(
|
||||
mut scorers: Vec<Box<dyn Scorer>>,
|
||||
num_docs_segment: u32,
|
||||
|
||||
@@ -194,7 +194,14 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
|
||||
fn cost(&self) -> u64 {
|
||||
// Advancing the docset is pretty expensive since it scans the whole column, there is no
|
||||
// index currently (will change with an kd-tree)
|
||||
// Since we use SIMD to scan the fast field range query we lower the cost a little bit.
|
||||
// Since we use SIMD to scan the fast field range query we lower the cost a little bit,
|
||||
// assuming that we hit 10% of the docs like in size_hint.
|
||||
//
|
||||
// If we would return a cost higher than num_docs, we would never choose ff range query as
|
||||
// the driver in a DocSet, when intersecting a term query with a fast field. But
|
||||
// it's the faster choice when the term query has a lot of docids and the range
|
||||
// query has not.
|
||||
//
|
||||
// Ideally this would take the fast field codec into account
|
||||
(self.column.num_docs() as f64 * 0.8) as u64
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user