diff --git a/src/docset.rs b/src/docset.rs index e8799b2e9..6fe3ec9c5 100644 --- a/src/docset.rs +++ b/src/docset.rs @@ -53,24 +53,22 @@ pub trait DocSet: Send { /// Seeks to the target if possible and returns true if the target is in the DocSet. /// - /// Implementations may choose to advance past the target if target does not exist. - /// /// DocSets that already have an efficient `seek` method don't need to implement `seek_exact`. /// All wrapper DocSets should forward `seek_exact` to the underlying DocSet. /// /// ## API Behaviour /// If `seek_exact` is returning true, a call to `doc()` has to return target. - /// If `seek_exact` is returning false, a call to `doc()` may return any doc and should not be - /// used until `seek_exact` returns true again. The DocSet is considered to be in an invalid - /// state until `seek_exact` returns true again. + /// If `seek_exact` is returning false, a call to `doc()` may return any doc between + /// the last doc that matched and target or a doc that is a valid next hit after target. + /// The DocSet is considered to be in an invalid state until `seek_exact` returns true again. /// - /// target needs to be equal or larger than `doc` when in a valid state. + /// `target` needs to be equal or larger than `doc` when in a valid state. /// /// Consecutive calls are not allowed to have decreasing `target` values. /// /// # Warning /// This is an advanced API used by intersection. The API contract is tricky, avoid using it. - fn seek_exact(&mut self, target: DocId) -> bool { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { let current_doc = self.doc(); if current_doc < target { self.seek(target); @@ -175,8 +173,8 @@ impl DocSet for &mut dyn DocSet { (**self).seek(target) } - fn seek_exact(&mut self, target: DocId) -> bool { - (**self).seek_exact(target) + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { + (**self).seek_into_the_danger_zone(target) } fn doc(&self) -> u32 { @@ -211,9 +209,9 @@ impl DocSet for Box { unboxed.seek(target) } - fn seek_exact(&mut self, target: DocId) -> bool { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { let unboxed: &mut TDocSet = self.borrow_mut(); - unboxed.seek_exact(target) + unboxed.seek_into_the_danger_zone(target) } fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize { diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs index 6b7b0de9f..62eeca3d5 100644 --- a/src/postings/compression/mod.rs +++ b/src/postings/compression/mod.rs @@ -9,6 +9,7 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * MAX_VINT_SIZE; mod vint; /// Returns the size in bytes of a compressed block, given `num_bits`. +#[inline] pub fn compressed_block_size(num_bits: u8) -> usize { (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8 } diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs index da442b323..ecbf3d8d6 100644 --- a/src/query/boost_query.rs +++ b/src/query/boost_query.rs @@ -104,8 +104,8 @@ impl DocSet for BoostScorer { fn seek(&mut self, target: DocId) -> DocId { self.underlying.seek(target) } - fn seek_exact(&mut self, target: DocId) -> bool { - self.underlying.seek_exact(target) + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { + self.underlying.seek_into_the_danger_zone(target) } fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize { diff --git a/src/query/disjunction.rs b/src/query/disjunction.rs index f34394e0f..b2f1080fc 100644 --- a/src/query/disjunction.rs +++ b/src/query/disjunction.rs @@ -67,8 +67,8 @@ impl DocSet for ScorerWrapper { self.current_doc = doc_id; doc_id } - fn seek_exact(&mut self, target: DocId) -> bool { - let found = self.scorer.seek_exact(target); + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { + let found = self.scorer.seek_into_the_danger_zone(target); self.current_doc = self.scorer.doc(); found } diff --git a/src/query/intersection.rs b/src/query/intersection.rs index 148c80f05..78c03163d 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -118,12 +118,17 @@ impl DocSet for Intersection candidate.wrapping_add(100) { + candidate = left.seek(right_doc); + } else { + candidate = left.advance(); + } if candidate == TERMINATED { return TERMINATED; } @@ -134,7 +139,7 @@ impl DocSet for Intersection DocSet for Intersection bool { - self.left.seek_exact(target) - && self.right.seek_exact(target) + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { + self.left.seek_into_the_danger_zone(target) + && self.right.seek_into_the_danger_zone(target) && self .others .iter_mut() - .all(|docset| docset.seek_exact(target)) + .all(|docset| docset.seek_into_the_danger_zone(target)) } fn doc(&self) -> DocId { diff --git a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs index aa6f26d26..cc7bb7886 100644 --- a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs +++ b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs @@ -193,8 +193,8 @@ impl DocSet for PhrasePrefixScorer { self.advance() } - fn seek_exact(&mut self, target: DocId) -> bool { - if self.phrase_scorer.seek_exact(target) { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { + if self.phrase_scorer.seek_into_the_danger_zone(target) { self.matches_prefix() } else { false diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index f0f87c9fe..886acf489 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -530,9 +530,9 @@ impl DocSet for PhraseScorer { self.advance() } - fn seek_exact(&mut self, target: DocId) -> bool { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { debug_assert!(target >= self.doc()); - if self.intersection_docset.seek_exact(target) && self.phrase_match() { + if self.intersection_docset.seek_into_the_danger_zone(target) && self.phrase_match() { return true; } false diff --git a/src/query/reqopt_scorer.rs b/src/query/reqopt_scorer.rs index 1217eb65a..45857567c 100644 --- a/src/query/reqopt_scorer.rs +++ b/src/query/reqopt_scorer.rs @@ -56,9 +56,9 @@ where self.req_scorer.seek(target) } - fn seek_exact(&mut self, target: DocId) -> bool { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { self.score_cache = None; - self.req_scorer.seek_exact(target) + self.req_scorer.seek_into_the_danger_zone(target) } fn doc(&self) -> DocId { diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 5c020febd..293aa7871 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -98,14 +98,17 @@ impl TermScorer { } impl DocSet for TermScorer { + #[inline] fn advance(&mut self) -> DocId { self.postings.advance() } + #[inline] fn seek(&mut self, target: DocId) -> DocId { self.postings.seek(target) } + #[inline] fn doc(&self) -> DocId { self.postings.doc() } diff --git a/src/query/union/buffered_union.rs b/src/query/union/buffered_union.rs index a3bf64990..9b3b085fa 100644 --- a/src/query/union/buffered_union.rs +++ b/src/query/union/buffered_union.rs @@ -217,11 +217,11 @@ where } } - fn seek_exact(&mut self, target: DocId) -> bool { + fn seek_into_the_danger_zone(&mut self, target: DocId) -> bool { let is_hit = self .docsets .iter_mut() - .all(|docset| docset.seek_exact(target)); + .all(|docset| docset.seek_into_the_danger_zone(target)); // The API requires the DocSet to be in a valid state when `seek_exact` returns true. if is_hit { self.seek(target);