mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-01 16:10:42 +00:00
Better intersection and added size_hint
This commit is contained in:
@@ -65,6 +65,10 @@ pub trait DocSet {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a best-effort hint of the
|
||||
/// length of the docset.
|
||||
fn size_hint(&self) -> usize;
|
||||
}
|
||||
|
||||
|
||||
@@ -83,6 +87,11 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
@@ -100,4 +109,9 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,9 @@ pub struct IntersectionDocSet<TDocSet: DocSet> {
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> {
|
||||
fn from(docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
|
||||
fn from(mut docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
|
||||
assert!(docsets.len() >= 2);
|
||||
docsets.sort_by_key(|docset| docset.size_hint());
|
||||
IntersectionDocSet {
|
||||
docsets: docsets,
|
||||
finished: false,
|
||||
@@ -31,37 +32,55 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
|
||||
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
fn size_hint(&self) -> usize {
|
||||
self.docsets
|
||||
.iter()
|
||||
.map(|docset| docset.size_hint())
|
||||
.min()
|
||||
.unwrap() // safe as docsets cannot be empty.
|
||||
}
|
||||
|
||||
#[allow(never_loop)]
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.finished {
|
||||
return false;
|
||||
}
|
||||
let num_docsets = self.docsets.len();
|
||||
let mut count_matching = 0;
|
||||
let mut doc_candidate = 0;
|
||||
let mut ord = 0;
|
||||
loop {
|
||||
let mut doc_set = &mut self.docsets[ord];
|
||||
match doc_set.skip_next(doc_candidate) {
|
||||
SkipResult::Reached => {
|
||||
count_matching += 1;
|
||||
if count_matching == num_docsets {
|
||||
self.doc = doc_candidate;
|
||||
return true;
|
||||
let (head_arr, tail) = self.docsets.split_at_mut(1);
|
||||
let head: &mut TDocSet = &mut head_arr[0];
|
||||
if !head.advance() {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
let mut doc_candidate = head.doc();
|
||||
|
||||
'outer: loop {
|
||||
|
||||
for docset in tail.iter_mut() {
|
||||
match docset.skip_next(doc_candidate) {
|
||||
SkipResult::Reached => {}
|
||||
SkipResult::OverStep => {
|
||||
doc_candidate = docset.doc();
|
||||
match head.skip_next(doc_candidate) {
|
||||
SkipResult::Reached => {}
|
||||
SkipResult::End => {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
doc_candidate = head.doc();
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
count_matching = 1;
|
||||
doc_candidate = doc_set.doc();
|
||||
}
|
||||
}
|
||||
ord += 1;
|
||||
if ord == num_docsets {
|
||||
ord = 0;
|
||||
}
|
||||
|
||||
self.doc = doc_candidate;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -152,6 +152,10 @@ impl<'a> DocSet for SegmentPostings<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn doc(&self) -> DocId {
|
||||
let docs = self.block_cursor.docs();
|
||||
|
||||
@@ -34,6 +34,10 @@ impl DocSet for VecPostings {
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc_ids[self.cursor.0]
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLen for VecPostings {
|
||||
|
||||
@@ -93,6 +93,18 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
|
||||
fn size_hint(&self) -> usize {
|
||||
// TODO fix this. it should be the min
|
||||
// of the MUST scorer
|
||||
// and the max of the SHOULD scorers.
|
||||
self.scorers
|
||||
.iter()
|
||||
.map(|scorer| scorer.size_hint())
|
||||
.max()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
self.score_combiner.clear();
|
||||
|
||||
@@ -67,6 +67,10 @@ impl<'a> DocSet for PhraseScorer<'a> {
|
||||
fn doc(&self) -> DocId {
|
||||
self.intersection_docset.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.intersection_docset.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -49,6 +49,10 @@ impl DocSet for EmptyScorer {
|
||||
fn doc(&self) -> DocId {
|
||||
DocId::max_value()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
impl Scorer for EmptyScorer {
|
||||
|
||||
@@ -32,6 +32,11 @@ impl<TPostings> DocSet for TermScorer<TPostings>
|
||||
fn doc(&self) -> DocId {
|
||||
self.postings.doc()
|
||||
}
|
||||
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.postings.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<TPostings> Scorer for TermScorer<TPostings>
|
||||
|
||||
Reference in New Issue
Block a user