use crate::docset::{DocSet, TERMINATED}; use crate::query::Scorer; use crate::DocId; use crate::Score; #[inline(always)] fn is_within(docset: &mut TDocSetExclude, doc: DocId) -> bool { docset.doc() <= doc && docset.seek(doc) == doc } /// Filters a given `DocSet` by removing the docs from a given `DocSet`. /// /// The excluding docset has no impact on scoring. pub struct Exclude { underlying_docset: TDocSet, excluding_docset: TDocSetExclude, } impl Exclude where TDocSet: DocSet, TDocSetExclude: DocSet, { /// Creates a new `ExcludeScorer` pub fn new( mut underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude, ) -> Exclude { while underlying_docset.doc() != TERMINATED { let target = underlying_docset.doc(); if !is_within(&mut excluding_docset, target) { break; } underlying_docset.advance(); } Exclude { underlying_docset, excluding_docset, } } } impl DocSet for Exclude where TDocSet: DocSet, TDocSetExclude: DocSet, { fn advance(&mut self) -> DocId { loop { let candidate = self.underlying_docset.advance(); if candidate == TERMINATED { return TERMINATED; } if !is_within(&mut self.excluding_docset, candidate) { return candidate; } } } fn seek(&mut self, target: DocId) -> DocId { let candidate = self.underlying_docset.seek(target); if candidate == TERMINATED { return TERMINATED; } if !is_within(&mut self.excluding_docset, candidate) { return candidate; } self.advance() } fn doc(&self) -> DocId { self.underlying_docset.doc() } /// `.size_hint()` directly returns the size /// of the underlying docset without taking in account /// the fact that docs might be deleted. fn size_hint(&self) -> u32 { self.underlying_docset.size_hint() } } impl Scorer for Exclude where TScorer: Scorer, TDocSetExclude: DocSet + 'static, { fn score(&mut self) -> Score { self.underlying_docset.score() } } #[cfg(test)] mod tests { use super::*; use crate::postings::tests::test_skip_against_unoptimized; use crate::query::VecDocSet; use crate::tests::sample_with_seed; #[test] fn test_exclude() { let mut exclude_scorer = Exclude::new( VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]), VecDocSet::from(vec![1, 2, 3, 10, 16, 24]), ); let mut els = vec![]; while exclude_scorer.doc() != TERMINATED { els.push(exclude_scorer.doc()); exclude_scorer.advance(); } assert_eq!(els, vec![5, 8, 15]); } #[test] fn test_exclude_skip() { test_skip_against_unoptimized( || { Box::new(Exclude::new( VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]), VecDocSet::from(vec![1, 2, 3, 10, 16, 24]), )) }, vec![5, 8, 10, 15, 24], ); } #[test] fn test_exclude_skip_random() { let sample_include = sample_with_seed(10_000, 0.1, 1); let sample_exclude = sample_with_seed(10_000, 0.05, 2); let sample_skip = sample_with_seed(10_000, 0.005, 3); test_skip_against_unoptimized( || { Box::new(Exclude::new( VecDocSet::from(sample_include.clone()), VecDocSet::from(sample_exclude.clone()), )) }, sample_skip, ); } }