diff --git a/src/postings/mod.rs b/src/postings/mod.rs index a19dd048c..c6ade2a60 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -31,7 +31,7 @@ pub use self::vec_postings::VecPostings; pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings}; pub use self::intersection::IntersectionDocSet; -pub use self::union::UnionDocSet; +pub use self::union::{docsets_union, UnionDocSet}; pub use common::HasLen; diff --git a/src/postings/postings.rs b/src/postings/postings.rs index ac5516e2e..979640230 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -30,15 +30,3 @@ impl Postings for Box { unboxed.positions() } } -// -//impl<'a, TPostings: Postings> Postings for &'a mut TPostings { -// fn term_freq(&self) -> u32 { -// let unref: &TPostings = *self; -// unref.term_freq() -// } -// -// fn positions(&self) -> &[u32] { -// let unref: &TPostings = *self; -// unref.positions() -// } -//} diff --git a/src/postings/union.rs b/src/postings/union.rs index c4797fb66..dfe34162d 100644 --- a/src/postings/union.rs +++ b/src/postings/union.rs @@ -186,6 +186,22 @@ impl DocSet for UnionDocSet { } +pub fn docsets_union(docsets: Vec) -> Option> { + if docsets.is_empty() { + None + } else { + Some(if docsets.len() == 1 { + docsets + .into_iter() + .next() + .unwrap() //< we checked the size beforehands + } else { + box UnionDocSet::from(docsets) + }) + } + +} + #[cfg(test)] mod tests { diff --git a/src/query/exclude.rs b/src/query/exclude.rs new file mode 100644 index 000000000..af14de022 --- /dev/null +++ b/src/query/exclude.rs @@ -0,0 +1,118 @@ +use query::Scorer; +use postings::SkipResult; +use DocSet; +use Score; +use DocId; + +#[derive(Clone, Copy)] +enum State { + ExcludeOne(DocId), + Finished +} + +pub struct ExcludeScorer { + underlying_docset: TDocSet, + excluding_docset: TDocSetExclude, + excluding_state: State, +} + + +impl ExcludeScorer + where TDocSetExclude: DocSet { + + pub fn new(underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude) -> ExcludeScorer { + let state = + if excluding_docset.advance() { + State::ExcludeOne(excluding_docset.doc()) + } else { + State::Finished + }; + ExcludeScorer { + underlying_docset, + excluding_docset, + excluding_state: state, + } + } +} + +impl ExcludeScorer + where TDocSet: DocSet, TDocSetExclude: DocSet { + + /// Returns true iff the doc is not removed. + /// + /// The method has to be called with non strictly + /// increasing `doc`. + fn accept(&mut self) -> bool { + let doc = self.underlying_docset.doc(); + match self.excluding_state { + State::ExcludeOne(excluded_doc) => { + if doc == excluded_doc { + return false; + } + match self.excluding_docset.skip_next(doc) { + SkipResult::OverStep => { + self.excluding_state = State::ExcludeOne(self.excluding_docset.doc()); + true + } + SkipResult::End => { + self.excluding_state = State::Finished; + true + } + SkipResult::Reached => { + false + } + } + } + State::Finished => { + true + } + } + } +} + +impl DocSet for ExcludeScorer + where TDocSet: DocSet, TDocSetExclude: DocSet { + + fn advance(&mut self) -> bool { + while self.underlying_docset.advance() { + if self.accept() { + return true; + } + } + false + } + + fn skip_next(&mut self, target: DocId) -> SkipResult { + let underlying_skip_result = self.underlying_docset.skip_next(target); + if underlying_skip_result == SkipResult::End { + return SkipResult::End; + } + if self.accept() { + underlying_skip_result + } else if self.advance() { + SkipResult::OverStep + } else { + SkipResult::End + } + + } + + fn doc(&self) -> DocId { + self.underlying_docset.doc() + } + + /// `.size_hint()` directly returns the size + /// of the underlying docset without taking in account + /// the fact that docs might be deleted. + fn size_hint(&self) -> u32 { + self.underlying_docset.size_hint() + } +} + + +impl Scorer for ExcludeScorer + where TDocSet: Scorer, TDocSetExclude: Scorer { + fn score(&self) -> Score { + self.underlying_docset.score() + } +} \ No newline at end of file diff --git a/src/query/mod.rs b/src/query/mod.rs index eaa310596..0ea4c8583 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -14,7 +14,9 @@ mod phrase_query; mod all_query; mod bitset; mod range_query; +mod exclude; +use self::exclude::ExcludeScorer; pub use self::bitset::BitSetDocSet; pub use self::boolean_query::BooleanQuery; pub use self::occur_filter::OccurFilter;