mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
Renaming intersection / exclude
This commit is contained in:
@@ -1,20 +1,22 @@
|
||||
use postings::DocSet;
|
||||
use postings::SkipResult;
|
||||
use query::Scorer;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
pub struct IntersectionDocSet<TDocSet: DocSet> {
|
||||
pub struct Intersection<TDocSet: DocSet> {
|
||||
docsets: Vec<TDocSet>,
|
||||
finished: bool,
|
||||
doc: DocId,
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> {
|
||||
fn from(mut docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet> From<Vec<TDocSet>> for Intersection<TDocSet> {
|
||||
fn from(mut docsets: Vec<TDocSet>) -> Intersection<TDocSet> {
|
||||
assert!(docsets.len() >= 2);
|
||||
docsets.sort_by_key(|docset| docset.size_hint());
|
||||
IntersectionDocSet {
|
||||
Intersection {
|
||||
docsets,
|
||||
finished: false,
|
||||
doc: 0u32,
|
||||
@@ -22,7 +24,7 @@ impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet> Intersection<TDocSet> {
|
||||
/// Returns an array to the underlying `DocSet`s of the intersection.
|
||||
/// These `DocSet` are in the same position as the `IntersectionDocSet`,
|
||||
/// so that user can access their `docfreq` and `positions`.
|
||||
@@ -31,7 +33,7 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet> DocSet for Intersection<TDocSet> {
|
||||
#[allow(never_loop)]
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.finished {
|
||||
@@ -114,10 +116,10 @@ impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
|
||||
}
|
||||
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.docsets
|
||||
.iter()
|
||||
@@ -127,10 +129,21 @@ impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<TScorer> Scorer for Intersection<TScorer>
|
||||
where TScorer: Scorer {
|
||||
fn score(&mut self) -> Score {
|
||||
self.docsets
|
||||
.iter_mut()
|
||||
.map(Scorer::score)
|
||||
.sum()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use postings::SkipResult;
|
||||
use postings::{DocSet, IntersectionDocSet, VecPostings};
|
||||
use postings::{DocSet, Intersection, VecPostings};
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
|
||||
#[test]
|
||||
@@ -138,7 +151,7 @@ mod tests {
|
||||
{
|
||||
let left = VecPostings::from(vec![1, 3, 9]);
|
||||
let right = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
let mut intersection = Intersection::from(vec![left, right]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 3);
|
||||
assert!(intersection.advance());
|
||||
@@ -149,7 +162,7 @@ mod tests {
|
||||
let a = VecPostings::from(vec![1, 3, 9]);
|
||||
let b = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let c = VecPostings::from(vec![1, 5, 9, 111]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
|
||||
let mut intersection = Intersection::from(vec![a, b, c]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.advance());
|
||||
@@ -160,7 +173,7 @@ mod tests {
|
||||
fn test_intersection_zero() {
|
||||
let left = VecPostings::from(vec![0]);
|
||||
let right = VecPostings::from(vec![0]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
let mut intersection = Intersection::from(vec![left, right]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 0);
|
||||
}
|
||||
@@ -170,7 +183,7 @@ mod tests {
|
||||
fn test_intersection_skip() {
|
||||
let left = VecPostings::from(vec![0, 1, 2, 4]);
|
||||
let right = VecPostings::from(vec![2, 5]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
let mut intersection = Intersection::from(vec![left, right]);
|
||||
assert_eq!(intersection.skip_next(2), SkipResult::Reached);
|
||||
assert_eq!(intersection.doc(), 2);
|
||||
}
|
||||
@@ -181,17 +194,17 @@ mod tests {
|
||||
test_skip_against_unoptimized(|| {
|
||||
let left = VecPostings::from(vec![4]);
|
||||
let right = VecPostings::from(vec![2, 5]);
|
||||
box IntersectionDocSet::from(vec![left, right])
|
||||
box Intersection::from(vec![left, right])
|
||||
}, vec![0,2,4,5,6]);
|
||||
test_skip_against_unoptimized(|| {
|
||||
let mut left = VecPostings::from(vec![1, 4, 5, 6]);
|
||||
let mut right = VecPostings::from(vec![2, 5, 10]);
|
||||
left.advance();
|
||||
right.advance();
|
||||
box IntersectionDocSet::from(vec![left, right])
|
||||
box Intersection::from(vec![left, right])
|
||||
}, vec![0,1,2,3,4,5,6,7,10,11]);
|
||||
test_skip_against_unoptimized(|| {
|
||||
box IntersectionDocSet::from(vec![
|
||||
box Intersection::from(vec![
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
VecPostings::from(vec![1, 2, 5, 6]),
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
@@ -206,7 +219,7 @@ mod tests {
|
||||
let a = VecPostings::from(vec![1, 3]);
|
||||
let b = VecPostings::from(vec![1, 4]);
|
||||
let c = VecPostings::from(vec![3, 9]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
|
||||
let mut intersection = Intersection::from(vec![a, b, c]);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ pub use self::postings::Postings;
|
||||
pub use self::vec_postings::VecPostings;
|
||||
|
||||
pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
|
||||
pub use self::intersection::IntersectionDocSet;
|
||||
pub use self::intersection::Intersection;
|
||||
pub use self::union::UnionDocSet;
|
||||
|
||||
pub use common::HasLen;
|
||||
@@ -551,7 +551,7 @@ pub mod tests {
|
||||
.inverted_index(TERM_D.field())
|
||||
.read_postings(&*TERM_D, IndexRecordOption::Basic)
|
||||
.unwrap();
|
||||
let mut intersection = IntersectionDocSet::from(vec![
|
||||
let mut intersection = Intersection::from(vec![
|
||||
segment_postings_a,
|
||||
segment_postings_b,
|
||||
segment_postings_c,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use query::Weight;
|
||||
use core::SegmentReader;
|
||||
use postings::{IntersectionDocSet, UnionDocSet};
|
||||
use postings::{Intersection, UnionDocSet};
|
||||
use std::collections::HashMap;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use query::ExcludeScorer;
|
||||
use query::Exclude;
|
||||
use super::BooleanScorer;
|
||||
use query::OccurFilter;
|
||||
use query::ConstScorer;
|
||||
@@ -60,7 +60,7 @@ impl BooleanWeight {
|
||||
let must_scorer_opt: Option<Box<Scorer + 'a>> = per_occur_scorers
|
||||
.remove(&Occur::Must)
|
||||
.map(|scorers| {
|
||||
let scorer: Box<Scorer> = box ConstScorer::new(IntersectionDocSet::from(scorers));
|
||||
let scorer: Box<Scorer> = box ConstScorer::new(Intersection::from(scorers));
|
||||
scorer
|
||||
});
|
||||
|
||||
@@ -77,7 +77,7 @@ impl BooleanWeight {
|
||||
};
|
||||
|
||||
if let Some(exclude_scorer) = exclude_scorer_opt {
|
||||
Ok(box ExcludeScorer::new(positive_scorer, exclude_scorer))
|
||||
Ok(box Exclude::new(positive_scorer, exclude_scorer))
|
||||
} else {
|
||||
Ok(positive_scorer)
|
||||
}
|
||||
|
||||
@@ -10,24 +10,28 @@ enum State {
|
||||
Finished
|
||||
}
|
||||
|
||||
pub struct ExcludeScorer<TDocSet, TDocSetExclude> {
|
||||
/// Filters a given `DocSet` by removing the docs from a given `DocSet`.
|
||||
///
|
||||
/// The excluding docset has no impact on scoring.
|
||||
pub struct Exclude<TDocSet, TDocSetExclude> {
|
||||
underlying_docset: TDocSet,
|
||||
excluding_docset: TDocSetExclude,
|
||||
excluding_state: State,
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet, TDocSetExclude> ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSetExclude: DocSet {
|
||||
|
||||
pub fn new(underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude) -> ExcludeScorer<TDocSet, TDocSetExclude> {
|
||||
/// Creates a new `ExcludeScorer`
|
||||
pub fn new(underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude) -> Exclude<TDocSet, TDocSetExclude> {
|
||||
let state =
|
||||
if excluding_docset.advance() {
|
||||
State::ExcludeOne(excluding_docset.doc())
|
||||
} else {
|
||||
State::Finished
|
||||
};
|
||||
ExcludeScorer {
|
||||
Exclude {
|
||||
underlying_docset,
|
||||
excluding_docset,
|
||||
excluding_state: state,
|
||||
@@ -35,7 +39,7 @@ impl<TDocSet, TDocSetExclude> ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet, TDocSetExclude> ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSet: DocSet, TDocSetExclude: DocSet {
|
||||
|
||||
/// Returns true iff the doc is not removed.
|
||||
@@ -73,7 +77,7 @@ impl<TDocSet, TDocSetExclude> ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet, TDocSetExclude> DocSet for ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSet: DocSet, TDocSetExclude: DocSet {
|
||||
|
||||
fn advance(&mut self) -> bool {
|
||||
@@ -113,8 +117,8 @@ impl<TDocSet, TDocSetExclude> DocSet for ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet, TDocSetExclude> Scorer for ExcludeScorer<TDocSet, TDocSetExclude>
|
||||
where TDocSet: Scorer, TDocSetExclude: Scorer {
|
||||
impl<TScorer, TDocSetExclude> Scorer for Exclude<TScorer, TDocSetExclude>
|
||||
where TScorer: Scorer, TDocSetExclude: DocSet {
|
||||
fn score(&mut self) -> Score {
|
||||
self.underlying_docset.score()
|
||||
}
|
||||
@@ -130,7 +134,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_exclude() {
|
||||
let mut exclude_scorer = ExcludeScorer::new(
|
||||
let mut exclude_scorer = Exclude::new(
|
||||
VecPostings::from(vec![1,2,5,8,10,15,24]),
|
||||
VecPostings::from(vec![1,2,3,10,16,24])
|
||||
);
|
||||
@@ -144,7 +148,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_exclude_skip() {
|
||||
test_skip_against_unoptimized(
|
||||
|| box ExcludeScorer::new(
|
||||
|| box Exclude::new(
|
||||
VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]),
|
||||
VecPostings::from(vec![1, 2, 3, 10, 16, 24])
|
||||
),
|
||||
@@ -158,7 +162,7 @@ mod tests {
|
||||
let sample_exclude = sample_with_seed(10_000, 0.05, 2);
|
||||
let sample_skip = sample_with_seed(10_000, 0.005, 3);
|
||||
test_skip_against_unoptimized(
|
||||
|| box ExcludeScorer::new(
|
||||
|| box Exclude::new(
|
||||
VecPostings::from(sample_include.clone()),
|
||||
VecPostings::from(sample_exclude.clone())
|
||||
),
|
||||
|
||||
@@ -18,7 +18,7 @@ mod exclude;
|
||||
mod reqopt_scorer;
|
||||
|
||||
pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::exclude::ExcludeScorer;
|
||||
pub use self::exclude::Exclude;
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::occur_filter::OccurFilter;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use query::Scorer;
|
||||
use DocId;
|
||||
use postings::{DocSet, IntersectionDocSet, Postings, SegmentPostings, SkipResult};
|
||||
use postings::{DocSet, Intersection, Postings, SegmentPostings, SkipResult};
|
||||
|
||||
struct PostingsWithOffset {
|
||||
offset: u32,
|
||||
@@ -45,7 +45,7 @@ impl DocSet for PostingsWithOffset {
|
||||
}
|
||||
|
||||
pub struct PhraseScorer {
|
||||
intersection_docset: IntersectionDocSet<PostingsWithOffset>,
|
||||
intersection_docset: Intersection<PostingsWithOffset>,
|
||||
}
|
||||
|
||||
impl PhraseScorer {
|
||||
@@ -56,7 +56,7 @@ impl PhraseScorer {
|
||||
.map(|(offset, postings)| PostingsWithOffset::new(postings, offset as u32))
|
||||
.collect();
|
||||
PhraseScorer {
|
||||
intersection_docset: IntersectionDocSet::from(postings_with_offsets),
|
||||
intersection_docset: Intersection::from(postings_with_offsets),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user