mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Using the same logic when score is enabled.
This commit is contained in:
@@ -42,8 +42,8 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
|
||||
segment_local_id: SegmentLocalId,
|
||||
segment: &SegmentReader,
|
||||
) -> Result<()> {
|
||||
try!(self.left.set_segment(segment_local_id, segment));
|
||||
try!(self.right.set_segment(segment_local_id, segment));
|
||||
self.left.set_segment(segment_local_id, segment)?;
|
||||
self.right.set_segment(segment_local_id, segment)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ impl<'a> Collector for MultiCollector<'a> {
|
||||
segment: &SegmentReader,
|
||||
) -> Result<()> {
|
||||
for collector in &mut self.collectors {
|
||||
try!(collector.set_segment(segment_local_id, segment));
|
||||
collector.set_segment(segment_local_id, segment)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#![allow(new_without_default)]
|
||||
#![warn(missing_docs)]
|
||||
|
||||
|
||||
//! # `tantivy`
|
||||
//!
|
||||
//! Tantivy is a search engine library.
|
||||
|
||||
@@ -4,7 +4,6 @@ use query::Scorer;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
pub struct Intersection<TDocSet: DocSet> {
|
||||
docsets: Vec<TDocSet>,
|
||||
@@ -77,7 +76,6 @@ impl<TDocSet: DocSet> DocSet for Intersection<TDocSet> {
|
||||
// We optimize skipping by skipping every single member
|
||||
// of the intersection to target.
|
||||
|
||||
|
||||
// TODO fix BUG...
|
||||
// what if we overstep on the second member of the intersection?
|
||||
// The first member is not necessarily correct.
|
||||
@@ -112,8 +110,6 @@ impl<TDocSet: DocSet> DocSet for Intersection<TDocSet> {
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
@@ -130,16 +126,14 @@ impl<TDocSet: DocSet> DocSet for Intersection<TDocSet> {
|
||||
}
|
||||
|
||||
impl<TScorer> Scorer for Intersection<TScorer>
|
||||
where TScorer: Scorer {
|
||||
where
|
||||
TScorer: Scorer,
|
||||
{
|
||||
fn score(&mut self) -> Score {
|
||||
self.docsets
|
||||
.iter_mut()
|
||||
.map(Scorer::score)
|
||||
.sum()
|
||||
self.docsets.iter_mut().map(Scorer::score).sum()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use postings::SkipResult;
|
||||
@@ -178,7 +172,6 @@ mod tests {
|
||||
assert_eq!(intersection.doc(), 0);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_intersection_skip() {
|
||||
let left = VecPostings::from(vec![0, 1, 2, 4]);
|
||||
@@ -188,30 +181,38 @@ mod tests {
|
||||
assert_eq!(intersection.doc(), 2);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_intersection_skip_against_unoptimized() {
|
||||
test_skip_against_unoptimized(|| {
|
||||
let left = VecPostings::from(vec![4]);
|
||||
let right = VecPostings::from(vec![2, 5]);
|
||||
box Intersection::from(vec![left, right])
|
||||
}, vec![0,2,4,5,6]);
|
||||
test_skip_against_unoptimized(|| {
|
||||
let mut left = VecPostings::from(vec![1, 4, 5, 6]);
|
||||
let mut right = VecPostings::from(vec![2, 5, 10]);
|
||||
left.advance();
|
||||
right.advance();
|
||||
box Intersection::from(vec![left, right])
|
||||
}, vec![0,1,2,3,4,5,6,7,10,11]);
|
||||
test_skip_against_unoptimized(|| {
|
||||
box Intersection::from(vec![
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
VecPostings::from(vec![1, 2, 5, 6]),
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
VecPostings::from(vec![1, 5, 6]),
|
||||
VecPostings::from(vec![2, 4, 5, 7, 8])
|
||||
])
|
||||
}, vec![0,1,2,3,4,5,6,7,10,11]);
|
||||
test_skip_against_unoptimized(
|
||||
|| {
|
||||
let left = VecPostings::from(vec![4]);
|
||||
let right = VecPostings::from(vec![2, 5]);
|
||||
box Intersection::from(vec![left, right])
|
||||
},
|
||||
vec![0, 2, 4, 5, 6],
|
||||
);
|
||||
test_skip_against_unoptimized(
|
||||
|| {
|
||||
let mut left = VecPostings::from(vec![1, 4, 5, 6]);
|
||||
let mut right = VecPostings::from(vec![2, 5, 10]);
|
||||
left.advance();
|
||||
right.advance();
|
||||
box Intersection::from(vec![left, right])
|
||||
},
|
||||
vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11],
|
||||
);
|
||||
test_skip_against_unoptimized(
|
||||
|| {
|
||||
box Intersection::from(vec![
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
VecPostings::from(vec![1, 2, 5, 6]),
|
||||
VecPostings::from(vec![1, 4, 5, 6]),
|
||||
VecPostings::from(vec![1, 5, 6]),
|
||||
VecPostings::from(vec![2, 4, 5, 7, 8]),
|
||||
])
|
||||
},
|
||||
vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -633,7 +633,6 @@ pub mod tests {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/// Wraps a given docset, and forward alls call but the
|
||||
/// `.skip_next(...)`. This is useful to test that a specialized
|
||||
/// implementation of `.skip_next(...)` is consistent
|
||||
@@ -660,25 +659,38 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_skip_against_unoptimized<F: Fn()->Box<DocSet>>(postings_factory: F, targets: Vec<u32>) {
|
||||
pub fn test_skip_against_unoptimized<F: Fn() -> Box<DocSet>>(
|
||||
postings_factory: F,
|
||||
targets: Vec<u32>,
|
||||
) {
|
||||
for target in targets {
|
||||
let mut postings_opt = postings_factory();
|
||||
let mut postings_unopt = UnoptimizedDocSet::wrap(postings_factory());
|
||||
let skip_result_opt = postings_opt.skip_next(target);
|
||||
let skip_result_unopt = postings_unopt.skip_next(target);
|
||||
assert_eq!(skip_result_unopt, skip_result_opt, "Failed while skipping to {}", target);
|
||||
assert_eq!(
|
||||
skip_result_unopt, skip_result_opt,
|
||||
"Failed while skipping to {}",
|
||||
target
|
||||
);
|
||||
match skip_result_opt {
|
||||
SkipResult::Reached => assert_eq!(postings_opt.doc(), target),
|
||||
SkipResult::OverStep => assert!(postings_opt.doc() > target),
|
||||
SkipResult::End => { return; },
|
||||
SkipResult::End => {
|
||||
return;
|
||||
}
|
||||
}
|
||||
while postings_opt.advance() {
|
||||
assert!(postings_unopt.advance());
|
||||
assert_eq!(postings_opt.doc(), postings_unopt.doc(), "Failed while skipping to {}", target);
|
||||
assert_eq!(
|
||||
postings_opt.doc(),
|
||||
postings_unopt.doc(),
|
||||
"Failed while skipping to {}",
|
||||
target
|
||||
);
|
||||
}
|
||||
assert!(!postings_unopt.advance());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use postings::DocSet;
|
||||
use query::Scorer;
|
||||
use postings::SkipResult;
|
||||
use common::TinySet;
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
use query::score_combiner::{DoNothingCombiner, ScoreCombiner};
|
||||
|
||||
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 32;
|
||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
pub struct Union<TDocSet: DocSet, TScoreCombiner=DoNothingCombiner>
|
||||
where TDocSet: DocSet, TScoreCombiner: ScoreCombiner {
|
||||
docsets: Vec<TDocSet>,
|
||||
pub struct Union<TScorer, TScoreCombiner=DoNothingCombiner>
|
||||
{
|
||||
docsets: Vec<TScorer>,
|
||||
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
|
||||
scores: Box<[TScoreCombiner; HORIZON as usize]>,
|
||||
cursor: usize,
|
||||
@@ -20,60 +20,65 @@ pub struct Union<TDocSet: DocSet, TScoreCombiner=DoNothingCombiner>
|
||||
doc: DocId,
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> From<Vec<TDocSet>> for Union<TDocSet, TScoreCombiner> {
|
||||
fn from(docsets: Vec<TDocSet>) -> Union<TDocSet> {
|
||||
let non_empty_docsets: Vec<TDocSet> =
|
||||
docsets
|
||||
.into_iter()
|
||||
.flat_map(|mut docset| {
|
||||
impl<TScorer, TScoreCombiner> From<Vec<TScorer>>
|
||||
for Union<TScorer, TScoreCombiner>
|
||||
where TScoreCombiner: ScoreCombiner, TScorer: Scorer
|
||||
{
|
||||
fn from(docsets: Vec<TScorer>) -> Union<TScorer, TScoreCombiner> {
|
||||
let non_empty_docsets: Vec<TScorer> = docsets
|
||||
.into_iter()
|
||||
.flat_map(
|
||||
|mut docset| {
|
||||
if docset.advance() {
|
||||
Some(docset)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
Union {
|
||||
docsets: non_empty_docsets,
|
||||
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
|
||||
scores: Box::new([TScoreCombiner::default(); HORIZON]),
|
||||
scores: Box::new([TScoreCombiner::default(); HORIZON as usize]),
|
||||
cursor: HORIZON_NUM_TINYBITSETS,
|
||||
offset: 0,
|
||||
doc: 0
|
||||
doc: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn refill<TDocSet: DocSet>(docsets: &mut Vec<TDocSet>, bitsets: &mut [TinySet; HORIZON_NUM_TINYBITSETS], min_doc: DocId) {
|
||||
docsets
|
||||
.drain_filter(|docset| {
|
||||
let horizon = min_doc + HORIZON as u32;
|
||||
loop {
|
||||
let doc = docset.doc();
|
||||
if doc >= horizon {
|
||||
return false;
|
||||
}
|
||||
// add this document
|
||||
let delta = doc - min_doc;
|
||||
bitsets[(delta / 64) as usize].insert_mut(delta % 64u32);
|
||||
if !docset.advance() {
|
||||
// remove the docset, it has been entirely consumed.
|
||||
return true;
|
||||
}
|
||||
fn refill<TScorer: Scorer, TScoreCombiner: ScoreCombiner>(
|
||||
scorers: &mut Vec<TScorer>,
|
||||
bitsets: &mut [TinySet; HORIZON_NUM_TINYBITSETS],
|
||||
score_combiner: &mut [TScoreCombiner; HORIZON as usize],
|
||||
min_doc: DocId,
|
||||
) {
|
||||
scorers.drain_filter(|scorer| {
|
||||
let horizon = min_doc + HORIZON as u32;
|
||||
loop {
|
||||
let doc = scorer.doc();
|
||||
if doc >= horizon {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
// add this document
|
||||
let delta = doc - min_doc;
|
||||
bitsets[(delta / 64) as usize].insert_mut(delta % 64u32);
|
||||
score_combiner[delta as usize].update(scorer);
|
||||
if !scorer.advance() {
|
||||
// remove the docset, it has been entirely consumed.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> Union<TDocSet, TScoreCombiner> {
|
||||
impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Union<TScorer, TScoreCombiner> {
|
||||
fn refill(&mut self) -> bool {
|
||||
if let Some(min_doc) = self.docsets
|
||||
.iter_mut()
|
||||
.map(|docset| docset.doc())
|
||||
.min() {
|
||||
if let Some(min_doc) = self.docsets.iter_mut().map(|docset| docset.doc()).min() {
|
||||
self.offset = min_doc;
|
||||
self.cursor = 0;
|
||||
refill(&mut self.docsets, &mut *self.bitsets, min_doc);
|
||||
refill(&mut self.docsets, &mut *self.bitsets, &mut *self.scores, min_doc);
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
@@ -94,8 +99,7 @@ impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> Union<TDocSet, TScoreCombin
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, TScoreCombiner> {
|
||||
|
||||
impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet for Union<TScorer, TScoreCombiner> {
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.advance_buffered() {
|
||||
return true;
|
||||
@@ -150,18 +154,12 @@ impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, T
|
||||
// The target is outside of the buffered horizon.
|
||||
// advance all docsets to a doc >= to the target.
|
||||
self.docsets
|
||||
.drain_filter(|docset| {
|
||||
match docset.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
match docset.skip_next(target) {
|
||||
SkipResult::End => true,
|
||||
SkipResult::Reached | SkipResult::OverStep => false
|
||||
}
|
||||
}
|
||||
Ordering::Equal | Ordering::Greater => {
|
||||
false
|
||||
}
|
||||
}
|
||||
.drain_filter(|docset| match docset.doc().cmp(&target) {
|
||||
Ordering::Less => match docset.skip_next(target) {
|
||||
SkipResult::End => true,
|
||||
SkipResult::Reached | SkipResult::OverStep => false,
|
||||
},
|
||||
Ordering::Equal | Ordering::Greater => false,
|
||||
});
|
||||
|
||||
// at this point all of the docsets
|
||||
@@ -177,7 +175,6 @@ impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, T
|
||||
SkipResult::End
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
@@ -189,13 +186,11 @@ impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, T
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::Union;
|
||||
use postings::{VecPostings, DocSet};
|
||||
use postings::{DocSet, VecPostings};
|
||||
use tests;
|
||||
use test::Bencher;
|
||||
use DocId;
|
||||
@@ -203,7 +198,8 @@ mod tests {
|
||||
use super::HORIZON;
|
||||
use postings::SkipResult;
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
|
||||
use query::ConstScorer;
|
||||
use query::score_combiner::DoNothingCombiner;
|
||||
|
||||
fn aux_test_union(vals: Vec<Vec<u32>>) {
|
||||
use std::collections::BTreeSet;
|
||||
@@ -213,15 +209,14 @@ mod tests {
|
||||
val_set.insert(v);
|
||||
}
|
||||
}
|
||||
let union_vals: Vec<u32> = val_set
|
||||
.into_iter()
|
||||
.collect();
|
||||
let union_vals: Vec<u32> = val_set.into_iter().collect();
|
||||
let mut union_expected = VecPostings::from(union_vals);
|
||||
|
||||
let mut union = Union::from(
|
||||
let mut union: Union<_, DoNothingCombiner> = Union::from(
|
||||
vals.into_iter()
|
||||
.map(VecPostings::from)
|
||||
.collect::<Vec<VecPostings>>()
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<ConstScorer<VecPostings>>>(),
|
||||
);
|
||||
while union.advance() {
|
||||
assert!(union_expected.advance());
|
||||
@@ -232,30 +227,25 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_union() {
|
||||
aux_test_union(
|
||||
vec![
|
||||
vec![1, 3333, 100000000u32],
|
||||
vec![1,2, 100000000u32],
|
||||
vec![1,2, 100000000u32],
|
||||
vec![]
|
||||
]
|
||||
);
|
||||
aux_test_union(
|
||||
vec![
|
||||
vec![1, 3333, 100000000u32],
|
||||
vec![1,2, 100000000u32],
|
||||
vec![1,2, 100000000u32],
|
||||
vec![]
|
||||
]
|
||||
);
|
||||
aux_test_union(vec![
|
||||
vec![1, 3333, 100000000u32],
|
||||
vec![1, 2, 100000000u32],
|
||||
vec![1, 2, 100000000u32],
|
||||
vec![],
|
||||
]);
|
||||
aux_test_union(vec![
|
||||
vec![1, 3333, 100000000u32],
|
||||
vec![1, 2, 100000000u32],
|
||||
vec![1, 2, 100000000u32],
|
||||
vec![],
|
||||
]);
|
||||
aux_test_union(vec![
|
||||
tests::sample_with_seed(100_000, 0.01, 1),
|
||||
tests::sample_with_seed(100_000, 0.05, 2),
|
||||
tests::sample_with_seed(100_000, 0.001, 3)
|
||||
tests::sample_with_seed(100_000, 0.001, 3),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
fn test_aux_union_skip(docs_list: &[Vec<DocId>], skip_targets: Vec<DocId>) {
|
||||
let mut btree_set = BTreeSet::new();
|
||||
for docs in docs_list {
|
||||
@@ -264,12 +254,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
let docset_factory = || {
|
||||
let res: Box<DocSet> = box Union::from(
|
||||
let res: Box<DocSet> = box Union::<_, DoNothingCombiner>::from(
|
||||
docs_list
|
||||
.iter()
|
||||
.map(|docs| docs.clone())
|
||||
.map(VecPostings::from)
|
||||
.collect::<Vec<VecPostings>>()
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
res
|
||||
};
|
||||
@@ -282,29 +273,24 @@ mod tests {
|
||||
test_skip_against_unoptimized(docset_factory, skip_targets);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_corner_case() {
|
||||
test_aux_union_skip(
|
||||
&[vec![165132, 167382], vec![25029, 25091]],
|
||||
vec![25029],
|
||||
);
|
||||
test_aux_union_skip(&[vec![165132, 167382], vec![25029, 25091]], vec![25029]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_corner_case2() {
|
||||
test_aux_union_skip(
|
||||
&[
|
||||
vec![1u32, 1u32 + HORIZON],
|
||||
vec![2u32, 1000u32, 10_000u32]
|
||||
], vec![0u32, 1u32, 2u32, 3u32, 1u32 + HORIZON, 2u32 + HORIZON]);
|
||||
&[vec![1u32, 1u32 + HORIZON], vec![2u32, 1000u32, 10_000u32]],
|
||||
vec![0u32, 1u32, 2u32, 3u32, 1u32 + HORIZON, 2u32 + HORIZON],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_corner_case3() {
|
||||
let mut docset = Union::from(vec![
|
||||
VecPostings::from(vec![0u32, 5u32]),
|
||||
VecPostings::from(vec![1u32, 4u32]),
|
||||
let mut docset = Union::<_, DoNothingCombiner>::from(vec![
|
||||
ConstScorer::new(VecPostings::from(vec![0u32, 5u32])),
|
||||
ConstScorer::new(VecPostings::from(vec![1u32, 4u32]))
|
||||
]);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 0u32);
|
||||
@@ -314,53 +300,70 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_random() {
|
||||
test_aux_union_skip(&[
|
||||
vec![1,2,3,7],
|
||||
vec![1,3,9,10000],
|
||||
vec![1,3,8,9,100]
|
||||
], vec![1,2,3,5,6,7,8,100]);
|
||||
test_aux_union_skip(&[
|
||||
tests::sample_with_seed(100_000, 0.001, 1),
|
||||
tests::sample_with_seed(100_000, 0.002, 2),
|
||||
tests::sample_with_seed(100_000, 0.005, 3)
|
||||
], tests::sample_with_seed(100_000, 0.01, 4));
|
||||
test_aux_union_skip(
|
||||
&[
|
||||
vec![1, 2, 3, 7],
|
||||
vec![1, 3, 9, 10000],
|
||||
vec![1, 3, 8, 9, 100],
|
||||
],
|
||||
vec![1, 2, 3, 5, 6, 7, 8, 100],
|
||||
);
|
||||
test_aux_union_skip(
|
||||
&[
|
||||
tests::sample_with_seed(100_000, 0.001, 1),
|
||||
tests::sample_with_seed(100_000, 0.002, 2),
|
||||
tests::sample_with_seed(100_000, 0.005, 3),
|
||||
],
|
||||
tests::sample_with_seed(100_000, 0.01, 4),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_specific() {
|
||||
test_aux_union_skip(&[
|
||||
vec![1,2,3,7],
|
||||
vec![1,3,9,10000],
|
||||
vec![1,3,8,9,100]
|
||||
], vec![1,2,3,7,8,9,99,100,101,500,20000]);
|
||||
test_aux_union_skip(
|
||||
&[
|
||||
vec![1, 2, 3, 7],
|
||||
vec![1, 3, 9, 10000],
|
||||
vec![1, 3, 8, 9, 100],
|
||||
],
|
||||
vec![1, 2, 3, 7, 8, 9, 99, 100, 101, 500, 20000],
|
||||
);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_union_3_high(bench: &mut Bencher) {
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
tests::sample_with_seed(100_000, 0.1, 0),
|
||||
tests::sample_with_seed(100_000, 0.2, 1),
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = Union::from(union_docset.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.collect::<Vec<VecPostings>>());
|
||||
while v.advance() {};
|
||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||
union_docset
|
||||
.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
while v.advance() {}
|
||||
});
|
||||
}
|
||||
#[bench]
|
||||
fn bench_union_3_low(bench: &mut Bencher) {
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
tests::sample_with_seed(100_000, 0.01, 0),
|
||||
tests::sample_with_seed(100_000, 0.05, 1),
|
||||
tests::sample_with_seed(100_000, 0.001, 2)
|
||||
tests::sample_with_seed(100_000, 0.001, 2),
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = Union::from(union_docset.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.collect::<Vec<VecPostings>>());
|
||||
while v.advance() {};
|
||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||
union_docset
|
||||
.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
while v.advance() {}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +56,6 @@ impl Postings for VecPostings {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
|
||||
|
||||
@@ -22,14 +22,14 @@ use query::Occur;
|
||||
#[derive(Debug)]
|
||||
pub struct BooleanQuery {
|
||||
subqueries: Vec<(Occur, Box<Query>)>,
|
||||
scoring_disabled: bool
|
||||
scoring_disabled: bool,
|
||||
}
|
||||
|
||||
impl From<Vec<(Occur, Box<Query>)>> for BooleanQuery {
|
||||
fn from(subqueries: Vec<(Occur, Box<Query>)>) -> BooleanQuery {
|
||||
BooleanQuery {
|
||||
subqueries,
|
||||
scoring_disabled: false
|
||||
scoring_disabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,9 +49,7 @@ impl Query for BooleanQuery {
|
||||
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
|
||||
let sub_weights = self.subqueries
|
||||
.iter()
|
||||
.map(|&(ref occur, ref subquery)| {
|
||||
Ok((*occur, subquery.weight(searcher)?))
|
||||
})
|
||||
.map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(searcher)?)))
|
||||
.collect::<Result<_>>()?;
|
||||
Ok(box BooleanWeight::new(sub_weights, self.scoring_disabled))
|
||||
}
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
use query::Scorer;
|
||||
use DocId;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::cmp::Ordering;
|
||||
use postings::DocSet;
|
||||
use query::OccurFilter;
|
||||
use query::score_combiner::{ScoreCombiner, SumWithCoordsCombiner};
|
||||
|
||||
/// Each `HeapItem` represents the head of
|
||||
/// one of scorer being merged.
|
||||
///
|
||||
/// * `doc` - is the current doc id for the given segment postings
|
||||
/// * `ord` - is the ordinal used to identify to which segment postings
|
||||
/// this heap item belong to.
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct HeapItem {
|
||||
doc: DocId,
|
||||
ord: u32,
|
||||
}
|
||||
|
||||
/// `HeapItem` are ordered by the document
|
||||
impl PartialOrd for HeapItem {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for HeapItem {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
(other.doc).cmp(&self.doc)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BooleanScorer<TScorer: Scorer> {
|
||||
scorers: Vec<TScorer>,
|
||||
queue: BinaryHeap<HeapItem>,
|
||||
doc: DocId,
|
||||
score_combiner: ScoreCombiner,
|
||||
occur_filter: OccurFilter,
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> BooleanScorer<TScorer> {
|
||||
pub fn new(scorers: Vec<TScorer>, occur_filter: OccurFilter) -> BooleanScorer<TScorer> {
|
||||
let score_combiner = ScoreCombiner::default_for_num_scorers(scorers.len());
|
||||
let mut non_empty_scorers: Vec<TScorer> = Vec::new();
|
||||
for mut posting in scorers {
|
||||
let non_empty = posting.advance();
|
||||
if non_empty {
|
||||
non_empty_scorers.push(posting);
|
||||
}
|
||||
}
|
||||
let heap_items: Vec<HeapItem> = non_empty_scorers
|
||||
.iter()
|
||||
.map(|posting| posting.doc())
|
||||
.enumerate()
|
||||
.map(|(ord, doc)| HeapItem {
|
||||
doc,
|
||||
ord: ord as u32,
|
||||
})
|
||||
.collect();
|
||||
BooleanScorer {
|
||||
scorers: non_empty_scorers,
|
||||
queue: BinaryHeap::from(heap_items),
|
||||
doc: 0u32,
|
||||
score_combiner,
|
||||
occur_filter,
|
||||
}
|
||||
}
|
||||
|
||||
/// Advances the head of our heap (the segment posting with the lowest doc)
|
||||
/// It will also update the new current `DocId` as well as the term frequency
|
||||
/// associated with the segment postings.
|
||||
///
|
||||
/// After advancing the `SegmentPosting`, the postings is removed from the heap
|
||||
/// if it has been entirely consumed, or pushed back into the heap.
|
||||
///
|
||||
/// # Panics
|
||||
/// This method will panic if the head `SegmentPostings` is not empty.
|
||||
fn advance_head(&mut self) {
|
||||
{
|
||||
let mut mutable_head = self.queue.peek_mut().unwrap();
|
||||
let cur_scorers = &mut self.scorers[mutable_head.ord as usize];
|
||||
if cur_scorers.advance() {
|
||||
mutable_head.doc = cur_scorers.doc();
|
||||
return;
|
||||
}
|
||||
}
|
||||
self.queue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
self.score_combiner.clear();
|
||||
let mut ord_bitset = 0u64;
|
||||
match self.queue.peek() {
|
||||
Some(heap_item) => {
|
||||
let ord = heap_item.ord as usize;
|
||||
self.doc = heap_item.doc;
|
||||
let score = self.scorers[ord].score();
|
||||
self.score_combiner.update(score);
|
||||
ord_bitset |= 1 << ord;
|
||||
}
|
||||
None => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
self.advance_head();
|
||||
while let Some(&HeapItem { doc, ord }) = self.queue.peek() {
|
||||
if doc == self.doc {
|
||||
let ord = ord as usize;
|
||||
let score = self.scorers[ord].score();
|
||||
self.score_combiner.update(score);
|
||||
ord_bitset |= 1 << ord;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
self.advance_head();
|
||||
}
|
||||
if self.occur_filter.accept(ord_bitset) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> u32 {
|
||||
// TODO fix this. it should be the min
|
||||
// of the MUST scorer
|
||||
// and the max of the SHOULD scorers.
|
||||
self.scorers
|
||||
.iter()
|
||||
.map(|scorer| scorer.size_hint())
|
||||
.max()
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> Scorer for BooleanScorer<TScorer> {
|
||||
fn score(&mut self) -> f32 {
|
||||
self.score_combiner.score()
|
||||
}
|
||||
}
|
||||
@@ -5,41 +5,39 @@ use std::collections::HashMap;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use query::Exclude;
|
||||
use super::BooleanScorer;
|
||||
use query::OccurFilter;
|
||||
use query::ConstScorer;
|
||||
use query::Occur;
|
||||
use query::RequiredOptionalScorer;
|
||||
use query::score_combiner::{SumWithCoordsCombiner, DoNothingCombiner, ScoreCombiner};
|
||||
use Result;
|
||||
|
||||
|
||||
fn scorer_union<'a>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a> {
|
||||
fn scorer_union<'a, TScoreCombiner: ScoreCombiner + 'static>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a> {
|
||||
assert!(!docsets.is_empty());
|
||||
if docsets.len() == 1 {
|
||||
docsets
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap() //< we checked the size beforehands
|
||||
docsets.into_iter().next().unwrap() //< we checked the size beforehands
|
||||
} else {
|
||||
// TODO have a UnionScorer instead.
|
||||
box ConstScorer::new(Union::from(docsets))
|
||||
box ConstScorer::new(Union::<_, TScoreCombiner>::from(docsets))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BooleanWeight {
|
||||
weights: Vec<(Occur, Box<Weight>)>,
|
||||
scoring_disabled: bool
|
||||
scoring_disabled: bool,
|
||||
}
|
||||
|
||||
impl BooleanWeight {
|
||||
pub fn new(weights: Vec<(Occur, Box<Weight>)>, scoring_disabled: bool) -> BooleanWeight {
|
||||
BooleanWeight {
|
||||
weights,
|
||||
scoring_disabled
|
||||
scoring_disabled,
|
||||
}
|
||||
}
|
||||
|
||||
fn scorer_if_scoring_disabled<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
fn complex_scorer<'a, TScoreCombiner: ScoreCombiner + 'static>(
|
||||
&'a self,
|
||||
reader: &'a SegmentReader,
|
||||
) -> Result<Box<Scorer + 'a>> {
|
||||
let mut per_occur_scorers: HashMap<Occur, Vec<Box<Scorer + 'a>>> = HashMap::new();
|
||||
for &(ref occur, ref subweight) in self.weights.iter() {
|
||||
let sub_scorer: Box<Scorer + 'a> = subweight.scorer(reader)?;
|
||||
@@ -49,28 +47,32 @@ impl BooleanWeight {
|
||||
.push(sub_scorer);
|
||||
}
|
||||
|
||||
let should_scorer_opt: Option<Box<Scorer + 'a>> = per_occur_scorers
|
||||
.remove(&Occur::Should)
|
||||
.map(scorer_union);
|
||||
let should_scorer_opt: Option<Box<Scorer + 'a>> =
|
||||
per_occur_scorers.remove(&Occur::Should).map(scorer_union::<TScoreCombiner>);
|
||||
|
||||
let exclude_scorer_opt: Option<Box<Scorer + 'a>> = per_occur_scorers
|
||||
.remove(&Occur::MustNot)
|
||||
.map(scorer_union);
|
||||
let exclude_scorer_opt: Option<Box<Scorer + 'a>> =
|
||||
per_occur_scorers.remove(&Occur::MustNot).map(scorer_union::<TScoreCombiner>);
|
||||
|
||||
let must_scorer_opt: Option<Box<Scorer + 'a>> = per_occur_scorers
|
||||
.remove(&Occur::Must)
|
||||
.map(|scorers| {
|
||||
let scorer: Box<Scorer> = box ConstScorer::new(Intersection::from(scorers));
|
||||
scorer
|
||||
let must_scorer_opt: Option<Box<Scorer + 'a>> =
|
||||
per_occur_scorers.remove(&Occur::Must).map(|scorers| {
|
||||
if scorers.len() == 1 {
|
||||
scorers.into_iter().next().unwrap()
|
||||
} else {
|
||||
let scorer: Box<Scorer> = box Intersection::from(scorers);
|
||||
scorer
|
||||
}
|
||||
});
|
||||
|
||||
let positive_scorer: Box<Scorer> = match (should_scorer_opt, must_scorer_opt) {
|
||||
(Some(should_scorer), Some(must_scorer)) =>
|
||||
box RequiredOptionalScorer::new(must_scorer, should_scorer),
|
||||
(None, Some(must_scorer)) =>
|
||||
must_scorer,
|
||||
(Some(should_scorer), None) =>
|
||||
should_scorer,
|
||||
(Some(should_scorer), Some(must_scorer)) => {
|
||||
if self.scoring_disabled {
|
||||
must_scorer
|
||||
} else {
|
||||
box RequiredOptionalScorer::<_,_,TScoreCombiner>::new(must_scorer, should_scorer)
|
||||
}
|
||||
}
|
||||
(None, Some(must_scorer)) => must_scorer,
|
||||
(Some(should_scorer), None) => should_scorer,
|
||||
(None, None) => {
|
||||
return Ok(box EmptyScorer);
|
||||
}
|
||||
@@ -83,38 +85,37 @@ impl BooleanWeight {
|
||||
}
|
||||
}
|
||||
|
||||
fn scorer_if_scoring_enabled<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
let sub_scorers: Vec<Box<Scorer + 'a>> = self.weights
|
||||
.iter()
|
||||
.map(|&(_, ref weight)| weight)
|
||||
.map(|weight| weight.scorer(reader))
|
||||
.collect::<Result<_>>()?;
|
||||
let occurs: Vec<Occur> = self.weights
|
||||
.iter()
|
||||
.map(|&(ref occur, _)| *occur)
|
||||
.collect();
|
||||
let occur_filter = OccurFilter::new(&occurs);
|
||||
let boolean_scorer = BooleanScorer::new(sub_scorers, occur_filter);
|
||||
Ok(box boolean_scorer)
|
||||
}
|
||||
// fn scorer_if_scoring_enabled<'a>(
|
||||
// &'a self,
|
||||
// reader: &'a SegmentReader,
|
||||
// ) -> Result<Box<Scorer + 'a>> {
|
||||
// let sub_scorers: Vec<Box<Scorer + 'a>> = self.weights
|
||||
// .iter()
|
||||
// .map(|&(_, ref weight)| weight)
|
||||
// .map(|weight| weight.scorer(reader))
|
||||
// .collect::<Result<_>>()?;
|
||||
// let occurs: Vec<Occur> = self.weights.iter().map(|&(ref occur, _)| *occur).collect();
|
||||
// let occur_filter = OccurFilter::new(&occurs);
|
||||
// let boolean_scorer = BooleanScorer::new(sub_scorers, occur_filter);
|
||||
// Ok(box boolean_scorer)
|
||||
// }
|
||||
}
|
||||
|
||||
|
||||
impl Weight for BooleanWeight {
|
||||
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
if self.weights.is_empty() {
|
||||
Ok(box EmptyScorer)
|
||||
} else if self.weights.len() == 1 {
|
||||
let &(occur, ref weight) = &self.weights[0];
|
||||
if occur == Occur::MustNot {
|
||||
if occur == Occur::MustNot {
|
||||
Ok(box EmptyScorer)
|
||||
} else {
|
||||
weight.scorer(reader)
|
||||
}
|
||||
} else if self.scoring_disabled {
|
||||
self.scorer_if_scoring_disabled(reader)
|
||||
self.complex_scorer::<DoNothingCombiner>(reader)
|
||||
} else {
|
||||
self.scorer_if_scoring_enabled(reader)
|
||||
self.complex_scorer::<SumWithCoordsCombiner>(reader)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
mod boolean_query;
|
||||
mod boolean_scorer;
|
||||
//mod boolean_scorer;
|
||||
mod boolean_weight;
|
||||
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::boolean_scorer::BooleanScorer;
|
||||
//pub use self::boolean_scorer::BooleanScorer;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use postings::{DocSet, VecPostings};
|
||||
use query::Scorer;
|
||||
use query::OccurFilter;
|
||||
use query::term_query::TermScorer;
|
||||
use query::Occur;
|
||||
use query::Query;
|
||||
use query::TermQuery;
|
||||
@@ -111,40 +108,40 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_boolean_scorer() {
|
||||
let occurs = vec![Occur::Should, Occur::Should];
|
||||
let occur_filter = OccurFilter::new(&occurs);
|
||||
|
||||
let left_fieldnorms =
|
||||
U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 3).collect::<Vec<u64>>());
|
||||
|
||||
let left = VecPostings::from(vec![1, 2, 3]);
|
||||
let left_scorer = TermScorer {
|
||||
idf: 1f32,
|
||||
fieldnorm_reader_opt: Some(left_fieldnorms),
|
||||
postings: left,
|
||||
};
|
||||
|
||||
let right_fieldnorms =
|
||||
U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 5).collect::<Vec<u64>>());
|
||||
let right = VecPostings::from(vec![1, 3, 8]);
|
||||
|
||||
let right_scorer = TermScorer {
|
||||
idf: 4f32,
|
||||
fieldnorm_reader_opt: Some(right_fieldnorms),
|
||||
postings: right,
|
||||
};
|
||||
|
||||
let mut boolean_scorer = BooleanScorer::new(vec![left_scorer, right_scorer], occur_filter);
|
||||
assert_eq!(boolean_scorer.next(), Some(1u32));
|
||||
assert!(abs_diff(boolean_scorer.score(), 2.3662047) < 0.001);
|
||||
assert_eq!(boolean_scorer.next(), Some(2u32));
|
||||
assert!(abs_diff(boolean_scorer.score(), 0.20412415) < 0.001f32);
|
||||
assert_eq!(boolean_scorer.next(), Some(3u32));
|
||||
assert_eq!(boolean_scorer.next(), Some(8u32));
|
||||
assert!(abs_diff(boolean_scorer.score(), 0.31622776) < 0.001f32);
|
||||
assert!(!boolean_scorer.advance());
|
||||
}
|
||||
// #[test]
|
||||
// pub fn test_boolean_scorer() {
|
||||
// let occurs = vec![Occur::Should, Occur::Should];
|
||||
// let occur_filter = OccurFilter::new(&occurs);
|
||||
//
|
||||
// let left_fieldnorms =
|
||||
// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 3).collect::<Vec<u64>>());
|
||||
//
|
||||
// let left = VecPostings::from(vec![1, 2, 3]);
|
||||
// let left_scorer = TermScorer {
|
||||
// idf: 1f32,
|
||||
// fieldnorm_reader_opt: Some(left_fieldnorms),
|
||||
// postings: left,
|
||||
// };
|
||||
//
|
||||
// let right_fieldnorms =
|
||||
// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 5).collect::<Vec<u64>>());
|
||||
// let right = VecPostings::from(vec![1, 3, 8]);
|
||||
//
|
||||
// let right_scorer = TermScorer {
|
||||
// idf: 4f32,
|
||||
// fieldnorm_reader_opt: Some(right_fieldnorms),
|
||||
// postings: right,
|
||||
// };
|
||||
//
|
||||
// let mut boolean_scorer = BooleanScorer::new(vec![left_scorer, right_scorer], occur_filter);
|
||||
// assert_eq!(boolean_scorer.next(), Some(1u32));
|
||||
// assert!(abs_diff(boolean_scorer.score(), 2.3662047) < 0.001);
|
||||
// assert_eq!(boolean_scorer.next(), Some(2u32));
|
||||
// assert!(abs_diff(boolean_scorer.score(), 0.20412415) < 0.001f32);
|
||||
// assert_eq!(boolean_scorer.next(), Some(3u32));
|
||||
// assert_eq!(boolean_scorer.next(), Some(8u32));
|
||||
// assert!(abs_diff(boolean_scorer.score(), 0.31622776) < 0.001f32);
|
||||
// assert!(!boolean_scorer.advance());
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ use DocId;
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum State {
|
||||
ExcludeOne(DocId),
|
||||
Finished
|
||||
Finished,
|
||||
}
|
||||
|
||||
/// Filters a given `DocSet` by removing the docs from a given `DocSet`.
|
||||
@@ -19,18 +19,20 @@ pub struct Exclude<TDocSet, TDocSetExclude> {
|
||||
excluding_state: State,
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSetExclude: DocSet {
|
||||
|
||||
where
|
||||
TDocSetExclude: DocSet,
|
||||
{
|
||||
/// Creates a new `ExcludeScorer`
|
||||
pub fn new(underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude) -> Exclude<TDocSet, TDocSetExclude> {
|
||||
let state =
|
||||
if excluding_docset.advance() {
|
||||
State::ExcludeOne(excluding_docset.doc())
|
||||
} else {
|
||||
State::Finished
|
||||
};
|
||||
pub fn new(
|
||||
underlying_docset: TDocSet,
|
||||
mut excluding_docset: TDocSetExclude,
|
||||
) -> Exclude<TDocSet, TDocSetExclude> {
|
||||
let state = if excluding_docset.advance() {
|
||||
State::ExcludeOne(excluding_docset.doc())
|
||||
} else {
|
||||
State::Finished
|
||||
};
|
||||
Exclude {
|
||||
underlying_docset,
|
||||
excluding_docset,
|
||||
@@ -40,8 +42,10 @@ impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
}
|
||||
|
||||
impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSet: DocSet, TDocSetExclude: DocSet {
|
||||
|
||||
where
|
||||
TDocSet: DocSet,
|
||||
TDocSetExclude: DocSet,
|
||||
{
|
||||
/// Returns true iff the doc is not removed.
|
||||
///
|
||||
/// The method has to be called with non strictly
|
||||
@@ -64,22 +68,20 @@ impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
|
||||
self.excluding_state = State::Finished;
|
||||
true
|
||||
}
|
||||
SkipResult::Reached => {
|
||||
false
|
||||
}
|
||||
SkipResult::Reached => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
State::Finished => {
|
||||
true
|
||||
}
|
||||
State::Finished => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
|
||||
where TDocSet: DocSet, TDocSetExclude: DocSet {
|
||||
|
||||
where
|
||||
TDocSet: DocSet,
|
||||
TDocSetExclude: DocSet,
|
||||
{
|
||||
fn advance(&mut self) -> bool {
|
||||
while self.underlying_docset.advance() {
|
||||
if self.accept() {
|
||||
@@ -101,7 +103,6 @@ impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
|
||||
} else {
|
||||
SkipResult::End
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
@@ -116,9 +117,11 @@ impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<TScorer, TDocSetExclude> Scorer for Exclude<TScorer, TDocSetExclude>
|
||||
where TScorer: Scorer, TDocSetExclude: DocSet {
|
||||
where
|
||||
TScorer: Scorer,
|
||||
TDocSetExclude: DocSet,
|
||||
{
|
||||
fn score(&mut self) -> Score {
|
||||
self.underlying_docset.score()
|
||||
}
|
||||
@@ -135,24 +138,26 @@ mod tests {
|
||||
#[test]
|
||||
fn test_exclude() {
|
||||
let mut exclude_scorer = Exclude::new(
|
||||
VecPostings::from(vec![1,2,5,8,10,15,24]),
|
||||
VecPostings::from(vec![1,2,3,10,16,24])
|
||||
VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]),
|
||||
VecPostings::from(vec![1, 2, 3, 10, 16, 24]),
|
||||
);
|
||||
let mut els = vec![];
|
||||
while exclude_scorer.advance() {
|
||||
els.push(exclude_scorer.doc());
|
||||
}
|
||||
assert_eq!(els, vec![5,8,15]);
|
||||
assert_eq!(els, vec![5, 8, 15]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_skip() {
|
||||
test_skip_against_unoptimized(
|
||||
|| box Exclude::new(
|
||||
VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]),
|
||||
VecPostings::from(vec![1, 2, 3, 10, 16, 24])
|
||||
),
|
||||
vec![1, 2, 5, 8, 10, 15, 24]
|
||||
|| {
|
||||
box Exclude::new(
|
||||
VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]),
|
||||
VecPostings::from(vec![1, 2, 3, 10, 16, 24]),
|
||||
)
|
||||
},
|
||||
vec![1, 2, 5, 8, 10, 15, 24],
|
||||
);
|
||||
}
|
||||
|
||||
@@ -162,12 +167,14 @@ mod tests {
|
||||
let sample_exclude = sample_with_seed(10_000, 0.05, 2);
|
||||
let sample_skip = sample_with_seed(10_000, 0.005, 3);
|
||||
test_skip_against_unoptimized(
|
||||
|| box Exclude::new(
|
||||
VecPostings::from(sample_include.clone()),
|
||||
VecPostings::from(sample_exclude.clone())
|
||||
),
|
||||
sample_skip
|
||||
|| {
|
||||
box Exclude::new(
|
||||
VecPostings::from(sample_include.clone()),
|
||||
VecPostings::from(sample_exclude.clone()),
|
||||
)
|
||||
},
|
||||
sample_skip,
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ mod boolean_query;
|
||||
mod scorer;
|
||||
mod occur;
|
||||
mod weight;
|
||||
mod occur_filter;
|
||||
mod term_query;
|
||||
mod query_parser;
|
||||
mod phrase_query;
|
||||
@@ -22,7 +21,6 @@ pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::exclude::Exclude;
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::occur_filter::OccurFilter;
|
||||
pub use self::occur::Occur;
|
||||
pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::query_parser::QueryParserError;
|
||||
@@ -35,4 +33,3 @@ pub use self::weight::Weight;
|
||||
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
|
||||
pub use self::range_query::RangeQuery;
|
||||
pub use self::scorer::ConstScorer;
|
||||
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
use query::Occur;
|
||||
|
||||
/// An `OccurFilter` represents a filter over a bitset of
|
||||
/// at most 64 elements.
|
||||
///
|
||||
/// It wraps some simple bitmask to compute the filter
|
||||
/// rapidly.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct OccurFilter {
|
||||
and_mask: u64,
|
||||
result: u64,
|
||||
}
|
||||
|
||||
impl OccurFilter {
|
||||
/// Returns true if the bitset is matching the occur list.
|
||||
pub fn accept(&self, ord_set: u64) -> bool {
|
||||
(self.and_mask & ord_set) == self.result
|
||||
}
|
||||
|
||||
/// Builds an `OccurFilter` from a list of `Occur`.
|
||||
pub fn new(occurs: &[Occur]) -> OccurFilter {
|
||||
let mut and_mask = 0u64;
|
||||
let mut result = 0u64;
|
||||
for (i, occur) in occurs.iter().enumerate() {
|
||||
let shift = 1 << i;
|
||||
match *occur {
|
||||
Occur::Must => {
|
||||
and_mask |= shift;
|
||||
result |= shift;
|
||||
}
|
||||
Occur::MustNot => {
|
||||
and_mask |= shift;
|
||||
}
|
||||
Occur::Should => {}
|
||||
}
|
||||
}
|
||||
OccurFilter { and_mask, result }
|
||||
}
|
||||
}
|
||||
@@ -45,7 +45,6 @@ pub trait Query: fmt::Debug {
|
||||
/// into a specific type. This is mostly useful for unit tests.
|
||||
fn as_any(&self) -> &Any;
|
||||
|
||||
|
||||
/// Disable scoring.
|
||||
///
|
||||
/// For some query this may improve performance
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use DocId;
|
||||
use DocSet;
|
||||
use query::Scorer;
|
||||
use query::score_combiner::{ScoreCombiner, SumCombiner};
|
||||
use Score;
|
||||
use postings::SkipResult;
|
||||
use std::cmp::Ordering;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// Given a required scorer and an optional scorer
|
||||
/// matches all document from the required scorer
|
||||
@@ -12,30 +14,38 @@ use std::cmp::Ordering;
|
||||
/// This is useful for queries like `+somethingrequired somethingoptional`.
|
||||
///
|
||||
/// Note that `somethingoptional` has no impact on the `DocSet`.
|
||||
pub struct RequiredOptionalScorer<TReqScorer, TOptScorer> {
|
||||
pub struct RequiredOptionalScorer<TReqScorer, TOptScorer, TScoreCombiner> {
|
||||
req_scorer: TReqScorer,
|
||||
opt_scorer: TOptScorer,
|
||||
score_cache: Option<Score>,
|
||||
opt_finished: bool,
|
||||
_phantom: PhantomData<TScoreCombiner>
|
||||
}
|
||||
|
||||
impl<TReqScorer, TOptScorer> RequiredOptionalScorer<TReqScorer, TOptScorer>
|
||||
where TOptScorer: DocSet {
|
||||
|
||||
impl<TReqScorer, TOptScorer, TScoreCombiner> RequiredOptionalScorer<TReqScorer, TOptScorer, TScoreCombiner>
|
||||
where
|
||||
TOptScorer: DocSet,
|
||||
{
|
||||
/// Creates a new `RequiredOptionalScorer`.
|
||||
pub fn new(req_scorer: TReqScorer, mut opt_scorer: TOptScorer) -> RequiredOptionalScorer<TReqScorer, TOptScorer> {
|
||||
pub fn new(
|
||||
req_scorer: TReqScorer,
|
||||
mut opt_scorer: TOptScorer,
|
||||
) -> RequiredOptionalScorer<TReqScorer, TOptScorer, TScoreCombiner> {
|
||||
let opt_finished = !opt_scorer.advance();
|
||||
RequiredOptionalScorer {
|
||||
req_scorer,
|
||||
opt_scorer,
|
||||
score_cache: None,
|
||||
opt_finished
|
||||
opt_finished,
|
||||
_phantom: PhantomData
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<TReqScorer, TOptScorer> DocSet for RequiredOptionalScorer<TReqScorer, TOptScorer>
|
||||
where TReqScorer: DocSet, TOptScorer: DocSet
|
||||
impl<TReqScorer, TOptScorer, TScoreCombiner> DocSet for RequiredOptionalScorer<TReqScorer, TOptScorer, TScoreCombiner>
|
||||
where
|
||||
TReqScorer: DocSet,
|
||||
TOptScorer: DocSet,
|
||||
{
|
||||
fn advance(&mut self) -> bool {
|
||||
self.score_cache = None;
|
||||
@@ -55,42 +65,42 @@ impl<TReqScorer, TOptScorer> DocSet for RequiredOptionalScorer<TReqScorer, TOptS
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<TReqScorer, TOptScorer> Scorer for RequiredOptionalScorer<TReqScorer, TOptScorer>
|
||||
where TReqScorer: Scorer, TOptScorer: Scorer {
|
||||
|
||||
impl<TReqScorer, TOptScorer, TScoreCombiner> Scorer for RequiredOptionalScorer<TReqScorer, TOptScorer, TScoreCombiner>
|
||||
where
|
||||
TReqScorer: Scorer,
|
||||
TOptScorer: Scorer,
|
||||
TScoreCombiner: ScoreCombiner
|
||||
{
|
||||
fn score(&mut self) -> Score {
|
||||
if let Some(score) = self.score_cache {
|
||||
return score;
|
||||
}
|
||||
let doc = self.doc();
|
||||
let mut score = self.req_scorer.score();
|
||||
if self.opt_finished {
|
||||
return score;
|
||||
}
|
||||
match self.opt_scorer.doc().cmp(&doc) {
|
||||
Ordering::Greater => {}
|
||||
Ordering::Equal => {
|
||||
score += self.opt_scorer.score();
|
||||
}
|
||||
Ordering::Less => {
|
||||
match self.opt_scorer.skip_next(doc) {
|
||||
let mut score_combiner = TScoreCombiner::default();
|
||||
score_combiner.update(&mut self.req_scorer);
|
||||
if !self.opt_finished {
|
||||
match self.opt_scorer.doc().cmp(&doc) {
|
||||
Ordering::Greater => {}
|
||||
Ordering::Equal => {
|
||||
score_combiner.update(&mut self.opt_scorer);
|
||||
}
|
||||
Ordering::Less => match self.opt_scorer.skip_next(doc) {
|
||||
SkipResult::Reached => {
|
||||
score += self.opt_scorer.score();
|
||||
score_combiner.update(&mut self.opt_scorer);
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.opt_finished = true;
|
||||
}
|
||||
SkipResult::OverStep => {}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
let score = score_combiner.score();
|
||||
self.score_cache = Some(score);
|
||||
score
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tests::sample_with_seed;
|
||||
@@ -100,14 +110,14 @@ mod tests {
|
||||
use DocSet;
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
use query::Scorer;
|
||||
|
||||
use query::score_combiner::{DoNothingCombiner, SumCombiner};
|
||||
|
||||
#[test]
|
||||
fn test_reqopt_scorer_empty() {
|
||||
let req = vec![1, 3, 7];
|
||||
let mut reqoptscorer = RequiredOptionalScorer::new(
|
||||
let mut reqoptscorer: RequiredOptionalScorer<_, _, SumCombiner> = RequiredOptionalScorer::new(
|
||||
ConstScorer::new(VecPostings::from(req.clone())),
|
||||
ConstScorer::new(VecPostings::from(vec![]))
|
||||
ConstScorer::new(VecPostings::from(vec![])),
|
||||
);
|
||||
let mut docs = vec![];
|
||||
while reqoptscorer.advance() {
|
||||
@@ -118,9 +128,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_reqopt_scorer() {
|
||||
let mut reqoptscorer = RequiredOptionalScorer::new(
|
||||
ConstScorer::new(VecPostings::from(vec![1,3,7,8,9,10,13,15])),
|
||||
ConstScorer::new(VecPostings::from(vec![1,2,7,11,12,15]))
|
||||
let mut reqoptscorer: RequiredOptionalScorer<_,_,SumCombiner> = RequiredOptionalScorer::new(
|
||||
ConstScorer::new(VecPostings::from(vec![1, 3, 7, 8, 9, 10, 13, 15])),
|
||||
ConstScorer::new(VecPostings::from(vec![1, 2, 7, 11, 12, 15])),
|
||||
);
|
||||
{
|
||||
assert!(reqoptscorer.advance());
|
||||
@@ -170,12 +180,15 @@ mod tests {
|
||||
let req_docs = sample_with_seed(10_000, 0.02, 1);
|
||||
let opt_docs = sample_with_seed(10_000, 0.02, 2);
|
||||
let skip_docs = sample_with_seed(10_000, 0.001, 3);
|
||||
test_skip_against_unoptimized(||
|
||||
box RequiredOptionalScorer::new(
|
||||
ConstScorer::new(VecPostings::from(req_docs.clone())),
|
||||
ConstScorer::new(VecPostings::from(opt_docs.clone()))
|
||||
), skip_docs);
|
||||
test_skip_against_unoptimized(
|
||||
|| {
|
||||
box RequiredOptionalScorer::<_,_,DoNothingCombiner>::new(
|
||||
ConstScorer::new(VecPostings::from(req_docs.clone())),
|
||||
ConstScorer::new(VecPostings::from(opt_docs.clone())),
|
||||
)
|
||||
},
|
||||
skip_docs,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
use Score;
|
||||
use query::Scorer;
|
||||
|
||||
pub trait ScoreCombiner: Default + Copy {
|
||||
fn update(&mut self, score: Score);
|
||||
pub trait ScoreCombiner: Default + Clone + Copy {
|
||||
fn update<TScorer: Scorer>(&mut self, scorer: &mut TScorer);
|
||||
fn clear(&mut self);
|
||||
fn score(&self) -> Score;
|
||||
}
|
||||
|
||||
|
||||
#[derive(Default, Clone, Copy)] //< these should not be too much work :)
|
||||
pub struct DoNothingCombiner;
|
||||
|
||||
impl ScoreCombiner for DoNothingCombiner {
|
||||
fn update(&mut self, score: Score) {}
|
||||
fn update<TScorer: Scorer>(&mut self, _scorer: &mut TScorer) {}
|
||||
|
||||
fn clear(&mut self) {}
|
||||
|
||||
@@ -18,15 +20,35 @@ impl ScoreCombiner for DoNothingCombiner {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy)]
|
||||
pub struct SumCombiner {
|
||||
score: Score
|
||||
}
|
||||
|
||||
|
||||
impl ScoreCombiner for SumCombiner {
|
||||
fn update<TScorer: Scorer>(&mut self, scorer: &mut TScorer) {
|
||||
self.score += scorer.score();
|
||||
}
|
||||
|
||||
fn clear(&mut self) {
|
||||
self.score = 0f32;
|
||||
}
|
||||
|
||||
fn score(&self) -> Score {
|
||||
self.score
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy)]
|
||||
pub struct SumWithCoordsCombiner {
|
||||
coords: Vec<Score>,
|
||||
num_fields: usize,
|
||||
score: Score,
|
||||
}
|
||||
|
||||
impl ScoreCombiner for SumWithCoordsCombiner {
|
||||
fn update(&mut self, score: Score) {
|
||||
self.score += score;
|
||||
fn update<TScorer: Scorer>(&mut self, scorer: &mut TScorer) {
|
||||
self.score += scorer.score();
|
||||
self.num_fields += 1;
|
||||
}
|
||||
|
||||
@@ -36,32 +58,7 @@ impl ScoreCombiner for SumWithCoordsCombiner {
|
||||
}
|
||||
|
||||
fn score(&self) -> Score {
|
||||
self.score * self.coord()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl SumWithCoordsCombiner {
|
||||
/// Compute the coord term
|
||||
fn coord(&self) -> f32 {
|
||||
self.coords[self.num_fields]
|
||||
}
|
||||
|
||||
|
||||
pub fn default_for_num_scorers(num_scorers: usize) -> Self {
|
||||
let query_coords: Vec<Score> = (0..num_scorers + 1)
|
||||
.map(|i| (i as Score) / (num_scorers as Score))
|
||||
.collect();
|
||||
ScoreCombiner::from(query_coords)
|
||||
self.score
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Score>> for ScoreCombiner {
|
||||
fn from(coords: Vec<Score>) -> SumWithCoordsCombiner {
|
||||
SumWithCoordsCombiner {
|
||||
coords,
|
||||
num_fields: 0,
|
||||
score: 0f32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ impl TermQuery {
|
||||
num_docs: searcher.num_docs(),
|
||||
doc_freq: searcher.doc_freq(&self.term),
|
||||
term: self.term.clone(),
|
||||
index_record_option: self.index_record_option
|
||||
index_record_option: self.index_record_option,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ pub struct TermWeight {
|
||||
pub(crate) num_docs: u32,
|
||||
pub(crate) doc_freq: u32,
|
||||
pub(crate) term: Term,
|
||||
pub(crate) index_record_option: IndexRecordOption
|
||||
pub(crate) index_record_option: IndexRecordOption,
|
||||
}
|
||||
|
||||
impl Weight for TermWeight {
|
||||
|
||||
Reference in New Issue
Block a user