mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 23:50:41 +00:00
615 lines
21 KiB
Rust
615 lines
21 KiB
Rust
use common::TinySet;
|
|
|
|
use super::size_hint::estimate_intersection;
|
|
use crate::docset::{DocSet, SeekDangerResult, BLOCK_NUM_TINYBITSETS, TERMINATED};
|
|
use crate::query::term_query::TermScorer;
|
|
use crate::query::{EmptyScorer, Scorer};
|
|
use crate::{DocId, Score};
|
|
|
|
/// Returns the intersection scorer.
|
|
///
|
|
/// The score associated with the documents is the sum of the
|
|
/// score of the `Scorer`s given in argument.
|
|
///
|
|
/// For better performance, the function uses a
|
|
/// specialized implementation if the two
|
|
/// shortest scorers are `TermScorer`s.
|
|
///
|
|
/// num_docs_segment is the number of documents in the segment. It is used for estimating the
|
|
/// `size_hint` of the intersection.
|
|
pub fn intersect_scorers(
|
|
mut scorers: Vec<Box<dyn Scorer>>,
|
|
segment_num_docs: u32,
|
|
) -> Box<dyn Scorer> {
|
|
if scorers.is_empty() {
|
|
return Box::new(EmptyScorer);
|
|
}
|
|
if scorers.len() == 1 {
|
|
return scorers.pop().unwrap();
|
|
}
|
|
// Order by estimated cost to drive each scorer.
|
|
scorers.sort_by_key(|scorer| scorer.cost());
|
|
let doc = go_to_first_doc(&mut scorers[..]);
|
|
if doc == TERMINATED {
|
|
return Box::new(EmptyScorer);
|
|
}
|
|
// We know that we have at least 2 elements.
|
|
let left = scorers.remove(0);
|
|
let right = scorers.remove(0);
|
|
let all_term_scorers = [&left, &right]
|
|
.iter()
|
|
.all(|&scorer| scorer.is::<TermScorer>());
|
|
if all_term_scorers {
|
|
return Box::new(Intersection {
|
|
left: *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
|
|
right: *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
|
|
others: scorers,
|
|
segment_num_docs,
|
|
});
|
|
}
|
|
Box::new(Intersection {
|
|
left,
|
|
right,
|
|
others: scorers,
|
|
segment_num_docs,
|
|
})
|
|
}
|
|
|
|
/// Creates a `DocSet` that iterate through the intersection of two or more `DocSet`s.
|
|
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<dyn Scorer>> {
|
|
left: TDocSet,
|
|
right: TDocSet,
|
|
others: Vec<TOtherDocSet>,
|
|
segment_num_docs: u32,
|
|
}
|
|
|
|
fn go_to_first_doc<TDocSet: DocSet>(docsets: &mut [TDocSet]) -> DocId {
|
|
assert!(!docsets.is_empty());
|
|
let mut candidate = docsets.iter().map(TDocSet::doc).max().unwrap();
|
|
'outer: loop {
|
|
for docset in docsets.iter_mut() {
|
|
let seek_doc = docset.seek(candidate);
|
|
if seek_doc > candidate {
|
|
candidate = docset.doc();
|
|
continue 'outer;
|
|
}
|
|
}
|
|
return candidate;
|
|
}
|
|
}
|
|
|
|
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
|
/// num_docs is the number of documents in the segment.
|
|
pub(crate) fn new(
|
|
mut docsets: Vec<TDocSet>,
|
|
segment_num_docs: u32,
|
|
) -> Intersection<TDocSet, TDocSet> {
|
|
let num_docsets = docsets.len();
|
|
assert!(num_docsets >= 2);
|
|
docsets.sort_by_key(|docset| docset.cost());
|
|
go_to_first_doc(&mut docsets);
|
|
let left = docsets.remove(0);
|
|
debug_assert!({
|
|
let doc = left.doc();
|
|
if doc == TERMINATED {
|
|
true
|
|
} else {
|
|
docsets.iter().all(|docset| docset.doc() == doc)
|
|
}
|
|
});
|
|
let right = docsets.remove(0);
|
|
Intersection {
|
|
left,
|
|
right,
|
|
others: docsets,
|
|
segment_num_docs,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
|
pub(crate) fn docset_mut_specialized(&mut self, ord: usize) -> &mut TDocSet {
|
|
match ord {
|
|
0 => &mut self.left,
|
|
1 => &mut self.right,
|
|
n => &mut self.others[n - 2],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOtherDocSet> {
|
|
#[inline]
|
|
fn advance(&mut self) -> DocId {
|
|
let (left, right) = (&mut self.left, &mut self.right);
|
|
|
|
// Invariant:
|
|
// - candidate is always <= to the next document in the intersection.
|
|
// - candidate strictly increases at every occurence of the loop.
|
|
let mut candidate = left.doc() + 1;
|
|
|
|
// Termination: candidate strictly increases.
|
|
'outer: while candidate < TERMINATED {
|
|
// As we enter the loop, we should always have candidate < next_doc.
|
|
|
|
candidate = left.seek(candidate);
|
|
|
|
// Left is positionned on `candidate`.
|
|
debug_assert_eq!(left.doc(), candidate);
|
|
|
|
if let SeekDangerResult::SeekLowerBound(seek_lower_bound) = right.seek_danger(candidate)
|
|
{
|
|
debug_assert!(
|
|
seek_lower_bound == TERMINATED || seek_lower_bound > candidate,
|
|
"seek_lower_bound {seek_lower_bound} must be greater than candidate \
|
|
{candidate}"
|
|
);
|
|
candidate = seek_lower_bound;
|
|
continue;
|
|
}
|
|
|
|
// Left and right are positionned on `candidate`.
|
|
debug_assert_eq!(right.doc(), candidate);
|
|
|
|
for other in &mut self.others {
|
|
if let SeekDangerResult::SeekLowerBound(seek_lower_bound) =
|
|
other.seek_danger(candidate)
|
|
{
|
|
// One of the scorer does not match, let's restart at the top of the loop.
|
|
debug_assert!(
|
|
seek_lower_bound == TERMINATED || seek_lower_bound > candidate,
|
|
"seek_lower_bound {seek_lower_bound} must be greater than candidate \
|
|
{candidate}"
|
|
);
|
|
candidate = seek_lower_bound;
|
|
continue 'outer;
|
|
}
|
|
}
|
|
|
|
// At this point all scorers are in a valid state, aligned on the next document in the
|
|
// intersection.
|
|
debug_assert!(self.others.iter().all(|docset| docset.doc() == candidate));
|
|
return candidate;
|
|
}
|
|
|
|
// We make sure our docset is in a valid state.
|
|
// In particular, we want .doc() to return TERMINATED.
|
|
left.seek(TERMINATED);
|
|
|
|
TERMINATED
|
|
}
|
|
|
|
fn seek(&mut self, target: DocId) -> DocId {
|
|
self.left.seek(target);
|
|
let mut docsets: Vec<&mut dyn DocSet> = vec![&mut self.left, &mut self.right];
|
|
for docset in &mut self.others {
|
|
docsets.push(docset);
|
|
}
|
|
let doc = go_to_first_doc(&mut docsets[..]);
|
|
debug_assert!(docsets.iter().all(|docset| docset.doc() == doc));
|
|
debug_assert!(doc >= target);
|
|
doc
|
|
}
|
|
|
|
/// Seeks to the target if necessary and checks if the target is an exact match.
|
|
///
|
|
/// Some implementations may choose to advance past the target if beneficial for performance.
|
|
/// The return value is `true` if the target is in the docset, and `false` otherwise.
|
|
fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
|
|
if let SeekDangerResult::SeekLowerBound(new_target) = self.left.seek_danger(target) {
|
|
return SeekDangerResult::SeekLowerBound(new_target);
|
|
}
|
|
if let SeekDangerResult::SeekLowerBound(new_target) = self.right.seek_danger(target) {
|
|
return SeekDangerResult::SeekLowerBound(new_target);
|
|
}
|
|
for docset in &mut self.others {
|
|
if let SeekDangerResult::SeekLowerBound(new_target) = docset.seek_danger(target) {
|
|
return SeekDangerResult::SeekLowerBound(new_target);
|
|
}
|
|
}
|
|
SeekDangerResult::Found
|
|
}
|
|
|
|
#[inline]
|
|
fn doc(&self) -> DocId {
|
|
self.left.doc()
|
|
}
|
|
|
|
fn size_hint(&self) -> u32 {
|
|
estimate_intersection(
|
|
[self.left.size_hint(), self.right.size_hint()]
|
|
.into_iter()
|
|
.chain(self.others.iter().map(DocSet::size_hint)),
|
|
self.segment_num_docs,
|
|
)
|
|
}
|
|
|
|
fn cost(&self) -> u64 {
|
|
// What's the best way to compute the cost of an intersection?
|
|
// For now we take the cost of the docset driver, which is the first docset.
|
|
// If there are docsets that are bad at skipping, they should also influence the cost.
|
|
self.left.cost()
|
|
}
|
|
|
|
fn count_including_deleted(&mut self) -> u32 {
|
|
const DENSITY_THRESHOLD_INVERSE: u32 = 32;
|
|
if self
|
|
.left
|
|
.size_hint()
|
|
.saturating_mul(DENSITY_THRESHOLD_INVERSE)
|
|
< self.segment_num_docs
|
|
{
|
|
// Sparse path: if the lead iterator covers less than ~3% of docs,
|
|
// the block approach wastes time on mostly-empty blocks.
|
|
self.count_including_deleted_sparse()
|
|
} else {
|
|
// Dense approach. We push documents into a block bitset to then
|
|
// perform count using popcount.
|
|
self.count_including_deleted_dense()
|
|
}
|
|
}
|
|
}
|
|
|
|
const EMPTY_BLOCK: [TinySet; BLOCK_NUM_TINYBITSETS] = [TinySet::EMPTY; BLOCK_NUM_TINYBITSETS];
|
|
|
|
/// ANDs `other` into `mask` in-place. Returns `true` if the result is all zeros.
|
|
#[inline]
|
|
fn and_blocks_and_return_is_empty(
|
|
mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
|
|
update: &[TinySet; BLOCK_NUM_TINYBITSETS],
|
|
) -> bool {
|
|
let mut all_empty = true;
|
|
for (mask_tinyset, update_tinyset) in mask.iter_mut().zip(update.iter()) {
|
|
*mask_tinyset = mask_tinyset.intersect(*update_tinyset);
|
|
all_empty &= mask_tinyset.is_empty();
|
|
}
|
|
all_empty
|
|
}
|
|
|
|
impl<TDocSet: DocSet, TOtherDocSet: DocSet> Intersection<TDocSet, TOtherDocSet> {
|
|
fn count_including_deleted_sparse(&mut self) -> u32 {
|
|
let mut count = 0u32;
|
|
let mut doc = self.doc();
|
|
while doc != TERMINATED {
|
|
count += 1;
|
|
doc = self.advance();
|
|
}
|
|
count
|
|
}
|
|
|
|
/// Dense block-wise bitmask intersection count.
|
|
///
|
|
/// Fills a 1024-doc window from each iterator, ANDs the bitmasks together,
|
|
/// and popcounts the result. `fill_bitset_block` handles seeking tails forward
|
|
/// when they lag behind the current block.
|
|
fn count_including_deleted_dense(&mut self) -> u32 {
|
|
let mut count = 0u32;
|
|
let mut next_base = self.left.doc();
|
|
|
|
while next_base < TERMINATED {
|
|
let base = next_base;
|
|
|
|
// Fill lead bitmask.
|
|
let mut mask = EMPTY_BLOCK;
|
|
next_base = next_base.max(self.left.fill_bitset_block(base, &mut mask));
|
|
|
|
let mut tail_mask = EMPTY_BLOCK;
|
|
next_base = next_base.max(self.right.fill_bitset_block(base, &mut tail_mask));
|
|
|
|
if and_blocks_and_return_is_empty(&mut mask, &tail_mask) {
|
|
continue;
|
|
}
|
|
// AND with each additional tail.
|
|
for other in &mut self.others {
|
|
let mut other_mask = EMPTY_BLOCK;
|
|
next_base = next_base.max(other.fill_bitset_block(base, &mut other_mask));
|
|
if and_blocks_and_return_is_empty(&mut mask, &other_mask) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
for tinyset in &mask {
|
|
count += tinyset.len();
|
|
}
|
|
}
|
|
|
|
count
|
|
}
|
|
}
|
|
|
|
impl<TScorer, TOtherScorer> Scorer for Intersection<TScorer, TOtherScorer>
|
|
where
|
|
TScorer: Scorer,
|
|
TOtherScorer: Scorer,
|
|
{
|
|
#[inline]
|
|
fn score(&mut self) -> Score {
|
|
self.left.score()
|
|
+ self.right.score()
|
|
+ self.others.iter_mut().map(Scorer::score).sum::<Score>()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use proptest::prelude::*;
|
|
|
|
use super::Intersection;
|
|
use crate::collector::Count;
|
|
use crate::docset::{DocSet, TERMINATED};
|
|
use crate::postings::tests::test_skip_against_unoptimized;
|
|
use crate::query::{QueryParser, VecDocSet};
|
|
use crate::schema::{Schema, TEXT};
|
|
use crate::Index;
|
|
|
|
#[test]
|
|
fn test_intersection() {
|
|
{
|
|
let left = VecDocSet::from(vec![1, 3, 9]);
|
|
let right = VecDocSet::from(vec![3, 4, 9, 18]);
|
|
let mut intersection = Intersection::new(vec![left, right], 10);
|
|
assert_eq!(intersection.doc(), 3);
|
|
assert_eq!(intersection.advance(), 9);
|
|
assert_eq!(intersection.doc(), 9);
|
|
assert_eq!(intersection.advance(), TERMINATED);
|
|
}
|
|
{
|
|
let a = VecDocSet::from(vec![1, 3, 9]);
|
|
let b = VecDocSet::from(vec![3, 4, 9, 18]);
|
|
let c = VecDocSet::from(vec![1, 5, 9, 111]);
|
|
let mut intersection = Intersection::new(vec![a, b, c], 10);
|
|
assert_eq!(intersection.doc(), 9);
|
|
assert_eq!(intersection.advance(), TERMINATED);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_zero() {
|
|
let left = VecDocSet::from(vec![0]);
|
|
let right = VecDocSet::from(vec![0]);
|
|
let mut intersection = Intersection::new(vec![left, right], 10);
|
|
assert_eq!(intersection.doc(), 0);
|
|
assert_eq!(intersection.advance(), TERMINATED);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_skip() {
|
|
let left = VecDocSet::from(vec![0, 1, 2, 4]);
|
|
let right = VecDocSet::from(vec![2, 5]);
|
|
let mut intersection = Intersection::new(vec![left, right], 10);
|
|
assert_eq!(intersection.seek(2), 2);
|
|
assert_eq!(intersection.doc(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_skip_against_unoptimized() {
|
|
test_skip_against_unoptimized(
|
|
|| {
|
|
let left = VecDocSet::from(vec![4]);
|
|
let right = VecDocSet::from(vec![2, 5]);
|
|
Box::new(Intersection::new(vec![left, right], 10))
|
|
},
|
|
vec![0, 2, 4, 5, 6],
|
|
);
|
|
test_skip_against_unoptimized(
|
|
|| {
|
|
let mut left = VecDocSet::from(vec![1, 4, 5, 6]);
|
|
let mut right = VecDocSet::from(vec![2, 5, 10]);
|
|
left.advance();
|
|
right.advance();
|
|
Box::new(Intersection::new(vec![left, right], 10))
|
|
},
|
|
vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11],
|
|
);
|
|
test_skip_against_unoptimized(
|
|
|| {
|
|
Box::new(Intersection::new(
|
|
vec![
|
|
VecDocSet::from(vec![1, 4, 5, 6]),
|
|
VecDocSet::from(vec![1, 2, 5, 6]),
|
|
VecDocSet::from(vec![1, 4, 5, 6]),
|
|
VecDocSet::from(vec![1, 5, 6]),
|
|
VecDocSet::from(vec![2, 4, 5, 7, 8]),
|
|
],
|
|
10,
|
|
))
|
|
},
|
|
vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_empty() {
|
|
let a = VecDocSet::from(vec![1, 3]);
|
|
let b = VecDocSet::from(vec![1, 4]);
|
|
let c = VecDocSet::from(vec![3, 9]);
|
|
let intersection = Intersection::new(vec![a, b, c], 10);
|
|
assert_eq!(intersection.doc(), TERMINATED);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_abc() {
|
|
let a = VecDocSet::from(vec![2, 3, 6]);
|
|
let b = VecDocSet::from(vec![1, 3, 5]);
|
|
let c = VecDocSet::from(vec![1, 3, 5]);
|
|
let mut intersection = Intersection::new(vec![c, b, a], 10);
|
|
let mut docs = Vec::new();
|
|
use crate::DocSet;
|
|
while intersection.doc() != TERMINATED {
|
|
docs.push(intersection.doc());
|
|
intersection.advance();
|
|
}
|
|
assert_eq!(&docs, &[3]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intersection_termination() {
|
|
use crate::query::score_combiner::DoNothingCombiner;
|
|
use crate::query::{BufferedUnionScorer, ConstScorer, VecDocSet};
|
|
|
|
let a1 = ConstScorer::new(VecDocSet::from(vec![0u32, 10000]), 1.0);
|
|
let a2 = ConstScorer::new(VecDocSet::from(vec![0u32, 10000]), 1.0);
|
|
|
|
let mut b_scorers = vec![];
|
|
for _ in 0..2 {
|
|
// Union matches 0 and 10000.
|
|
b_scorers.push(ConstScorer::new(VecDocSet::from(vec![0, 10000]), 1.0));
|
|
}
|
|
// That's the union of two scores matching 0, and 10_000.
|
|
let union = BufferedUnionScorer::build(b_scorers, DoNothingCombiner::default, 30000);
|
|
|
|
// Mismatching scorer: matches 0 and 20000. We then append more docs at the end to ensure it
|
|
// is last.
|
|
let mut m_docs = vec![0, 20000];
|
|
for i in 30000..30100 {
|
|
m_docs.push(i);
|
|
}
|
|
let m = ConstScorer::new(VecDocSet::from(m_docs), 1.0);
|
|
|
|
// Costs: A1=2, A2=2, Union=4, M=102.
|
|
// Sorted: A1, A2, Union, M.
|
|
// Left=A1, Right=A2, Others=[Union, M].
|
|
let mut intersection = crate::query::intersect_scorers(
|
|
vec![Box::new(a1), Box::new(a2), Box::new(union), Box::new(m)],
|
|
40000,
|
|
);
|
|
|
|
while intersection.doc() != TERMINATED {
|
|
intersection.advance();
|
|
}
|
|
}
|
|
|
|
// Strategy to generate sorted and deduplicated vectors of u32 document IDs
|
|
fn sorted_deduped_vec(max_val: u32, max_size: usize) -> impl Strategy<Value = Vec<u32>> {
|
|
prop::collection::vec(0..max_val, 0..max_size).prop_map(|mut vec| {
|
|
vec.sort();
|
|
vec.dedup();
|
|
vec
|
|
})
|
|
}
|
|
|
|
proptest! {
|
|
#[test]
|
|
fn prop_test_intersection_consistency(
|
|
a in sorted_deduped_vec(100, 10),
|
|
b in sorted_deduped_vec(100, 10),
|
|
num_docs in 100u32..500u32
|
|
) {
|
|
let left = VecDocSet::from(a.clone());
|
|
let right = VecDocSet::from(b.clone());
|
|
let mut intersection = Intersection::new(vec![left, right], num_docs);
|
|
|
|
let expected: Vec<u32> = a.iter()
|
|
.cloned()
|
|
.filter(|doc| b.contains(doc))
|
|
.collect();
|
|
|
|
for expected_doc in expected {
|
|
assert_eq!(intersection.doc(), expected_doc);
|
|
intersection.advance();
|
|
}
|
|
assert_eq!(intersection.doc(), TERMINATED);
|
|
}
|
|
}
|
|
|
|
proptest! {
|
|
#[test]
|
|
fn prop_test_count_including_deleted_matches_default(
|
|
a in sorted_deduped_vec(1200, 400),
|
|
b in sorted_deduped_vec(1200, 400),
|
|
c in sorted_deduped_vec(1200, 400),
|
|
num_docs in 1200u32..2000u32,
|
|
) {
|
|
// Compute expected count via set intersection.
|
|
let expected: u32 = a.iter()
|
|
.filter(|doc| b.contains(doc) && c.contains(doc))
|
|
.count() as u32;
|
|
|
|
// Test count_including_deleted (dense path).
|
|
let make_intersection = || {
|
|
Intersection::new(
|
|
vec![
|
|
VecDocSet::from(a.clone()),
|
|
VecDocSet::from(b.clone()),
|
|
VecDocSet::from(c.clone()),
|
|
],
|
|
num_docs,
|
|
)
|
|
};
|
|
|
|
let mut intersection = make_intersection();
|
|
let count = intersection.count_including_deleted();
|
|
prop_assert_eq!(count, expected,
|
|
"count_including_deleted mismatch: a={:?}, b={:?}, c={:?}", a, b, c);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_count_including_deleted_two_way() {
|
|
let left = VecDocSet::from(vec![1, 3, 9]);
|
|
let right = VecDocSet::from(vec![3, 4, 9, 18]);
|
|
let mut intersection = Intersection::new(vec![left, right], 100);
|
|
assert_eq!(intersection.count_including_deleted(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_count_including_deleted_empty() {
|
|
let a = VecDocSet::from(vec![1, 3]);
|
|
let b = VecDocSet::from(vec![1, 4]);
|
|
let c = VecDocSet::from(vec![3, 9]);
|
|
let mut intersection = Intersection::new(vec![a, b, c], 100);
|
|
assert_eq!(intersection.count_including_deleted(), 0);
|
|
}
|
|
|
|
/// Test with enough documents to exercise the dense path (>= num_docs/32).
|
|
#[test]
|
|
fn test_count_including_deleted_dense_path() {
|
|
// Create dense docsets: many docs relative to segment size.
|
|
let docs_a: Vec<u32> = (0..2000).step_by(2).collect(); // even numbers 0..2000
|
|
let docs_b: Vec<u32> = (0..2000).step_by(3).collect(); // multiples of 3
|
|
let expected = docs_a.iter().filter(|d| *d % 3 == 0).count() as u32;
|
|
|
|
let a = VecDocSet::from(docs_a);
|
|
let b = VecDocSet::from(docs_b);
|
|
let mut intersection = Intersection::new(vec![a, b], 2000);
|
|
assert_eq!(intersection.count_including_deleted(), expected);
|
|
}
|
|
|
|
/// Test that spans multiple blocks (>1024 docs).
|
|
#[test]
|
|
fn test_count_including_deleted_multi_block() {
|
|
let docs_a: Vec<u32> = (0..5000).collect();
|
|
let docs_b: Vec<u32> = (0..5000).step_by(7).collect();
|
|
let expected = docs_b.len() as u32; // all of b is in a
|
|
|
|
let a = VecDocSet::from(docs_a);
|
|
let b = VecDocSet::from(docs_b);
|
|
let mut intersection = Intersection::new(vec![a, b], 5000);
|
|
assert_eq!(intersection.count_including_deleted(), expected);
|
|
}
|
|
|
|
#[test]
|
|
fn test_bug_2811_intersection_candidate_should_increase() {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
|
let schema = schema_builder.build();
|
|
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests().unwrap();
|
|
writer
|
|
.add_document(doc!(text_field=>"hello happy tax"))
|
|
.unwrap();
|
|
writer.add_document(doc!(text_field=>"hello")).unwrap();
|
|
writer.add_document(doc!(text_field=>"hello")).unwrap();
|
|
writer.add_document(doc!(text_field=>"happy tax")).unwrap();
|
|
|
|
writer.commit().unwrap();
|
|
let query_parser = QueryParser::for_index(&index, Vec::new());
|
|
let query = query_parser
|
|
.parse_query(r#"+text:hello +text:"happy tax""#)
|
|
.unwrap();
|
|
let searcher = index.reader().unwrap().searcher();
|
|
let c = searcher.search(&*query, &Count).unwrap();
|
|
assert_eq!(c, 1);
|
|
}
|
|
}
|