mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-22 11:10:40 +00:00
Fixed the second all scorer issue
This commit is contained in:
committed by
Stu Hood
parent
f6893c30e1
commit
42c90e8508
@@ -288,7 +288,15 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
|
||||
should_scorer
|
||||
}
|
||||
} else {
|
||||
let must_scorer = intersect_scorers(must_scorers, num_docs);
|
||||
// When must_scorers is empty but there were AllScorers removed,
|
||||
// we need to use AllScorer instead of EmptyScorer
|
||||
let must_scorer: Box<dyn Scorer> = if must_scorers.is_empty() {
|
||||
// must_special_scorer_counts.num_all_scorers > 0 here
|
||||
// (otherwise we'd be in the first branch above)
|
||||
Box::new(AllScorer::new(reader.max_doc()))
|
||||
} else {
|
||||
intersect_scorers(must_scorers, num_docs)
|
||||
};
|
||||
if self.scoring_enabled {
|
||||
SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
|
||||
_,
|
||||
@@ -305,7 +313,21 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
|
||||
}
|
||||
(ShouldScorersCombinationMethod::Required(should_scorer), mut must_scorers) => {
|
||||
if must_scorers.is_empty() {
|
||||
should_scorer
|
||||
// When must_scorers is empty but there were AllScorers removed,
|
||||
// we need to union the should_scorer with AllScorer
|
||||
if must_special_scorer_counts.num_all_scorers > 0 {
|
||||
let all_scorers: Vec<Box<dyn Scorer>> = vec![
|
||||
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
|
||||
Box::new(AllScorer::new(reader.max_doc())),
|
||||
];
|
||||
SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
|
||||
all_scorers,
|
||||
&score_combiner_fn,
|
||||
num_docs,
|
||||
)))
|
||||
} else {
|
||||
should_scorer
|
||||
}
|
||||
} else {
|
||||
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn, num_docs));
|
||||
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
|
||||
|
||||
@@ -417,4 +417,192 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Regression test for: When a SHOULD clause contains an AllScorer (e.g., from a
|
||||
/// range query matching all docs) combined with other SHOULD clauses, the AllScorer
|
||||
/// was being removed but its matching documents were lost.
|
||||
#[test]
|
||||
pub fn test_should_with_all_scorer_regression() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
use crate::query::AllQuery;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
// Add 6 documents
|
||||
index_writer.add_document(doc!(text_field => "hello"))?;
|
||||
index_writer.add_document(doc!(text_field => "world"))?;
|
||||
index_writer.add_document(doc!(text_field => "hello world"))?;
|
||||
index_writer.add_document(doc!(text_field => "foo"))?;
|
||||
index_writer.add_document(doc!(text_field => "bar"))?;
|
||||
index_writer.add_document(doc!(text_field => "baz"))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
// AllQuery matches all 6 docs
|
||||
let all_query: Box<dyn Query> = Box::new(AllQuery);
|
||||
|
||||
// TermQuery matches only 2 docs ("hello" and "hello world")
|
||||
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
|
||||
Term::from_field_text(text_field, "hello"),
|
||||
IndexRecordOption::Basic,
|
||||
));
|
||||
|
||||
// Test 1: SHOULD with AllQuery and TermQuery
|
||||
// Expected: 6 docs (AllQuery matches all)
|
||||
// Bug before fix: Only 2 docs (AllQuery was removed, only TermQuery remained)
|
||||
let bool_query = BooleanQuery::new(vec![
|
||||
(Occur::Should, all_query.box_clone()),
|
||||
(Occur::Should, term_query.box_clone()),
|
||||
]);
|
||||
let count = searcher.search(&bool_query, &Count)?;
|
||||
assert_eq!(
|
||||
count, 6,
|
||||
"SHOULD with AllQuery should match all 6 docs, got {}",
|
||||
count
|
||||
);
|
||||
|
||||
// Test 2: Verify order doesn't matter
|
||||
let bool_query_reversed = BooleanQuery::new(vec![
|
||||
(Occur::Should, term_query.box_clone()),
|
||||
(Occur::Should, all_query.box_clone()),
|
||||
]);
|
||||
let count_reversed = searcher.search(&bool_query_reversed, &Count)?;
|
||||
assert_eq!(
|
||||
count_reversed, 6,
|
||||
"SHOULD with AllQuery (reversed order) should match all 6 docs, got {}",
|
||||
count_reversed
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Regression test for: When a MUST clause contains an AllScorer and there are
|
||||
/// SHOULD clauses, the AllScorer in MUST was being removed, leaving an empty
|
||||
/// must_scorers vector which caused intersect_scorers to return EmptyScorer.
|
||||
#[test]
|
||||
pub fn test_must_all_with_should_regression() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
use crate::query::AllQuery;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
// Add 4 documents
|
||||
index_writer.add_document(doc!(text_field => "apple"))?;
|
||||
index_writer.add_document(doc!(text_field => "banana"))?;
|
||||
index_writer.add_document(doc!(text_field => "cherry"))?;
|
||||
index_writer.add_document(doc!(text_field => "date"))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
// AllQuery matches all 4 docs
|
||||
let all_query: Box<dyn Query> = Box::new(AllQuery);
|
||||
|
||||
// TermQuery matches only 1 doc ("apple")
|
||||
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
|
||||
Term::from_field_text(text_field, "apple"),
|
||||
IndexRecordOption::Basic,
|
||||
));
|
||||
|
||||
// Test: MUST with AllQuery, SHOULD with TermQuery
|
||||
// According to Lucene/ES semantics: MUST clauses must match, SHOULD is optional
|
||||
// Expected: 4 docs (all match the MUST AllQuery)
|
||||
// Bug before fix: 0 docs (AllQuery was removed, intersect_scorers got empty vec)
|
||||
let bool_query = BooleanQuery::new(vec![
|
||||
(Occur::Must, all_query.box_clone()),
|
||||
(Occur::Should, term_query.box_clone()),
|
||||
]);
|
||||
let count = searcher.search(&bool_query, &Count)?;
|
||||
assert_eq!(
|
||||
count, 4,
|
||||
"MUST AllQuery + SHOULD TermQuery should match all 4 docs, got {}",
|
||||
count
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Regression test for: When using range queries that match all documents in
|
||||
/// a Boolean query with other clauses, the range query's AllScorer optimization
|
||||
/// should not cause documents to be lost.
|
||||
#[test]
|
||||
pub fn test_range_query_in_boolean_regression() -> crate::Result<()> {
|
||||
use std::ops::Bound;
|
||||
|
||||
use crate::collector::Count;
|
||||
use crate::query::RangeQuery;
|
||||
use crate::schema::NumericOptions;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name_field = schema_builder.add_text_field("name", TEXT);
|
||||
let age_field =
|
||||
schema_builder.add_i64_field("age", NumericOptions::default().set_fast().set_indexed());
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
|
||||
// Add documents where ALL have age > 50
|
||||
index_writer.add_document(doc!(name_field => "alice", age_field => 55_i64))?;
|
||||
index_writer.add_document(doc!(name_field => "bob", age_field => 60_i64))?;
|
||||
index_writer.add_document(doc!(name_field => "charlie", age_field => 70_i64))?;
|
||||
index_writer.add_document(doc!(name_field => "diana", age_field => 80_i64))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
// Range query age > 50: matches all 4 docs
|
||||
// This may return AllScorer as an optimization when all values match
|
||||
let range_query: Box<dyn Query> = Box::new(RangeQuery::new(
|
||||
Bound::Excluded(Term::from_field_i64(age_field, 50)),
|
||||
Bound::Unbounded,
|
||||
));
|
||||
|
||||
// TermQuery matches only 1 doc ("alice")
|
||||
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
|
||||
Term::from_field_text(name_field, "alice"),
|
||||
IndexRecordOption::Basic,
|
||||
));
|
||||
|
||||
// Verify individual queries work
|
||||
let range_count = searcher.search(range_query.as_ref(), &Count)?;
|
||||
assert_eq!(range_count, 4, "Range query should match 4 docs");
|
||||
|
||||
let term_count = searcher.search(term_query.as_ref(), &Count)?;
|
||||
assert_eq!(term_count, 1, "Term query should match 1 doc");
|
||||
|
||||
// Test 1: SHOULD with range and term
|
||||
// Expected: 4 docs (range matches all)
|
||||
let should_query = BooleanQuery::new(vec![
|
||||
(Occur::Should, range_query.box_clone()),
|
||||
(Occur::Should, term_query.box_clone()),
|
||||
]);
|
||||
let count = searcher.search(&should_query, &Count)?;
|
||||
assert_eq!(
|
||||
count, 4,
|
||||
"SHOULD (range OR term) should match all 4 docs, got {}",
|
||||
count
|
||||
);
|
||||
|
||||
// Test 2: MUST with range, SHOULD with term
|
||||
// Expected: 4 docs (range in MUST matches all, term is optional)
|
||||
let must_should_query = BooleanQuery::new(vec![
|
||||
(Occur::Must, range_query.box_clone()),
|
||||
(Occur::Should, term_query.box_clone()),
|
||||
]);
|
||||
let count = searcher.search(&must_should_query, &Count)?;
|
||||
assert_eq!(
|
||||
count, 4,
|
||||
"MUST range + SHOULD term should match all 4 docs, got {}",
|
||||
count
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user