Fixed the second all scorer issue

This commit is contained in:
Mohammad Dashti
2025-12-03 20:15:50 -08:00
committed by Stu Hood
parent f6893c30e1
commit 42c90e8508
2 changed files with 212 additions and 2 deletions

View File

@@ -288,7 +288,15 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
should_scorer
}
} else {
let must_scorer = intersect_scorers(must_scorers, num_docs);
// When must_scorers is empty but there were AllScorers removed,
// we need to use AllScorer instead of EmptyScorer
let must_scorer: Box<dyn Scorer> = if must_scorers.is_empty() {
// must_special_scorer_counts.num_all_scorers > 0 here
// (otherwise we'd be in the first branch above)
Box::new(AllScorer::new(reader.max_doc()))
} else {
intersect_scorers(must_scorers, num_docs)
};
if self.scoring_enabled {
SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
_,
@@ -305,7 +313,21 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
}
(ShouldScorersCombinationMethod::Required(should_scorer), mut must_scorers) => {
if must_scorers.is_empty() {
should_scorer
// When must_scorers is empty but there were AllScorers removed,
// we need to union the should_scorer with AllScorer
if must_special_scorer_counts.num_all_scorers > 0 {
let all_scorers: Vec<Box<dyn Scorer>> = vec![
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
Box::new(AllScorer::new(reader.max_doc())),
];
SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
all_scorers,
&score_combiner_fn,
num_docs,
)))
} else {
should_scorer
}
} else {
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn, num_docs));
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))

View File

@@ -417,4 +417,192 @@ mod tests {
Ok(())
}
/// Regression test for: When a SHOULD clause contains an AllScorer (e.g., from a
/// range query matching all docs) combined with other SHOULD clauses, the AllScorer
/// was being removed but its matching documents were lost.
#[test]
pub fn test_should_with_all_scorer_regression() -> crate::Result<()> {
use crate::collector::Count;
use crate::query::AllQuery;
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
// Add 6 documents
index_writer.add_document(doc!(text_field => "hello"))?;
index_writer.add_document(doc!(text_field => "world"))?;
index_writer.add_document(doc!(text_field => "hello world"))?;
index_writer.add_document(doc!(text_field => "foo"))?;
index_writer.add_document(doc!(text_field => "bar"))?;
index_writer.add_document(doc!(text_field => "baz"))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
// AllQuery matches all 6 docs
let all_query: Box<dyn Query> = Box::new(AllQuery);
// TermQuery matches only 2 docs ("hello" and "hello world")
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "hello"),
IndexRecordOption::Basic,
));
// Test 1: SHOULD with AllQuery and TermQuery
// Expected: 6 docs (AllQuery matches all)
// Bug before fix: Only 2 docs (AllQuery was removed, only TermQuery remained)
let bool_query = BooleanQuery::new(vec![
(Occur::Should, all_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&bool_query, &Count)?;
assert_eq!(
count, 6,
"SHOULD with AllQuery should match all 6 docs, got {}",
count
);
// Test 2: Verify order doesn't matter
let bool_query_reversed = BooleanQuery::new(vec![
(Occur::Should, term_query.box_clone()),
(Occur::Should, all_query.box_clone()),
]);
let count_reversed = searcher.search(&bool_query_reversed, &Count)?;
assert_eq!(
count_reversed, 6,
"SHOULD with AllQuery (reversed order) should match all 6 docs, got {}",
count_reversed
);
Ok(())
}
/// Regression test for: When a MUST clause contains an AllScorer and there are
/// SHOULD clauses, the AllScorer in MUST was being removed, leaving an empty
/// must_scorers vector which caused intersect_scorers to return EmptyScorer.
#[test]
pub fn test_must_all_with_should_regression() -> crate::Result<()> {
use crate::collector::Count;
use crate::query::AllQuery;
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
// Add 4 documents
index_writer.add_document(doc!(text_field => "apple"))?;
index_writer.add_document(doc!(text_field => "banana"))?;
index_writer.add_document(doc!(text_field => "cherry"))?;
index_writer.add_document(doc!(text_field => "date"))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
// AllQuery matches all 4 docs
let all_query: Box<dyn Query> = Box::new(AllQuery);
// TermQuery matches only 1 doc ("apple")
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "apple"),
IndexRecordOption::Basic,
));
// Test: MUST with AllQuery, SHOULD with TermQuery
// According to Lucene/ES semantics: MUST clauses must match, SHOULD is optional
// Expected: 4 docs (all match the MUST AllQuery)
// Bug before fix: 0 docs (AllQuery was removed, intersect_scorers got empty vec)
let bool_query = BooleanQuery::new(vec![
(Occur::Must, all_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&bool_query, &Count)?;
assert_eq!(
count, 4,
"MUST AllQuery + SHOULD TermQuery should match all 4 docs, got {}",
count
);
Ok(())
}
/// Regression test for: When using range queries that match all documents in
/// a Boolean query with other clauses, the range query's AllScorer optimization
/// should not cause documents to be lost.
#[test]
pub fn test_range_query_in_boolean_regression() -> crate::Result<()> {
use std::ops::Bound;
use crate::collector::Count;
use crate::query::RangeQuery;
use crate::schema::NumericOptions;
let mut schema_builder = Schema::builder();
let name_field = schema_builder.add_text_field("name", TEXT);
let age_field =
schema_builder.add_i64_field("age", NumericOptions::default().set_fast().set_indexed());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
// Add documents where ALL have age > 50
index_writer.add_document(doc!(name_field => "alice", age_field => 55_i64))?;
index_writer.add_document(doc!(name_field => "bob", age_field => 60_i64))?;
index_writer.add_document(doc!(name_field => "charlie", age_field => 70_i64))?;
index_writer.add_document(doc!(name_field => "diana", age_field => 80_i64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
// Range query age > 50: matches all 4 docs
// This may return AllScorer as an optimization when all values match
let range_query: Box<dyn Query> = Box::new(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(age_field, 50)),
Bound::Unbounded,
));
// TermQuery matches only 1 doc ("alice")
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(name_field, "alice"),
IndexRecordOption::Basic,
));
// Verify individual queries work
let range_count = searcher.search(range_query.as_ref(), &Count)?;
assert_eq!(range_count, 4, "Range query should match 4 docs");
let term_count = searcher.search(term_query.as_ref(), &Count)?;
assert_eq!(term_count, 1, "Term query should match 1 doc");
// Test 1: SHOULD with range and term
// Expected: 4 docs (range matches all)
let should_query = BooleanQuery::new(vec![
(Occur::Should, range_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&should_query, &Count)?;
assert_eq!(
count, 4,
"SHOULD (range OR term) should match all 4 docs, got {}",
count
);
// Test 2: MUST with range, SHOULD with term
// Expected: 4 docs (range in MUST matches all, term is optional)
let must_should_query = BooleanQuery::new(vec![
(Occur::Must, range_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&must_should_query, &Count)?;
assert_eq!(
count, 4,
"MUST range + SHOULD term should match all 4 docs, got {}",
count
);
Ok(())
}
}