Improved tests + logic based on PR comments.

This commit is contained in:
Mohammad Dashti
2025-12-05 16:33:54 -08:00
committed by Stu Hood
parent 329ef62153
commit 3f0865526c
2 changed files with 85 additions and 54 deletions

View File

@@ -124,23 +124,33 @@ fn effective_must_scorer(
///
/// For union semantics (OR): if any SHOULD clause was an AllScorer, the result
/// should include all documents. We restore this by unioning with AllScorer.
///
/// When `scoring_enabled` is false, we can just return AllScorer alone since
/// we don't need score contributions from the should_scorer.
fn effective_should_scorer_for_union<TScoreCombiner: ScoreCombiner>(
should_scorer: SpecializedScorer,
removed_all_scorer_count: usize,
max_doc: DocId,
num_docs: u32,
score_combiner_fn: impl Fn() -> TScoreCombiner,
scoring_enabled: bool,
) -> SpecializedScorer {
if removed_all_scorer_count > 0 {
let all_scorers: Vec<Box<dyn Scorer>> = vec![
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
Box::new(AllScorer::new(max_doc)),
];
SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
all_scorers,
score_combiner_fn,
num_docs,
)))
if scoring_enabled {
// Need to union to get score contributions from both
let all_scorers: Vec<Box<dyn Scorer>> = vec![
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
Box::new(AllScorer::new(max_doc)),
];
SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
all_scorers,
score_combiner_fn,
num_docs,
)))
} else {
// Scoring disabled - AllScorer alone is sufficient
SpecializedScorer::Other(Box::new(AllScorer::new(max_doc)))
}
} else {
should_scorer
}
@@ -325,6 +335,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
reader.max_doc(),
num_docs,
&score_combiner_fn,
self.scoring_enabled,
)
}
Some(must_scorer) => {

View File

@@ -15,12 +15,14 @@ mod tests {
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
use crate::collector::{Count, TopDocs};
use crate::query::term_query::TermScorer;
use crate::query::RangeQuery;
use crate::query::{
AllScorer, EmptyScorer, EnableScoring, Intersection, Occur, Query, QueryParser, RangeQuery,
RequiredOptionalScorer, Scorer, SumCombiner, TermQuery,
};
use crate::schema::*;
use crate::{assert_nearly_equals, DocAddress, DocId, Index, IndexWriter, Score};
use std::ops::Bound;
fn aux_test_helper() -> crate::Result<(Index, Field)> {
let mut schema_builder = Schema::builder();
@@ -434,49 +436,57 @@ mod tests {
/// Regression test: SHOULD clause with AllScorer combined with other SHOULD clauses.
///
/// When a SHOULD clause produces an AllScorer (e.g., from AllQuery or a range query
/// matching all documents), the Boolean query should still match all documents.
/// When a SHOULD clause produces an AllScorer (e.g., from a range query matching
/// all documents), the Boolean query should still match all documents.
///
/// Bug before fix: AllScorer was removed during optimization, leaving only the
/// other SHOULD clauses, which incorrectly excluded documents.
#[test]
pub fn test_should_with_all_scorer_regression() -> crate::Result<()> {
use crate::collector::Count;
use crate::query::AllQuery;
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let num_field =
schema_builder.add_i64_field("num", NumericOptions::default().set_fast().set_indexed());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "hello"))?;
index_writer.add_document(doc!(text_field => "world"))?;
index_writer.add_document(doc!(text_field => "hello world"))?;
index_writer.add_document(doc!(text_field => "foo"))?;
index_writer.add_document(doc!(text_field => "bar"))?;
index_writer.add_document(doc!(text_field => "baz"))?;
// All docs have num > 0, so range query will return AllScorer
index_writer.add_document(doc!(text_field => "hello", num_field => 10i64))?;
index_writer.add_document(doc!(text_field => "world", num_field => 20i64))?;
index_writer.add_document(doc!(text_field => "hello world", num_field => 30i64))?;
index_writer.add_document(doc!(text_field => "foo", num_field => 40i64))?;
index_writer.add_document(doc!(text_field => "bar", num_field => 50i64))?;
index_writer.add_document(doc!(text_field => "baz", num_field => 60i64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let all_query: Box<dyn Query> = Box::new(AllQuery);
// Range query matching all docs (returns AllScorer)
let all_match_query: Box<dyn Query> = Box::new(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(num_field, 0)),
Bound::Unbounded,
));
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "hello"),
IndexRecordOption::Basic,
));
// AllQuery OR TermQuery should match all 6 docs
// Verify range matches all 6 docs
assert_eq!(searcher.search(all_match_query.as_ref(), &Count)?, 6);
// RangeQuery(all) OR TermQuery should match all 6 docs
let bool_query = BooleanQuery::new(vec![
(Occur::Should, all_query.box_clone()),
(Occur::Should, all_match_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&bool_query, &Count)?;
assert_eq!(count, 6, "SHOULD with AllQuery should match all docs");
assert_eq!(count, 6, "SHOULD with AllScorer should match all docs");
// Order should not matter
let bool_query_reversed = BooleanQuery::new(vec![
(Occur::Should, term_query.box_clone()),
(Occur::Should, all_query.box_clone()),
(Occur::Should, all_match_query.box_clone()),
]);
let count_reversed = searcher.search(&bool_query_reversed, &Count)?;
assert_eq!(
@@ -496,35 +506,43 @@ mod tests {
/// intersect_scorers([]) incorrectly returned EmptyScorer, matching 0 documents.
#[test]
pub fn test_must_all_with_should_regression() -> crate::Result<()> {
use crate::collector::Count;
use crate::query::AllQuery;
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let num_field =
schema_builder.add_i64_field("num", NumericOptions::default().set_fast().set_indexed());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "apple"))?;
index_writer.add_document(doc!(text_field => "banana"))?;
index_writer.add_document(doc!(text_field => "cherry"))?;
index_writer.add_document(doc!(text_field => "date"))?;
// All docs have num > 0, so range query will return AllScorer
index_writer.add_document(doc!(text_field => "apple", num_field => 10i64))?;
index_writer.add_document(doc!(text_field => "banana", num_field => 20i64))?;
index_writer.add_document(doc!(text_field => "cherry", num_field => 30i64))?;
index_writer.add_document(doc!(text_field => "date", num_field => 40i64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let all_query: Box<dyn Query> = Box::new(AllQuery);
// Range query matching all docs (returns AllScorer)
let all_match_query: Box<dyn Query> = Box::new(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(num_field, 0)),
Bound::Unbounded,
));
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "apple"),
IndexRecordOption::Basic,
));
// MUST(all) AND SHOULD(term) should match all 4 docs
// Verify range matches all 4 docs
assert_eq!(searcher.search(all_match_query.as_ref(), &Count)?, 4);
// MUST(range matching all) AND SHOULD(term) should match all 4 docs
let bool_query = BooleanQuery::new(vec![
(Occur::Must, all_query.box_clone()),
(Occur::Must, all_match_query.box_clone()),
(Occur::Should, term_query.box_clone()),
]);
let count = searcher.search(&bool_query, &Count)?;
assert_eq!(count, 4, "MUST AllQuery + SHOULD should match all docs");
assert_eq!(count, 4, "MUST AllScorer + SHOULD should match all docs");
Ok(())
}
@@ -540,12 +558,6 @@ mod tests {
/// when all documents have age > 50.
#[test]
pub fn test_range_query_all_match_in_boolean() -> crate::Result<()> {
use std::ops::Bound;
use crate::collector::Count;
use crate::query::RangeQuery;
use crate::schema::NumericOptions;
let mut schema_builder = Schema::builder();
let name_field = schema_builder.add_text_field("name", TEXT);
let age_field =
@@ -606,29 +618,37 @@ mod tests {
/// Verifies correct behavior when AllScorers appear in multiple positions.
#[test]
pub fn test_multiple_all_scorers() -> crate::Result<()> {
use crate::collector::Count;
use crate::query::AllQuery;
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let num_field =
schema_builder.add_i64_field("num", NumericOptions::default().set_fast().set_indexed());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "doc1"))?;
index_writer.add_document(doc!(text_field => "doc2"))?;
index_writer.add_document(doc!(text_field => "doc3"))?;
// All docs have num > 0, so range queries will return AllScorer
index_writer.add_document(doc!(text_field => "doc1", num_field => 10i64))?;
index_writer.add_document(doc!(text_field => "doc2", num_field => 20i64))?;
index_writer.add_document(doc!(text_field => "doc3", num_field => 30i64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let all_query1: Box<dyn Query> = Box::new(AllQuery);
let all_query2: Box<dyn Query> = Box::new(AllQuery);
// Two different range queries that both match all docs (return AllScorer)
let all_query1: Box<dyn Query> = Box::new(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(num_field, 0)),
Bound::Unbounded,
));
let all_query2: Box<dyn Query> = Box::new(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(num_field, 5)),
Bound::Unbounded,
));
let term_query: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "doc1"),
IndexRecordOption::Basic,
));
// Multiple AllQueries in SHOULD
// Multiple AllScorers in SHOULD
let multi_all_should = BooleanQuery::new(vec![
(Occur::Should, all_query1.box_clone()),
(Occur::Should, all_query2.box_clone()),
@@ -637,10 +657,10 @@ mod tests {
assert_eq!(
searcher.search(&multi_all_should, &Count)?,
3,
"Multiple AllQueries in SHOULD"
"Multiple AllScorers in SHOULD"
);
// AllQuery in both MUST and SHOULD
// AllScorer in both MUST and SHOULD
let all_must_and_should = BooleanQuery::new(vec![
(Occur::Must, all_query1.box_clone()),
(Occur::Should, all_query2.box_clone()),
@@ -648,7 +668,7 @@ mod tests {
assert_eq!(
searcher.search(&all_must_and_should, &Count)?,
3,
"AllQuery in both MUST and SHOULD"
"AllScorer in both MUST and SHOULD"
);
Ok(())