Fix compilation errors after rebase

- Remove duplicate proptest_boolean_query module - Remove duplicate cost() method implementations - Fix TopDocs API usage (add .order_by_score()) - Remove duplicate imports - Remove unused variable assignments
2026-01-08 18:12:55 +00:00 · 2025-12-29 20:21:54 +01:00
parent b711d189a9
commit dce3d87006
6 changed files with 6 additions and 157 deletions
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -467,7 +467,6 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score),
    ) -> crate::Result<()> {
-        let num_docs = reader.num_docs();
        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
        match scorer {
            SpecializedScorer::TermUnion(term_scorers) => {
@@ -490,7 +489,6 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
-        let num_docs = reader.num_docs();
        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];

--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -817,7 +817,6 @@ mod proptest_boolean_query {
    fn proptest_boolean_query() {
        // In the presence of optimizations around buffering, it can take large numbers of
        // documents to uncover some issues.
-        let num_docs = 10000;
        let num_fields = 8;
        let num_docs = 1 << num_fields;
        let (index, fields, range_field) =
@@ -840,135 +839,3 @@ mod proptest_boolean_query {
        });
    }
 }
-
-/// A proptest which generates arbitrary permutations of a simple boolean AST, and then matches
-/// the result against an index which contains all permutations of documents with N fields.
-#[cfg(test)]
-mod proptest_boolean_query {
-    use std::collections::{BTreeMap, HashSet};
-
-    use proptest::collection::vec;
-    use proptest::prelude::*;
-
-    use crate::collector::DocSetCollector;
-    use crate::query::{BooleanQuery, Occur, Query, TermQuery};
-    use crate::schema::{Field, OwnedValue, Schema, TEXT};
-    use crate::{DocId, Index, Term};
-
-    #[derive(Debug, Clone)]
-    enum BooleanQueryAST {
-        Leaf { field_idx: usize },
-        Union(Vec<BooleanQueryAST>),
-        Intersection(Vec<BooleanQueryAST>),
-    }
-
-    impl BooleanQueryAST {
-        fn matches(&self, doc_id: DocId) -> bool {
-            match self {
-                BooleanQueryAST::Leaf { field_idx } => Self::matches_field(doc_id, *field_idx),
-                BooleanQueryAST::Union(children) => {
-                    children.iter().any(|child| child.matches(doc_id))
-                }
-                BooleanQueryAST::Intersection(children) => {
-                    children.iter().all(|child| child.matches(doc_id))
-                }
-            }
-        }
-
-        fn matches_field(doc_id: DocId, field_idx: usize) -> bool {
-            ((doc_id as usize) >> field_idx) & 1 == 1
-        }
-
-        fn to_query(&self, fields: &[Field]) -> Box<dyn Query> {
-            match self {
-                BooleanQueryAST::Leaf { field_idx } => Box::new(TermQuery::new(
-                    Term::from_field_text(fields[*field_idx], "true"),
-                    crate::schema::IndexRecordOption::Basic,
-                )),
-                BooleanQueryAST::Union(children) => {
-                    let sub_queries = children
-                        .iter()
-                        .map(|child| (Occur::Should, child.to_query(fields)))
-                        .collect();
-                    Box::new(BooleanQuery::new(sub_queries))
-                }
-                BooleanQueryAST::Intersection(children) => {
-                    let sub_queries = children
-                        .iter()
-                        .map(|child| (Occur::Must, child.to_query(fields)))
-                        .collect();
-                    Box::new(BooleanQuery::new(sub_queries))
-                }
-            }
-        }
-    }
-
-    fn doc_ids(num_docs: usize, num_fields: usize) -> impl Iterator<Item = DocId> {
-        let permutations = 1 << num_fields;
-        let copies = (num_docs as f32 / permutations as f32).ceil() as u32;
-        (0..(permutations * copies)).into_iter()
-    }
-
-    fn create_index_with_boolean_permutations(
-        num_docs: usize,
-        num_fields: usize,
-    ) -> (Index, Vec<Field>) {
-        let mut schema_builder = Schema::builder();
-        let fields: Vec<Field> = (0..num_fields)
-            .map(|i| schema_builder.add_text_field(&format!("field_{}", i), TEXT))
-            .collect();
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_for_tests().unwrap();
-
-        for doc_id in doc_ids(num_docs, num_fields) {
-            let mut doc: BTreeMap<_, OwnedValue> = BTreeMap::default();
-            for (field_idx, &field) in fields.iter().enumerate() {
-                if BooleanQueryAST::matches_field(doc_id, field_idx) {
-                    doc.insert(field, "true".into());
-                }
-            }
-            writer.add_document(doc).unwrap();
-        }
-        writer.commit().unwrap();
-        (index, fields)
-    }
-
-    fn arb_boolean_query_ast(num_fields: usize) -> impl Strategy<Value = BooleanQueryAST> {
-        let leaf = (0..num_fields).prop_map(|field_idx| BooleanQueryAST::Leaf { field_idx });
-        leaf.prop_recursive(
-            8,   // 8 levels of recursion
-            256, // 256 nodes max
-            10,  // 10 items per collection
-            |inner| {
-                prop_oneof![
-                    vec(inner.clone(), 1..10).prop_map(BooleanQueryAST::Union),
-                    vec(inner, 1..10).prop_map(BooleanQueryAST::Intersection),
-                ]
-            },
-        )
-    }
-
-    #[test]
-    fn proptest_boolean_query() {
-        let num_docs = 10000;
-        let num_fields = 8;
-        let (index, fields) = create_index_with_boolean_permutations(num_docs, num_fields);
-        let searcher = index.reader().unwrap().searcher();
-        proptest!(|(ast in arb_boolean_query_ast(num_fields))| {
-            let query = ast.to_query(&fields);
-
-            let mut matching_docs = HashSet::new();
-            for doc_id in doc_ids(num_docs, num_fields) {
-                if ast.matches(doc_id as DocId) {
-                    matching_docs.insert(doc_id as DocId);
-                }
-            }
-
-            let doc_addresses = searcher.search(&*query, &DocSetCollector).unwrap();
-            let result_docs: HashSet<DocId> =
-                doc_addresses.into_iter().map(|doc_address| doc_address.doc_id).collect();
-            prop_assert_eq!(result_docs, matching_docs);
-        });
-    }
-}
--- a/src/query/intersection.rs
+++ b/src/query/intersection.rs
@@ -1,6 +1,5 @@
 use super::size_hint::estimate_intersection;
 use crate::docset::{DocSet, TERMINATED};
-use crate::query::size_hint::estimate_intersection;
 use crate::query::term_query::TermScorer;
 use crate::query::{EmptyScorer, Scorer};
 use crate::{DocId, Score};
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -87,8 +87,9 @@ mod tests {
            let query = QueryParser::for_index(&index, vec![text_field])
                .parse_query(term)
                .unwrap();
-            let top_docs: Vec<(f32, DocAddress)> =
-                searcher.search(&query, &TopDocs::with_limit(10)).unwrap();
+            let top_docs: Vec<(f32, DocAddress)> = searcher
+                .search(&query, &TopDocs::with_limit(10).order_by_score())
+                .unwrap();

            top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
        };
@@ -126,8 +127,9 @@ mod tests {
            let query = QueryParser::for_index(&index, vec![text_field])
                .parse_query(term)
                .unwrap();
-            let top_docs: Vec<(f32, DocAddress)> =
-                searcher.search(&query, &TopDocs::with_limit(10)).unwrap();
+            let top_docs: Vec<(f32, DocAddress)> = searcher
+                .search(&query, &TopDocs::with_limit(10).order_by_score())
+                .unwrap();

            top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
        };
--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -560,15 +560,6 @@ impl<TPostings: Postings> DocSet for PhraseScorer<TPostings> {
        // 10 * self.num_terms.
        self.intersection_docset.size_hint() as u64 * 10 * self.num_terms as u64
    }
-
-    /// Returns a best-effort hint of the
-    /// cost to drive the docset.
-    fn cost(&self) -> u64 {
-        // Evaluating phrase matches is generally more expensive than simple term matches,
-        // as it requires loading and comparing positions. Use a conservative multiplier
-        // based on the number of terms.
-        self.intersection_docset.size_hint() as u64 * 10 * self.num_terms as u64
-    }
 }

 impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {
--- a/src/query/range_query/fast_field_range_doc_set.rs
+++ b/src/query/range_query/fast_field_range_doc_set.rs
@@ -205,14 +205,6 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
        // Ideally this would take the fast field codec into account
        (self.column.num_docs() as f64 * 0.8) as u64
    }
-
-    /// Returns a best-effort hint of the
-    /// cost to drive the docset.
-    fn cost(&self) -> u64 {
-        // Advancing the docset is relatively expensive since it scans the column.
-        // Keep cost relative to a term query driver; use num_docs as baseline.
-        self.column.num_docs() as u64
-    }
 }

 #[cfg(test)]