Removing unused deps

Merge pull request #1113 from shikhar/patch-1
stale comments in segment_reader.rs
2026-01-07 17:42:55 +00:00 · 2021-07-21 17:03:21 +09:00 · 2021-07-20 23:02:20 +02:00 · 2021-07-20 21:02:28 +02:00 · 2021-07-15 22:55:47 -04:00 · 2021-07-15 22:47:32 -04:00
7 changed files with 33 additions and 299 deletions
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -12,8 +12,6 @@ description = "Fast field codecs used by tantivy"
 common = { path = "../common/" }
 tantivy-bitpacker = { path = "../bitpacker/" }
 prettytable-rs = {version="0.8.0", optional= true}
-#prettytable-rs = {version="0.8.0" }
-rand = "0.8.3"

 [dev-dependencies]
 more-asserts = "0.2.1"
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -16,7 +16,7 @@ use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
 use crate::schema::Field;
 use crate::{Score, SegmentReader, TantivyError};

-/// The `FilterCollector` collector filters docs using a u64 fast field value and a predicate.
+/// The `FilterCollector` collector filters docs using a fast field value and a predicate.
 /// Only the documents for which the predicate returned "true" will be passed on to the next collector.
 ///
 /// ```rust
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -31,9 +31,6 @@ use std::{collections::HashMap, io};
 ///
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
-///
-///
-/// TODO fix not decoding docfreq
 #[derive(Clone)]
 pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,
@@ -56,15 +53,12 @@ pub struct SegmentReader {
 impl SegmentReader {
    /// Returns the highest document id ever attributed in
    /// this segment + 1.
-    /// Today, `tantivy` does not handle deletes, so it happens
-    /// to also be the number of documents in the index.
    pub fn max_doc(&self) -> DocId {
        self.max_doc
    }

    /// Returns the number of alive documents.
    /// Deleted documents are not counted.
-    ///
    pub fn num_docs(&self) -> DocId {
        self.num_docs
    }
--- a/src/query/phrase_query/mod.rs
+++ b/src/query/phrase_query/mod.rs
@@ -179,87 +179,6 @@ pub mod tests {
        assert_nearly_equals!(scores[1], 0.46844664);
    }

-    #[ignore]
-    #[test]
-    pub fn test_phrase_score_with_slop() {
-        let index = create_index(&["a c b", "a b c a b"]);
-        let schema = index.schema();
-        let text_field = schema.get_field("text").unwrap();
-        let searcher = index.reader().unwrap().searcher();
-        let test_query = |texts: Vec<&str>| {
-            let terms: Vec<Term> = texts
-                .iter()
-                .map(|text| Term::from_field_text(text_field, text))
-                .collect();
-            let mut phrase_query = PhraseQuery::new(terms);
-            phrase_query.slop(1);
-            searcher
-                .search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
-                .expect("search should succeed")
-                .scores()
-                .to_vec()
-        };
-        let scores = test_query(vec!["a", "b"]);
-        assert_nearly_equals!(scores[0], 0.40618482);
-        assert_nearly_equals!(scores[1], 0.46844664);
-    }
-
-    #[test]
-    pub fn test_phrase_score_with_slop_size() {
-        let index = create_index(&["a b e c", "a e e e c", "a e e e e c"]);
-        let schema = index.schema();
-        let text_field = schema.get_field("text").unwrap();
-        let searcher = index.reader().unwrap().searcher();
-        let test_query = |texts: Vec<&str>| {
-            let terms: Vec<Term> = texts
-                .iter()
-                .map(|text| Term::from_field_text(text_field, text))
-                .collect();
-            let mut phrase_query = PhraseQuery::new(terms);
-            phrase_query.slop(3);
-            searcher
-                .search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
-                .expect("search should succeed")
-                .scores()
-                .to_vec()
-        };
-        let scores = test_query(vec!["a", "c"]);
-        assert_nearly_equals!(scores[0], 0.29086056);
-        assert_nearly_equals!(scores[1], 0.26706287);
-    }
-
-    #[test]
-    pub fn test_phrase_score_with_slop_ordering() {
-        let index = create_index(&[
-            "a e b e c",
-            "a e e e e e b e e e e c",
-            "a c b",
-            "a c e b e",
-            "a e c b",
-            "a e b c",
-        ]);
-        let schema = index.schema();
-        let text_field = schema.get_field("text").unwrap();
-        let searcher = index.reader().unwrap().searcher();
-        let test_query = |texts: Vec<&str>| {
-            let terms: Vec<Term> = texts
-                .iter()
-                .map(|text| Term::from_field_text(text_field, text))
-                .collect();
-            let mut phrase_query = PhraseQuery::new(terms);
-            phrase_query.slop(3);
-            searcher
-                .search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
-                .expect("search should succeed")
-                .scores()
-                .to_vec()
-        };
-        let scores = test_query(vec!["a", "b", "c"]);
-        // The first and last matches.
-        assert_nearly_equals!(scores[0], 0.23091172);
-        assert_nearly_equals!(scores[1], 0.25024384);
-    }
-
    #[test] // motivated by #234
    pub fn test_phrase_query_docfreq_order() {
        let mut schema_builder = Schema::builder();
--- a/src/query/phrase_query/phrase_query.rs
+++ b/src/query/phrase_query/phrase_query.rs
@@ -26,7 +26,6 @@ use crate::schema::{Field, Term};
 pub struct PhraseQuery {
    field: Field,
    phrase_terms: Vec<(usize, Term)>,
-    slop: u32,
 }

 impl PhraseQuery {
@@ -57,15 +56,9 @@ impl PhraseQuery {
        PhraseQuery {
            field,
            phrase_terms: terms,
-            slop: 0,
        }
    }

-    /// Slop allowed for the phrase.
-    pub fn slop(&mut self, value: u32) {
-        self.slop = value;
-    }
-
    /// The `Field` this `PhraseQuery` is targeting.
    pub fn field(&self) -> Field {
        self.field
@@ -104,11 +97,11 @@ impl PhraseQuery {
        }
        let terms = self.phrase_terms();
        let bm25_weight = Bm25Weight::for_terms(searcher, &terms)?;
-        let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight, scoring_enabled);
-        if self.slop > 0 {
-            weight.slop(self.slop);
-        }
-        Ok(weight)
+        Ok(PhraseWeight::new(
+            self.phrase_terms.clone(),
+            bm25_weight,
+            scoring_enabled,
+        ))
    }
 }

--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -51,7 +51,6 @@ pub struct PhraseScorer<TPostings: Postings> {
    fieldnorm_reader: FieldNormReader,
    similarity_weight: Bm25Weight,
    scoring_enabled: bool,
-    slop: u32,
 }

 /// Returns true iff the two sorted array contain a common element
@@ -131,82 +130,12 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize {
    count
 }

-/// Intersect twos sorted arrays `left` and `right` and outputs the
-/// resulting array in left.
-///
-/// Condition for match is that the value stored in left is less than the value in right,
-/// and that the difference between the value in left_begin is within
-///
-/// Returns the length of the intersection
-fn intersection_with_distance(
-    left: &mut [u32],
-    left_begin: &mut [u32],
-    right: &mut [u32],
-    max_distance_to_begin: u32,
-) -> usize {
-    // TODO: Improve variable names?
-    let mut left_i = 0;
-    let mut right_i = 0;
-    let mut count = 0;
-    let left_len = left.len();
-    let right_len = right.len();
-    // Is the current last value guaranteed to be the final value.
-    let mut is_temporary = false;
-    while left_i < left_len && right_i < right_len {
-        let left_val = left[left_i];
-        let right_val = right[right_i];
-        match left_val.cmp(&right_val) {
-            Ordering::Less => {
-                if right_val - left_begin[left_i] <= max_distance_to_begin {
-                    if is_temporary {
-                        // If the value was temporary we have found a closer match.
-                        count -= 1;
-                    };
-                    left[count] = left_val;
-                    left_begin[count] = left_begin[left_i];
-                    right[count] = right_val;
-                    count += 1;
-                    left_i += 1;
-                    // Still possible to find a closer match.
-                    is_temporary = true;
-                } else {
-                    left_i += 1;
-                }
-            }
-            Ordering::Equal => {
-                if is_temporary {
-                    // If the value was temporary we have found an.
-                    count -= 1;
-                    is_temporary = false;
-                }
-                left[count] = left_val;
-                left_begin[count] = left_begin[left_i];
-                right[count] = right_val;
-                count += 1;
-                left_i += 1;
-                right_i += 1;
-            }
-            Ordering::Greater => {
-                right_i += 1;
-                // Given the constraint that left cannot be greater than right we know that the value in left is
-                // final.
-                is_temporary = false;
-            }
-        }
-    }
-    for i in 0..count {
-        left[i] = right[i];
-    }
-    count
-}
-
 impl<TPostings: Postings> PhraseScorer<TPostings> {
    pub fn new(
        term_postings: Vec<(usize, TPostings)>,
        similarity_weight: Bm25Weight,
        fieldnorm_reader: FieldNormReader,
        scoring_enabled: bool,
-        slop: u32,
    ) -> PhraseScorer<TPostings> {
        let max_offset = term_postings
            .iter()
@@ -229,7 +158,6 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
            similarity_weight,
            fieldnorm_reader,
            scoring_enabled,
-            slop,
        };
        if scorer.doc() != TERMINATED && !scorer.phrase_match() {
            scorer.advance();
@@ -242,7 +170,6 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
    }

    fn phrase_match(&mut self) -> bool {
-        // Need to add support for slop in phrase_count and phrase_exists.
        if self.scoring_enabled {
            let count = self.compute_phrase_count();
            self.phrase_count = count;
@@ -253,25 +180,29 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
    }

    fn phrase_exists(&mut self) -> bool {
-        let intersection_len = if self.has_slop() {
-            self.compute_match_with_slop()
-        } else {
-            self.compute_match()
-        };
+        self.intersection_docset
+            .docset_mut_specialized(0)
+            .positions(&mut self.left);
+        let mut intersection_len = self.left.len();
+        for i in 1..self.num_terms - 1 {
+            {
+                self.intersection_docset
+                    .docset_mut_specialized(i)
+                    .positions(&mut self.right);
+            }
+            intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
+            if intersection_len == 0 {
+                return false;
+            }
+        }
+
+        self.intersection_docset
+            .docset_mut_specialized(self.num_terms - 1)
+            .positions(&mut self.right);
        intersection_exists(&self.left[..intersection_len], &self.right[..])
    }

    fn compute_phrase_count(&mut self) -> u32 {
-        let intersection_len = if self.has_slop() {
-            self.compute_match_with_slop()
-        } else {
-            self.compute_match()
-        };
-        intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
-    }
-
-    /// Computes match without slop.
-    fn compute_match(&mut self) -> usize {
        {
            self.intersection_docset
                .docset_mut_specialized(0)
@@ -286,52 +217,14 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
            }
            intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
            if intersection_len == 0 {
-                return 0;
+                return 0u32;
            }
        }

        self.intersection_docset
            .docset_mut_specialized(self.num_terms - 1)
            .positions(&mut self.right);
-        intersection_len
-    }
-
-    // Computes match with slop.
-    fn compute_match_with_slop(&mut self) -> usize {
-        {
-            self.intersection_docset
-                .docset_mut_specialized(0)
-                .positions(&mut self.left);
-        }
-        let mut intersection_len = self.left.len();
-        // We'll increment the values to be equal to the next match in the right array to achieve ordered slop.
-        let mut left_begin_vec = self.left.clone();
-        let left_begin = &mut left_begin_vec[..];
-        for i in 1..self.num_terms {
-            {
-                self.intersection_docset
-                    .docset_mut_specialized(i)
-                    .positions(&mut self.right);
-            }
-            intersection_len = intersection_with_distance(
-                &mut self.left[..intersection_len],
-                &mut left_begin[..intersection_len],
-                &mut self.right[..],
-                self.slop,
-            );
-            // Update the left to be equal to the right. Merge the initial left.
-            if intersection_len == 0 {
-                return 0;
-            }
-        }
-        self.intersection_docset
-            .docset_mut_specialized(self.num_terms - 1)
-            .positions(&mut self.right);
-        intersection_len
-    }
-
-    fn has_slop(&self) -> bool {
-        self.slop > 0
+        intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
    }
 }

@@ -374,28 +267,18 @@ impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {

 #[cfg(test)]
 mod tests {
-    use super::{intersection, intersection_count, intersection_with_distance};
+    use super::{intersection, intersection_count};

    fn test_intersection_sym(left: &[u32], right: &[u32], expected: &[u32]) {
-        test_intersection_aux(left, right, expected, 0);
-        test_intersection_aux(right, left, expected, 0);
+        test_intersection_aux(left, right, expected);
+        test_intersection_aux(right, left, expected);
    }

-    fn test_intersection_aux(left: &[u32], right: &[u32], expected: &[u32], slop: u32) {
+    fn test_intersection_aux(left: &[u32], right: &[u32], expected: &[u32]) {
        let mut left_vec = Vec::from(left);
        let left_mut = &mut left_vec[..];
-        if slop == 0 {
-            let left_mut = &mut left_vec[..];
-            assert_eq!(intersection_count(left_mut, right), expected.len());
-            let count = intersection(left_mut, right);
-            assert_eq!(&left_mut[..count], expected);
-            return;
-        }
-        let mut right_vec = Vec::from(right);
-        let right_mut = &mut right_vec[..];
-        let mut left_begin_vec = Vec::from(left);
-        let left_begin_mut = &mut left_begin_vec[..];
-        let count = intersection_with_distance(left_mut, left_begin_mut, right_mut, slop);
+        assert_eq!(intersection_count(left_mut, right), expected.len());
+        let count = intersection(left_mut, right);
        assert_eq!(&left_mut[..count], expected);
    }

@@ -407,51 +290,6 @@ mod tests {
        test_intersection_sym(&[5, 7], &[1, 5, 10, 12], &[5]);
        test_intersection_sym(&[1, 5, 6, 9, 10, 12], &[6, 8, 9, 12], &[6, 9, 12]);
    }
-    #[test]
-    fn test_slop() {
-        // The slop is not symetric. It does not allow for the phrase to be out of order.
-        test_intersection_aux(&[1], &[2], &[2], 1);
-        test_intersection_aux(&[1], &[3], &[], 1);
-        test_intersection_aux(&[1], &[3], &[3], 2);
-        test_intersection_aux(&[], &[2], &[], 100000);
-        test_intersection_aux(&[5, 7, 11], &[1, 5, 10, 12], &[5, 12], 1);
-        test_intersection_aux(&[1, 5, 6, 9, 10, 12], &[6, 8, 9, 12], &[6, 9, 12], 1);
-        test_intersection_aux(&[1, 5, 6, 9, 10, 12], &[6, 8, 9, 12], &[6, 9, 12], 10);
-    }
-
-    fn test_merge(
-        left: &[u32],
-        right: &[u32],
-        left_begin: &[u32],
-        expected_left: &[u32],
-        expected_left_begin: &[u32],
-        slop: u32,
-    ) {
-        let mut left_vec = Vec::from(left);
-        let left_mut = &mut left_vec[..];
-        let mut right_vec = Vec::from(right);
-        let right_mut = &mut right_vec[..];
-        let mut left_begin_vec = Vec::from(left_begin);
-        let left_begin_mut = &mut left_begin_vec[..];
-        let count = intersection_with_distance(left_mut, left_begin_mut, right_mut, slop);
-        assert_eq!(&left_mut[..count], expected_left);
-        assert_eq!(&left_begin_mut[..count], expected_left_begin);
-    }
-
-    #[test]
-    fn test_merge_slop() {
-        test_merge(&[1, 2], &[1], &[0, 1], &[1], &[0], 1);
-        test_merge(&[3], &[4], &[2], &[4], &[2], 2);
-        test_merge(&[3], &[4], &[2], &[4], &[2], 2);
-        test_merge(
-            &[1, 5, 6, 9, 10, 12],
-            &[6, 8, 9, 12],
-            &[0, 1, 2, 3, 4, 5],
-            &[6, 9, 12],
-            &[2, 3, 5],
-            10,
-        );
-    }
 }

 #[cfg(all(test, feature = "unstable"))]
--- a/src/query/phrase_query/phrase_weight.rs
+++ b/src/query/phrase_query/phrase_weight.rs
@@ -16,7 +16,6 @@ pub struct PhraseWeight {
    phrase_terms: Vec<(usize, Term)>,
    similarity_weight: Bm25Weight,
    scoring_enabled: bool,
-    slop: u32,
 }

 impl PhraseWeight {
@@ -26,12 +25,10 @@ impl PhraseWeight {
        similarity_weight: Bm25Weight,
        scoring_enabled: bool,
    ) -> PhraseWeight {
-        let slop = 0;
        PhraseWeight {
            phrase_terms,
            similarity_weight,
            scoring_enabled,
-            slop,
        }
    }

@@ -80,13 +77,8 @@ impl PhraseWeight {
            similarity_weight,
            fieldnorm_reader,
            self.scoring_enabled,
-            self.slop,
        )))
    }
-
-    pub fn slop(&mut self, slop: u32) {
-        self.slop = slop;
-    }
 }

 impl Weight for PhraseWeight {
Author	SHA1	Message	Date
Paul Masurel	4823771228	Removing unused deps	2021-07-21 17:03:21 +09:00
François Massot	1db76dd9cf	Merge pull request #1113 from shikhar/patch-1 stale comments in segment_reader.rs	2021-07-20 23:02:20 +02:00
François Massot	467a9517db	Merge pull request #1114 from shikhar/patch-2 FilterCollector doc fix	2021-07-20 21:02:28 +02:00
Shikhar Bhushan	b361315a67	FilterCollector doc fix Other types supported since https://github.com/tantivy-search/tantivy/pull/953/files	2021-07-15 22:55:47 -04:00
Shikhar Bhushan	4e3771bffc	stale comments in segment_reader.rs	2021-07-15 22:47:32 -04:00