Fixing unit tests.

There was a unit test failing when notify was sending more than one event on atomicwrites. It was observed on MacOS CI.
Merge branch 'master' of github.com:tantivy-search/tantivy
2026-06-13 05:50:41 +00:00 · 2020-08-27 16:04:50 +09:00 · 2020-08-22 21:30:47 +09:00 · 2020-08-22 21:29:12 +09:00 · 2020-08-21 21:37:05 +09:00 · 2020-08-21 11:23:09 +09:00
14 changed files with 134 additions and 224 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,28 +0,0 @@
-name: Tantivy CI
-
-on: [push]
-
-jobs:
-  test:
-    name: Test Suite
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-      - uses: actions-rs/cargo@v1
-        with:
-          command: fmt
-          args: --all -- --check
-      - run: rustup component add clippy
-      - uses: actions-rs/cargo@v1
-        with:
-          command: clippy
-          args: -- -D warnings
-
--- a/.github/workflows/coveralls.yml
+++ b/.github/workflows/coveralls.yml
@@ -1,66 +0,0 @@
-on: [push]
-
-name: Code coverage with grcov
-
-jobs:
-  grcov:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os:
-          - ubuntu-latest
-            #- macOS-latest
-            #- windows-latest
-
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Install toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: nightly
-          override: true
-          profile: minimal
-
-      - name: Execute tests
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --all --lib
-        env:
-          CARGO_INCREMENTAL: 0
-          RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests"
-
-      # Note that `actions-rs/grcov` Action can install `grcov` too,
-      # but can't use faster installation methods yet.
-      # As a temporary experiment `actions-rs/install` Action plugged in here.
-      # Consider **NOT** to copy that into your workflow,
-      # but use `actions-rs/grcov` only
-      - name: Pre-installing grcov
-        uses: actions-rs/install@v0.1
-        with:
-          crate: grcov
-          use-tool-cache: true
-
-      - name: Gather coverage data
-        id: coverage
-        uses: actions-rs/grcov@v0.1
-        with:
-          coveralls-token: ${{ secrets.COVERALLS_TOKEN }}
-
-      - name: Coveralls upload
-        uses: coverallsapp/github-action@master
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          parallel: true
-          path-to-lcov: ${{ steps.coverage.outputs.report }}
-
-  grcov_finalize:
-    runs-on: ubuntu-latest
-    needs: grcov
-    steps:
-      - name: Coveralls finalization
-        uses: coverallsapp/github-action@master
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          parallel-finished: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+Tantivy 0.14.0
+=========================
+- Remove dependency to atomicwrites #833 .Implemented by @pmasurel upon suggestion and research from @asafigan). 
+
 Tantivy 0.13.0
 ======================
 Tantivy 0.13 introduce a change in the index format that will require
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.13.0"
+version = "0.14.0-dev"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -22,8 +22,7 @@ tantivy-fst = "0.3"
 memmap = {version = "0.7", optional=true}
 lz4 = {version="1.20", optional=true}
 snap = "1"
-atomicwrites = {version="0.2.2", optional=true}
-tempfile = "3.0"
+tempfile = {version="3.0", optional=true}
 log = "0.4"
 serde = {version="1.0", features=["derive"]}
 serde_json = "1.0"
@@ -75,7 +74,7 @@ overflow-checks = true

 [features]
 default = ["mmap"]
-mmap = ["atomicwrites", "fs2", "memmap", "notify"]
+mmap = ["fs2", "tempfile", "memmap", "notify"]
 lz4-compression = ["lz4"]
 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.
--- a/README.md
+++ b/README.md
@@ -34,11 +34,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
 The following [benchmark](https://tantivy-search.github.io/bench/) break downs 
 performance for different type of queries / collection.

-
-In general, Tantivy tends to be 
- slower than Lucene on union with a Top-K due to Block-WAND optimization.
- faster than Lucene on intersection and phrase queries. 
-
 Your mileage WILL vary depending on the nature of queries and their load.

 # Features
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -112,18 +112,6 @@ fn main() -> tantivy::Result<()> {
            limbs and branches that arch over the pool"
    ));

-    index_writer.add_document(doc!(
-    title => "Of Mice and Men",
-    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
-            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-            over the yellow sands in the sunlight before reaching the narrow pool. On one \
-            side of the river the golden foothill slopes curve up to the strong and rocky \
-            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-            fresh and green with every spring, carrying in their lower leaf junctures the \
-            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-            limbs and branches that arch over the pool"
-    ));
-
    // Multivalued field just need to be repeated.
    index_writer.add_document(doc!(
    title => "Frankenstein",
--- a/query-grammar/src/occur.rs
+++ b/query-grammar/src/occur.rs
@@ -52,7 +52,7 @@ mod test {
    use crate::Occur;

    #[test]
-    fn test_Occur_compose() {
+    fn test_occur_compose() {
        assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
        assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
        assert_eq!(
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -539,7 +539,6 @@ mod tests {
            test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
        }
    }
-
    fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
        let mut reader_index = reader.index();
        let (sender, receiver) = crossbeam::channel::unbounded();
@@ -550,12 +549,23 @@ mod tests {
        assert_eq!(reader.searcher().num_docs(), 0);
        writer.add_document(doc!(field=>1u64));
        writer.commit().unwrap();
-        assert!(receiver.recv().is_ok());
-        assert_eq!(reader.searcher().num_docs(), 1);
+        // We need a loop here because it is possible for notify to send more than
+        // one modify event. It was observed on CI on MacOS.
+        loop {
+            assert!(receiver.recv().is_ok());
+            if reader.searcher().num_docs() == 1 {
+                break;
+            }
+        }
        writer.add_document(doc!(field=>2u64));
        writer.commit().unwrap();
-        assert!(receiver.recv().is_ok());
-        assert_eq!(reader.searcher().num_docs(), 2);
+        // ... Same as above
+        loop {
+            assert!(receiver.recv().is_ok());
+            if reader.searcher().num_docs() == 2 {
+                break;
+            }
+        }
    }

    // This test will not pass on windows, because windows
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -34,6 +34,7 @@ use std::sync::Mutex;
 use std::sync::RwLock;
 use std::sync::Weak;
 use std::thread;
+use tempfile;
 use tempfile::TempDir;

 /// Create a default io error given a string.
@@ -487,11 +488,13 @@ impl Directory for MmapDirectory {
        }
    }

-    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
+    fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
        debug!("Atomic Write {:?}", path);
+        let mut tempfile = tempfile::NamedTempFile::new()?;
+        tempfile.write_all(content)?;
+        tempfile.flush()?;
        let full_path = self.resolve_path(path);
-        let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
-        meta_file.write(|f| f.write_all(data))?;
+        tempfile.into_temp_path().persist(full_path)?;
        Ok(())
    }

--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -211,19 +211,19 @@ fn test_watch(directory: &mut dyn Directory) {
        .unwrap();

    for i in 0..10 {
-        assert_eq!(i, counter.load(SeqCst));
+        assert!(i <= counter.load(SeqCst));
        assert!(directory
            .atomic_write(Path::new("meta.json"), b"random_test_data_2")
            .is_ok());
        assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
-        assert_eq!(i + 1, counter.load(SeqCst));
+        assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
    }
    mem::drop(watch_handle);
    assert!(directory
        .atomic_write(Path::new("meta.json"), b"random_test_data")
        .is_ok());
    assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
-    assert_eq!(10, counter.load(SeqCst));
+    assert!(10 <= counter.load(SeqCst));
 }

 fn test_lock_non_blocking(directory: &mut dyn Directory) {
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -29,8 +29,9 @@ pub use self::segment_writer::SegmentWriter;
 /// Alias for the default merge policy, which is the `LogMergePolicy`.
 pub type DefaultMergePolicy = LogMergePolicy;

+#[cfg(feature = "mmap")]
 #[cfg(test)]
-mod tests {
+mod tests_mmap {
    use crate::schema::{self, Schema};
    use crate::{Index, Term};

--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -4,19 +4,6 @@ use crate::{DocId, DocSet, Score, TERMINATED};
 use std::ops::Deref;
 use std::ops::DerefMut;

-fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
-    if let Some(first) = it.next() {
-        let mut prev = first;
-        for doc in it {
-            if doc < prev {
-                return false;
-            }
-            prev = doc;
-        }
-    }
-    true
-}
-
 /// Takes a term_scorers sorted by their current doc() and a threshold and returns
 /// Returns (pivot_len, pivot_ord) defined as follows:
 /// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
@@ -55,37 +42,12 @@ fn find_pivot_doc(
    Some((before_pivot_len, pivot_len, pivot_doc))
 }

-struct TermScorerWithMaxScore<'a> {
-    scorer: &'a mut TermScorer,
-    max_score: Score,
-}
-
-impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
-    fn from(scorer: &'a mut TermScorer) -> Self {
-        let max_score = scorer.max_score();
-        TermScorerWithMaxScore { scorer, max_score }
-    }
-}
-
-impl<'a> Deref for TermScorerWithMaxScore<'a> {
-    type Target = TermScorer;
-
-    fn deref(&self) -> &Self::Target {
-        self.scorer
-    }
-}
-
-impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.scorer
-    }
-}
-
 // Before and after calling this method, scorers need to be sorted by their `.doc()`.
 fn block_max_was_too_low_advance_one_scorer(
    scorers: &mut Vec<TermScorerWithMaxScore>,
    pivot_len: usize,
 ) {
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
    let mut scorer_to_seek = pivot_len - 1;
    let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
    for scorer_ord in (0..pivot_len - 1).rev() {
@@ -102,6 +64,7 @@ fn block_max_was_too_low_advance_one_scorer(
    }
    scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
    restore_ordering(scorers, scorer_to_seek);
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
 }

 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
@@ -177,64 +140,99 @@ pub fn block_wand(
        .map(TermScorerWithMaxScore::from)
        .collect();
    scorers.sort_by_key(|scorer| scorer.doc());
-    loop {
-        // At this point we need to ensure that the scorers are sorted!
+    // At this point we need to ensure that the scorers are sorted!
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
+    while let Some((before_pivot_len, pivot_len, pivot_doc)) =
+        find_pivot_doc(&scorers[..], threshold)
+    {
        debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
-        if let Some((before_pivot_len, pivot_len, pivot_doc)) =
-            find_pivot_doc(&scorers[..], threshold)
-        {
-            debug_assert_ne!(pivot_doc, TERMINATED);
-            debug_assert!(before_pivot_len < pivot_len);
+        debug_assert_ne!(pivot_doc, TERMINATED);
+        debug_assert!(before_pivot_len < pivot_len);

-            let block_max_score_upperbound: Score = scorers[..pivot_len]
-                .iter_mut()
-                .map(|scorer| {
-                    scorer.shallow_seek(pivot_doc);
-                    scorer.block_max_score()
-                })
-                .sum();
+        let block_max_score_upperbound: Score = scorers[..pivot_len]
+            .iter_mut()
+            .map(|scorer| {
+                scorer.shallow_seek(pivot_doc);
+                scorer.block_max_score()
+            })
+            .sum();

-            // Beware after shallow advance, skip readers can be in advance compared to
-            // the segment posting lists.
-            //
-            // `block_segment_postings.load_block()` need to be called separately.
-            if block_max_score_upperbound <= threshold {
-                // Block max condition was not reached
-                // We could get away by simply advancing the scorers to DocId + 1 but it would
-                // be inefficient. The optimization requires proper explanation and was
-                // isolated in a different function.
-                block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
-                continue;
-            }
-
-            // Block max condition is observed.
-            //
-            // Let's try and advance all scorers before the pivot to the pivot.
-            if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
-                // At least of the scorer does not contain the pivot.
-                //
-                // Let's stop scoring this pivot and go through the pivot selection again.
-                // Note that the current pivot is not necessarily a bad candidate and it
-                // may be picked again.
-                continue;
-            }
-
-            // At this point, all scorers are positioned on the doc.
-            let score = scorers[..pivot_len]
-                .iter_mut()
-                .map(|scorer| scorer.score())
-                .sum();
-            if score > threshold {
-                threshold = callback(pivot_doc, score);
-            }
-            // let's advance all of the scorers that are currently positioned on the pivot.
-            advance_all_scorers_on_pivot(&mut scorers, pivot_len);
-        } else {
-            return;
+        // Beware after shallow advance, skip readers can be in advance compared to
+        // the segment posting lists.
+        //
+        // `block_segment_postings.load_block()` need to be called separately.
+        if block_max_score_upperbound <= threshold {
+            // Block max condition was not reached
+            // We could get away by simply advancing the scorers to DocId + 1 but it would
+            // be inefficient. The optimization requires proper explanation and was
+            // isolated in a different function.
+            block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
+            continue;
        }
+
+        // Block max condition is observed.
+        //
+        // Let's try and advance all scorers before the pivot to the pivot.
+        if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
+            // At least of the scorer does not contain the pivot.
+            //
+            // Let's stop scoring this pivot and go through the pivot selection again.
+            // Note that the current pivot is not necessarily a bad candidate and it
+            // may be picked again.
+            continue;
+        }
+
+        // At this point, all scorers are positioned on the doc.
+        let score = scorers[..pivot_len]
+            .iter_mut()
+            .map(|scorer| scorer.score())
+            .sum();
+        if score > threshold {
+            threshold = callback(pivot_doc, score);
+        }
+        // let's advance all of the scorers that are currently positioned on the pivot.
+        advance_all_scorers_on_pivot(&mut scorers, pivot_len);
    }
 }

+struct TermScorerWithMaxScore<'a> {
+    scorer: &'a mut TermScorer,
+    max_score: Score,
+}
+
+impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
+    fn from(scorer: &'a mut TermScorer) -> Self {
+        let max_score = scorer.max_score();
+        TermScorerWithMaxScore { scorer, max_score }
+    }
+}
+
+impl<'a> Deref for TermScorerWithMaxScore<'a> {
+    type Target = TermScorer;
+
+    fn deref(&self) -> &Self::Target {
+        self.scorer
+    }
+}
+
+impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.scorer
+    }
+}
+
+fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
+    if let Some(first) = it.next() {
+        let mut prev = first;
+        for doc in it {
+            if doc < prev {
+                return false;
+            }
+            prev = doc;
+        }
+    }
+    true
+}
 #[cfg(test)]
 mod tests {
    use crate::query::score_combiner::SumCombiner;
@@ -248,17 +246,21 @@ mod tests {
    use std::iter;

    struct Float(Score);
+
    impl Eq for Float {}
+
    impl PartialEq for Float {
        fn eq(&self, other: &Self) -> bool {
            self.cmp(&other) == Ordering::Equal
        }
    }
+
    impl PartialOrd for Float {
        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
            Some(self.cmp(other))
        }
    }
+
    impl Ord for Float {
        fn cmp(&self, other: &Self) -> Ordering {
            other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -4,7 +4,7 @@ use crate::docset::DocSet;
 use crate::postings::SegmentPostings;
 use crate::query::bm25::BM25Weight;
 use crate::query::explanation::does_not_match;
-use crate::query::weight::{for_each_pruning_scorer, for_each_scorer};
+use crate::query::weight::for_each_scorer;
 use crate::query::Weight;
 use crate::query::{Explanation, Scorer};
 use crate::schema::IndexRecordOption;
@@ -73,8 +73,8 @@ impl Weight for TermWeight {
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score) -> Score,
    ) -> crate::Result<()> {
-        let mut scorer = self.scorer(reader, 1.0)?;
-        for_each_pruning_scorer(&mut scorer, threshold, callback);
+        let scorer = self.specialized_scorer(reader, 1.0)?;
+        crate::query::boolean_query::block_wand(vec![scorer], threshold, callback);
        Ok(())
    }
 }
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -138,9 +138,11 @@ impl InnerIndexReader {
                .collect::<crate::Result<_>>()?
        };
        let schema = self.index.schema();
-        let searchers = (0..self.num_searchers)
-            .map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone()))
-            .collect();
+        let searchers = std::iter::repeat_with(|| {
+            Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())
+        })
+        .take(self.num_searchers)
+        .collect();
        self.searcher_pool.publish_new_generation(searchers);
        Ok(())
    }
Author	SHA1	Message	Date
Paul Masurel	9b0ffc401e	Fixing unit tests. There was a unit test failing when notify was sending more than one event on atomicwrites. It was observed on MacOS CI.	2020-08-27 16:04:50 +09:00
Paul Masurel	3f1ecf53ab	Merge branch 'master' of github.com:tantivy-search/tantivy	2020-08-22 21:30:47 +09:00
Paul Masurel	0b583b8130	Plastic changes	2020-08-22 21:29:12 +09:00
Paul Masurel	31d18dca1c	Removing dependency to atomicwrites (#866 )	2020-08-21 21:37:05 +09:00
stephenlagree	5e06e7de5a	Update basic_search.rs (#865 ) Remove duplicated document entry.	2020-08-21 11:23:09 +09:00
Paul Masurel	8af53cbd36	Merge branch 'master' of github.com:tantivy-search/tantivy	2020-08-21 08:57:42 +09:00
Paul Masurel	4914076e8f	Fixing release build	2020-08-21 08:57:27 +09:00
Paul Masurel	e04f47e922	Using block wand for term queries too.	2020-08-20 15:51:21 +09:00
Paul Masurel	f355695581	Code clean up	2020-08-20 15:42:50 +09:00
Paul Masurel	cbacdf0de8	Edited README.	2020-08-20 14:28:24 +09:00