Reverting atomic_write to the atomic_writes in order to address #866

Creating the tempfile for atomicwrites in the same directory as the MmapDirectory. (#878 )
Accept dash within field names. (#874 )
2026-01-07 17:42:55 +00:00 · 2020-09-19 10:39:43 +09:00 · 2020-09-05 23:06:29 +09:00 · 2020-09-01 13:38:52 +09:00 · 2020-08-27 16:43:39 +09:00 · 2020-08-27 07:54:37 +09:00
29 changed files with 225 additions and 226 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ cpp/simdcomp/bitpackingbenchmark
 *.bk
 .idea
 trace.dat
 cargo-timing*
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,6 @@
-Tantivy 0.13.2
+Tantivy 0.14.0
-===================
+=========================
-Bugfix. Acquiring a facet reader on a segment that does not contain any 
+- Remove dependency to atomicwrites #833 .Implemented by @pmasurel upon suggestion and research from @asafigan). 
 doc with this facet returns `None`. (#896)
 Tantivy 0.13.1
 ======================
 Made `Query` and `Collector` `Send + Sync`.
 Updated misc dependency versions.
 Tantivy 0.13.0
 ======================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.13.2"
+version = "0.14.0-dev"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -13,21 +13,21 @@ keywords = ["search", "information", "retrieval"]
 edition = "2018"
 [dependencies]
-base64 = "0.12"
+base64 = "0.12.0"
-byteorder = "1"
+byteorder = "1.0"
-crc32fast = "1"
+crc32fast = "1.2.0"
-once_cell = "1"
+once_cell = "1.0"
-regex ={version = "1", default-features = false, features = ["std"]}
+regex ={version = "1.3.0", default-features = false, features = ["std"]}
 tantivy-fst = "0.3"
 memmap = {version = "0.7", optional=true}
-lz4 = {version="1", optional=true}
+lz4 = {version="1.20", optional=true}
 snap = "1"
-atomicwrites = {version="0.2", optional=true}
+tempfile = {version="3.0", optional=true}
-tempfile = "3"
+atomicwrites = "0.2"
 log = "0.4"
-serde = {version="1", features=["derive"]}
+serde = {version="1.0", features=["derive"]}
-serde_json = "1"
+serde_json = "1.0"
-num_cpus = "1"
+num_cpus = "1.2"
 fs2={version="0.4", optional=true}
 levenshtein_automata = "0.2"
 notify = {version="4", optional=true}
@@ -35,20 +35,20 @@ uuid = { version = "0.8", features = ["v4", "serde"] }
 crossbeam = "0.7"
 futures = {version = "0.3",  features=["thread-pool"] }
 owning_ref = "0.4"
-stable_deref_trait = "1"
+stable_deref_trait = "1.0.0"
-rust-stemmers = "1"
+rust-stemmers = "1.2"
-downcast-rs = "1"
+downcast-rs = { version="1.0" }
-tantivy-query-grammar = { version="0.13", path="./query-grammar" }
+tantivy-query-grammar = { version="0.14.0-dev", path="./query-grammar" }
 bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
 census = "0.4"
-fnv = "1"
+fnv = "1.0.6"
 owned-read = "0.4"
 failure = "0.1"
-htmlescape = "0.3"
+htmlescape = "0.3.1"
 fail = "0.4"
 murmurhash32 = "0.2"
 chrono = "0.4"
-smallvec = "1"
+smallvec = "1.0"
 rayon = "1"
 [target.'cfg(windows)'.dependencies]
@@ -75,7 +75,7 @@ overflow-checks = true
 [features]
 default = ["mmap"]
-mmap = ["atomicwrites", "fs2", "memmap", "notify"]
+mmap = ["fs2", "tempfile", "memmap", "notify"]
 lz4-compression = ["lz4"]
 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.
--- a/README.md
+++ b/README.md
@@ -34,11 +34,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
 The following [benchmark](https://tantivy-search.github.io/bench/) break downs 
 performance for different type of queries / collection.
 In general, Tantivy tends to be 
 - slower than Lucene on union with a Top-K due to Block-WAND optimization.
 - faster than Lucene on intersection and phrase queries. 
 Your mileage WILL vary depending on the nature of queries and their load.
 # Features
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -112,18 +112,6 @@ fn main() -> tantivy::Result<()> {
            limbs and branches that arch over the pool"
    ));
    index_writer.add_document(doc!(
    title => "Of Mice and Men",
    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
            over the yellow sands in the sunlight before reaching the narrow pool. On one \
            side of the river the golden foothill slopes curve up to the strong and rocky \
            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
            fresh and green with every spring, carrying in their lower leaf junctures the \
            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
            limbs and branches that arch over the pool"
    ));
    // Multivalued field just need to be repeated.
    index_writer.add_document(doc!(
    title => "Frankenstein",
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -56,7 +56,7 @@ fn main() -> tantivy::Result<()> {
        );
        let top_docs_by_custom_score =
            TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
-                let ingredient_reader = segment_reader.facet_reader(ingredient).unwrap();
+                let mut ingredient_reader = segment_reader.facet_reader(ingredient).unwrap();
                let facet_dict = ingredient_reader.facet_dict();
                let query_ords: HashSet<u64> = facets
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.13.0"
+version = "0.14.0-dev"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/query-grammar/src/occur.rs
+++ b/query-grammar/src/occur.rs
@@ -52,7 +52,7 @@ mod test {
    use crate::Occur;
    #[test]
-    fn test_Occur_compose() {
+    fn test_occur_compose() {
        assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
        assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
        assert_eq!(
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -9,8 +9,10 @@ use combine::{
 fn field<'a>() -> impl Parser<&'a str, Output = String> {
    (
-        letter(),
+        (letter().or(char('_'))),
-        many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
+        many(satisfy(|c: char| {
            c.is_alphanumeric() || c == '_' || c == '-'
        })),
    )
        .skip(char(':'))
        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
@@ -279,6 +281,8 @@ pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
 #[cfg(test)]
 mod test {
    type TestParseResult = Result<(), StringStreamError>;
    use super::*;
    use combine::parser::Parser;
@@ -296,9 +300,10 @@ mod test {
    }
    #[test]
-    fn test_occur_symbol() {
+    fn test_occur_symbol() -> TestParseResult {
-        assert_eq!(super::occur_symbol().parse("-"), Ok((Occur::MustNot, "")));
+        assert_eq!(super::occur_symbol().parse("-")?, (Occur::MustNot, ""));
-        assert_eq!(super::occur_symbol().parse("+"), Ok((Occur::Must, "")));
+        assert_eq!(super::occur_symbol().parse("+")?, (Occur::Must, ""));
        Ok(())
    }
    #[test]
@@ -410,6 +415,25 @@ mod test {
        assert_eq!(format!("{:?}", ast), "\"abc\"");
    }
    #[test]
    fn test_field_name() -> TestParseResult {
        assert_eq!(
            super::field().parse("my-field-name:a")?,
            ("my-field-name".to_string(), "a")
        );
        assert_eq!(
            super::field().parse("my_field_name:a")?,
            ("my_field_name".to_string(), "a")
        );
        assert!(super::field().parse(":a").is_err());
        assert!(super::field().parse("-my_field:a").is_err());
        assert_eq!(
            super::field().parse("_my_field:a")?,
            ("_my_field".to_string(), "a")
        );
        Ok(())
    }
    #[test]
    fn test_range_parser() {
        // testing the range() parser separately
--- a/src/collector/custom_score_top_collector.rs
+++ b/src/collector/custom_score_top_collector.rs
@@ -46,7 +46,7 @@ pub trait CustomScorer<TScore>: Sync {
 impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
 where
-    TCustomScorer: CustomScorer<TScore> + Send + Sync,
+    TCustomScorer: CustomScorer<TScore>,
    TScore: 'static + PartialOrd + Clone + Send + Sync,
 {
    type Fruit = Vec<(TScore, DocAddress)>;
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -133,7 +133,7 @@ impl<T> Fruit for T where T: Send + downcast_rs::Downcast {}
 /// The collection logic itself is in the `SegmentCollector`.
 ///
 /// Segments are not guaranteed to be visited in any specific order.
-pub trait Collector: Sync + Send {
+pub trait Collector: Sync {
    /// `Fruit` is the type for the result of our collection.
    /// e.g. `usize` for the `Count` collector.
    type Fruit: Fruit;
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -324,7 +324,7 @@ impl TopDocs {
    where
        TScore: 'static + Send + Sync + Clone + PartialOrd,
        TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
-        TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker> + Send + Sync,
+        TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker>,
    {
        TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
    }
@@ -438,7 +438,7 @@ impl TopDocs {
    where
        TScore: 'static + Send + Sync + Clone + PartialOrd,
        TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
-        TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer> + Send + Sync,
+        TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer>,
    {
        CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
    }
--- a/src/collector/tweak_score_top_collector.rs
+++ b/src/collector/tweak_score_top_collector.rs
@@ -49,7 +49,7 @@ pub trait ScoreTweaker<TScore>: Sync {
 impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
 where
-    TScoreTweaker: ScoreTweaker<TScore> + Send + Sync,
+    TScoreTweaker: ScoreTweaker<TScore>,
    TScore: 'static + PartialOrd + Clone + Send + Sync,
 {
    type Fruit = Vec<(TScore, DocAddress)>;
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -539,7 +539,6 @@ mod tests {
            test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
        }
    }
    fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
        let mut reader_index = reader.index();
        let (sender, receiver) = crossbeam::channel::unbounded();
@@ -550,12 +549,23 @@ mod tests {
        assert_eq!(reader.searcher().num_docs(), 0);
        writer.add_document(doc!(field=>1u64));
        writer.commit().unwrap();
-        assert!(receiver.recv().is_ok());
+        // We need a loop here because it is possible for notify to send more than
-        assert_eq!(reader.searcher().num_docs(), 1);
+        // one modify event. It was observed on CI on MacOS.
        loop {
            assert!(receiver.recv().is_ok());
            if reader.searcher().num_docs() == 1 {
                break;
            }
        }
        writer.add_document(doc!(field=>2u64));
        writer.commit().unwrap();
-        assert!(receiver.recv().is_ok());
+        // ... Same as above
-        assert_eq!(reader.searcher().num_docs(), 2);
+        loop {
            assert!(receiver.recv().is_ok());
            if reader.searcher().num_docs() == 2 {
                break;
            }
        }
    }
    // This test will not pass on windows, because windows
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -112,10 +112,8 @@ impl SegmentReader {
            return None;
        }
        let term_ords_reader = self.fast_fields().u64s(field)?;
-        let termdict = self.termdict_composite
+        let termdict_source = self.termdict_composite.open_read(field)?;
-            .open_read(field)
+        let termdict = TermDictionary::from_source(&termdict_source);
            .map(|source| TermDictionary::from_source(&source))
            .unwrap_or_else(TermDictionary::empty);
        let facet_reader = FacetReader::new(term_ords_reader, termdict);
        Some(facet_reader)
    }
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,4 +1,4 @@
-use crate::core::MANAGED_FILEPATH;
+use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
 use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError};
 use crate::directory::footer::{Footer, FooterProxy};
 use crate::directory::DirectoryLock;
@@ -246,13 +246,15 @@ impl ManagedDirectory {
    /// List files for which checksum does not match content
    pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
        let mut hashset = HashSet::new();
-        let managed_paths = self
+        let mut managed_paths = self
            .meta_informations
            .read()
            .expect("Managed directory rlock poisoned in list damaged.")
            .managed_paths
            .clone();
        managed_paths.remove(*META_FILEPATH);
        for path in managed_paths.into_iter() {
            if !self.validate_checksum(&path)? {
                hashset.insert(path);
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -1,4 +1,5 @@
 use crate::core::META_FILEPATH;
 use atomicwrites;
 use crate::directory::error::LockError;
 use crate::directory::error::{
    DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError,
@@ -34,6 +35,7 @@ use std::sync::Mutex;
 use std::sync::RwLock;
 use std::sync::Weak;
 use std::thread;
 use tempfile;
 use tempfile::TempDir;
 /// Create a default io error given a string.
@@ -487,11 +489,11 @@ impl Directory for MmapDirectory {
        }
    }
-    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
+    fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
        debug!("Atomic Write {:?}", path);
        let full_path = self.resolve_path(path);
        let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
-        meta_file.write(|f| f.write_all(data))?;
+        meta_file.write(|f| f.write_all(content))?;
        Ok(())
    }
--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -211,19 +211,18 @@ fn test_watch(directory: &mut dyn Directory) {
        .unwrap();
    for i in 0..10 {
-        assert_eq!(i, counter.load(SeqCst));
+        assert!(i <= counter.load(SeqCst));
        assert!(directory
            .atomic_write(Path::new("meta.json"), b"random_test_data_2")
            .is_ok());
-        assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
+        assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
        assert_eq!(i + 1, counter.load(SeqCst));
    }
    mem::drop(watch_handle);
    assert!(directory
        .atomic_write(Path::new("meta.json"), b"random_test_data")
        .is_ok());
    assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
-    assert_eq!(10, counter.load(SeqCst));
+    assert!(10 <= counter.load(SeqCst));
 }
 fn test_lock_non_blocking(directory: &mut dyn Directory) {
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -73,52 +73,7 @@ impl FacetReader {
    }
    /// Return the list of facet ordinals associated to a document.
-    pub fn facet_ords(&self, doc: DocId, output: &mut Vec<u64>) {
+    pub fn facet_ords(&mut self, doc: DocId, output: &mut Vec<u64>) {
        self.term_ords.get_vals(doc, output);
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::{Document, schema::{Facet, SchemaBuilder}};
    use crate::Index;
    #[test]
    fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
        index_writer.add_document(Document::default());
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher.segment_reader(0u32).facet_reader(facet_field).unwrap();
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert_eq!(&facet_ords, &[2u64]);
        facet_reader.facet_ords(1u32, &mut facet_ords);
        assert!(facet_ords.is_empty());
        Ok(())
    }
    #[test]
    fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
        index_writer.add_document(Document::default());
        index_writer.add_document(Document::default());
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher.segment_reader(0u32).facet_reader(facet_field).unwrap();
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert!(facet_ords.is_empty());
        facet_reader.facet_ords(1u32, &mut facet_ords);
        assert!(facet_ords.is_empty());
        Ok(())
    }
 }
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -126,7 +126,6 @@ impl FastFieldsWriter {
        for field_writer in &self.single_value_writers {
            field_writer.serialize(serializer)?;
        }
        for field_writer in &self.multi_values_writers {
            let field = field_writer.field();
            field_writer.serialize(serializer, mapping.get(&field))?;
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -29,8 +29,9 @@ pub use self::segment_writer::SegmentWriter;
 /// Alias for the default merge policy, which is the `LogMergePolicy`.
 pub type DefaultMergePolicy = LogMergePolicy;
 #[cfg(feature = "mmap")]
 #[cfg(test)]
-mod tests {
+mod tests_mmap {
    use crate::schema::{self, Schema};
    use crate::{Index, Term};
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -151,7 +151,7 @@ impl SegmentWriter {
                        if let Some(unordered_term_id) = unordered_term_id_opt {
                            self.fast_field_writers
                                .get_multivalue_writer(field)
-                                .expect("writer for facet missing")
+                                .expect("multified writer for facet missing")
                                .add_val(unordered_term_id);
                        }
                    }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1012,4 +1012,12 @@ mod tests {
            DOC_COUNT as usize
        );
    }
    #[test]
    fn test_validate_checksum() {
        let index_path = tempfile::tempdir().expect("dir");
        let schema = Schema::builder().build();
        let index = Index::create_in_dir(&index_path, schema).expect("index");
        assert!(index.validate_checksum().unwrap().is_empty());
    }
 }
--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -4,19 +4,6 @@ use crate::{DocId, DocSet, Score, TERMINATED};
 use std::ops::Deref;
 use std::ops::DerefMut;
 fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
    if let Some(first) = it.next() {
        let mut prev = first;
        for doc in it {
            if doc < prev {
                return false;
            }
            prev = doc;
        }
    }
    true
 }
 /// Takes a term_scorers sorted by their current doc() and a threshold and returns
 /// Returns (pivot_len, pivot_ord) defined as follows:
 /// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
@@ -55,37 +42,12 @@ fn find_pivot_doc(
    Some((before_pivot_len, pivot_len, pivot_doc))
 }
 struct TermScorerWithMaxScore<'a> {
    scorer: &'a mut TermScorer,
    max_score: Score,
 }
 impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
    fn from(scorer: &'a mut TermScorer) -> Self {
        let max_score = scorer.max_score();
        TermScorerWithMaxScore { scorer, max_score }
    }
 }
 impl<'a> Deref for TermScorerWithMaxScore<'a> {
    type Target = TermScorer;
    fn deref(&self) -> &Self::Target {
        self.scorer
    }
 }
 impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.scorer
    }
 }
 // Before and after calling this method, scorers need to be sorted by their `.doc()`.
 fn block_max_was_too_low_advance_one_scorer(
    scorers: &mut Vec<TermScorerWithMaxScore>,
    pivot_len: usize,
 ) {
    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
    let mut scorer_to_seek = pivot_len - 1;
    let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
    for scorer_ord in (0..pivot_len - 1).rev() {
@@ -102,6 +64,7 @@ fn block_max_was_too_low_advance_one_scorer(
    }
    scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
    restore_ordering(scorers, scorer_to_seek);
    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
 }
 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
@@ -177,64 +140,99 @@ pub fn block_wand(
        .map(TermScorerWithMaxScore::from)
        .collect();
    scorers.sort_by_key(|scorer| scorer.doc());
-    loop {
+    // At this point we need to ensure that the scorers are sorted!
-        // At this point we need to ensure that the scorers are sorted!
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
    while let Some((before_pivot_len, pivot_len, pivot_doc)) =
        find_pivot_doc(&scorers[..], threshold)
    {
        debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
-        if let Some((before_pivot_len, pivot_len, pivot_doc)) =
+        debug_assert_ne!(pivot_doc, TERMINATED);
-            find_pivot_doc(&scorers[..], threshold)
+        debug_assert!(before_pivot_len < pivot_len);
        {
            debug_assert_ne!(pivot_doc, TERMINATED);
            debug_assert!(before_pivot_len < pivot_len);
-            let block_max_score_upperbound: Score = scorers[..pivot_len]
+        let block_max_score_upperbound: Score = scorers[..pivot_len]
-                .iter_mut()
+            .iter_mut()
-                .map(|scorer| {
+            .map(|scorer| {
-                    scorer.shallow_seek(pivot_doc);
+                scorer.shallow_seek(pivot_doc);
-                    scorer.block_max_score()
+                scorer.block_max_score()
-                })
+            })
-                .sum();
+            .sum();
-            // Beware after shallow advance, skip readers can be in advance compared to
+        // Beware after shallow advance, skip readers can be in advance compared to
-            // the segment posting lists.
+        // the segment posting lists.
-            //
+        //
-            // `block_segment_postings.load_block()` need to be called separately.
+        // `block_segment_postings.load_block()` need to be called separately.
-            if block_max_score_upperbound <= threshold {
+        if block_max_score_upperbound <= threshold {
-                // Block max condition was not reached
+            // Block max condition was not reached
-                // We could get away by simply advancing the scorers to DocId + 1 but it would
+            // We could get away by simply advancing the scorers to DocId + 1 but it would
-                // be inefficient. The optimization requires proper explanation and was
+            // be inefficient. The optimization requires proper explanation and was
-                // isolated in a different function.
+            // isolated in a different function.
-                block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
+            block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
-                continue;
+            continue;
            }
            // Block max condition is observed.
            //
            // Let's try and advance all scorers before the pivot to the pivot.
            if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
                // At least of the scorer does not contain the pivot.
                //
                // Let's stop scoring this pivot and go through the pivot selection again.
                // Note that the current pivot is not necessarily a bad candidate and it
                // may be picked again.
                continue;
            }
            // At this point, all scorers are positioned on the doc.
            let score = scorers[..pivot_len]
                .iter_mut()
                .map(|scorer| scorer.score())
                .sum();
            if score > threshold {
                threshold = callback(pivot_doc, score);
            }
            // let's advance all of the scorers that are currently positioned on the pivot.
            advance_all_scorers_on_pivot(&mut scorers, pivot_len);
        } else {
            return;
        }
        // Block max condition is observed.
        //
        // Let's try and advance all scorers before the pivot to the pivot.
        if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
            // At least of the scorer does not contain the pivot.
            //
            // Let's stop scoring this pivot and go through the pivot selection again.
            // Note that the current pivot is not necessarily a bad candidate and it
            // may be picked again.
            continue;
        }
        // At this point, all scorers are positioned on the doc.
        let score = scorers[..pivot_len]
            .iter_mut()
            .map(|scorer| scorer.score())
            .sum();
        if score > threshold {
            threshold = callback(pivot_doc, score);
        }
        // let's advance all of the scorers that are currently positioned on the pivot.
        advance_all_scorers_on_pivot(&mut scorers, pivot_len);
    }
 }
 struct TermScorerWithMaxScore<'a> {
    scorer: &'a mut TermScorer,
    max_score: Score,
 }
 impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
    fn from(scorer: &'a mut TermScorer) -> Self {
        let max_score = scorer.max_score();
        TermScorerWithMaxScore { scorer, max_score }
    }
 }
 impl<'a> Deref for TermScorerWithMaxScore<'a> {
    type Target = TermScorer;
    fn deref(&self) -> &Self::Target {
        self.scorer
    }
 }
 impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.scorer
    }
 }
 fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
    if let Some(first) = it.next() {
        let mut prev = first;
        for doc in it {
            if doc < prev {
                return false;
            }
            prev = doc;
        }
    }
    true
 }
 #[cfg(test)]
 mod tests {
    use crate::query::score_combiner::SumCombiner;
@@ -248,17 +246,21 @@ mod tests {
    use std::iter;
    struct Float(Score);
    impl Eq for Float {}
    impl PartialEq for Float {
        fn eq(&self, other: &Self) -> bool {
            self.cmp(&other) == Ordering::Equal
        }
    }
    impl PartialOrd for Float {
        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
            Some(self.cmp(other))
        }
    }
    impl Ord for Float {
        fn cmp(&self, other: &Self) -> Ordering {
            other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
--- a/src/query/query.rs
+++ b/src/query/query.rs
@@ -40,7 +40,7 @@ use std::fmt;
 ///
 /// When implementing a new type of `Query`, it is normal to implement a
 /// dedicated `Query`, `Weight` and `Scorer`.
-pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
+pub trait Query: QueryClone + downcast_rs::Downcast + fmt::Debug {
    /// Create the weight associated to a query.
    ///
    /// If scoring is not required, setting `scoring_enabled` to `false`
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -4,7 +4,7 @@ use crate::docset::DocSet;
 use crate::postings::SegmentPostings;
 use crate::query::bm25::BM25Weight;
 use crate::query::explanation::does_not_match;
-use crate::query::weight::{for_each_pruning_scorer, for_each_scorer};
+use crate::query::weight::for_each_scorer;
 use crate::query::Weight;
 use crate::query::{Explanation, Scorer};
 use crate::schema::IndexRecordOption;
@@ -73,8 +73,8 @@ impl Weight for TermWeight {
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score) -> Score,
    ) -> crate::Result<()> {
-        let mut scorer = self.scorer(reader, 1.0)?;
+        let scorer = self.specialized_scorer(reader, 1.0)?;
-        for_each_pruning_scorer(&mut scorer, threshold, callback);
+        crate::query::boolean_query::block_wand(vec![scorer], threshold, callback);
        Ok(())
    }
 }
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -138,9 +138,11 @@ impl InnerIndexReader {
                .collect::<crate::Result<_>>()?
        };
        let schema = self.index.schema();
-        let searchers = (0..self.num_searchers)
+        let searchers = std::iter::repeat_with(|| {
-            .map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone()))
+            Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())
-            .collect();
+        })
        .take(self.num_searchers)
        .collect();
        self.searcher_pool.publish_new_generation(searchers);
        Ok(())
    }
--- a/src/schema/field_entry.rs
+++ b/src/schema/field_entry.rs
@@ -1,5 +1,5 @@
 use crate::schema::IntOptions;
 use crate::schema::TextOptions;
 use crate::schema::{is_valid_field_name, IntOptions};
 use crate::schema::FieldType;
 use serde::de::{self, MapAccess, Visitor};
@@ -24,6 +24,7 @@ impl FieldEntry {
    /// Creates a new u64 field entry in the schema, given
    /// a name, and some options.
    pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::Str(text_options),
@@ -33,6 +34,7 @@ impl FieldEntry {
    /// Creates a new u64 field entry in the schema, given
    /// a name, and some options.
    pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::U64(field_type),
@@ -42,6 +44,7 @@ impl FieldEntry {
    /// Creates a new i64 field entry in the schema, given
    /// a name, and some options.
    pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::I64(field_type),
@@ -51,6 +54,7 @@ impl FieldEntry {
    /// Creates a new f64 field entry in the schema, given
    /// a name, and some options.
    pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::F64(field_type),
@@ -60,6 +64,7 @@ impl FieldEntry {
    /// Creates a new date field entry in the schema, given
    /// a name, and some options.
    pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::Date(field_type),
@@ -68,6 +73,7 @@ impl FieldEntry {
    /// Creates a field entry for a facet.
    pub fn new_facet(field_name: String) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::HierarchicalFacet,
@@ -76,6 +82,7 @@ impl FieldEntry {
    /// Creates a field entry for a bytes field
    pub fn new_bytes(field_name: String) -> FieldEntry {
        assert!(is_valid_field_name(&field_name));
        FieldEntry {
            name: field_name,
            field_type: FieldType::Bytes,
@@ -268,6 +275,12 @@ mod tests {
    use crate::schema::TEXT;
    use serde_json;
    #[test]
    #[should_panic]
    fn test_invalid_field_name_should_panic() {
        FieldEntry::new_text("-hello".to_string(), TEXT);
    }
    #[test]
    fn test_json_serialization() {
        let field_value = FieldEntry::new_text(String::from("title"), TEXT);
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -149,14 +149,16 @@ pub use self::int_options::IntOptions;
 use once_cell::sync::Lazy;
 use regex::Regex;
 /// Regular expression representing the restriction on a valid field names.
 pub const FIELD_NAME_PATTERN: &'static str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#;
 /// Validator for a potential `field_name`.
 /// Returns true iff the name can be use for a field name.
 ///
 /// A field name must start by a letter `[a-zA-Z]`.
 /// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
 pub fn is_valid_field_name(field_name: &str) -> bool {
-    static FIELD_NAME_PTN: Lazy<Regex> =
+    static FIELD_NAME_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(FIELD_NAME_PATTERN).unwrap());
        Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
    FIELD_NAME_PTN.is_match(field_name)
 }
@@ -170,6 +172,11 @@ mod tests {
        assert!(is_valid_field_name("text"));
        assert!(is_valid_field_name("text0"));
        assert!(!is_valid_field_name("0text"));
        assert!(is_valid_field_name("field-name"));
        assert!(is_valid_field_name("field_name"));
        assert!(!is_valid_field_name("field!name"));
        assert!(!is_valid_field_name("-fieldname"));
        assert!(is_valid_field_name("_fieldname"));
        assert!(!is_valid_field_name(""));
        assert!(!is_valid_field_name("シャボン玉"));
        assert!(is_valid_field_name("my_text_field"));
Author	SHA1	Message	Date
Paul Masurel	37e7af322d	Reverting atomic_write to the atomic_writes in order to address #866	2020-09-19 10:39:43 +09:00
Paul Masurel	151498cbe7	Creating the tempfile for atomicwrites in the same directory as the MmapDirectory. (#878 )	2020-09-05 23:06:29 +09:00
Paul Masurel	3a72b1cb98	Accept dash within field names. (#874 ) Accept dash in field names and enforce field names constraint at the creation of the schema. Closes #796	2020-09-01 13:38:52 +09:00
Paul Masurel	2737822620	Fixing unit tests. (#868 ) There was a unit test failing when notify was sending more than one event on atomicwrites. It was observed on MacOS CI.	2020-08-27 16:43:39 +09:00
b8591340	06c12ae221	Filter meta.json from validate_checksum (#872 )	2020-08-27 07:54:37 +09:00
Paul Masurel	4e4400af7f	Added cargo timing report to .gitignore	2020-08-23 16:15:28 +09:00
Paul Masurel	3f1ecf53ab	Merge branch 'master' of github.com:tantivy-search/tantivy	2020-08-22 21:30:47 +09:00
Paul Masurel	0b583b8130	Plastic changes	2020-08-22 21:29:12 +09:00
Paul Masurel	31d18dca1c	Removing dependency to atomicwrites (#866 )	2020-08-21 21:37:05 +09:00
stephenlagree	5e06e7de5a	Update basic_search.rs (#865 ) Remove duplicated document entry.	2020-08-21 11:23:09 +09:00
Paul Masurel	8af53cbd36	Merge branch 'master' of github.com:tantivy-search/tantivy	2020-08-21 08:57:42 +09:00
Paul Masurel	4914076e8f	Fixing release build	2020-08-21 08:57:27 +09:00
Paul Masurel	e04f47e922	Using block wand for term queries too.	2020-08-20 15:51:21 +09:00
Paul Masurel	f355695581	Code clean up	2020-08-20 15:42:50 +09:00
Paul Masurel	cbacdf0de8	Edited README.	2020-08-20 14:28:24 +09:00