Added psuedocode for poll implementation

2026-01-11 11:32:54 +00:00 · 2020-01-29 13:50:41 +01:00
23 changed files with 118 additions and 252 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,6 @@
 Tantivy 0.12.0
 ======================
 - Removing static dispatch in tokenizers for simplicity. (#762)
- Added backward iteration for `TermDictionary` stream. (@halvorboe)
- Fixed a performance issue when searching for the posting lists of a missing term (@audunhalland)
- Added a configurable maximum number of docs (10M by default) for a segment to be considered for merge (@hntd187, landed by @halvorboe #713) 

 ## How to update?

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.12.0"
+version = "0.11.3"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -18,7 +18,7 @@ byteorder = "1.0"
 crc32fast = "1.2.0"
 once_cell = "1.0"
 regex ={version = "1.3.0", default-features = false, features = ["std"]}
-tantivy-fst = "0.2.1"
+tantivy-fst = "0.2"
 memmap = {version = "0.7", optional=true}
 lz4 = {version="1.20", optional=true}
 snap = {version="0.2"}
@@ -60,6 +60,7 @@ winapi = "0.3"
 rand = "0.7"
 maplit = "1"
 matches = "0.1.8"
+time = "0.1.42"

 [dev-dependencies.fail]
 version = "0.3"
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -10,9 +10,7 @@ use rayon::{ThreadPool, ThreadPoolBuilder};
 /// API of a dependency, knowing it might conflict with a different version
 /// used by the client. Second, we may stop using rayon in the future.
 pub enum Executor {
-    /// Single thread variant of an Executor
    SingleThread,
-    /// Thread pool variant of an Executor
    ThreadPool(ThreadPool),
 }

@@ -22,7 +20,7 @@ impl Executor {
        Executor::SingleThread
    }

-    /// Creates an Executor that dispatches the tasks in a thread pool.
+    // Creates an Executor that dispatches the tasks in a thread pool.
    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> Result<Executor> {
        let pool = ThreadPoolBuilder::new()
            .num_threads(num_threads)
@@ -31,10 +29,10 @@ impl Executor {
        Ok(Executor::ThreadPool(pool))
    }

-    /// Perform a map in the thread pool.
-    ///
-    /// Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task
-    /// will propagate to the caller.
+    // Perform a map in the thread pool.
+    //
+    // Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task
+    // will propagate to the caller.
    pub fn map<
        A: Send,
        R: Send,
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -60,7 +60,7 @@ impl InvertedIndexReader {
            .get_index_record_option()
            .unwrap_or(IndexRecordOption::Basic);
        InvertedIndexReader {
-            termdict: TermDictionary::empty(),
+            termdict: TermDictionary::empty(&field_type),
            postings_source: ReadOnlySource::empty(),
            positions_source: ReadOnlySource::empty(),
            positions_idx_source: ReadOnlySource::empty(),
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -141,11 +141,19 @@ impl MmapCache {
    }
 }

+pub enum WatcherMode {
+    Event,
+    Poll
+}
+
 struct WatcherWrapper {
    _watcher: Mutex<notify::RecommendedWatcher>,
    watcher_router: Arc<WatchCallbackList>,
+    watcher_mode: WatcherMode,
 }

+
+
 impl WatcherWrapper {
    pub fn new(path: &Path) -> Result<Self, OpenDirectoryError> {
        let (tx, watcher_recv): (Sender<RawEvent>, Receiver<RawEvent>) = channel();
@@ -163,33 +171,57 @@ impl WatcherWrapper {
            })?;
        let watcher_router: Arc<WatchCallbackList> = Default::default();
        let watcher_router_clone = watcher_router.clone();
+        let path_clone = path.clone();
+        let meta_path = path_clone.join(*META_FILEPATH);
        thread::Builder::new()
            .name("meta-file-watch-thread".to_string())
            .spawn(move || {
+                let mut old_content = String::new();
+                let mode = WatcherMode::Event;
                loop {
-                    match watcher_recv.recv().map(|evt| evt.path) {
-                        Ok(Some(changed_path)) => {
-                            // ... Actually subject to false positive.
-                            // We might want to be more accurate than this at one point.
-                            if let Some(filename) = changed_path.file_name() {
-                                if filename == *META_FILEPATH {
-                                    let _ = watcher_router_clone.broadcast();
+                    match mode {
+                        WatcherMode::Event => {
+                            match watcher_recv.recv().map(|evt| evt.path) {
+                                Ok(Some(changed_path)) => {
+                                    // ... Actually subject to false positive.
+                                    // We might want to be more accurate than this at one point.
+                                    if let Some(filename) = changed_path.file_name() {
+                                        if filename == *META_FILEPATH {
+                                            let _ = watcher_router_clone.broadcast();
+                                        }
+                                    }
+                                }
+                                Ok(None) => {
+                                    // not an event we are interested in.
+                                }
+                                Err(_e) => {
+                                    // the watch send channel was dropped
+                                    break;
                                }
                            }
                        }
-                        Ok(None) => {
-                            // not an event we are interested in.
+                        WatcherMode::Poll => {
+                            let mut file = match File::open(&meta_path) {
+                                Err(why) => panic!("open: nope"),
+                                Ok(file) => file,
+                            };
+                            let mut new_content = String::new();
+                            match file.read_to_string(&mut new_content) {
+                                Err(why) => panic!("read: nope"),
+                                Ok(_) => {},
+                            }
+                            if old_content != new_content {
+                                let _ = watcher_router_clone.broadcast();
+                                old_content = new_content;
+                            }
                        }
-                        Err(_e) => {
-                            // the watch send channel was dropped
-                            break;
-                        }
-                    }
+                    };
                }
            })?;
        Ok(WatcherWrapper {
            _watcher: Mutex::new(watcher),
            watcher_router,
+            watcher_mode: WatcherMode::Event,
        })
    }

--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -7,6 +7,9 @@ pub use self::writer::MultiValueIntFastFieldWriter;
 #[cfg(test)]
 mod tests {

+    use time;
+
+    use self::time::Duration;
    use crate::collector::TopDocs;
    use crate::query::QueryParser;
    use crate::schema::Cardinality;
@@ -14,7 +17,6 @@ mod tests {
    use crate::schema::IntOptions;
    use crate::schema::Schema;
    use crate::Index;
-    use chrono::Duration;

    #[test]
    fn test_multivalued_u64() {
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -897,7 +897,7 @@ mod tests {
        let index_writer = index.writer(3_000_000).unwrap();
        assert_eq!(
            format!("{:?}", index_writer.get_merge_policy()),
-            "LogMergePolicy { min_merge_size: 8, max_merge_size: 10000000, min_layer_size: 10000, \
+            "LogMergePolicy { min_merge_size: 8, min_layer_size: 10000, \
             level_log_size: 0.75 }"
        );
        let merge_policy = Box::new(NoMergePolicy::default());
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -6,14 +6,12 @@ use std::f64;
 const DEFAULT_LEVEL_LOG_SIZE: f64 = 0.75;
 const DEFAULT_MIN_LAYER_SIZE: u32 = 10_000;
 const DEFAULT_MIN_MERGE_SIZE: usize = 8;
-const DEFAULT_MAX_MERGE_SIZE: usize = 10_000_000;

 /// `LogMergePolicy` tries tries to merge segments that have a similar number of
 /// documents.
 #[derive(Debug, Clone)]
 pub struct LogMergePolicy {
    min_merge_size: usize,
-    max_merge_size: usize,
    min_layer_size: u32,
    level_log_size: f64,
 }
@@ -28,12 +26,6 @@ impl LogMergePolicy {
        self.min_merge_size = min_merge_size;
    }

-    /// Set the maximum number docs in a segment for it to be considered for
-    /// merging.
-    pub fn set_max_merge_size(&mut self, max_merge_size: usize) {
-        self.max_merge_size = max_merge_size;
-    }
-
    /// Set the minimum segment size under which all segment belong
    /// to the same level.
    pub fn set_min_layer_size(&mut self, min_layer_size: u32) {
@@ -61,7 +53,6 @@ impl MergePolicy for LogMergePolicy {
        let mut size_sorted_tuples = segments
            .iter()
            .map(SegmentMeta::num_docs)
-            .filter(|s| s <= &(self.max_merge_size as u32))
            .enumerate()
            .collect::<Vec<(usize, u32)>>();

@@ -95,7 +86,6 @@ impl Default for LogMergePolicy {
    fn default() -> LogMergePolicy {
        LogMergePolicy {
            min_merge_size: DEFAULT_MIN_MERGE_SIZE,
-            max_merge_size: DEFAULT_MAX_MERGE_SIZE,
            min_layer_size: DEFAULT_MIN_LAYER_SIZE,
            level_log_size: DEFAULT_LEVEL_LOG_SIZE,
        }
@@ -114,7 +104,6 @@ mod tests {
    fn test_merge_policy() -> LogMergePolicy {
        let mut log_merge_policy = LogMergePolicy::default();
        log_merge_policy.set_min_merge_size(3);
-        log_merge_policy.set_max_merge_size(100_000);
        log_merge_policy.set_min_layer_size(2);
        log_merge_policy
    }
@@ -152,11 +141,11 @@ mod tests {
            create_random_segment_meta(10),
            create_random_segment_meta(10),
            create_random_segment_meta(10),
-            create_random_segment_meta(1_000),
-            create_random_segment_meta(1_000),
-            create_random_segment_meta(1_000),
-            create_random_segment_meta(10_000),
-            create_random_segment_meta(10_000),
+            create_random_segment_meta(1000),
+            create_random_segment_meta(1000),
+            create_random_segment_meta(1000),
+            create_random_segment_meta(10000),
+            create_random_segment_meta(10000),
            create_random_segment_meta(10),
            create_random_segment_meta(10),
            create_random_segment_meta(10),
@@ -193,19 +182,4 @@ mod tests {
        let result_list = test_merge_policy().compute_merge_candidates(&test_input);
        assert_eq!(result_list.len(), 1);
    }
-
-    #[test]
-    fn test_large_merge_segments() {
-        let test_input = vec![
-            create_random_segment_meta(1_000_000),
-            create_random_segment_meta(100_001),
-            create_random_segment_meta(100_000),
-            create_random_segment_meta(100_000),
-            create_random_segment_meta(100_000),
-        ];
-        let result_list = test_merge_policy().compute_merge_candidates(&test_input);
-        // Do not include large segments
-        assert_eq!(result_list.len(), 1);
-        assert_eq!(result_list[0].0.len(), 3)
-    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -161,7 +161,7 @@ pub use self::snippet::{Snippet, SnippetGenerator};
 mod docset;
 pub use self::docset::{DocSet, SkipResult};
 pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
-pub use crate::core::{Executor, SegmentComponent};
+pub use crate::core::SegmentComponent;
 pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
 pub use crate::core::{InvertedIndexReader, SegmentReader};
 pub use crate::directory::Directory;
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -148,7 +148,8 @@ impl<'a> FieldSerializer<'a> {
            }
            _ => (false, false),
        };
-        let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
+        let term_dictionary_builder =
+            TermDictionaryBuilder::create(term_dictionary_write, &field_type)?;
        let postings_serializer =
            PostingsSerializer::new(postings_write, term_freq_enabled, position_enabled);
        let positions_serializer_opt = if position_enabled {
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -15,7 +15,6 @@ use tantivy_fst::Automaton;
 pub struct AutomatonWeight<A> {
    field: Field,
    automaton: Arc<A>,
-    boost: f32,
 }

 impl<A> AutomatonWeight<A>
@@ -27,15 +26,9 @@ where
        AutomatonWeight {
            field,
            automaton: automaton.into(),
-            boost: 1.0,
        }
    }

-    /// Boost the scorer by the given factor.
-    pub fn boost_by(self, boost: f32) -> Self {
-        Self { boost, ..self }
-    }
-
    fn automaton_stream<'a>(&'a self, term_dict: &'a TermDictionary) -> TermStreamer<'a, &'a A> {
        let automaton: &A = &*self.automaton;
        let term_stream_builder = term_dict.search(automaton);
@@ -65,7 +58,7 @@ where
            }
        }
        let doc_bitset = BitSetDocSet::from(doc_bitset);
-        Ok(Box::new(ConstScorer::with_score(doc_bitset, self.boost)))
+        Ok(Box::new(ConstScorer::new(doc_bitset)))
    }

    fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
--- a/src/query/bm25.rs
+++ b/src/query/bm25.rs
@@ -34,7 +34,7 @@ pub struct BM25Weight {
 }

 impl BM25Weight {
-    pub fn for_terms(searcher: &Searcher, terms: &[Term], boost: f32) -> BM25Weight {
+    pub fn for_terms(searcher: &Searcher, terms: &[Term]) -> BM25Weight {
        assert!(!terms.is_empty(), "BM25 requires at least one term");
        let field = terms[0].field();
        for term in &terms[1..] {
@@ -75,11 +75,11 @@ impl BM25Weight {
                .sum::<f32>();
            idf_explain = Explanation::new("idf", idf);
        }
-        BM25Weight::new(idf_explain, average_fieldnorm, boost)
+        BM25Weight::new(idf_explain, average_fieldnorm)
    }

-    fn new(idf_explain: Explanation, average_fieldnorm: f32, boost: f32) -> BM25Weight {
-        let weight = idf_explain.value() * (1f32 + K1) * boost;
+    fn new(idf_explain: Explanation, average_fieldnorm: f32) -> BM25Weight {
+        let weight = idf_explain.value() * (1f32 + K1);
        BM25Weight {
            idf_explain,
            weight,
--- a/src/query/fuzzy_query.rs
+++ b/src/query/fuzzy_query.rs
@@ -79,7 +79,6 @@ pub struct FuzzyTermQuery {
    transposition_cost_one: bool,
    ///
    prefix: bool,
-    boost: f32,
 }

 impl FuzzyTermQuery {
@@ -90,7 +89,6 @@ impl FuzzyTermQuery {
            distance,
            transposition_cost_one,
            prefix: false,
-            boost: 1.0,
        }
    }

@@ -101,22 +99,16 @@ impl FuzzyTermQuery {
            distance,
            transposition_cost_one,
            prefix: true,
-            boost: 1.0,
        }
    }

-    /// Boost the query score by the given factor.
-    pub fn boost_by(self, boost: f32) -> Self {
-        Self { boost, ..self }
-    }
-
    fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
        // LEV_BUILDER is a HashMap, whose `get` method returns an Option
        match LEV_BUILDER.get(&(self.distance, false)) {
            // Unwrap the option and build the Ok(AutomatonWeight)
            Some(automaton_builder) => {
                let automaton = automaton_builder.build_dfa(self.term.text());
-                Ok(AutomatonWeight::new(self.term.field(), automaton).boost_by(self.boost))
+                Ok(AutomatonWeight::new(self.term.field(), automaton))
            }
            None => Err(InvalidArgument(format!(
                "Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
--- a/src/query/phrase_query/phrase_query.rs
+++ b/src/query/phrase_query/phrase_query.rs
@@ -27,7 +27,6 @@ use std::collections::BTreeSet;
 pub struct PhraseQuery {
    field: Field,
    phrase_terms: Vec<(usize, Term)>,
-    boost: f32,
 }

 impl PhraseQuery {
@@ -58,15 +57,9 @@ impl PhraseQuery {
        PhraseQuery {
            field,
            phrase_terms: terms,
-            boost: 1.0,
        }
    }

-    /// Boost the query score by the given factor.
-    pub fn boost_by(self, boost: f32) -> Self {
-        Self { boost, ..self }
-    }
-
    /// The `Field` this `PhraseQuery` is targeting.
    pub fn field(&self) -> Field {
        self.field
@@ -104,7 +97,7 @@ impl PhraseQuery {
            )));
        }
        let terms = self.phrase_terms();
-        let bm25_weight = BM25Weight::for_terms(searcher, &terms, self.boost);
+        let bm25_weight = BM25Weight::for_terms(searcher, &terms);
        Ok(PhraseWeight::new(
            self.phrase_terms.clone(),
            bm25_weight,
--- a/src/query/regex_query.rs
+++ b/src/query/regex_query.rs
@@ -54,7 +54,6 @@ use tantivy_fst::Regex;
 pub struct RegexQuery {
    regex: Arc<Regex>,
    field: Field,
-    boost: f32,
 }

 impl RegexQuery {
@@ -70,17 +69,11 @@ impl RegexQuery {
        RegexQuery {
            regex: regex.into(),
            field,
-            boost: 1.0,
        }
    }

-    /// Boost the query score by the given factor.
-    pub fn boost_by(self, boost: f32) -> Self {
-        Self { boost, ..self }
-    }
-
    fn specialized_weight(&self) -> AutomatonWeight<Regex> {
-        AutomatonWeight::new(self.field, self.regex.clone()).boost_by(self.boost)
+        AutomatonWeight::new(self.field, self.regex.clone())
    }
 }

--- a/src/query/scorer.rs
+++ b/src/query/scorer.rs
@@ -56,11 +56,6 @@ impl<TDocSet: DocSet> ConstScorer<TDocSet> {
        }
    }

-    /// Creates a new `ConstScorer` with a custom score value
-    pub fn with_score(docset: TDocSet, score: f32) -> ConstScorer<TDocSet> {
-        ConstScorer { docset, score }
-    }
-
    /// Sets the constant score to a different value.
    pub fn set_score(&mut self, score: Score) {
        self.score = score;
--- a/src/query/term_query/mod.rs
+++ b/src/query/term_query/mod.rs
@@ -45,35 +45,6 @@ mod tests {
        assert_eq!(term_scorer.score(), 0.28768212);
    }

-    #[test]
-    pub fn test_term_query_boost_by() {
-        let mut schema_builder = Schema::builder();
-        let text_field = schema_builder.add_text_field("text", STRING);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        {
-            // writing the segment
-            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
-            {
-                let doc = doc!(text_field => "a");
-                index_writer.add_document(doc);
-            }
-            assert!(index_writer.commit().is_ok());
-        }
-        let searcher = index.reader().unwrap().searcher();
-        let term_query = TermQuery::new(
-            Term::from_field_text(text_field, "a"),
-            IndexRecordOption::Basic,
-        )
-        .boost_by(42.0);
-        let term_weight = term_query.weight(&searcher, true).unwrap();
-        let segment_reader = searcher.segment_reader(0);
-        let mut term_scorer = term_weight.scorer(segment_reader).unwrap();
-        assert!(term_scorer.advance());
-        assert_eq!(term_scorer.doc(), 0);
-        assert_nearly_equals(0.28768212 * 42.0, term_scorer.score());
-    }
-
    #[test]
    pub fn test_term_weight() {
        let mut schema_builder = Schema::builder();
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -61,7 +61,6 @@ use std::fmt;
 pub struct TermQuery {
    term: Term,
    index_record_option: IndexRecordOption,
-    boost: f32,
 }

 impl fmt::Debug for TermQuery {
@@ -76,15 +75,9 @@ impl TermQuery {
        TermQuery {
            term,
            index_record_option: segment_postings_options,
-            boost: 1.0,
        }
    }

-    /// Boost the query score by the given factor.
-    pub fn boost_by(self, boost: f32) -> Self {
-        Self { boost, ..self }
-    }
-
    /// The `Term` this query is built out of.
    pub fn term(&self) -> &Term {
        &self.term
@@ -97,7 +90,7 @@ impl TermQuery {
    /// This is useful for optimization purpose.
    pub fn specialized_weight(&self, searcher: &Searcher, scoring_enabled: bool) -> TermWeight {
        let term = self.term.clone();
-        let bm25_weight = BM25Weight::for_terms(searcher, &[term], self.boost);
+        let bm25_weight = BM25Weight::for_terms(searcher, &[term]);
        let index_record_option = if scoring_enabled {
            self.index_record_option
        } else {
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -11,6 +11,7 @@ use crate::Result;
 use crate::Searcher;
 use crate::SegmentReader;
 use std::sync::Arc;
+use std::thread;

 /// Defines when a new version of the index should be reloaded.
 ///
--- a/src/schema/facet.rs
+++ b/src/schema/facet.rs
@@ -122,11 +122,6 @@ impl Facet {
    pub fn to_path(&self) -> Vec<&str> {
        self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect()
    }
-
-    /// This function is the inverse of Facet::from(&str).
-    pub fn to_path_string(&self) -> String {
-        format!("{}", self.to_string())
-    }
 }

 impl Borrow<str> for Facet {
@@ -270,21 +265,4 @@ mod tests {
        let facet = Facet::from_path(v.iter());
        assert_eq!(facet.to_path(), v);
    }
-
-    #[test]
-    fn test_to_path_string() {
-        let v = ["first", "second", "third/not_fourth"];
-        let facet = Facet::from_path(v.iter());
-        assert_eq!(
-            facet.to_path_string(),
-            String::from("/first/second/third\\/not_fourth")
-        );
-    }
-
-    #[test]
-    fn test_to_path_string_empty() {
-        let v: Vec<&str> = vec![];
-        let facet = Facet::from_path(v.iter());
-        assert_eq!(facet.to_path_string(), "/");
-    }
 }
--- a/src/termdict/mod.rs
+++ b/src/termdict/mod.rs
@@ -38,7 +38,7 @@ mod tests {
    use crate::core::Index;
    use crate::directory::{Directory, RAMDirectory, ReadOnlySource};
    use crate::postings::TermInfo;
-    use crate::schema::{Document, Schema, TEXT};
+    use crate::schema::{Document, FieldType, Schema, TEXT};
    use std::path::PathBuf;
    use std::str;

@@ -52,12 +52,6 @@ mod tests {
        }
    }

-    #[test]
-    fn test_empty_term_dictionary() {
-        let empty = TermDictionary::empty();
-        assert!(empty.stream().next().is_none());
-    }
-
    #[test]
    fn test_term_ordinals() {
        const COUNTRIES: [&'static str; 7] = [
@@ -73,7 +67,9 @@ mod tests {
        let path = PathBuf::from("TermDictionary");
        {
            let write = directory.open_write(&path).unwrap();
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(write).unwrap();
+            let field_type = FieldType::Str(TEXT);
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(write, &field_type).unwrap();
            for term in COUNTRIES.iter() {
                term_dictionary_builder
                    .insert(term.as_bytes(), &make_term_info(0u64))
@@ -97,7 +93,9 @@ mod tests {
        let path = PathBuf::from("TermDictionary");
        {
            let write = directory.open_write(&path).unwrap();
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(write).unwrap();
+            let field_type = FieldType::Str(TEXT);
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(write, &field_type).unwrap();
            term_dictionary_builder
                .insert("abc".as_bytes(), &make_term_info(34u64))
                .unwrap();
@@ -181,8 +179,10 @@ mod tests {
        let ids: Vec<_> = (0u32..10_000u32)
            .map(|i| (format!("doc{:0>6}", i), i))
            .collect();
+        let field_type = FieldType::Str(TEXT);
        let buffer: Vec<u8> = {
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(vec![], &field_type).unwrap();
            for &(ref id, ref i) in &ids {
                term_dictionary_builder
                    .insert(id.as_bytes(), &make_term_info(*i as u64))
@@ -209,8 +209,10 @@ mod tests {

    #[test]
    fn test_stream_high_range_prefix_suffix() {
+        let field_type = FieldType::Str(TEXT);
        let buffer: Vec<u8> = {
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(vec![], &field_type).unwrap();
            // term requires more than 16bits
            term_dictionary_builder
                .insert("abcdefghijklmnopqrstuvwxy", &make_term_info(1))
@@ -242,8 +244,10 @@ mod tests {
        let ids: Vec<_> = (0u32..10_000u32)
            .map(|i| (format!("doc{:0>6}", i), i))
            .collect();
+        let field_type = FieldType::Str(TEXT);
        let buffer: Vec<u8> = {
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(vec![], &field_type).unwrap();
            for &(ref id, ref i) in &ids {
                term_dictionary_builder
                    .insert(id.as_bytes(), &make_term_info(*i as u64))
@@ -309,8 +313,10 @@ mod tests {

    #[test]
    fn test_empty_string() {
+        let field_type = FieldType::Str(TEXT);
        let buffer: Vec<u8> = {
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(vec![], &field_type).unwrap();
            term_dictionary_builder
                .insert(&[], &make_term_info(1 as u64))
                .unwrap();
@@ -331,8 +337,10 @@ mod tests {

    #[test]
    fn test_stream_range_boundaries() {
+        let field_type = FieldType::Str(TEXT);
        let buffer: Vec<u8> = {
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(vec![], &field_type).unwrap();
            for i in 0u8..10u8 {
                let number_arr = [i; 1];
                term_dictionary_builder
@@ -344,91 +352,41 @@ mod tests {
        let source = ReadOnlySource::from(buffer);
        let term_dictionary: TermDictionary = TermDictionary::from_source(&source);

-        let value_list = |mut streamer: TermStreamer<'_>, backwards: bool| {
+        let value_list = |mut streamer: TermStreamer<'_>| {
            let mut res: Vec<u32> = vec![];
            while let Some((_, ref v)) = streamer.next() {
                res.push(v.doc_freq);
            }
-            if backwards {
-                res.reverse();
-            }
            res
        };
-        {
-            let range = term_dictionary.range().backward().into_stream();
-            assert_eq!(
-                value_list(range, true),
-                vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-            );
-        }
        {
            let range = term_dictionary.range().ge([2u8]).into_stream();
            assert_eq!(
-                value_list(range, false),
-                vec![2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-            );
-        }
-        {
-            let range = term_dictionary.range().ge([2u8]).backward().into_stream();
-            assert_eq!(
-                value_list(range, true),
+                value_list(range),
                vec![2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
            );
        }
        {
            let range = term_dictionary.range().gt([2u8]).into_stream();
            assert_eq!(
-                value_list(range, false),
-                vec![3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
-            );
-        }
-        {
-            let range = term_dictionary.range().gt([2u8]).backward().into_stream();
-            assert_eq!(
-                value_list(range, true),
+                value_list(range),
                vec![3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]
            );
        }
        {
            let range = term_dictionary.range().lt([6u8]).into_stream();
-            assert_eq!(
-                value_list(range, false),
-                vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32]
-            );
-        }
-        {
-            let range = term_dictionary.range().lt([6u8]).backward().into_stream();
-            assert_eq!(
-                value_list(range, true),
-                vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32]
-            );
+            assert_eq!(value_list(range), vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32]);
        }
        {
            let range = term_dictionary.range().le([6u8]).into_stream();
            assert_eq!(
-                value_list(range, false),
-                vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32]
-            );
-        }
-        {
-            let range = term_dictionary.range().le([6u8]).backward().into_stream();
-            assert_eq!(
-                value_list(range, true),
+                value_list(range),
                vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32]
            );
        }
        {
            let range = term_dictionary.range().ge([0u8]).lt([5u8]).into_stream();
-            assert_eq!(value_list(range, false), vec![0u32, 1u32, 2u32, 3u32, 4u32]);
-        }
-        {
-            let range = term_dictionary
-                .range()
-                .ge([0u8])
-                .lt([5u8])
-                .backward()
-                .into_stream();
-            assert_eq!(value_list(range, true), vec![0u32, 1u32, 2u32, 3u32, 4u32]);
+            assert_eq!(value_list(range), vec![0u32, 1u32, 2u32, 3u32, 4u32]);
        }
    }

@@ -450,7 +408,9 @@ mod tests {
        let path = PathBuf::from("TermDictionary");
        {
            let write = directory.open_write(&path).unwrap();
-            let mut term_dictionary_builder = TermDictionaryBuilder::create(write).unwrap();
+            let field_type = FieldType::Str(TEXT);
+            let mut term_dictionary_builder =
+                TermDictionaryBuilder::create(write, &field_type).unwrap();
            for term in COUNTRIES.iter() {
                term_dictionary_builder
                    .insert(term.as_bytes(), &make_term_info(0u64))
--- a/src/termdict/streamer.rs
+++ b/src/termdict/streamer.rs
@@ -51,12 +51,6 @@ where
        self
    }

-    /// Iterate over the range backwards.
-    pub fn backward(mut self) -> Self {
-        self.stream_builder = self.stream_builder.backward();
-        self
-    }
-
    /// Creates the stream corresponding to the range
    /// of terms defined using the `TermStreamerBuilder`.
    pub fn into_stream(self) -> TermStreamer<'a, A> {
--- a/src/termdict/termdict.rs
+++ b/src/termdict/termdict.rs
@@ -4,8 +4,8 @@ use crate::common::BinarySerializable;
 use crate::common::CountingWriter;
 use crate::directory::ReadOnlySource;
 use crate::postings::TermInfo;
+use crate::schema::FieldType;
 use crate::termdict::TermOrdinal;
-use once_cell::sync::Lazy;
 use std::io::{self, Write};
 use tantivy_fst;
 use tantivy_fst::raw::Fst;
@@ -29,7 +29,7 @@ where
    W: Write,
 {
    /// Creates a new `TermDictionaryBuilder`
-    pub fn create(w: W) -> io::Result<Self> {
+    pub fn create(w: W, _field_type: &FieldType) -> io::Result<Self> {
        let fst_builder = tantivy_fst::MapBuilder::new(w).map_err(convert_fst_error)?;
        Ok(TermDictionaryBuilder {
            fst_builder,
@@ -92,14 +92,6 @@ fn open_fst_index(source: ReadOnlySource) -> tantivy_fst::Map<ReadOnlySource> {
    tantivy_fst::Map::from(fst)
 }

-static EMPTY_DATA_SOURCE: Lazy<ReadOnlySource> = Lazy::new(|| {
-    let term_dictionary_data: Vec<u8> = TermDictionaryBuilder::create(Vec::<u8>::new())
-        .expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
-        .finish()
-        .expect("Writing in a Vec<u8> should never fail");
-    ReadOnlySource::from(term_dictionary_data)
-});
-
 /// The term dictionary contains all of the terms in
 /// `tantivy index` in a sorted manner.
 ///
@@ -130,8 +122,14 @@ impl TermDictionary {
    }

    /// Creates an empty term dictionary which contains no terms.
-    pub fn empty() -> Self {
-        TermDictionary::from_source(&*EMPTY_DATA_SOURCE)
+    pub fn empty(field_type: &FieldType) -> Self {
+        let term_dictionary_data: Vec<u8> =
+            TermDictionaryBuilder::create(Vec::<u8>::new(), &field_type)
+                .expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
+                .finish()
+                .expect("Writing in a Vec<u8> should never fail");
+        let source = ReadOnlySource::from(term_dictionary_data);
+        Self::from_source(&source)
    }

    /// Returns the number of terms in the dictionary.