Integrated state into TermDict streamer

Introducing a wrapper struct instead of Boxed<BoxableTokenizer> (#631 )
Closes #629
2026-01-02 15:22:55 +00:00 · 2019-08-16 10:29:28 +09:00 · 2019-08-15 16:37:04 +09:00 · 2019-08-14 17:44:25 +09:00 · 2019-08-12 08:25:47 +09:00 · 2019-08-12 08:24:47 +09:00
42 changed files with 627 additions and 396 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,6 +47,7 @@ matrix:
 before_install:
  - set -e
  - rustup self update
+  - rustup component add rustfmt

 install:
  - sh ci/install.sh
@@ -60,6 +61,7 @@ before_script:

 script:
  - bash ci/script.sh
+  - cargo fmt --all -- --check

 before_deploy:
  - sh ci/before_deploy.sh
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,15 @@ Tantivy 0.11.0
 =====================

 - Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
+- Various bugfixes in the query parser.
+    - Better handling of hyphens in query parser. (#609)
+    - Better handling of whitespaces.
+- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
+- API change around `Box<BoxableTokenizer>`. See detail in #629
+
+## How to update?
+
+`Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.

 Tantivy 0.10.1
 =====================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.10.1"
+version = "0.11.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -17,7 +17,7 @@ base64 = "0.10.0"
 byteorder = "1.0"
 once_cell = "0.2"
 regex = "1.0"
-tantivy-fst = "0.1"
+tantivy-fst = {git="https://github.com/tantivy-search/fst"}
 memmap = {version = "0.7", optional=true}
 lz4 = {version="1.20", optional=true}
 snap = {version="0.2"}
@@ -25,7 +25,6 @@ atomicwrites = {version="0.2.2", optional=true}
 tempfile = "3.0"
 log = "0.4"
 combine = ">=3.6.0,<4.0.0"
-tempdir = "0.3"
 serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
@@ -36,7 +35,7 @@ levenshtein_automata = {version="0.1", features=["fst_automaton"]}
 notify = {version="4", optional=true}
 bit-set = "0.5"
 uuid = { version = "0.7.2", features = ["v4", "serde"] }
-crossbeam = "0.5"
+crossbeam = "0.7"
 futures = "0.1"
 futures-cpupool = "0.1"
 owning_ref = "0.4"
@@ -87,7 +86,6 @@ travis-ci = { repository = "tantivy-search/tantivy" }
 [dev-dependencies.fail]
 features = ["failpoints"]

-
 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
 # in a different binary.
@@ -98,4 +96,4 @@ features = ["failpoints"]
 [[test]]
 name = "failpoints"
 path = "tests/failpoints/mod.rs"
-required-features = ["fail/failpoints"]
+required-features = ["fail/failpoints"]
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -19,12 +19,12 @@ use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::Index;
 use tantivy::ReloadPolicy;
-use tempdir::TempDir;
+use tempfile::TempDir;

 fn main() -> tantivy::Result<()> {
    // Let's create a temporary directory for the
    // sake of this example
-    let index_path = TempDir::new("tantivy_example_dir")?;
+    let index_path = TempDir::new()?;

    // # Defining the schema
    //
--- a/examples/faceted_search.rs
+++ b/examples/faceted_search.rs
@@ -18,11 +18,12 @@ use tantivy::collector::FacetCollector;
 use tantivy::query::AllQuery;
 use tantivy::schema::*;
 use tantivy::Index;
+use tempfile::TempDir;

 fn main() -> tantivy::Result<()> {
    // Let's create a temporary directory for the
    // sake of this example
-    let index_path = TempDir::new("tantivy_facet_example_dir")?;
+    let index_path = TempDir::new()?;
    let mut schema_builder = Schema::builder();

    schema_builder.add_text_field("name", TEXT | STORED);
@@ -74,5 +75,3 @@ fn main() -> tantivy::Result<()> {

    Ok(())
 }
-
-use tempdir::TempDir;
--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -14,12 +14,12 @@ use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::Index;
 use tantivy::{Snippet, SnippetGenerator};
-use tempdir::TempDir;
+use tempfile::TempDir;

 fn main() -> tantivy::Result<()> {
    // Let's create a temporary directory for the
    // sake of this example
-    let index_path = TempDir::new("tantivy_example_dir")?;
+    let index_path = TempDir::new()?;

    // # Defining the schema
    let mut schema_builder = Schema::builder();
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -13,6 +13,7 @@ use crate::Result;
 use crate::Score;
 use crate::SegmentLocalId;
 use crate::SegmentReader;
+use std::fmt;

 /// The Top Score Collector keeps track of the K documents
 /// sorted by their score.
@@ -68,6 +69,12 @@ use crate::SegmentReader;
 /// ```
 pub struct TopDocs(TopCollector<Score>);

+impl fmt::Debug for TopDocs {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TopDocs({})", self.0.limit())
+    }
+}
+
 impl TopDocs {
    /// Creates a top score collector, with a number of documents equal to "limit".
    ///
@@ -584,7 +591,7 @@ mod tests {
        query_field: Field,
        schema: Schema,
        mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
-    ) -> (Index, Box<Query>) {
+    ) -> (Index, Box<dyn Query>) {
        let index = Index::create_in_ram(schema);

        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -124,26 +124,24 @@ pub fn f64_to_u64(val: f64) -> u64 {
 /// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
 #[inline(always)]
 pub fn u64_to_f64(val: u64) -> f64 {
-    f64::from_bits(
-        if val & HIGHEST_BIT != 0 {
-            val ^ HIGHEST_BIT
-        } else {
-            !val
-        }
-    )
+    f64::from_bits(if val & HIGHEST_BIT != 0 {
+        val ^ HIGHEST_BIT
+    } else {
+        !val
+    })
 }

 #[cfg(test)]
 pub(crate) mod test {

    pub use super::serialize::test::fixed_size_test;
-    use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
+    use super::{compute_num_bits, f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
    use std::f64;

    fn test_i64_converter_helper(val: i64) {
        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
    }
-    
+
    fn test_f64_converter_helper(val: f64) {
        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
    }
@@ -172,7 +170,8 @@ pub(crate) mod test {

    #[test]
    fn test_f64_order() {
-        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -173,11 +173,11 @@ impl Index {
    }

    /// Helper to access the tokenizer associated to a specific field.
-    pub fn tokenizer_for_field(&self, field: Field) -> Result<Box<dyn BoxedTokenizer>> {
+    pub fn tokenizer_for_field(&self, field: Field) -> Result<BoxedTokenizer> {
        let field_entry = self.schema.get_field_entry(field);
        let field_type = field_entry.field_type();
        let tokenizer_manager: &TokenizerManager = self.tokenizers();
-        let tokenizer_name_opt: Option<Box<dyn BoxedTokenizer>> = match field_type {
+        let tokenizer_name_opt: Option<BoxedTokenizer> = match field_type {
            FieldType::Str(text_options) => text_options
                .get_indexing_options()
                .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
@@ -459,13 +459,13 @@ mod tests {

        use super::*;
        use std::path::PathBuf;
-        use tempdir::TempDir;
+        use tempfile::TempDir;

        #[test]
        fn test_index_on_commit_reload_policy_mmap() {
            let schema = throw_away_schema();
            let field = schema.get_field("num_likes").unwrap();
-            let tempdir = TempDir::new("index").unwrap();
+            let tempdir = TempDir::new().unwrap();
            let tempdir_path = PathBuf::from(tempdir.path());
            let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
            let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -504,7 +504,7 @@ mod tests {
        fn test_index_on_commit_reload_policy_different_directories() {
            let schema = throw_away_schema();
            let field = schema.get_field("num_likes").unwrap();
-            let tempdir = TempDir::new("index").unwrap();
+            let tempdir = TempDir::new().unwrap();
            let tempdir_path = PathBuf::from(tempdir.path());
            let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
            let read_index = Index::open_in_dir(&tempdir_path).unwrap();
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -48,14 +48,14 @@ impl RetryPolicy {
 ///
 /// It is transparently associated to a lock file, that gets deleted
 /// on `Drop.` The lock is released automatically on `Drop`.
-pub struct DirectoryLock(Box<dyn Drop + Send + Sync + 'static>);
+pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);

 struct DirectoryLockGuard {
    directory: Box<dyn Directory>,
    path: PathBuf,
 }

-impl<T: Drop + Send + Sync + 'static> From<Box<T>> for DirectoryLock {
+impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock {
    fn from(underlying: Box<T>) -> Self {
        DirectoryLock(underlying)
    }
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -263,11 +263,11 @@ mod tests_mmap_specific {
    use std::collections::HashSet;
    use std::io::Write;
    use std::path::{Path, PathBuf};
-    use tempdir::TempDir;
+    use tempfile::TempDir;

    #[test]
    fn test_managed_directory() {
-        let tempdir = TempDir::new("tantivy-test").unwrap();
+        let tempdir = TempDir::new().unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());

        let test_path1: &'static Path = Path::new("some_path_for_test");
@@ -304,7 +304,7 @@ mod tests_mmap_specific {
    fn test_managed_directory_gc_while_mmapped() {
        let test_path1: &'static Path = Path::new("some_path_for_test");

-        let tempdir = TempDir::new("index").unwrap();
+        let tempdir = TempDir::new().unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
        let living_files = HashSet::new();

--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -36,7 +36,7 @@ use std::sync::Mutex;
 use std::sync::RwLock;
 use std::sync::Weak;
 use std::thread;
-use tempdir::TempDir;
+use tempfile::TempDir;

 /// Create a default io error given a string.
 pub(crate) fn make_io_err(msg: String) -> io::Error {
@@ -294,7 +294,7 @@ impl MmapDirectory {
    /// This is mostly useful to test the MmapDirectory itself.
    /// For your unit tests, prefer the RAMDirectory.
    pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
-        let tempdir = TempDir::new("index").map_err(OpenDirectoryError::IoError)?;
+        let tempdir = TempDir::new().map_err(OpenDirectoryError::IoError)?;
        let tempdir_path = PathBuf::from(tempdir.path());
        MmapDirectory::new(tempdir_path, Some(tempdir))
    }
@@ -642,7 +642,7 @@ mod tests {
    fn test_watch_wrapper() {
        let counter: Arc<AtomicUsize> = Default::default();
        let counter_clone = counter.clone();
-        let tmp_dir: TempDir = tempdir::TempDir::new("test_watch_wrapper").unwrap();
+        let tmp_dir = tempfile::TempDir::new().unwrap();
        let tmp_dirpath = tmp_dir.path().to_owned();
        let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
        let tmp_file = tmp_dirpath.join("coucou");
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -177,7 +177,7 @@ impl Directory for RAMDirectory {
    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
        fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
            io::ErrorKind::Other,
-            msg.unwrap_or("Undefined".to_string())
+            msg.unwrap_or_else(|| "Undefined".to_string())
        )));
        let path_buf = PathBuf::from(path);

--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -31,7 +31,9 @@ impl FastFieldsWriter {
                _ => 0u64,
            };
            match *field_entry.field_type() {
-                FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => {
+                FieldType::I64(ref int_options)
+                | FieldType::U64(ref int_options)
+                | FieldType::F64(ref int_options) => {
                    match int_options.get_fastfield_cardinality() {
                        Some(Cardinality::SingleValue) => {
                            let mut fast_field_writer = IntFastFieldWriter::new(field);
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -761,7 +761,6 @@ mod tests {
    use crate::Index;
    use crate::ReloadPolicy;
    use crate::Term;
-    use fail;

    #[test]
    fn test_operations_group() {
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -49,7 +49,7 @@ pub struct SegmentWriter {
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: FieldNormsWriter,
    doc_opstamps: Vec<Opstamp>,
-    tokenizers: Vec<Option<Box<dyn BoxedTokenizer>>>,
+    tokenizers: Vec<Option<BoxedTokenizer>>,
 }

 impl SegmentWriter {
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,9 +1,9 @@
 #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
+#![recursion_limit = "100"]
 #![cfg_attr(all(feature = "unstable", test), feature(test))]
 #![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
 #![doc(test(attr(allow(unused_variables), deny(warnings))))]
 #![warn(missing_docs)]
-#![recursion_limit = "80"]

 //! # `tantivy`
 //!
@@ -12,7 +12,7 @@
 //!
 //! ```rust

-//! # extern crate tempdir;
+//! # extern crate tempfile;
 //! #
 //! #[macro_use]
 //! extern crate tantivy;
@@ -20,7 +20,7 @@
 //! // ...
 //!
 //! # use std::path::Path;
-//! # use tempdir::TempDir;
+//! # use tempfile::TempDir;
 //! # use tantivy::Index;
 //! # use tantivy::schema::*;
 //! # use tantivy::{Score, DocAddress};
@@ -30,7 +30,7 @@
 //! # fn main() {
 //! #     // Let's create a temporary directory for the
 //! #     // sake of this example
-//! #     if let Ok(dir) = TempDir::new("tantivy_example_dir") {
+//! #     if let Ok(dir) = TempDir::new() {
 //! #         run_example(dir.path()).unwrap();
 //! #         dir.close().unwrap();
 //! #     }
@@ -171,16 +171,16 @@ pub use self::snippet::{Snippet, SnippetGenerator};
 mod docset;
 pub use self::docset::{DocSet, SkipResult};

+pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
 pub use crate::core::SegmentComponent;
 pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
 pub use crate::core::{InvertedIndexReader, SegmentReader};
 pub use crate::directory::Directory;
 pub use crate::indexer::IndexWriter;
 pub use crate::postings::Postings;
+pub use crate::reader::LeasedItem;
 pub use crate::schema::{Document, Term};

-pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
-
 /// Expose the current version of tantivy, as well
 /// whether it was compiled with the simd compression.
 pub fn version() -> &'static str {
@@ -849,7 +849,8 @@ mod tests {
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
        {
-            let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
+            let document =
+                doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
            index_writer.add_document(document);
            index_writer.commit().unwrap();
        }
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -14,6 +14,7 @@ use tantivy_fst::Automaton;
 pub struct AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    field: Field,
    automaton: A,
@@ -22,6 +23,7 @@ where
 impl<A> AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    /// Create a new AutomationWeight
    pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> {
@@ -37,6 +39,7 @@ where
 impl<A> Weight for AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    fn scorer(&self, reader: &SegmentReader) -> Result<Box<dyn Scorer>> {
        let max_doc = reader.max_doc();
--- a/src/query/fuzzy_query.rs
+++ b/src/query/fuzzy_query.rs
@@ -1,3 +1,4 @@
+use crate::error::TantivyError::InvalidArgument;
 use crate::query::{AutomatonWeight, Query, Weight};
 use crate::schema::Term;
 use crate::Result;
@@ -5,11 +6,16 @@ use crate::Searcher;
 use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
 use once_cell::sync::Lazy;
 use std::collections::HashMap;
+use std::ops::Range;
+
+/// A range of Levenshtein distances that we will build DFAs for our terms
+/// The computation is exponential, so best keep it to low single digits
+const VALID_LEVENSHTEIN_DISTANCE_RANGE: Range<u8> = (0..3);

 static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
    let mut lev_builder_cache = HashMap::new();
    // TODO make population lazy on a `(distance, val)` basis
-    for distance in 0..3 {
+    for distance in VALID_LEVENSHTEIN_DISTANCE_RANGE {
        for &transposition in &[false, true] {
            let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
            lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
@@ -100,10 +106,18 @@ impl FuzzyTermQuery {
    }

    fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
-        let automaton = LEV_BUILDER.get(&(self.distance, false))
-            .unwrap() // TODO return an error
-            .build_dfa(self.term.text());
-        Ok(AutomatonWeight::new(self.term.field(), automaton))
+        // LEV_BUILDER is a HashMap, whose `get` method returns an Option
+        match LEV_BUILDER.get(&(self.distance, false)) {
+            // Unwrap the option and build the Ok(AutomatonWeight)
+            Some(automaton_builder) => {
+                let automaton = automaton_builder.build_dfa(self.term.text());
+                Ok(AutomatonWeight::new(self.term.field(), automaton))
+            }
+            None => Err(InvalidArgument(format!(
+                "Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
+                self.distance, VALID_LEVENSHTEIN_DISTANCE_RANGE
+            ))),
+        }
    }
 }

--- a/src/query/query_parser/logical_ast.rs
+++ b/src/query/query_parser/logical_ast.rs
@@ -18,7 +18,6 @@ pub enum LogicalLiteral {
    All,
 }

-#[derive(Clone)]
 pub enum LogicalAST {
    Clause(Vec<(Occur, LogicalAST)>),
    Leaf(Box<LogicalLiteral>),
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -1,4 +1,3 @@
-use super::query_grammar;
 use super::user_input_ast::*;
 use crate::query::occur::Occur;
 use crate::query::query_parser::user_input_ast::UserInputBound;
@@ -13,22 +12,25 @@ parser! {
        (
            letter(),
            many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
-        ).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        ).skip(char(':')).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
    }
 }

 parser! {
    fn word[I]()(I) -> String
    where [I: Stream<Item = char>] {
-        many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
-               .and_then(|s: String| {
-                   match s.as_str() {
-                     "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
-                     "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
-                     "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
-                     _ => Ok(s)
-                   }
-               })
+        (
+            satisfy(|c: char| !c.is_whitespace() && !['-', '`', ':', '{', '}', '"', '[', ']', '(',')'].contains(&c) ),
+            many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c)))
+        )
+        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        .and_then(|s: String|
+           match s.as_str() {
+             "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
+             "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
+             "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
+             _ => Ok(s)
+           })
    }
 }

@@ -37,12 +39,13 @@ parser! {
    where [I: Stream<Item = char>]
    {
        let term_val = || {
-            let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s);
+            let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
            phrase.or(word())
        };
        let term_val_with_field = negative_number().or(term_val());
        let term_query =
-            (field(), char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral {
+            (field(), term_val_with_field)
+            .map(|(field_name, phrase)| UserInputLiteral {
                field_name: Some(field_name),
                phrase,
            });
@@ -60,8 +63,15 @@ parser! {
    fn negative_number[I]()(I) -> String
    where [I: Stream<Item = char>]
    {
-            (char('-'), many1(satisfy(char::is_numeric)))
-                .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        (char('-'), many1(satisfy(char::is_numeric)),
+         optional((char('.'), many1(satisfy(char::is_numeric)))))
+            .map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
+                if let Some(('.', s3)) = s3 {
+                    format!("{}{}.{}", s1, s2, s3)
+                } else {
+                    format!("{}{}", s1, s2)
+                }
+            })
    }
 }

@@ -73,55 +83,93 @@ parser! {
 }

 parser! {
+    /// Function that parses a range out of a Stream
+    /// Supports ranges like:
+    /// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
+    /// [a TO *], [a TO c], [abc TO bcd}
    fn range[I]()(I) -> UserInputLeaf
    where [I: Stream<Item = char>] {
-        let term_val = || {
-            word().or(negative_number()).or(char('*').map(|_| "*".to_string()))
+        let range_term_val = || {
+            word().or(negative_number()).or(char('*').with(value("*".to_string())))
        };
-        let lower_bound = {
-            let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w));
-            let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w));
-            attempt(excl).or(incl)
-        };
-        let upper_bound = {
-            let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w));
-            let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w));
-            attempt(excl).or(incl)
-        };
-        (
-            optional((field(), char(':')).map(|x| x.0)),
-            lower_bound,
-            spaces(),
-            string("TO"),
-            spaces(),
-            upper_bound,
-        ).map(|(field, lower, _, _, _, upper)| UserInputLeaf::Range {
-                field,
-                lower,
-                upper
+
+        // check for unbounded range in the form of <5, <=10, >5, >=5
+        let elastic_unbounded_range = (choice([attempt(string(">=")),
+                                               attempt(string("<=")),
+                                               attempt(string("<")),
+                                               attempt(string(">"))])
+                                       .skip(spaces()),
+                                       range_term_val()).
+            map(|(comparison_sign, bound): (&str, String)|
+                match comparison_sign {
+                    ">=" => (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
+                    "<=" => (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
+                    "<" => (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
+                    ">" => (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
+                    // default case
+                    _ => (UserInputBound::Unbounded, UserInputBound::Unbounded)
+                });
+        let lower_bound = (one_of("{[".chars()), range_term_val())
+            .map(|(boundary_char, lower_bound): (char, String)|
+                 if lower_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else if boundary_char == '{' {
+                         UserInputBound::Exclusive(lower_bound)
+                 } else {
+                     UserInputBound::Inclusive(lower_bound)
+                 });
+        let upper_bound = (range_term_val(), one_of("}]".chars()))
+            .map(|(higher_bound, boundary_char): (String, char)|
+                 if higher_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else if boundary_char == '}' {
+                     UserInputBound::Exclusive(higher_bound)
+                 } else {
+                     UserInputBound::Inclusive(higher_bound)
+                 });
+         // return only lower and upper
+        let lower_to_upper = (lower_bound.
+                                    skip((spaces(),
+                                          string("TO"),
+                                          spaces())),
+                                    upper_bound);
+
+        (optional(field()).skip(spaces()),
+         // try elastic first, if it matches, the range is unbounded
+         attempt(elastic_unbounded_range).or(lower_to_upper))
+            .map(|(field, (lower, upper))|
+                 // Construct the leaf from extracted field (optional)
+                 // and bounds
+                 UserInputLeaf::Range {
+                     field,
+                     lower,
+                     upper
        })
    }
 }

+fn negate(expr: UserInputAST) -> UserInputAST {
+    expr.unary(Occur::MustNot)
+}
+
+fn must(expr: UserInputAST) -> UserInputAST {
+    expr.unary(Occur::Must)
+}
+
 parser! {
    fn leaf[I]()(I) -> UserInputAST
    where [I: Stream<Item = char>] {
-         (char('-'), leaf()).map(|(_, expr)| expr.unary(Occur::MustNot) )
-        .or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) ))
-        .or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr))
-        .or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) ))
-        .or(attempt(
-            (string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot))
-            )
-         )
-        .or(attempt(
-            range().map(UserInputAST::from)
-            )
-        )
-        .or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf))))
+            char('-').with(leaf()).map(negate)
+        .or(char('+').with(leaf()).map(must))
+        .or(char('(').with(ast()).skip(char(')')))
+        .or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All)))
+        .or(attempt(string("NOT").skip(spaces1()).with(leaf()).map(negate)))
+        .or(attempt(range().map(UserInputAST::from)))
+        .or(literal().map(UserInputAST::from))
    }
 }

+#[derive(Clone, Copy)]
 enum BinaryOperand {
    Or,
    And,
@@ -129,27 +177,54 @@ enum BinaryOperand {

 parser! {
    fn binary_operand[I]()(I) -> BinaryOperand
-    where [I: Stream<Item = char>] {
-        (spaces1(),
-         (
-            string("AND").map(|_| BinaryOperand::And)
-           .or(string("OR").map(|_| BinaryOperand::Or))
-         ),
-         spaces1()).map(|(_, op,_)| op)
+    where [I: Stream<Item = char>]
+    {
+       string("AND").with(value(BinaryOperand::And))
+       .or(string("OR").with(value(BinaryOperand::Or)))
    }
 }

-enum Element {
-    SingleEl(UserInputAST),
-    NormalDisjunctive(Vec<Vec<UserInputAST>>),
+fn aggregate_binary_expressions(
+    left: UserInputAST,
+    others: Vec<(BinaryOperand, UserInputAST)>,
+) -> UserInputAST {
+    let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
+    for (operator, operand_ast) in others {
+        match operator {
+            BinaryOperand::And => {
+                if let Some(last) = dnf.last_mut() {
+                    last.push(operand_ast);
+                }
+            }
+            BinaryOperand::Or => {
+                dnf.push(vec![operand_ast]);
+            }
+        }
+    }
+    if dnf.len() == 1 {
+        UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
+    } else {
+        let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
+        UserInputAST::or(conjunctions)
+    }
 }

-impl Element {
-    pub fn into_dnf(self) -> Vec<Vec<UserInputAST>> {
-        match self {
-            Element::NormalDisjunctive(conjunctions) => conjunctions,
-            Element::SingleEl(el) => vec![vec![el]],
-        }
+parser! {
+    pub fn ast[I]()(I) -> UserInputAST
+    where [I: Stream<Item = char>]
+    {
+        let operand_leaf = (binary_operand().skip(spaces()), leaf().skip(spaces()));
+        let boolean_expr = (leaf().skip(spaces().silent()), many1(operand_leaf)).map(
+            |(left, right)| aggregate_binary_expressions(left,right));
+        let whitespace_separated_leaves = many1(leaf().skip(spaces().silent()))
+        .map(|subqueries: Vec<UserInputAST>|
+            if subqueries.len() == 1 {
+                subqueries.into_iter().next().unwrap()
+            } else {
+                UserInputAST::Clause(subqueries.into_iter().collect())
+            });
+        let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
+        spaces().with(expr).skip(spaces())
    }
 }

@@ -157,56 +232,7 @@ parser! {
    pub fn parse_to_ast[I]()(I) -> UserInputAST
    where [I: Stream<Item = char>]
    {
-        (
-            attempt(
-                chainl1(
-                    leaf().map(Element::SingleEl),
-                    binary_operand().map(|op: BinaryOperand|
-                        move |left: Element, right: Element| {
-                            let mut dnf = left.into_dnf();
-                            if let Element::SingleEl(el) = right {
-                                match op {
-                                    BinaryOperand::And => {
-                                        if let Some(last) = dnf.last_mut() {
-                                            last.push(el);
-                                        }
-                                    }
-                                    BinaryOperand::Or => {
-                                        dnf.push(vec!(el));
-                                    }
-                                }
-                            } else {
-                                unreachable!("Please report.")
-                            }
-                            Element::NormalDisjunctive(dnf)
-                        }
-                    )
-                )
-                .map(query_grammar::Element::into_dnf)
-                .map(|fnd| {
-                    if fnd.len() == 1 {
-                        UserInputAST::and(fnd.into_iter().next().unwrap()) //< safe
-                    } else {
-                        let conjunctions = fnd
-                        .into_iter()
-                        .map(UserInputAST::and)
-                        .collect();
-                        UserInputAST::or(conjunctions)
-                    }
-                })
-            )
-            .or(
-                sep_by(leaf(), spaces())
-                .map(|subqueries: Vec<UserInputAST>| {
-                    if subqueries.len() == 1 {
-                        subqueries.into_iter().next().unwrap()
-                    } else {
-                        UserInputAST::Clause(subqueries.into_iter().collect())
-                    }
-                })
-            )
-        )
-
+        spaces().with(optional(ast()).skip(eof())).map(|opt_ast| opt_ast.unwrap_or_else(UserInputAST::empty_query))
    }
 }

@@ -225,6 +251,18 @@ mod test {
        assert!(parse_to_ast().parse(query).is_err());
    }

+    #[test]
+    fn test_parse_empty_to_ast() {
+        test_parse_query_to_ast_helper("", "<emptyclause>");
+    }
+
+    #[test]
+    fn test_parse_query_to_ast_hyphen() {
+        test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
+        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+    }
+
    #[test]
    fn test_parse_query_to_ast_not_op() {
        assert_eq!(
@@ -259,8 +297,67 @@ mod test {
        );
    }

+    #[test]
+    fn test_parse_elastic_query_ranges() {
+        test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+
+        test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
+        test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+
+        test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+
+        test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+    }
+
+    #[test]
+    fn test_range_parser() {
+        // testing the range() parser separately
+        let res = range().parse("title: <hello").unwrap().0;
+        let expected = UserInputLeaf::Range {
+            field: Some("title".to_string()),
+            lower: UserInputBound::Unbounded,
+            upper: UserInputBound::Exclusive("hello".to_string()),
+        };
+        let res2 = range().parse("title:{* TO hello}").unwrap().0;
+        assert_eq!(res, expected);
+        assert_eq!(res2, expected);
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("weight".to_string()),
+            lower: UserInputBound::Inclusive("71.2".to_string()),
+            upper: UserInputBound::Unbounded,
+        };
+
+        let res3 = range().parse("weight: >=71.2").unwrap().0;
+        let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+        assert_eq!(res3, expected_weight);
+        assert_eq!(res4, expected_weight);
+    }
+
+    #[test]
+    fn test_parse_query_to_triming_spaces() {
+        test_parse_query_to_ast_helper("   abc", "\"abc\"");
+        test_parse_query_to_ast_helper("abc ", "\"abc\"");
+        test_parse_query_to_ast_helper("(  a OR abc)", "(?(\"a\") ?(\"abc\"))");
+        test_parse_query_to_ast_helper("(a  OR abc)", "(?(\"a\") ?(\"abc\"))");
+        test_parse_query_to_ast_helper("(a OR  abc)", "(?(\"a\") ?(\"abc\"))");
+        test_parse_query_to_ast_helper("a OR abc ", "(?(\"a\") ?(\"abc\"))");
+        test_parse_query_to_ast_helper("(a OR abc )", "(?(\"a\") ?(\"abc\"))");
+        test_parse_query_to_ast_helper("(a OR  abc) ", "(?(\"a\") ?(\"abc\"))");
+    }
+
    #[test]
    fn test_parse_query_to_ast() {
+        test_parse_query_to_ast_helper("abc", "\"abc\"");
+        test_parse_query_to_ast_helper("a b", "(\"a\" \"b\")");
+        test_parse_query_to_ast_helper("+(a b)", "+((\"a\" \"b\"))");
+        test_parse_query_to_ast_helper("+d", "+(\"d\")");
        test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))");
        test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")");
        test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
@@ -276,7 +373,7 @@ mod test {
        test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
        test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
        test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
-        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
        test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
        test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
        test_is_parse_err("abc +    ");
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -18,42 +18,56 @@ use crate::schema::{FieldType, Term};
 use crate::tokenizer::TokenizerManager;
 use combine::Parser;
 use std::borrow::Cow;
-use std::num::{ParseIntError, ParseFloatError};
+use std::num::{ParseFloatError, ParseIntError};
 use std::ops::Bound;
 use std::str::FromStr;

 /// Possible error that may happen when parsing a query.
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Fail)]
 pub enum QueryParserError {
    /// Error in the query syntax
+    #[fail(display = "Syntax Error")]
    SyntaxError,
    /// `FieldDoesNotExist(field_name: String)`
    /// The query references a field that is not in the schema
+    #[fail(display = "File does not exists: '{:?}'", _0)]
    FieldDoesNotExist(String),
    /// The query contains a term for a `u64` or `i64`-field, but the value
    /// is neither.
+    #[fail(display = "Expected a valid integer: '{:?}'", _0)]
    ExpectedInt(ParseIntError),
    /// The query contains a term for a `f64`-field, but the value
    /// is not a f64.
+    #[fail(display = "Invalid query: Only excluding terms given")]
    ExpectedFloat(ParseFloatError),
    /// It is forbidden queries that are only "excluding". (e.g. -title:pop)
+    #[fail(display = "Invalid query: Only excluding terms given")]
    AllButQueryForbidden,
    /// If no default field is declared, running a query without any
    /// field specified is forbbidden.
+    #[fail(display = "No default field declared and no field specified in query")]
    NoDefaultFieldDeclared,
    /// The field searched for is not declared
    /// as indexed in the schema.
+    #[fail(display = "The field '{:?}' is not declared as indexed", _0)]
    FieldNotIndexed(String),
    /// A phrase query was requested for a field that does not
    /// have any positions indexed.
+    #[fail(display = "The field '{:?}' does not have positions indexed", _0)]
    FieldDoesNotHavePositionsIndexed(String),
    /// The tokenizer for the given field is unknown
    /// The two argument strings are the name of the field, the name of the tokenizer
+    #[fail(
+        display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
+        _0, _1
+    )]
    UnknownTokenizer(String, String),
    /// The query contains a range query with a phrase as one of the bounds.
    /// Only terms can be used as bounds.
+    #[fail(display = "A range query cannot have a phrase as one of the bounds")]
    RangeMustNotHavePhrase,
    /// The format for the date field is not RFC 3339 compliant.
+    #[fail(display = "The date field has an invalid format")]
    DateFormatError(chrono::ParseError),
 }

@@ -355,6 +369,7 @@ impl QueryParser {
        match *bound {
            UserInputBound::Inclusive(_) => Ok(Bound::Included(term)),
            UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)),
+            UserInputBound::Unbounded => Ok(Bound::Unbounded),
        }
    }

@@ -614,7 +629,7 @@ mod test {
    pub fn test_parse_query_untokenized() {
        test_parse_query_to_logical_ast_helper(
            "nottokenized:\"wordone wordtwo\"",
-            "Term([0, 0, 0, 7, 119, 111, 114, 100, 111, 110, \
+            "Term(field=7,bytes=[119, 111, 114, 100, 111, 110, \
             101, 32, 119, 111, 114, 100, 116, 119, 111])",
            false,
        );
@@ -658,7 +673,7 @@ mod test {
            .is_ok());
        test_parse_query_to_logical_ast_helper(
            "unsigned:2324",
-            "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])",
+            "Term(field=3,bytes=[0, 0, 0, 0, 0, 0, 9, 20])",
            false,
        );

@@ -676,22 +691,22 @@ mod test {
    }

    #[test]
-    pub fn test_parse_query_to_ast_disjunction() {
+    pub fn test_parse_query_to_ast_single_term() {
        test_parse_query_to_logical_ast_helper(
            "title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto -titi",
-            "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \
-             -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \
-             Term([0, 0, 0, 1, 116, 105, 116, 105])))",
+            "(+Term(field=0,bytes=[116, 111, 116, 111]) \
+             -(Term(field=0,bytes=[116, 105, 116, 105]) \
+             Term(field=1,bytes=[116, 105, 116, 105])))",
            false,
        );
        assert_eq!(
@@ -700,49 +715,67 @@ mod test {
                .unwrap(),
            QueryParserError::AllButQueryForbidden
        );
+    }
+
+    #[test]
+    pub fn test_parse_query_to_ast_two_terms() {
        test_parse_query_to_logical_ast_helper(
            "title:a b",
-            "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
-             Term([0, 0, 0, 1, 98])))",
+            "(Term(field=0,bytes=[97]) (Term(field=0,bytes=[98]) Term(field=1,bytes=[98])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:\"a b\"",
-            "\"[(0, Term([0, 0, 0, 0, 97])), \
-             (1, Term([0, 0, 0, 0, 98]))]\"",
+            "\"[(0, Term(field=0,bytes=[97])), \
+             (1, Term(field=0,bytes=[98]))]\"",
            false,
        );
+    }
+
+    #[test]
+    pub fn test_parse_query_to_ast_ranges() {
        test_parse_query_to_logical_ast_helper(
            "title:[a TO b]",
-            "(Included(Term([0, 0, 0, 0, 97])) TO \
-             Included(Term([0, 0, 0, 0, 98])))",
+            "(Included(Term(field=0,bytes=[97])) TO Included(Term(field=0,bytes=[98])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "[a TO b]",
-            "((Included(Term([0, 0, 0, 0, 97])) TO \
-             Included(Term([0, 0, 0, 0, 98]))) \
-             (Included(Term([0, 0, 0, 1, 97])) TO \
-             Included(Term([0, 0, 0, 1, 98]))))",
+            "((Included(Term(field=0,bytes=[97])) TO \
+             Included(Term(field=0,bytes=[98]))) \
+             (Included(Term(field=1,bytes=[97])) TO \
+             Included(Term(field=1,bytes=[98]))))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{titi TO toto}",
-            "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO \
-             Excluded(Term([0, 0, 0, 0, 116, 111, 116, 111])))",
+            "(Excluded(Term(field=0,bytes=[116, 105, 116, 105])) TO \
+             Excluded(Term(field=0,bytes=[116, 111, 116, 111])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{* TO toto}",
-            "(Unbounded TO \
-             Excluded(Term([0, 0, 0, 0, 116, 111, 116, 111])))",
+            "(Unbounded TO Excluded(Term(field=0,bytes=[116, 111, 116, 111])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{titi TO *}",
-            "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)",
+            "(Excluded(Term(field=0,bytes=[116, 105, 116, 105])) TO Unbounded)",
            false,
        );
+        test_parse_query_to_logical_ast_helper(
+            "signed:{-5 TO 3}",
+            "(Excluded(Term(field=2,bytes=[127, 255, 255, 255, 255, 255, 255, 251])) TO \
+             Excluded(Term(field=2,bytes=[128, 0, 0, 0, 0, 0, 0, 3])))",
+            false,
+        );
+        test_parse_query_to_logical_ast_helper(
+            "float:{-1.5 TO 1.5}",
+            "(Excluded(Term(field=10,bytes=[64, 7, 255, 255, 255, 255, 255, 255])) TO \
+             Excluded(Term(field=10,bytes=[191, 248, 0, 0, 0, 0, 0, 0])))",
+            false,
+        );
+
        test_parse_query_to_logical_ast_helper("*", "*", false);
    }

@@ -844,19 +877,19 @@ mod test {
    pub fn test_parse_query_to_ast_conjunction() {
        test_parse_query_to_logical_ast_helper(
            "title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto -titi",
-            "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \
-             -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \
-             Term([0, 0, 0, 1, 116, 105, 116, 105])))",
+            "(+Term(field=0,bytes=[116, 111, 116, 111]) \
+             -(Term(field=0,bytes=[116, 105, 116, 105]) \
+             Term(field=1,bytes=[116, 105, 116, 105])))",
            true,
        );
        assert_eq!(
@@ -867,16 +900,25 @@ mod test {
        );
        test_parse_query_to_logical_ast_helper(
            "title:a b",
-            "(+Term([0, 0, 0, 0, 97]) \
-             +(Term([0, 0, 0, 0, 98]) \
-             Term([0, 0, 0, 1, 98])))",
+            "(+Term(field=0,bytes=[97]) \
+             +(Term(field=0,bytes=[98]) \
+             Term(field=1,bytes=[98])))",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "title:\"a b\"",
-            "\"[(0, Term([0, 0, 0, 0, 97])), \
-             (1, Term([0, 0, 0, 0, 98]))]\"",
+            "\"[(0, Term(field=0,bytes=[97])), \
+             (1, Term(field=0,bytes=[98]))]\"",
            true,
        );
    }
+
+    #[test]
+    pub fn test_query_parser_hyphen() {
+        test_parse_query_to_logical_ast_helper(
+            "title:www-form-encoded",
+            "\"[(0, Term(field=0,bytes=[119, 119, 119])), (1, Term(field=0,bytes=[102, 111, 114, 109])), (2, Term(field=0,bytes=[101, 110, 99, 111, 100, 101, 100]))]\"",
+            false
+        );
+    }
 }
--- a/src/query/query_parser/stemmer.rs
+++ b/src/query/query_parser/stemmer.rs
@@ -1,44 +0,0 @@
-use std::sync::Arc;
-use stemmer;
-
-
-pub struct StemmerTokenStream<TailTokenStream> 
-    where TailTokenStream: TokenStream {
-    tail: TailTokenStream,
-    stemmer: Arc<stemmer::Stemmer>,
-}
-
-impl<TailTokenStream> TokenStream for StemmerTokenStream<TailTokenStream>
-    where TailTokenStream: TokenStream {
-
-    fn token(&self) -> &Token {
-        self.tail.token()
-    }
-    
-    fn token_mut(&mut self) -> &mut Token {
-        self.tail.token_mut()
-    }
-
-    fn advance(&mut self) -> bool {
-        if self.tail.advance() {
-            // self.tail.token_mut().term.make_ascii_lowercase();
-            let new_str = self.stemmer.stem_str(&self.token().term);
-            true
-        }
-        else {
-            false
-        }
-    }
-
-}
-
-impl<TailTokenStream> StemmerTokenStream<TailTokenStream>
-    where TailTokenStream: TokenStream {
-    
-    fn wrap(stemmer: Arc<stemmer::Stemmer>, tail: TailTokenStream) -> StemmerTokenStream<TailTokenStream> {
-        StemmerTokenStream {
-            tail,
-            stemmer,
-        }
-    } 
-}
--- a/src/query/query_parser/user_input_ast.rs
+++ b/src/query/query_parser/user_input_ast.rs
@@ -3,6 +3,7 @@ use std::fmt::{Debug, Formatter};

 use crate::query::Occur;

+#[derive(PartialEq)]
 pub enum UserInputLeaf {
    Literal(UserInputLiteral),
    All,
@@ -35,6 +36,7 @@ impl Debug for UserInputLeaf {
    }
 }

+#[derive(PartialEq)]
 pub struct UserInputLiteral {
    pub field_name: Option<String>,
    pub phrase: String,
@@ -49,9 +51,11 @@ impl fmt::Debug for UserInputLiteral {
    }
 }

+#[derive(PartialEq)]
 pub enum UserInputBound {
    Inclusive(String),
    Exclusive(String),
+    Unbounded,
 }

 impl UserInputBound {
@@ -59,6 +63,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
        }
    }

@@ -66,6 +71,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
        }
    }

@@ -73,6 +79,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref contents) => contents,
            UserInputBound::Exclusive(ref contents) => contents,
+            UserInputBound::Unbounded => &"*",
        }
    }
 }
@@ -80,9 +87,6 @@ impl UserInputBound {
 pub enum UserInputAST {
    Clause(Vec<UserInputAST>),
    Unary(Occur, Box<UserInputAST>),
-    //    Not(Box<UserInputAST>),
-    //    Should(Box<UserInputAST>),
-    //    Must(Box<UserInputAST>),
    Leaf(Box<UserInputLeaf>),
 }

@@ -92,7 +96,7 @@ impl UserInputAST {
    }

    fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
-        assert!(occur != Occur::MustNot);
+        assert_ne!(occur, Occur::MustNot);
        assert!(!asts.is_empty());
        if asts.len() == 1 {
            asts.into_iter().next().unwrap() //< safe
@@ -105,6 +109,10 @@ impl UserInputAST {
        }
    }

+    pub fn empty_query() -> UserInputAST {
+        UserInputAST::Clause(Vec::default())
+    }
+
    pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
        UserInputAST::compose(Occur::Must, asts)
    }
@@ -114,42 +122,6 @@ impl UserInputAST {
    }
 }

-/*
-impl UserInputAST {
-
-    fn compose_occur(self, occur: Occur) -> UserInputAST {
-        match self {
-            UserInputAST::Not(other) => {
-                let new_occur = compose_occur(Occur::MustNot, occur);
-                other.simplify()
-            }
-            _ => {
-                self
-            }
-        }
-    }
-
-    pub fn simplify(self) -> UserInputAST {
-        match self {
-            UserInputAST::Clause(els) => {
-                if els.len() == 1 {
-                    return els.into_iter().next().unwrap();
-                } else {
-                    return self;
-                }
-            }
-            UserInputAST::Not(els) => {
-                if els.len() == 1 {
-                    return els.into_iter().next().unwrap();
-                } else {
-                    return self;
-                }
-            }
-        }
-    }
-}
-*/
-
 impl From<UserInputLiteral> for UserInputLeaf {
    fn from(literal: UserInputLiteral) -> UserInputLeaf {
        UserInputLeaf::Literal(literal)
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -338,39 +338,33 @@ mod tests {
    use crate::collector::Count;
    use crate::schema::{Document, Field, Schema, INDEXED};
    use crate::Index;
-    use crate::Result;
    use std::collections::Bound;

    #[test]
    fn test_range_query_simple() {
-        fn run() -> Result<()> {
-            let mut schema_builder = Schema::builder();
-            let year_field = schema_builder.add_u64_field("year", INDEXED);
-            let schema = schema_builder.build();
+        let mut schema_builder = Schema::builder();
+        let year_field = schema_builder.add_u64_field("year", INDEXED);
+        let schema = schema_builder.build();

-            let index = Index::create_in_ram(schema);
-            {
-                let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
-                for year in 1950u64..2017u64 {
-                    let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
-                    for _ in 0..num_docs_within_year {
-                        index_writer.add_document(doc!(year_field => year));
-                    }
+        let index = Index::create_in_ram(schema);
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
+            for year in 1950u64..2017u64 {
+                let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
+                for _ in 0..num_docs_within_year {
+                    index_writer.add_document(doc!(year_field => year));
                }
-                index_writer.commit().unwrap();
            }
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-
-            let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
-
-            // ... or `1960..=1969` if inclusive range is enabled.
-            let count = searcher.search(&docs_in_the_sixties, &Count)?;
-            assert_eq!(count, 2285);
-            Ok(())
+            index_writer.commit().unwrap();
        }
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();

-        run().unwrap();
+        let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
+
+        // ... or `1960..=1969` if inclusive range is enabled.
+        let count = searcher.search(&docs_in_the_sixties, &Count).unwrap();
+        assert_eq!(count, 2285);
    }

    #[test]
@@ -460,7 +454,10 @@ mod tests {
        let count_multiples =
            |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();

-        assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9);
+        assert_eq!(
+            count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)),
+            9
+        );
        assert_eq!(
            count_multiples(RangeQuery::new_f64_bounds(
                float_field,
--- a/src/query/term_query/mod.rs
+++ b/src/query/term_query/mod.rs
@@ -12,7 +12,7 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::docset::DocSet;
    use crate::query::{Query, QueryParser, Scorer, TermQuery};
-    use crate::schema::{IndexRecordOption, Schema, STRING, TEXT};
+    use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT};
    use crate::tests::assert_nearly_equals;
    use crate::Index;
    use crate::Term;
@@ -114,4 +114,16 @@ mod tests {
        let reader = index.reader().unwrap();
        assert_eq!(term_query.count(&*reader.searcher()).unwrap(), 1);
    }
+
+    #[test]
+    fn test_term_query_debug() {
+        let term_query = TermQuery::new(
+            Term::from_field_text(Field(1), "hello"),
+            IndexRecordOption::WithFreqs,
+        );
+        assert_eq!(
+            format!("{:?}", term_query),
+            "TermQuery(Term(field=1,bytes=[104, 101, 108, 108, 111]))"
+        );
+    }
 }
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -7,6 +7,7 @@ use crate::Result;
 use crate::Searcher;
 use crate::Term;
 use std::collections::BTreeSet;
+use std::fmt;

 /// A Term query matches all of the documents
 /// containing a specific term.
@@ -61,12 +62,18 @@ use std::collections::BTreeSet;
 ///     Ok(())
 /// }
 /// ```
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 pub struct TermQuery {
    term: Term,
    index_record_option: IndexRecordOption,
 }

+impl fmt::Debug for TermQuery {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TermQuery({:?})", self.term)
+    }
+}
+
 impl TermQuery {
    /// Creates a new term query.
    pub fn new(term: Term, segment_postings_options: IndexRecordOption) -> TermQuery {
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -1,6 +1,7 @@
 mod pool;

-use self::pool::{LeasedItem, Pool};
+pub use self::pool::LeasedItem;
+use self::pool::Pool;
 use crate::core::Segment;
 use crate::directory::Directory;
 use crate::directory::WatchHandle;
--- a/src/reader/pool.rs
+++ b/src/reader/pool.rs
@@ -123,6 +123,10 @@ impl<T> Pool<T> {
    }
 }

+/// A LeasedItem holds an object borrowed from a Pool.
+///
+/// Upon drop, the object is automatically returned
+/// into the pool.
 pub struct LeasedItem<T> {
    gen_item: Option<GenerationItem<T>>,
    recycle_queue: Arc<Queue<GenerationItem<T>>>,
--- a/src/schema/facet.rs
+++ b/src/schema/facet.rs
@@ -120,9 +120,7 @@ impl Facet {

    /// Extract path from the `Facet`.
    pub fn to_path(&self) -> Vec<&str> {
-        self.encoded_str()
-            .split(|c| c == FACET_SEP_CHAR)
-            .collect()
+        self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect()
    }
 }

--- a/src/schema/field_entry.rs
+++ b/src/schema/field_entry.rs
@@ -108,7 +108,9 @@ impl FieldEntry {
    /// Returns true iff the field is a int (signed or unsigned) fast field
    pub fn is_int_fast(&self) -> bool {
        match self.field_type {
-            FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) => options.is_fast(),
+            FieldType::U64(ref options)
+            | FieldType::I64(ref options)
+            | FieldType::F64(ref options) => options.is_fast(),
            _ => false,
        }
    }
--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -10,7 +10,7 @@ use serde_json::Value as JsonValue;

 /// Possible error that may occur while parsing a field value
 /// At this point the JSON is known to be valid.
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ValueParsingError {
    /// Encountered a numerical value that overflows or underflow its integer type.
    OverflowError(String),
@@ -83,9 +83,9 @@ impl FieldType {
    pub fn is_indexed(&self) -> bool {
        match *self {
            FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
-            FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => {
-                int_options.is_indexed()
-            }
+            FieldType::U64(ref int_options)
+            | FieldType::I64(ref int_options)
+            | FieldType::F64(ref int_options) => int_options.is_indexed(),
            FieldType::Date(ref date_options) => date_options.is_indexed(),
            FieldType::HierarchicalFacet => true,
            FieldType::Bytes => false,
@@ -125,9 +125,12 @@ impl FieldType {
        match *json {
            JsonValue::String(ref field_text) => match *self {
                FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
-                FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => Err(
-                    ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)),
-                ),
+                FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {
+                    Err(ValueParsingError::TypeError(format!(
+                        "Expected an integer, got {:?}",
+                        json
+                    )))
+                }
                FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
                FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| {
                    ValueParsingError::InvalidBase64(format!(
@@ -152,7 +155,7 @@ impl FieldType {
                        let msg = format!("Expected a u64 int, got {:?}", json);
                        Err(ValueParsingError::OverflowError(msg))
                    }
-                },
+                }
                FieldType::F64(_) => {
                    if let Some(field_val_f64) = field_val_num.as_f64() {
                        Ok(Value::F64(field_val_f64))
--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -246,6 +246,25 @@ impl Schema {
        self.0.fields_map.get(field_name).cloned()
    }

+    /// Create a named document off the doc.
+    pub fn convert_named_doc(
+        &self,
+        named_doc: NamedFieldDocument,
+    ) -> Result<Document, DocParsingError> {
+        let mut document = Document::new();
+        for (field_name, values) in named_doc.0 {
+            if let Some(field) = self.get_field(&field_name) {
+                for value in values {
+                    let field_value = FieldValue::new(field, value);
+                    document.add(field_value);
+                }
+            } else {
+                return Err(DocParsingError::NoSuchFieldInSchema(field_name));
+            }
+        }
+        Ok(document)
+    }
+
    /// Create a named document off the doc.
    pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
        let mut field_map = BTreeMap::new();
@@ -360,13 +379,19 @@ impl<'de> Deserialize<'de> for Schema {

 /// Error that may happen when deserializing
 /// a document from JSON.
-#[derive(Debug)]
+#[derive(Debug, Fail, PartialEq)]
 pub enum DocParsingError {
    /// The payload given is not valid JSON.
+    #[fail(display = "The provided string is not valid JSON")]
    NotJSON(String),
    /// One of the value node could not be parsed.
+    #[fail(display = "The field '{:?}' could not be parsed: {:?}", _0, _1)]
    ValueError(String, ValueParsingError),
    /// The json-document contains a field that is not declared in the schema.
+    #[fail(
+        display = "The document contains a field that is not declared in the schema: {:?}",
+        _0
+    )]
    NoSuchFieldInSchema(String),
 }

@@ -378,6 +403,7 @@ mod tests {
    use crate::schema::*;
    use matches::{assert_matches, matches};
    use serde_json;
+    use std::collections::BTreeMap;

    #[test]
    pub fn is_indexed_test() {
@@ -492,6 +518,54 @@ mod tests {
        assert_eq!(doc, doc_serdeser);
    }

+    #[test]
+    pub fn test_document_from_nameddoc() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field("title", TEXT);
+        let val = schema_builder.add_i64_field("val", INDEXED);
+        let schema = schema_builder.build();
+        let mut named_doc_map = BTreeMap::default();
+        named_doc_map.insert(
+            "title".to_string(),
+            vec![Value::from("title1"), Value::from("title2")],
+        );
+        named_doc_map.insert(
+            "val".to_string(),
+            vec![Value::from(14u64), Value::from(-1i64)],
+        );
+        let doc = schema
+            .convert_named_doc(NamedFieldDocument(named_doc_map))
+            .unwrap();
+        assert_eq!(
+            doc.get_all(title),
+            vec![
+                &Value::from("title1".to_string()),
+                &Value::from("title2".to_string())
+            ]
+        );
+        assert_eq!(
+            doc.get_all(val),
+            vec![&Value::from(14u64), &Value::from(-1i64)]
+        );
+    }
+
+    #[test]
+    pub fn test_document_from_nameddoc_error() {
+        let schema = Schema::builder().build();
+        let mut named_doc_map = BTreeMap::default();
+        named_doc_map.insert(
+            "title".to_string(),
+            vec![Value::from("title1"), Value::from("title2")],
+        );
+        let err = schema
+            .convert_named_doc(NamedFieldDocument(named_doc_map))
+            .unwrap_err();
+        assert_eq!(
+            err,
+            DocParsingError::NoSuchFieldInSchema("title".to_string())
+        );
+    }
+
    #[test]
    pub fn test_parse_document() {
        let mut schema_builder = Schema::builder();
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -224,7 +224,12 @@ where

 impl fmt::Debug for Term {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "Term({:?})", &self.0[..])
+        write!(
+            f,
+            "Term(field={},bytes={:?})",
+            self.field().0,
+            self.value_bytes()
+        )
    }
 }

--- a/src/schema/value.rs
+++ b/src/schema/value.rs
@@ -2,7 +2,7 @@ use crate::schema::Facet;
 use crate::DateTime;
 use serde::de::Visitor;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
-use std::{fmt, cmp::Ordering};
+use std::{cmp::Ordering, fmt};

 /// Value represents the value of a any field.
 /// It is an enum over all over all of the possible field type.
@@ -27,7 +27,7 @@ pub enum Value {
 impl Eq for Value {}
 impl Ord for Value {
    fn cmp(&self, other: &Self) -> Ordering {
-        match (self,other) {
+        match (self, other) {
            (Value::Str(l), Value::Str(r)) => l.cmp(r),
            (Value::U64(l), Value::U64(r)) => l.cmp(r),
            (Value::I64(l), Value::I64(r)) => l.cmp(r),
@@ -35,7 +35,7 @@ impl Ord for Value {
            (Value::Facet(l), Value::Facet(r)) => l.cmp(r),
            (Value::Bytes(l), Value::Bytes(r)) => l.cmp(r),
            (Value::F64(l), Value::F64(r)) => {
-                match (l.is_nan(),r.is_nan()) {
+                match (l.is_nan(), r.is_nan()) {
                    (false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN
                    (true, true) => Ordering::Equal,
                    (true, false) => Ordering::Less, // we define NaN as less than -∞
@@ -155,7 +155,7 @@ impl Value {
            Value::F64(ref value) => *value,
            _ => panic!("This is not a f64 field."),
        }
-    }    
+    }

    /// Returns the Date-value, provided the value is of the `Date` type.
    ///
@@ -219,7 +219,7 @@ impl From<Vec<u8>> for Value {

 mod binary_serialize {
    use super::Value;
-    use crate::common::{BinarySerializable, f64_to_u64, u64_to_f64};
+    use crate::common::{f64_to_u64, u64_to_f64, BinarySerializable};
    use crate::schema::Facet;
    use chrono::{TimeZone, Utc};
    use std::io::{self, Read, Write};
--- a/src/snippet/mod.rs
+++ b/src/snippet/mod.rs
@@ -63,7 +63,7 @@ impl FragmentCandidate {
    fn try_add_token(&mut self, token: &Token, terms: &BTreeMap<String, f32>) {
        self.stop_offset = token.offset_to;

-        if let Some(score) = terms.get(&token.text.to_lowercase()) {
+        if let Some(&score) = terms.get(&token.text.to_lowercase()) {
            self.score += score;
            self.highlighted
                .push(HighlightSection::new(token.offset_from, token.offset_to));
@@ -142,7 +142,7 @@ impl Snippet {
 /// Fragments must be valid in the sense that `&text[fragment.start..fragment.stop]`\
 /// has to be a valid string.
 fn search_fragments<'a>(
-    tokenizer: &dyn BoxedTokenizer,
+    tokenizer: &BoxedTokenizer,
    text: &'a str,
    terms: &BTreeMap<String, f32>,
    max_num_chars: usize,
@@ -150,7 +150,6 @@ fn search_fragments<'a>(
    let mut token_stream = tokenizer.token_stream(text);
    let mut fragment = FragmentCandidate::new(0);
    let mut fragments: Vec<FragmentCandidate> = vec![];
-
    while let Some(next) = token_stream.next() {
        if (next.offset_to - fragment.start_offset) > max_num_chars {
            if fragment.score > 0.0 {
@@ -254,7 +253,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
 /// ```
 pub struct SnippetGenerator {
    terms_text: BTreeMap<String, f32>,
-    tokenizer: Box<dyn BoxedTokenizer>,
+    tokenizer: BoxedTokenizer,
    field: Field,
    max_num_chars: usize,
 }
@@ -316,12 +315,8 @@ impl SnippetGenerator {

    /// Generates a snippet for the given text.
    pub fn snippet(&self, text: &str) -> Snippet {
-        let fragment_candidates = search_fragments(
-            &*self.tokenizer,
-            &text,
-            &self.terms_text,
-            self.max_num_chars,
-        );
+        let fragment_candidates =
+            search_fragments(&self.tokenizer, &text, &self.terms_text, self.max_num_chars);
        select_best_fragment_combination(&fragment_candidates[..], &text)
    }
 }
@@ -331,7 +326,7 @@ mod tests {
    use super::{search_fragments, select_best_fragment_combination};
    use crate::query::QueryParser;
    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT};
-    use crate::tokenizer::{box_tokenizer, SimpleTokenizer};
+    use crate::tokenizer::SimpleTokenizer;
    use crate::Index;
    use crate::SnippetGenerator;
    use maplit::btreemap;
@@ -355,12 +350,12 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        let terms = btreemap! {
            String::from("rust") => 1.0,
            String::from("language") => 0.9
        };
-        let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 100);
+        let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 100);
        assert_eq!(fragments.len(), 7);
        {
            let first = &fragments[0];
@@ -382,13 +377,13 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_scored_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        {
            let terms = btreemap! {
                String::from("rust") =>1.0f32,
                String::from("language") => 0.9f32
            };
-            let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20);
+            let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20);
            {
                let first = &fragments[0];
                assert_eq!(first.score, 1.0);
@@ -397,13 +392,13 @@ Survey in 2016, 2017, and 2018."#;
            let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT);
            assert_eq!(snippet.to_html(), "<b>Rust</b> is a systems")
        }
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        {
            let terms = btreemap! {
                String::from("rust") =>0.9f32,
                String::from("language") => 1.0f32
            };
-            let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20);
+            let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20);
            //assert_eq!(fragments.len(), 7);
            {
                let first = &fragments[0];
@@ -417,14 +412,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_in_second_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f g";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("c"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 1);
        {
@@ -441,14 +436,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_term_at_the_end_of_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f f g";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("f"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 2);
        {
@@ -465,7 +460,7 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_second_fragment_has_the_highest_score() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f g";

@@ -473,7 +468,7 @@ Survey in 2016, 2017, and 2018."#;
        terms.insert(String::from("f"), 1.0);
        terms.insert(String::from("a"), 0.9);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 7);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 7);

        assert_eq!(fragments.len(), 2);
        {
@@ -490,14 +485,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_term_not_in_text() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("z"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 0);

@@ -508,12 +503,12 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_no_terms() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d";

        let terms = BTreeMap::new();
-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);
        assert_eq!(fragments.len(), 0);

        let snippet = select_best_fragment_combination(&fragments[..], &text);
--- a/src/termdict/streamer.rs
+++ b/src/termdict/streamer.rs
@@ -2,7 +2,7 @@ use super::TermDictionary;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use tantivy_fst::automaton::AlwaysMatch;
-use tantivy_fst::map::{Stream, StreamBuilder};
+use tantivy_fst::map::{Stream, StreamBuilder, StreamWithState};
 use tantivy_fst::Automaton;
 use tantivy_fst::{IntoStreamer, Streamer};

@@ -11,6 +11,7 @@ use tantivy_fst::{IntoStreamer, Streamer};
 pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
 where
    A: Automaton,
+    A::State: Clone,
 {
    fst_map: &'a TermDictionary,
    stream_builder: StreamBuilder<'a, A>,
@@ -19,6 +20,7 @@ where
 impl<'a, A> TermStreamerBuilder<'a, A>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
        TermStreamerBuilder {
@@ -56,10 +58,11 @@ where
    pub fn into_stream(self) -> TermStreamer<'a, A> {
        TermStreamer {
            fst_map: self.fst_map,
-            stream: self.stream_builder.into_stream(),
+            stream: self.stream_builder.with_state().into_stream(),
            term_ord: 0u64,
            current_key: Vec::with_capacity(100),
            current_value: TermInfo::default(),
+            state: Default::default(),
        }
    }
 }
@@ -69,27 +72,31 @@ where
 pub struct TermStreamer<'a, A = AlwaysMatch>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    fst_map: &'a TermDictionary,
-    stream: Stream<'a, A>,
+    stream: StreamWithState<'a, A>,
    term_ord: TermOrdinal,
    current_key: Vec<u8>,
    current_value: TermInfo,
+    state: A::State,
 }

 impl<'a, A> TermStreamer<'a, A>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    /// Advance position the stream on the next item.
    /// Before the first call to `.advance()`, the stream
    /// is an unitialized state.
    pub fn advance(&mut self) -> bool {
-        if let Some((term, term_ord)) = self.stream.next() {
+        if let Some((term, term_ord, state)) = self.stream.next() {
            self.current_key.clear();
            self.current_key.extend_from_slice(term);
            self.term_ord = term_ord;
            self.current_value = self.fst_map.term_info_from_ord(term_ord);
+            self.state = state;
            true
        } else {
            false
@@ -118,6 +125,10 @@ where
        &self.current_key
    }

+    pub fn state(&self) -> &A::State {
+        &self.state
+    }
+
    /// Accesses the current value.
    ///
    /// Calling `.value()` after the end of the stream will return the
--- a/src/termdict/termdict.rs
+++ b/src/termdict/termdict.rs
@@ -197,7 +197,11 @@ impl TermDictionary {

    /// Returns a search builder, to stream all of the terms
    /// within the Automaton
-    pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> {
+    pub fn search<'a, A>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
+    where
+        A: Automaton + 'a,
+        A::State: Clone + Default + Sized,
+    {
        let stream_builder = self.fst_index.search(automaton);
        TermStreamerBuilder::<A>::new(self, stream_builder)
    }
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -155,7 +155,6 @@ pub use self::simple_tokenizer::SimpleTokenizer;
 pub use self::stemmer::{Language, Stemmer};
 pub use self::stop_word_filter::StopWordFilter;
 pub(crate) use self::token_stream_chain::TokenStreamChain;
-pub(crate) use self::tokenizer::box_tokenizer;
 pub use self::tokenizer::BoxedTokenizer;

 pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
--- a/src/tokenizer/tokenizer.rs
+++ b/src/tokenizer/tokenizer.rs
@@ -56,8 +56,6 @@ pub trait Tokenizer<'a>: Sized + Clone {
    /// # Example
    ///
    /// ```rust
-    /// # extern crate tantivy;
-    ///
    /// use tantivy::tokenizer::*;
    ///
    /// # fn main() {
@@ -80,7 +78,7 @@ pub trait Tokenizer<'a>: Sized + Clone {
 }

 /// A boxed tokenizer
-pub trait BoxedTokenizer: Send + Sync {
+trait BoxedTokenizerTrait: Send + Sync {
    /// Tokenize a `&str`
    fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a>;

@@ -92,7 +90,41 @@ pub trait BoxedTokenizer: Send + Sync {
    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<dyn TokenStream + 'b>;

    /// Return a boxed clone of the tokenizer
-    fn boxed_clone(&self) -> Box<dyn BoxedTokenizer>;
+    fn boxed_clone(&self) -> BoxedTokenizer;
+}
+
+/// A boxed tokenizer
+pub struct BoxedTokenizer(Box<dyn BoxedTokenizerTrait>);
+
+impl<T> From<T> for BoxedTokenizer
+where
+    T: 'static + Send + Sync + for<'a> Tokenizer<'a>,
+{
+    fn from(tokenizer: T) -> BoxedTokenizer {
+        BoxedTokenizer(Box::new(BoxableTokenizer(tokenizer)))
+    }
+}
+
+impl BoxedTokenizer {
+    /// Tokenize a `&str`
+    pub fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
+        self.0.token_stream(text)
+    }
+
+    /// Tokenize an array`&str`
+    ///
+    /// The resulting `TokenStream` is equivalent to what would be obtained if the &str were
+    /// one concatenated `&str`, with an artificial position gap of `2` between the different fields
+    /// to prevent accidental `PhraseQuery` to match accross two terms.
+    pub fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<dyn TokenStream + 'b> {
+        self.0.token_stream_texts(texts)
+    }
+}
+
+impl Clone for BoxedTokenizer {
+    fn clone(&self) -> BoxedTokenizer {
+        self.0.boxed_clone()
+    }
 }

 #[derive(Clone)]
@@ -100,7 +132,7 @@ struct BoxableTokenizer<A>(A)
 where
    A: for<'a> Tokenizer<'a> + Send + Sync;

-impl<A> BoxedTokenizer for BoxableTokenizer<A>
+impl<A> BoxedTokenizerTrait for BoxableTokenizer<A>
 where
    A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
 {
@@ -125,18 +157,11 @@ where
        }
    }

-    fn boxed_clone(&self) -> Box<dyn BoxedTokenizer> {
-        Box::new(self.clone())
+    fn boxed_clone(&self) -> BoxedTokenizer {
+        self.0.clone().into()
    }
 }

-pub(crate) fn box_tokenizer<A>(a: A) -> Box<dyn BoxedTokenizer>
-where
-    A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
-{
-    Box::new(BoxableTokenizer(a))
-}
-
 impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
    fn advance(&mut self) -> bool {
        let token_stream: &mut dyn TokenStream = self.borrow_mut();
@@ -161,7 +186,6 @@ impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
 /// # Example
 ///
 /// ```
-/// extern crate tantivy;
 /// use tantivy::tokenizer::*;
 ///
 /// # fn main() {
@@ -203,7 +227,6 @@ pub trait TokenStream {
    /// and `.token()`.
    ///
    /// ```
-    /// # extern crate tantivy;
    /// # use tantivy::tokenizer::*;
    /// #
    /// # fn main() {
--- a/src/tokenizer/tokenizer_manager.rs
+++ b/src/tokenizer/tokenizer_manager.rs
@@ -1,4 +1,3 @@
-use crate::tokenizer::box_tokenizer;
 use crate::tokenizer::stemmer::Language;
 use crate::tokenizer::BoxedTokenizer;
 use crate::tokenizer::LowerCaser;
@@ -8,7 +7,6 @@ use crate::tokenizer::SimpleTokenizer;
 use crate::tokenizer::Stemmer;
 use crate::tokenizer::Tokenizer;
 use std::collections::HashMap;
-use std::ops::Deref;
 use std::sync::{Arc, RwLock};

 /// The tokenizer manager serves as a store for
@@ -25,16 +23,16 @@ use std::sync::{Arc, RwLock};
 ///  search engine.
 #[derive(Clone)]
 pub struct TokenizerManager {
-    tokenizers: Arc<RwLock<HashMap<String, Box<dyn BoxedTokenizer>>>>,
+    tokenizers: Arc<RwLock<HashMap<String, BoxedTokenizer>>>,
 }

 impl TokenizerManager {
    /// Registers a new tokenizer associated with a given name.
    pub fn register<A>(&self, tokenizer_name: &str, tokenizer: A)
    where
-        A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
+        A: Into<BoxedTokenizer>,
    {
-        let boxed_tokenizer = box_tokenizer(tokenizer);
+        let boxed_tokenizer = tokenizer.into();
        self.tokenizers
            .write()
            .expect("Acquiring the lock should never fail")
@@ -42,13 +40,12 @@ impl TokenizerManager {
    }

    /// Accessing a tokenizer given its name.
-    pub fn get(&self, tokenizer_name: &str) -> Option<Box<dyn BoxedTokenizer>> {
+    pub fn get(&self, tokenizer_name: &str) -> Option<BoxedTokenizer> {
        self.tokenizers
            .read()
            .expect("Acquiring the lock should never fail")
            .get(tokenizer_name)
-            .map(Deref::deref)
-            .map(BoxedTokenizer::boxed_clone)
+            .cloned()
    }
 }

--- a/tests/failpoints/mod.rs
+++ b/tests/failpoints/mod.rs
@@ -8,7 +8,7 @@ use tantivy::{Index, Term};

 #[test]
 fn test_failpoints_managed_directory_gc_if_delete_fails() {
-    let scenario = fail::FailScenario::setup();
+    let _scenario = fail::FailScenario::setup();

    let test_path: &'static Path = Path::new("some_path_for_test");
Author	SHA1	Message	Date
Paul Masurel	790baa7adf	Integrated state into TermDict streamer	2019-08-16 10:29:28 +09:00
Paul Masurel	039c0a0863	Introducing a wrapper struct instead of Boxed<BoxableTokenizer> (#631 ) Closes #629	2019-08-15 16:37:04 +09:00
Paul Masurel	b3b0138b82	Change for tantivy-py Schema.convert_named_doc Better Debug string for Terms and TermQueries	2019-08-14 17:44:25 +09:00
petr-tik	ea56160cdc	Added cargo-fmt to CI runs (#627 ) * Added cargo-fmt to CI runs Closes #625 * Remove fmt from appveyor builds Windows seems to have issues with install components through rustup. Formatting should be equally informative regardless of the OS, so best to keep it in Linux on Travis	2019-08-12 08:25:47 +09:00
petr-tik	028b0a749c	Elastic unbounded range query (#624 ) * Tidy up fmt remove unneccessary -> Result<()> followed by run.unwrap() in a test * Adding support for elasticsearch-style unbounded queries Extend the UserInputBound to include Unbounded, so we can reuse formatting and internal query format * Still working on elastic-style range queries Fixes #498 Merge the elastic_range into range Reformat to make code easier to follow, use optional() macro to return Some * Fixed bugs Made the range parser insensitive to whitespace between the ":" and the range. Removed optional parsing of field. Added a unit test for the range parser. Derived PartialEq to compare the results of parsing as structs, instead of strings. Found a bug with that unit test - "}" was parsed as an UserInputBound::Exclusive, instead of UserInputBound::Unbounded. Added an early detection-and-return for in the original range parser * Correct failing test Assume that we will use "{" for Unbounded ranges Add a note in the changelog cargo-fmt * Moved parenthesis to a newline to make nested if-else more visible	2019-08-12 08:24:47 +09:00
Paul Masurel	941f06eb9f	Added Schema.from_named_doc	2019-08-11 16:50:32 +09:00
Paul Masurel	04832a86eb	WTF is this file doing here (#622 )	2019-08-08 21:54:10 +09:00
fdb-hiroshima	beb8e990cd	fix parsing neg float in range query (#621 ) fix #620	2019-08-08 20:41:04 +09:00
Paul Masurel	001af3876f	cargo fmt	2019-08-08 18:07:19 +09:00
Paul Masurel	f428f344da	Various bugfix in the query parser (#619 )	2019-08-08 17:48:21 +09:00
Paul Masurel	143f78eced	Trying to fix #609 (#616 )	2019-08-06 20:33:30 +09:00
Kornel	754b55eee5	Bump deps (#613 ) * Bump crossbeam * Warnings-- * Remove outdated tempdir	2019-08-05 22:21:22 +09:00
Paul Masurel	280ea1209c	Changes required for python binding (#610 )	2019-08-01 17:26:21 +09:00
petr-tik	0154dbe477	Replace unwrap with match and proper Error handling (#606 ) * Replace unwrap with match and proper Error handling * Replaced 'magic' values with a documented variable Didn't like the unexplained 0..3 range, thought it was best as a variable Calculating Levenshtein distance is expensive, so best explain why we should keep it low	2019-07-31 08:16:02 +09:00