Integrated state into TermDict streamer

Introducing a wrapper struct instead of Boxed<BoxableTokenizer> (#631 )
Closes #629
2026-02-27 10:10:36 +00:00 · 2019-08-16 10:29:28 +09:00 · 2019-08-15 16:37:04 +09:00 · 2019-08-14 17:44:25 +09:00 · 2019-08-12 08:25:47 +09:00 · 2019-08-12 08:24:47 +09:00
36 changed files with 670 additions and 486 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,6 +47,7 @@ matrix:
 before_install:
  - set -e
  - rustup self update
+  - rustup component add rustfmt

 install:
  - sh ci/install.sh
@@ -60,6 +61,7 @@ before_script:

 script:
  - bash ci/script.sh
+  - cargo fmt --all -- --check

 before_deploy:
  - sh ci/before_deploy.sh
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,12 @@ Tantivy 0.11.0
 - Various bugfixes in the query parser.
    - Better handling of hyphens in query parser. (#609)
    - Better handling of whitespaces.
+- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
+- API change around `Box<BoxableTokenizer>`. See detail in #629

+## How to update?
+
+`Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.

 Tantivy 0.10.1
 =====================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@ base64 = "0.10.0"
 byteorder = "1.0"
 once_cell = "0.2"
 regex = "1.0"
-tantivy-fst = "0.1"
+tantivy-fst = {git="https://github.com/tantivy-search/fst"}
 memmap = {version = "0.7", optional=true}
 lz4 = {version="1.20", optional=true}
 snap = {version="0.2"}
@@ -62,7 +62,6 @@ rand = "0.7"
 maplit = "1"
 matches = "0.1.8"
 time = "0.1.42"
-criterion = "0.2"

 [profile.release]
 opt-level = 3
@@ -75,7 +74,6 @@ overflow-checks = true

 [features]
 default = ["mmap"]
-forbench = []
 mmap = ["atomicwrites", "fs2", "memmap", "notify"]
 lz4-compression = ["lz4"]
 failpoints = ["fail/failpoints"]
@@ -99,15 +97,3 @@ features = ["failpoints"]
 name = "failpoints"
 path = "tests/failpoints/mod.rs"
 required-features = ["fail/failpoints"]
-
-[profile.bench]
-lto = true
-
-[[bench]]
-name = "vint"
-harness = false
-
-
-[[bench]]
-name = "fastfield"
-harness = false
--- a/benches/bitset.rs
+++ b/benches/bitset.rs
@@ -1,73 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use rand::distributions::{Bernoulli, Uniform};
-use rand::rngs::StdRng;
-use rand::{Rng, SeedableRng};
-use tantivy::forbench::bitset::{BitSet, TinySet};
-use tantivy::query::BitSetDocSet;
-use tantivy::DocSet;
-
-fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
-    StdRng::from_seed([seed_val; 32])
-        .sample_iter(&Bernoulli::new(ratio).unwrap())
-        .take(n as usize)
-        .enumerate()
-        .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
-        .collect()
-}
-
-fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
-    let seed: [u8; 32] = [1; 32];
-    StdRng::from_seed(seed)
-        .sample_iter(&Uniform::new(0u32, max_value))
-        .take(n_elems)
-        .collect::<Vec<u32>>()
-}
-
-fn bench_tinyset_pop(criterion: &mut Criterion) {
-    criterion.bench_function("pop_lowest", |b| {
-        b.iter(|| {
-            let mut tinyset = TinySet::singleton(criterion::black_box(31u32));
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-        })
-    });
-}
-
-fn bench_bitset_insert(criterion: &mut Criterion) {
-    criterion.bench_function_over_inputs(
-        "bitset_insert",
-        |bench, (max_value, n_elems)| {
-            let els = generate_nonunique_unsorted(*max_value, *n_elems);
-            bench.iter(move || {
-                let mut bitset = BitSet::with_max_value(1_000_000);
-                for el in els.iter().cloned() {
-                    bitset.insert(el);
-                }
-            });
-        },
-        vec![(1_000_000u32, 10_000)],
-    );
-}
-
-fn bench_bitsetdocset_iterate(b: &mut test::Bencher) {
-    let mut bitset = BitSet::with_max_value(1_000_000);
-    for el in sample_with_seed(1_000_000u32, 0.01, 0u8) {
-        bitset.insert(el);
-    }
-    b.iter(|| {
-        let mut docset = BitSetDocSet::from(bitset.clone());
-        while docset.advance() {}
-    });
-}
-
-criterion_group!(
-    benches,
-    bench_tinyset_pop,
-    bench_bitset_insert,
-    bench_bitsetdocset_iterate
-);
-criterion_main!(benches);
--- a/benches/fastfield.rs
+++ b/benches/fastfield.rs
@@ -1,107 +0,0 @@
-use criterion::criterion_group;
-use criterion::criterion_main;
-use criterion::Criterion;
-use criterion::ParameterizedBenchmark;
-use rand::rngs::StdRng;
-use rand::seq::SliceRandom;
-use rand::SeedableRng;
-use tantivy::schema::{Schema, FAST};
-use tantivy::{doc, DocId, Index};
-
-const NUM_LOOKUPS: usize = 1_000;
-
-fn generate_permutation(stride: usize, bit_width: u8) -> Vec<u64> {
-    let mut permutation: Vec<u64> = (0u64..(NUM_LOOKUPS * stride) as u64).collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation.push(1u64 << (bit_width as u64)); //< just to force the bit_width
-    permutation
-}
-
-fn bench_linear_lookup(c: &mut Criterion) {
-    c.bench(
-        "lookup_stride",
-        ParameterizedBenchmark::new(
-            "baseline_vec",
-            |bench, (stride, num_bits)| {
-                let arr = generate_permutation(*stride, *num_bits);
-                bench.iter(move || {
-                    let mut a = 0u64;
-                    for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
-                        a ^= arr[i as usize];
-                    }
-                    a
-                })
-            },
-            vec![(7, 1), (7, 5), (7, 20)],
-        )
-        .with_function("fastfield", |bench, (stride, num_bits)| {
-            let mut schema_builder = Schema::builder();
-            let val_field = schema_builder.add_u64_field("val", FAST);
-            let schema = schema_builder.build();
-
-            let index = Index::create_in_ram(schema);
-            let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
-            for el in generate_permutation(*stride, *num_bits) {
-                index_writer.add_document(doc!(val_field=>el));
-            }
-            index_writer.commit().unwrap();
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-            let segment_reader = searcher.segment_reader(0u32);
-            let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
-            bench.iter(move || {
-                let mut a = 0u64;
-                for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
-                    a ^= fast_field_reader.get(i as DocId);
-                }
-                a
-            })
-        }),
-    );
-}
-
-fn bench_jumpy_lookup(c: &mut Criterion) {
-    c.bench(
-        "lookup_jumpy",
-        ParameterizedBenchmark::new(
-            "baseline_vec",
-            |bench, (stride, num_bits)| {
-                let arr = generate_permutation(*stride, *num_bits);
-                bench.iter(move || {
-                    let mut a = 0u64;
-                    for _ in 0..NUM_LOOKUPS {
-                        a = arr[a as usize];
-                    }
-                    a
-                })
-            },
-            vec![(7, 1), (7, 5), (7, 20)],
-        )
-        .with_function("fastfield", |bench, (stride, num_bits)| {
-            let mut schema_builder = Schema::builder();
-            let val_field = schema_builder.add_u64_field("val", FAST);
-            let schema = schema_builder.build();
-
-            let index = Index::create_in_ram(schema);
-            let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
-            for el in generate_permutation(*stride, *num_bits) {
-                index_writer.add_document(doc!(val_field=>el));
-            }
-            index_writer.commit().unwrap();
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-            let segment_reader = searcher.segment_reader(0u32);
-            let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
-            bench.iter(move || {
-                let mut a = 0u64;
-                for _ in 0..NUM_LOOKUPS {
-                    a = fast_field_reader.get(a as DocId);
-                }
-                a
-            })
-        }),
-    );
-}
-
-criterion_group!(benches, bench_linear_lookup, bench_jumpy_lookup);
-criterion_main!(benches);
--- a/benches/union.rs
+++ b/benches/union.rs
@@ -1,50 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use rand::rngs::StdRng;
-use rand::{Rng, SeedableRng};
-use tantivy::query::QueryParser;
-use tantivy::schema::{Schema, STRING};
-use tantivy::{Document, Index};
-
-fn bench_union(criterion: &mut Criterion) {
-    criterion.bench_function_over_inputs(
-        "union_docset_fulladvance",
-        |bench, (ratio_left, ratio_right)| {
-            let mut schema_builder = Schema::builder();
-            let field = schema_builder.add_text_field("val", STRING);
-            let schema = schema_builder.build();
-            let index = Index::create_in_ram(schema);
-            let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
-            let mut stdrng = StdRng::from_seed([0u8; 32]);
-            for _ in 0u32..100_000u32 {
-                let mut doc = Document::default();
-                if stdrng.gen_bool(*ratio_left) {
-                    doc.add_text(field, "left");
-                }
-                if stdrng.gen_bool(*ratio_right) {
-                    doc.add_text(field, "right");
-                }
-                index_writer.add_document(doc);
-            }
-            index_writer.commit().unwrap();
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-
-            let query = QueryParser::for_index(&index, vec![field])
-                .parse_query("left right")
-                .unwrap();
-
-            bench.iter(move || {
-                let weight = query.weight(&searcher, false).unwrap();
-                let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
-                let mut sum_docs = 0u64;
-                scorer.for_each(&mut |doc_id, _score| {
-                    sum_docs += doc_id as u64;
-                });
-            });
-        },
-        vec![(0.2, 0.1), (0.2, 0.02)],
-    );
-}
-
-criterion_group!(benches, bench_union);
-criterion_main!(benches);
--- a/benches/vint.rs
+++ b/benches/vint.rs
@@ -1,72 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion, ParameterizedBenchmark};
-use rand::rngs::StdRng;
-use rand::Rng;
-use rand::SeedableRng;
-use tantivy::forbench::compression::{compressed_block_size, BlockDecoder};
-use tantivy::forbench::compression::{BlockEncoder, VIntEncoder};
-use tantivy::forbench::compression::{VIntDecoder, COMPRESSION_BLOCK_SIZE};
-
-fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
-    let seed: [u8; 32] = [seed_val; 32];
-    let mut rng = StdRng::from_seed(seed);
-    (0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
-}
-
-pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
-    generate_array_with_seed(n, ratio, 4)
-}
-
-fn bench_compress(criterion: &mut Criterion) {
-    criterion.bench(
-        "compress_sorted",
-        ParameterizedBenchmark::new(
-            "bitpack",
-            |bench, ratio| {
-                let mut encoder = BlockEncoder::new();
-                let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
-                bench.iter(|| {
-                    encoder.compress_block_sorted(&data, 0u32);
-                });
-            },
-            vec![0.1],
-        )
-        .with_function("vint", |bench, ratio| {
-            let mut encoder = BlockEncoder::new();
-            let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
-            bench.iter(|| {
-                encoder.compress_vint_sorted(&data, 0u32);
-            });
-        }),
-    );
-}
-
-fn bench_uncompress(criterion: &mut Criterion) {
-    criterion.bench(
-        "uncompress_sorted",
-        ParameterizedBenchmark::new(
-            "bitpack",
-            |bench, ratio| {
-                let mut encoder = BlockEncoder::new();
-                let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
-                let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
-                let mut decoder = BlockDecoder::new();
-                bench.iter(|| {
-                    decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
-                });
-            },
-            vec![0.1],
-        )
-        .with_function("vint", |bench, ratio| {
-            let mut encoder = BlockEncoder::new();
-            let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
-            let compressed = encoder.compress_vint_sorted(&data, 0u32);
-            let mut decoder = BlockDecoder::new();
-            bench.iter(move || {
-                decoder.uncompress_vint_sorted(compressed, 0u32, COMPRESSION_BLOCK_SIZE);
-            });
-        }),
-    );
-}
-
-criterion_group!(benches, bench_compress, bench_uncompress);
-criterion_main!(benches);
--- a/run-bench.rs
+++ b/run-bench.rs
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-cargo bench --features forbench 
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -86,7 +86,6 @@ where
        }
    }

-    #[inline(always)]
    pub fn get(&self, idx: u64) -> u64 {
        if self.num_bits == 0 {
            return 0u64;
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -2,7 +2,7 @@ use std::fmt;
 use std::u64;

 #[derive(Clone, Copy, Eq, PartialEq)]
-pub struct TinySet(u64);
+pub(crate) struct TinySet(u64);

 impl fmt::Debug for TinySet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -353,3 +353,43 @@ mod tests {
        }
    }
 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::BitSet;
+    use super::TinySet;
+    use test;
+
+    #[bench]
+    fn bench_tinyset_pop(b: &mut test::Bencher) {
+        b.iter(|| {
+            let mut tinyset = TinySet::singleton(test::black_box(31u32));
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+        });
+    }
+
+    #[bench]
+    fn bench_tinyset_sum(b: &mut test::Bencher) {
+        let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
+        b.iter(|| {
+            assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
+        });
+    }
+
+    #[bench]
+    fn bench_tinyarr_sum(b: &mut test::Bencher) {
+        let v = [10u32, 14u32, 21u32];
+        b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
+    }
+
+    #[bench]
+    fn bench_bitset_initialize(b: &mut test::Bencher) {
+        b.iter(|| BitSet::with_max_value(1_000_000));
+    }
+}
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -6,7 +6,7 @@ mod serialize;
 mod vint;

 pub use self::bitset::BitSet;
-pub use self::bitset::TinySet;
+pub(crate) use self::bitset::TinySet;
 pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::counting_writer::CountingWriter;
 pub use self::serialize::{BinarySerializable, FixedSize};
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -173,11 +173,11 @@ impl Index {
    }

    /// Helper to access the tokenizer associated to a specific field.
-    pub fn tokenizer_for_field(&self, field: Field) -> Result<Box<dyn BoxedTokenizer>> {
+    pub fn tokenizer_for_field(&self, field: Field) -> Result<BoxedTokenizer> {
        let field_entry = self.schema.get_field_entry(field);
        let field_type = field_entry.field_type();
        let tokenizer_manager: &TokenizerManager = self.tokenizers();
-        let tokenizer_name_opt: Option<Box<dyn BoxedTokenizer>> = match field_type {
+        let tokenizer_name_opt: Option<BoxedTokenizer> = match field_type {
            FieldType::Str(text_options) => text_options
                .get_indexing_options()
                .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -431,3 +431,111 @@ mod tests {
    }

 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+    use super::tests::FIELD;
+    use super::tests::{generate_permutation, SCHEMA};
+    use super::*;
+    use common::CompositeFile;
+    use directory::{Directory, RAMDirectory, WritePtr};
+    use fastfield::FastFieldReader;
+    use std::collections::HashMap;
+    use std::path::Path;
+    use test::{self, Bencher};
+
+    #[bench]
+    fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        b.iter(|| {
+            let n = test::black_box(7000u32);
+            let mut a = 0u64;
+            for i in (0u32..n / 7).map(|v| v * 7) {
+                a ^= permutation[i as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_veclookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        b.iter(|| {
+            let n = test::black_box(1000u32);
+            let mut a = 0u64;
+            for _ in 0u32..n {
+                a = permutation[a as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
+        let path = Path::new("test");
+        let permutation = generate_permutation();
+        let mut directory: RAMDirectory = RAMDirectory::create();
+        {
+            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
+            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
+            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
+            for &x in &permutation {
+                fast_field_writers.add_document(&doc!(*FIELD=>x));
+            }
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
+            serializer.close().unwrap();
+        }
+        let source = directory.open_read(&path).unwrap();
+        {
+            let fast_fields_composite = CompositeFile::open(&source).unwrap();
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);
+
+            b.iter(|| {
+                let n = test::black_box(7000u32);
+                let mut a = 0u64;
+                for i in (0u32..n / 7).map(|val| val * 7) {
+                    a ^= fast_field_reader.get(i);
+                }
+                a
+            });
+        }
+    }
+
+    #[bench]
+    fn bench_intfastfield_fflookup(b: &mut Bencher) {
+        let path = Path::new("test");
+        let permutation = generate_permutation();
+        let mut directory: RAMDirectory = RAMDirectory::create();
+        {
+            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
+            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
+            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
+            for &x in &permutation {
+                fast_field_writers.add_document(&doc!(*FIELD=>x));
+            }
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
+            serializer.close().unwrap();
+        }
+        let source = directory.open_read(&path).unwrap();
+        {
+            let fast_fields_composite = CompositeFile::open(&source).unwrap();
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);
+
+            b.iter(|| {
+                let n = test::black_box(1000u32);
+                let mut a = 0u32;
+                for _ in 0u32..n {
+                    a = fast_field_reader.get(a) as u32;
+                }
+                a
+            });
+        }
+    }
+
+}
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -67,12 +67,10 @@ impl<Item: FastValue> FastFieldReader<Item> {
    ///
    /// May panic if `doc` is greater than the segment
    // `maxdoc`.
-    #[inline(always)]
    pub fn get(&self, doc: DocId) -> Item {
        self.get_u64(u64::from(doc))
    }

-    #[inline(always)]
    pub(crate) fn get_u64(&self, doc: u64) -> Item {
        Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
    }
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -49,7 +49,7 @@ pub struct SegmentWriter {
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: FieldNormsWriter,
    doc_opstamps: Vec<Opstamp>,
-    tokenizers: Vec<Option<Box<dyn BoxedTokenizer>>>,
+    tokenizers: Vec<Option<BoxedTokenizer>>,
 }

 impl SegmentWriter {
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -249,6 +249,7 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);

 #[cfg(test)]
 mod tests {
+
    use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
    use crate::core::SegmentReader;
    use crate::docset::DocSet;
@@ -894,15 +895,3 @@ mod tests {
        }
    }
 }
-
-#[cfg(feature = "forbench")]
-pub mod forbench {
-    pub mod compression {
-        pub use crate::postings::compression::*;
-    }
-
-    pub mod bitset {
-        pub use crate::common::BitSet;
-        pub use crate::common::TinySet;
-    }
-}
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -160,9 +160,9 @@ impl VIntEncoder for BlockEncoder {
 }

 impl VIntDecoder for BlockDecoder {
-    fn uncompress_vint_sorted(
+    fn uncompress_vint_sorted<'a>(
        &mut self,
-        compressed_data: &[u8],
+        compressed_data: &'a [u8],
        offset: u32,
        num_els: usize,
    ) -> usize {
@@ -170,7 +170,7 @@ impl VIntDecoder for BlockDecoder {
        vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
    }

-    fn uncompress_vint_unsorted(&mut self, compressed_data: &[u8], num_els: usize) -> usize {
+    fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize {
        self.output_len = num_els;
        vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
    }
@@ -268,17 +268,78 @@ pub mod tests {
            }
        }
    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::*;
+    use rand::SeedableRng;
+    use rand::{Rng, XorShiftRng};
+    use test::Bencher;
+
+    fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
+        let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val];
+        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
+        (0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
+    }
+
+    pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
+        generate_array_with_seed(n, ratio, 4)
+    }
+
+    #[bench]
+    fn bench_compress(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        b.iter(|| {
+            encoder.compress_block_sorted(&data, 0u32);
+        });
+    }
+
+    #[bench]
+    fn bench_uncompress(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
+        let mut decoder = BlockDecoder::new();
+        b.iter(|| {
+            decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
+        });
+    }

    #[test]
    fn test_all_docs_compression_numbits() {
-        for expected_num_bits in 0u8..33u8 {
+        for expected_num_bits in 0u8.. {
            let mut data = [0u32; 128];
            if expected_num_bits > 0 {
-                data[0] = (1u64 << (expected_num_bits as u64) - 1u64) as u32;
+                data[0] = (1u64 << (expected_num_bits as usize) - 1) as u32;
            }
            let mut encoder = BlockEncoder::new();
            let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
            assert_eq!(compressed.len(), compressed_block_size(num_bits));
        }
    }
+
+    const NUM_INTS_BENCH_VINT: usize = 10;
+
+    #[bench]
+    fn bench_compress_vint(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        b.iter(|| {
+            encoder.compress_vint_sorted(&data, 0u32);
+        });
+    }
+
+    #[bench]
+    fn bench_uncompress_vint(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let compressed = encoder.compress_vint_sorted(&data, 0u32);
+        let mut decoder = BlockDecoder::new();
+        b.iter(|| {
+            decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
+        });
+    }
 }
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -3,7 +3,6 @@ Postings module (also called inverted index)
 */

 mod block_search;
-
 pub(crate) mod compression;
 /// Postings module
 ///
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -14,6 +14,7 @@ use tantivy_fst::Automaton;
 pub struct AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    field: Field,
    automaton: A,
@@ -22,6 +23,7 @@ where
 impl<A> AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    /// Create a new AutomationWeight
    pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> {
@@ -37,6 +39,7 @@ where
 impl<A> Weight for AutomatonWeight<A>
 where
    A: Automaton + Send + Sync + 'static,
+    A::State: Clone + Default + Sized,
 {
    fn scorer(&self, reader: &SegmentReader) -> Result<Box<dyn Scorer>> {
        let max_doc = reader.max_doc();
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -218,3 +218,49 @@ mod tests {
    }

 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::BitSet;
+    use super::BitSetDocSet;
+    use test;
+    use tests;
+    use DocSet;
+
+    #[bench]
+    fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
+        use tests;
+        let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
+        b.iter(|| {
+            let mut bitset = BitSet::with_max_value(1_000_000);
+            for el in els.iter().cloned() {
+                bitset.insert(el);
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
+        use tests;
+        let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
+        let mut bitset = BitSet::with_max_value(1_000_000);
+        for el in els {
+            bitset.insert(el);
+        }
+        b.iter(|| bitset.clone());
+    }
+
+    #[bench]
+    fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
+        let els = tests::sample(1_000_000u32, 0.01);
+        let mut bitset = BitSet::with_max_value(1_000_000);
+        for el in els {
+            bitset.insert(el);
+        }
+        b.iter(|| {
+            let mut docset = BitSetDocSet::from(bitset.clone());
+            while docset.advance() {}
+        });
+    }
+}
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -83,28 +83,67 @@ parser! {
 }

 parser! {
+    /// Function that parses a range out of a Stream
+    /// Supports ranges like:
+    /// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
+    /// [a TO *], [a TO c], [abc TO bcd}
    fn range[I]()(I) -> UserInputLeaf
    where [I: Stream<Item = char>] {
        let range_term_val = || {
            word().or(negative_number()).or(char('*').with(value("*".to_string())))
        };
+
+        // check for unbounded range in the form of <5, <=10, >5, >=5
+        let elastic_unbounded_range = (choice([attempt(string(">=")),
+                                               attempt(string("<=")),
+                                               attempt(string("<")),
+                                               attempt(string(">"))])
+                                       .skip(spaces()),
+                                       range_term_val()).
+            map(|(comparison_sign, bound): (&str, String)|
+                match comparison_sign {
+                    ">=" => (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
+                    "<=" => (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
+                    "<" => (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
+                    ">" => (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
+                    // default case
+                    _ => (UserInputBound::Unbounded, UserInputBound::Unbounded)
+                });
        let lower_bound = (one_of("{[".chars()), range_term_val())
            .map(|(boundary_char, lower_bound): (char, String)|
-                if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
-                else { UserInputBound::Inclusive(lower_bound) });
+                 if lower_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else if boundary_char == '{' {
+                         UserInputBound::Exclusive(lower_bound)
+                 } else {
+                     UserInputBound::Inclusive(lower_bound)
+                 });
        let upper_bound = (range_term_val(), one_of("}]".chars()))
            .map(|(higher_bound, boundary_char): (String, char)|
-                if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
-                else { UserInputBound::Inclusive(higher_bound) });
-        (
-            optional(field()),
-            lower_bound
-            .skip((spaces(), string("TO"), spaces())),
-            upper_bound,
-        ).map(|(field, lower, upper)| UserInputLeaf::Range {
-                field,
-                lower,
-                upper
+                 if higher_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else if boundary_char == '}' {
+                     UserInputBound::Exclusive(higher_bound)
+                 } else {
+                     UserInputBound::Inclusive(higher_bound)
+                 });
+         // return only lower and upper
+        let lower_to_upper = (lower_bound.
+                                    skip((spaces(),
+                                          string("TO"),
+                                          spaces())),
+                                    upper_bound);
+
+        (optional(field()).skip(spaces()),
+         // try elastic first, if it matches, the range is unbounded
+         attempt(elastic_unbounded_range).or(lower_to_upper))
+            .map(|(field, (lower, upper))|
+                 // Construct the leaf from extracted field (optional)
+                 // and bounds
+                 UserInputLeaf::Range {
+                     field,
+                     lower,
+                     upper
        })
    }
 }
@@ -258,6 +297,49 @@ mod test {
        );
    }

+    #[test]
+    fn test_parse_elastic_query_ranges() {
+        test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+
+        test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
+        test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+
+        test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+
+        test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+    }
+
+    #[test]
+    fn test_range_parser() {
+        // testing the range() parser separately
+        let res = range().parse("title: <hello").unwrap().0;
+        let expected = UserInputLeaf::Range {
+            field: Some("title".to_string()),
+            lower: UserInputBound::Unbounded,
+            upper: UserInputBound::Exclusive("hello".to_string()),
+        };
+        let res2 = range().parse("title:{* TO hello}").unwrap().0;
+        assert_eq!(res, expected);
+        assert_eq!(res2, expected);
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("weight".to_string()),
+            lower: UserInputBound::Inclusive("71.2".to_string()),
+            upper: UserInputBound::Unbounded,
+        };
+
+        let res3 = range().parse("weight: >=71.2").unwrap().0;
+        let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+        assert_eq!(res3, expected_weight);
+        assert_eq!(res4, expected_weight);
+    }
+
    #[test]
    fn test_parse_query_to_triming_spaces() {
        test_parse_query_to_ast_helper("   abc", "\"abc\"");
@@ -291,7 +373,7 @@ mod test {
        test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
        test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
        test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
-        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
        test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
        test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
        test_is_parse_err("abc +    ");
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -369,6 +369,7 @@ impl QueryParser {
        match *bound {
            UserInputBound::Inclusive(_) => Ok(Bound::Included(term)),
            UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)),
+            UserInputBound::Unbounded => Ok(Bound::Unbounded),
        }
    }

@@ -628,7 +629,7 @@ mod test {
    pub fn test_parse_query_untokenized() {
        test_parse_query_to_logical_ast_helper(
            "nottokenized:\"wordone wordtwo\"",
-            "Term([0, 0, 0, 7, 119, 111, 114, 100, 111, 110, \
+            "Term(field=7,bytes=[119, 111, 114, 100, 111, 110, \
             101, 32, 119, 111, 114, 100, 116, 119, 111])",
            false,
        );
@@ -672,7 +673,7 @@ mod test {
            .is_ok());
        test_parse_query_to_logical_ast_helper(
            "unsigned:2324",
-            "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])",
+            "Term(field=3,bytes=[0, 0, 0, 0, 0, 0, 9, 20])",
            false,
        );

@@ -693,19 +694,19 @@ mod test {
    pub fn test_parse_query_to_ast_single_term() {
        test_parse_query_to_logical_ast_helper(
            "title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto -titi",
-            "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \
-             -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \
-             Term([0, 0, 0, 1, 116, 105, 116, 105])))",
+            "(+Term(field=0,bytes=[116, 111, 116, 111]) \
+             -(Term(field=0,bytes=[116, 105, 116, 105]) \
+             Term(field=1,bytes=[116, 105, 116, 105])))",
            false,
        );
        assert_eq!(
@@ -720,14 +721,13 @@ mod test {
    pub fn test_parse_query_to_ast_two_terms() {
        test_parse_query_to_logical_ast_helper(
            "title:a b",
-            "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
-             Term([0, 0, 0, 1, 98])))",
+            "(Term(field=0,bytes=[97]) (Term(field=0,bytes=[98]) Term(field=1,bytes=[98])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:\"a b\"",
-            "\"[(0, Term([0, 0, 0, 0, 97])), \
-             (1, Term([0, 0, 0, 0, 98]))]\"",
+            "\"[(0, Term(field=0,bytes=[97])), \
+             (1, Term(field=0,bytes=[98]))]\"",
            false,
        );
    }
@@ -736,45 +736,43 @@ mod test {
    pub fn test_parse_query_to_ast_ranges() {
        test_parse_query_to_logical_ast_helper(
            "title:[a TO b]",
-            "(Included(Term([0, 0, 0, 0, 97])) TO \
-             Included(Term([0, 0, 0, 0, 98])))",
+            "(Included(Term(field=0,bytes=[97])) TO Included(Term(field=0,bytes=[98])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "[a TO b]",
-            "((Included(Term([0, 0, 0, 0, 97])) TO \
-             Included(Term([0, 0, 0, 0, 98]))) \
-             (Included(Term([0, 0, 0, 1, 97])) TO \
-             Included(Term([0, 0, 0, 1, 98]))))",
+            "((Included(Term(field=0,bytes=[97])) TO \
+             Included(Term(field=0,bytes=[98]))) \
+             (Included(Term(field=1,bytes=[97])) TO \
+             Included(Term(field=1,bytes=[98]))))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{titi TO toto}",
-            "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO \
-             Excluded(Term([0, 0, 0, 0, 116, 111, 116, 111])))",
+            "(Excluded(Term(field=0,bytes=[116, 105, 116, 105])) TO \
+             Excluded(Term(field=0,bytes=[116, 111, 116, 111])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{* TO toto}",
-            "(Unbounded TO \
-             Excluded(Term([0, 0, 0, 0, 116, 111, 116, 111])))",
+            "(Unbounded TO Excluded(Term(field=0,bytes=[116, 111, 116, 111])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "title:{titi TO *}",
-            "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)",
+            "(Excluded(Term(field=0,bytes=[116, 105, 116, 105])) TO Unbounded)",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "signed:{-5 TO 3}",
-            "(Excluded(Term([0, 0, 0, 2, 127, 255, 255, 255, 255, 255, 255, 251])) TO \
-            Excluded(Term([0, 0, 0, 2, 128, 0, 0, 0, 0, 0, 0, 3])))",
+            "(Excluded(Term(field=2,bytes=[127, 255, 255, 255, 255, 255, 255, 251])) TO \
+             Excluded(Term(field=2,bytes=[128, 0, 0, 0, 0, 0, 0, 3])))",
            false,
        );
        test_parse_query_to_logical_ast_helper(
            "float:{-1.5 TO 1.5}",
-            "(Excluded(Term([0, 0, 0, 10, 64, 7, 255, 255, 255, 255, 255, 255])) TO \
-            Excluded(Term([0, 0, 0, 10, 191, 248, 0, 0, 0, 0, 0, 0])))",
+            "(Excluded(Term(field=10,bytes=[64, 7, 255, 255, 255, 255, 255, 255])) TO \
+             Excluded(Term(field=10,bytes=[191, 248, 0, 0, 0, 0, 0, 0])))",
            false,
        );

@@ -879,19 +877,19 @@ mod test {
    pub fn test_parse_query_to_ast_conjunction() {
        test_parse_query_to_logical_ast_helper(
            "title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto",
-            "Term([0, 0, 0, 0, 116, 111, 116, 111])",
+            "Term(field=0,bytes=[116, 111, 116, 111])",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "+title:toto -titi",
-            "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \
-             -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \
-             Term([0, 0, 0, 1, 116, 105, 116, 105])))",
+            "(+Term(field=0,bytes=[116, 111, 116, 111]) \
+             -(Term(field=0,bytes=[116, 105, 116, 105]) \
+             Term(field=1,bytes=[116, 105, 116, 105])))",
            true,
        );
        assert_eq!(
@@ -902,15 +900,15 @@ mod test {
        );
        test_parse_query_to_logical_ast_helper(
            "title:a b",
-            "(+Term([0, 0, 0, 0, 97]) \
-             +(Term([0, 0, 0, 0, 98]) \
-             Term([0, 0, 0, 1, 98])))",
+            "(+Term(field=0,bytes=[97]) \
+             +(Term(field=0,bytes=[98]) \
+             Term(field=1,bytes=[98])))",
            true,
        );
        test_parse_query_to_logical_ast_helper(
            "title:\"a b\"",
-            "\"[(0, Term([0, 0, 0, 0, 97])), \
-             (1, Term([0, 0, 0, 0, 98]))]\"",
+            "\"[(0, Term(field=0,bytes=[97])), \
+             (1, Term(field=0,bytes=[98]))]\"",
            true,
        );
    }
@@ -919,10 +917,8 @@ mod test {
    pub fn test_query_parser_hyphen() {
        test_parse_query_to_logical_ast_helper(
            "title:www-form-encoded",
-            "\"[(0, Term([0, 0, 0, 0, 119, 119, 119])), \
-             (1, Term([0, 0, 0, 0, 102, 111, 114, 109])), \
-             (2, Term([0, 0, 0, 0, 101, 110, 99, 111, 100, 101, 100]))]\"",
-            false,
+            "\"[(0, Term(field=0,bytes=[119, 119, 119])), (1, Term(field=0,bytes=[102, 111, 114, 109])), (2, Term(field=0,bytes=[101, 110, 99, 111, 100, 101, 100]))]\"",
+            false
        );
    }
 }
--- a/src/query/query_parser/user_input_ast.rs
+++ b/src/query/query_parser/user_input_ast.rs
@@ -3,6 +3,7 @@ use std::fmt::{Debug, Formatter};

 use crate::query::Occur;

+#[derive(PartialEq)]
 pub enum UserInputLeaf {
    Literal(UserInputLiteral),
    All,
@@ -35,6 +36,7 @@ impl Debug for UserInputLeaf {
    }
 }

+#[derive(PartialEq)]
 pub struct UserInputLiteral {
    pub field_name: Option<String>,
    pub phrase: String,
@@ -49,9 +51,11 @@ impl fmt::Debug for UserInputLiteral {
    }
 }

+#[derive(PartialEq)]
 pub enum UserInputBound {
    Inclusive(String),
    Exclusive(String),
+    Unbounded,
 }

 impl UserInputBound {
@@ -59,6 +63,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
        }
    }

@@ -66,6 +71,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
        }
    }

@@ -73,6 +79,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref contents) => contents,
            UserInputBound::Exclusive(ref contents) => contents,
+            UserInputBound::Unbounded => &"*",
        }
    }
 }
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -338,39 +338,33 @@ mod tests {
    use crate::collector::Count;
    use crate::schema::{Document, Field, Schema, INDEXED};
    use crate::Index;
-    use crate::Result;
    use std::collections::Bound;

    #[test]
    fn test_range_query_simple() {
-        fn run() -> Result<()> {
-            let mut schema_builder = Schema::builder();
-            let year_field = schema_builder.add_u64_field("year", INDEXED);
-            let schema = schema_builder.build();
+        let mut schema_builder = Schema::builder();
+        let year_field = schema_builder.add_u64_field("year", INDEXED);
+        let schema = schema_builder.build();

-            let index = Index::create_in_ram(schema);
-            {
-                let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
-                for year in 1950u64..2017u64 {
-                    let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
-                    for _ in 0..num_docs_within_year {
-                        index_writer.add_document(doc!(year_field => year));
-                    }
+        let index = Index::create_in_ram(schema);
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
+            for year in 1950u64..2017u64 {
+                let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
+                for _ in 0..num_docs_within_year {
+                    index_writer.add_document(doc!(year_field => year));
                }
-                index_writer.commit().unwrap();
            }
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-
-            let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
-
-            // ... or `1960..=1969` if inclusive range is enabled.
-            let count = searcher.search(&docs_in_the_sixties, &Count)?;
-            assert_eq!(count, 2285);
-            Ok(())
+            index_writer.commit().unwrap();
        }
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();

-        run().unwrap();
+        let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
+
+        // ... or `1960..=1969` if inclusive range is enabled.
+        let count = searcher.search(&docs_in_the_sixties, &Count).unwrap();
+        assert_eq!(count, 2285);
    }

    #[test]
--- a/src/query/term_query/mod.rs
+++ b/src/query/term_query/mod.rs
@@ -12,7 +12,7 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::docset::DocSet;
    use crate::query::{Query, QueryParser, Scorer, TermQuery};
-    use crate::schema::{IndexRecordOption, Schema, STRING, TEXT};
+    use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT};
    use crate::tests::assert_nearly_equals;
    use crate::Index;
    use crate::Term;
@@ -114,4 +114,16 @@ mod tests {
        let reader = index.reader().unwrap();
        assert_eq!(term_query.count(&*reader.searcher()).unwrap(), 1);
    }
+
+    #[test]
+    fn test_term_query_debug() {
+        let term_query = TermQuery::new(
+            Term::from_field_text(Field(1), "hello"),
+            IndexRecordOption::WithFreqs,
+        );
+        assert_eq!(
+            format!("{:?}", term_query),
+            "TermQuery(Term(field=1,bytes=[104, 101, 108, 108, 111]))"
+        );
+    }
 }
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -7,6 +7,7 @@ use crate::Result;
 use crate::Searcher;
 use crate::Term;
 use std::collections::BTreeSet;
+use std::fmt;

 /// A Term query matches all of the documents
 /// containing a specific term.
@@ -61,12 +62,18 @@ use std::collections::BTreeSet;
 ///     Ok(())
 /// }
 /// ```
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 pub struct TermQuery {
    term: Term,
    index_record_option: IndexRecordOption,
 }

+impl fmt::Debug for TermQuery {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TermQuery({:?})", self.term)
+    }
+}
+
 impl TermQuery {
    /// Creates a new term query.
    pub fn new(term: Term, segment_postings_options: IndexRecordOption) -> TermQuery {
--- a/src/query/union.rs
+++ b/src/query/union.rs
@@ -411,3 +411,52 @@ mod tests {
    }

 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use query::score_combiner::DoNothingCombiner;
+    use query::ConstScorer;
+    use query::Union;
+    use query::VecDocSet;
+    use test::Bencher;
+    use tests;
+    use DocId;
+    use DocSet;
+
+    #[bench]
+    fn bench_union_3_high(bench: &mut Bencher) {
+        let union_docset: Vec<Vec<DocId>> = vec![
+            tests::sample_with_seed(100_000, 0.1, 0),
+            tests::sample_with_seed(100_000, 0.2, 1),
+        ];
+        bench.iter(|| {
+            let mut v = Union::<_, DoNothingCombiner>::from(
+                union_docset
+                    .iter()
+                    .map(|doc_ids| VecDocSet::from(doc_ids.clone()))
+                    .map(ConstScorer::new)
+                    .collect::<Vec<_>>(),
+            );
+            while v.advance() {}
+        });
+    }
+    #[bench]
+    fn bench_union_3_low(bench: &mut Bencher) {
+        let union_docset: Vec<Vec<DocId>> = vec![
+            tests::sample_with_seed(100_000, 0.01, 0),
+            tests::sample_with_seed(100_000, 0.05, 1),
+            tests::sample_with_seed(100_000, 0.001, 2),
+        ];
+        bench.iter(|| {
+            let mut v = Union::<_, DoNothingCombiner>::from(
+                union_docset
+                    .iter()
+                    .map(|doc_ids| VecDocSet::from(doc_ids.clone()))
+                    .map(ConstScorer::new)
+                    .collect::<Vec<_>>(),
+            );
+            while v.advance() {}
+        });
+    }
+}
--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -10,7 +10,7 @@ use serde_json::Value as JsonValue;

 /// Possible error that may occur while parsing a field value
 /// At this point the JSON is known to be valid.
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ValueParsingError {
    /// Encountered a numerical value that overflows or underflow its integer type.
    OverflowError(String),
--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -246,6 +246,25 @@ impl Schema {
        self.0.fields_map.get(field_name).cloned()
    }

+    /// Create a named document off the doc.
+    pub fn convert_named_doc(
+        &self,
+        named_doc: NamedFieldDocument,
+    ) -> Result<Document, DocParsingError> {
+        let mut document = Document::new();
+        for (field_name, values) in named_doc.0 {
+            if let Some(field) = self.get_field(&field_name) {
+                for value in values {
+                    let field_value = FieldValue::new(field, value);
+                    document.add(field_value);
+                }
+            } else {
+                return Err(DocParsingError::NoSuchFieldInSchema(field_name));
+            }
+        }
+        Ok(document)
+    }
+
    /// Create a named document off the doc.
    pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
        let mut field_map = BTreeMap::new();
@@ -360,7 +379,7 @@ impl<'de> Deserialize<'de> for Schema {

 /// Error that may happen when deserializing
 /// a document from JSON.
-#[derive(Debug, Fail)]
+#[derive(Debug, Fail, PartialEq)]
 pub enum DocParsingError {
    /// The payload given is not valid JSON.
    #[fail(display = "The provided string is not valid JSON")]
@@ -369,7 +388,10 @@ pub enum DocParsingError {
    #[fail(display = "The field '{:?}' could not be parsed: {:?}", _0, _1)]
    ValueError(String, ValueParsingError),
    /// The json-document contains a field that is not declared in the schema.
-    #[fail(display = "The json-document contains an unknown field: {:?}", _0)]
+    #[fail(
+        display = "The document contains a field that is not declared in the schema: {:?}",
+        _0
+    )]
    NoSuchFieldInSchema(String),
 }

@@ -381,6 +403,7 @@ mod tests {
    use crate::schema::*;
    use matches::{assert_matches, matches};
    use serde_json;
+    use std::collections::BTreeMap;

    #[test]
    pub fn is_indexed_test() {
@@ -495,6 +518,54 @@ mod tests {
        assert_eq!(doc, doc_serdeser);
    }

+    #[test]
+    pub fn test_document_from_nameddoc() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field("title", TEXT);
+        let val = schema_builder.add_i64_field("val", INDEXED);
+        let schema = schema_builder.build();
+        let mut named_doc_map = BTreeMap::default();
+        named_doc_map.insert(
+            "title".to_string(),
+            vec![Value::from("title1"), Value::from("title2")],
+        );
+        named_doc_map.insert(
+            "val".to_string(),
+            vec![Value::from(14u64), Value::from(-1i64)],
+        );
+        let doc = schema
+            .convert_named_doc(NamedFieldDocument(named_doc_map))
+            .unwrap();
+        assert_eq!(
+            doc.get_all(title),
+            vec![
+                &Value::from("title1".to_string()),
+                &Value::from("title2".to_string())
+            ]
+        );
+        assert_eq!(
+            doc.get_all(val),
+            vec![&Value::from(14u64), &Value::from(-1i64)]
+        );
+    }
+
+    #[test]
+    pub fn test_document_from_nameddoc_error() {
+        let schema = Schema::builder().build();
+        let mut named_doc_map = BTreeMap::default();
+        named_doc_map.insert(
+            "title".to_string(),
+            vec![Value::from("title1"), Value::from("title2")],
+        );
+        let err = schema
+            .convert_named_doc(NamedFieldDocument(named_doc_map))
+            .unwrap_err();
+        assert_eq!(
+            err,
+            DocParsingError::NoSuchFieldInSchema("title".to_string())
+        );
+    }
+
    #[test]
    pub fn test_parse_document() {
        let mut schema_builder = Schema::builder();
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -224,7 +224,12 @@ where

 impl fmt::Debug for Term {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "Term({:?})", &self.0[..])
+        write!(
+            f,
+            "Term(field={},bytes={:?})",
+            self.field().0,
+            self.value_bytes()
+        )
    }
 }

--- a/src/snippet/mod.rs
+++ b/src/snippet/mod.rs
@@ -63,7 +63,7 @@ impl FragmentCandidate {
    fn try_add_token(&mut self, token: &Token, terms: &BTreeMap<String, f32>) {
        self.stop_offset = token.offset_to;

-        if let Some(score) = terms.get(&token.text.to_lowercase()) {
+        if let Some(&score) = terms.get(&token.text.to_lowercase()) {
            self.score += score;
            self.highlighted
                .push(HighlightSection::new(token.offset_from, token.offset_to));
@@ -142,7 +142,7 @@ impl Snippet {
 /// Fragments must be valid in the sense that `&text[fragment.start..fragment.stop]`\
 /// has to be a valid string.
 fn search_fragments<'a>(
-    tokenizer: &dyn BoxedTokenizer,
+    tokenizer: &BoxedTokenizer,
    text: &'a str,
    terms: &BTreeMap<String, f32>,
    max_num_chars: usize,
@@ -150,7 +150,6 @@ fn search_fragments<'a>(
    let mut token_stream = tokenizer.token_stream(text);
    let mut fragment = FragmentCandidate::new(0);
    let mut fragments: Vec<FragmentCandidate> = vec![];
-
    while let Some(next) = token_stream.next() {
        if (next.offset_to - fragment.start_offset) > max_num_chars {
            if fragment.score > 0.0 {
@@ -254,7 +253,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
 /// ```
 pub struct SnippetGenerator {
    terms_text: BTreeMap<String, f32>,
-    tokenizer: Box<dyn BoxedTokenizer>,
+    tokenizer: BoxedTokenizer,
    field: Field,
    max_num_chars: usize,
 }
@@ -316,12 +315,8 @@ impl SnippetGenerator {

    /// Generates a snippet for the given text.
    pub fn snippet(&self, text: &str) -> Snippet {
-        let fragment_candidates = search_fragments(
-            &*self.tokenizer,
-            &text,
-            &self.terms_text,
-            self.max_num_chars,
-        );
+        let fragment_candidates =
+            search_fragments(&self.tokenizer, &text, &self.terms_text, self.max_num_chars);
        select_best_fragment_combination(&fragment_candidates[..], &text)
    }
 }
@@ -331,7 +326,7 @@ mod tests {
    use super::{search_fragments, select_best_fragment_combination};
    use crate::query::QueryParser;
    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT};
-    use crate::tokenizer::{box_tokenizer, SimpleTokenizer};
+    use crate::tokenizer::SimpleTokenizer;
    use crate::Index;
    use crate::SnippetGenerator;
    use maplit::btreemap;
@@ -355,12 +350,12 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        let terms = btreemap! {
            String::from("rust") => 1.0,
            String::from("language") => 0.9
        };
-        let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 100);
+        let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 100);
        assert_eq!(fragments.len(), 7);
        {
            let first = &fragments[0];
@@ -382,13 +377,13 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_scored_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        {
            let terms = btreemap! {
                String::from("rust") =>1.0f32,
                String::from("language") => 0.9f32
            };
-            let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20);
+            let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20);
            {
                let first = &fragments[0];
                assert_eq!(first.score, 1.0);
@@ -397,13 +392,13 @@ Survey in 2016, 2017, and 2018."#;
            let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT);
            assert_eq!(snippet.to_html(), "<b>Rust</b> is a systems")
        }
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();
        {
            let terms = btreemap! {
                String::from("rust") =>0.9f32,
                String::from("language") => 1.0f32
            };
-            let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20);
+            let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20);
            //assert_eq!(fragments.len(), 7);
            {
                let first = &fragments[0];
@@ -417,14 +412,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_in_second_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f g";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("c"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 1);
        {
@@ -441,14 +436,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_term_at_the_end_of_fragment() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f f g";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("f"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 2);
        {
@@ -465,7 +460,7 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_second_fragment_has_the_highest_score() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d e f g";

@@ -473,7 +468,7 @@ Survey in 2016, 2017, and 2018."#;
        terms.insert(String::from("f"), 1.0);
        terms.insert(String::from("a"), 0.9);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 7);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 7);

        assert_eq!(fragments.len(), 2);
        {
@@ -490,14 +485,14 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_term_not_in_text() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d";

        let mut terms = BTreeMap::new();
        terms.insert(String::from("z"), 1.0);

-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);

        assert_eq!(fragments.len(), 0);

@@ -508,12 +503,12 @@ Survey in 2016, 2017, and 2018."#;

    #[test]
    fn test_snippet_with_no_terms() {
-        let boxed_tokenizer = box_tokenizer(SimpleTokenizer);
+        let boxed_tokenizer = SimpleTokenizer.into();

        let text = "a b c d";

        let terms = BTreeMap::new();
-        let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3);
+        let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3);
        assert_eq!(fragments.len(), 0);

        let snippet = select_best_fragment_combination(&fragments[..], &text);
--- a/src/termdict/streamer.rs
+++ b/src/termdict/streamer.rs
@@ -2,7 +2,7 @@ use super::TermDictionary;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use tantivy_fst::automaton::AlwaysMatch;
-use tantivy_fst::map::{Stream, StreamBuilder};
+use tantivy_fst::map::{Stream, StreamBuilder, StreamWithState};
 use tantivy_fst::Automaton;
 use tantivy_fst::{IntoStreamer, Streamer};

@@ -11,6 +11,7 @@ use tantivy_fst::{IntoStreamer, Streamer};
 pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
 where
    A: Automaton,
+    A::State: Clone,
 {
    fst_map: &'a TermDictionary,
    stream_builder: StreamBuilder<'a, A>,
@@ -19,6 +20,7 @@ where
 impl<'a, A> TermStreamerBuilder<'a, A>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
        TermStreamerBuilder {
@@ -56,10 +58,11 @@ where
    pub fn into_stream(self) -> TermStreamer<'a, A> {
        TermStreamer {
            fst_map: self.fst_map,
-            stream: self.stream_builder.into_stream(),
+            stream: self.stream_builder.with_state().into_stream(),
            term_ord: 0u64,
            current_key: Vec::with_capacity(100),
            current_value: TermInfo::default(),
+            state: Default::default(),
        }
    }
 }
@@ -69,27 +72,31 @@ where
 pub struct TermStreamer<'a, A = AlwaysMatch>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    fst_map: &'a TermDictionary,
-    stream: Stream<'a, A>,
+    stream: StreamWithState<'a, A>,
    term_ord: TermOrdinal,
    current_key: Vec<u8>,
    current_value: TermInfo,
+    state: A::State,
 }

 impl<'a, A> TermStreamer<'a, A>
 where
    A: Automaton,
+    A::State: Clone + Default + Sized,
 {
    /// Advance position the stream on the next item.
    /// Before the first call to `.advance()`, the stream
    /// is an unitialized state.
    pub fn advance(&mut self) -> bool {
-        if let Some((term, term_ord)) = self.stream.next() {
+        if let Some((term, term_ord, state)) = self.stream.next() {
            self.current_key.clear();
            self.current_key.extend_from_slice(term);
            self.term_ord = term_ord;
            self.current_value = self.fst_map.term_info_from_ord(term_ord);
+            self.state = state;
            true
        } else {
            false
@@ -118,6 +125,10 @@ where
        &self.current_key
    }

+    pub fn state(&self) -> &A::State {
+        &self.state
+    }
+
    /// Accesses the current value.
    ///
    /// Calling `.value()` after the end of the stream will return the
--- a/src/termdict/termdict.rs
+++ b/src/termdict/termdict.rs
@@ -197,7 +197,11 @@ impl TermDictionary {

    /// Returns a search builder, to stream all of the terms
    /// within the Automaton
-    pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> {
+    pub fn search<'a, A>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
+    where
+        A: Automaton + 'a,
+        A::State: Clone + Default + Sized,
+    {
        let stream_builder = self.fst_index.search(automaton);
        TermStreamerBuilder::<A>::new(self, stream_builder)
    }
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -155,7 +155,6 @@ pub use self::simple_tokenizer::SimpleTokenizer;
 pub use self::stemmer::{Language, Stemmer};
 pub use self::stop_word_filter::StopWordFilter;
 pub(crate) use self::token_stream_chain::TokenStreamChain;
-pub(crate) use self::tokenizer::box_tokenizer;
 pub use self::tokenizer::BoxedTokenizer;

 pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
--- a/src/tokenizer/tokenizer.rs
+++ b/src/tokenizer/tokenizer.rs
@@ -56,8 +56,6 @@ pub trait Tokenizer<'a>: Sized + Clone {
    /// # Example
    ///
    /// ```rust
-    /// # extern crate tantivy;
-    ///
    /// use tantivy::tokenizer::*;
    ///
    /// # fn main() {
@@ -80,7 +78,7 @@ pub trait Tokenizer<'a>: Sized + Clone {
 }

 /// A boxed tokenizer
-pub trait BoxedTokenizer: Send + Sync {
+trait BoxedTokenizerTrait: Send + Sync {
    /// Tokenize a `&str`
    fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a>;

@@ -92,7 +90,41 @@ pub trait BoxedTokenizer: Send + Sync {
    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<dyn TokenStream + 'b>;

    /// Return a boxed clone of the tokenizer
-    fn boxed_clone(&self) -> Box<dyn BoxedTokenizer>;
+    fn boxed_clone(&self) -> BoxedTokenizer;
+}
+
+/// A boxed tokenizer
+pub struct BoxedTokenizer(Box<dyn BoxedTokenizerTrait>);
+
+impl<T> From<T> for BoxedTokenizer
+where
+    T: 'static + Send + Sync + for<'a> Tokenizer<'a>,
+{
+    fn from(tokenizer: T) -> BoxedTokenizer {
+        BoxedTokenizer(Box::new(BoxableTokenizer(tokenizer)))
+    }
+}
+
+impl BoxedTokenizer {
+    /// Tokenize a `&str`
+    pub fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
+        self.0.token_stream(text)
+    }
+
+    /// Tokenize an array`&str`
+    ///
+    /// The resulting `TokenStream` is equivalent to what would be obtained if the &str were
+    /// one concatenated `&str`, with an artificial position gap of `2` between the different fields
+    /// to prevent accidental `PhraseQuery` to match accross two terms.
+    pub fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<dyn TokenStream + 'b> {
+        self.0.token_stream_texts(texts)
+    }
+}
+
+impl Clone for BoxedTokenizer {
+    fn clone(&self) -> BoxedTokenizer {
+        self.0.boxed_clone()
+    }
 }

 #[derive(Clone)]
@@ -100,7 +132,7 @@ struct BoxableTokenizer<A>(A)
 where
    A: for<'a> Tokenizer<'a> + Send + Sync;

-impl<A> BoxedTokenizer for BoxableTokenizer<A>
+impl<A> BoxedTokenizerTrait for BoxableTokenizer<A>
 where
    A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
 {
@@ -125,18 +157,11 @@ where
        }
    }

-    fn boxed_clone(&self) -> Box<dyn BoxedTokenizer> {
-        Box::new(self.clone())
+    fn boxed_clone(&self) -> BoxedTokenizer {
+        self.0.clone().into()
    }
 }

-pub(crate) fn box_tokenizer<A>(a: A) -> Box<dyn BoxedTokenizer>
-where
-    A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
-{
-    Box::new(BoxableTokenizer(a))
-}
-
 impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
    fn advance(&mut self) -> bool {
        let token_stream: &mut dyn TokenStream = self.borrow_mut();
@@ -161,7 +186,6 @@ impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
 /// # Example
 ///
 /// ```
-/// extern crate tantivy;
 /// use tantivy::tokenizer::*;
 ///
 /// # fn main() {
@@ -203,7 +227,6 @@ pub trait TokenStream {
    /// and `.token()`.
    ///
    /// ```
-    /// # extern crate tantivy;
    /// # use tantivy::tokenizer::*;
    /// #
    /// # fn main() {
--- a/src/tokenizer/tokenizer_manager.rs
+++ b/src/tokenizer/tokenizer_manager.rs
@@ -1,4 +1,3 @@
-use crate::tokenizer::box_tokenizer;
 use crate::tokenizer::stemmer::Language;
 use crate::tokenizer::BoxedTokenizer;
 use crate::tokenizer::LowerCaser;
@@ -8,7 +7,6 @@ use crate::tokenizer::SimpleTokenizer;
 use crate::tokenizer::Stemmer;
 use crate::tokenizer::Tokenizer;
 use std::collections::HashMap;
-use std::ops::Deref;
 use std::sync::{Arc, RwLock};

 /// The tokenizer manager serves as a store for
@@ -25,16 +23,16 @@ use std::sync::{Arc, RwLock};
 ///  search engine.
 #[derive(Clone)]
 pub struct TokenizerManager {
-    tokenizers: Arc<RwLock<HashMap<String, Box<dyn BoxedTokenizer>>>>,
+    tokenizers: Arc<RwLock<HashMap<String, BoxedTokenizer>>>,
 }

 impl TokenizerManager {
    /// Registers a new tokenizer associated with a given name.
    pub fn register<A>(&self, tokenizer_name: &str, tokenizer: A)
    where
-        A: 'static + Send + Sync + for<'a> Tokenizer<'a>,
+        A: Into<BoxedTokenizer>,
    {
-        let boxed_tokenizer = box_tokenizer(tokenizer);
+        let boxed_tokenizer = tokenizer.into();
        self.tokenizers
            .write()
            .expect("Acquiring the lock should never fail")
@@ -42,13 +40,12 @@ impl TokenizerManager {
    }

    /// Accessing a tokenizer given its name.
-    pub fn get(&self, tokenizer_name: &str) -> Option<Box<dyn BoxedTokenizer>> {
+    pub fn get(&self, tokenizer_name: &str) -> Option<BoxedTokenizer> {
        self.tokenizers
            .read()
            .expect("Acquiring the lock should never fail")
            .get(tokenizer_name)
-            .map(Deref::deref)
-            .map(BoxedTokenizer::boxed_clone)
+            .cloned()
    }
 }
Author	SHA1	Message	Date
Paul Masurel	790baa7adf	Integrated state into TermDict streamer	2019-08-16 10:29:28 +09:00
Paul Masurel	039c0a0863	Introducing a wrapper struct instead of Boxed<BoxableTokenizer> (#631 ) Closes #629	2019-08-15 16:37:04 +09:00
Paul Masurel	b3b0138b82	Change for tantivy-py Schema.convert_named_doc Better Debug string for Terms and TermQueries	2019-08-14 17:44:25 +09:00
petr-tik	ea56160cdc	Added cargo-fmt to CI runs (#627 ) * Added cargo-fmt to CI runs Closes #625 * Remove fmt from appveyor builds Windows seems to have issues with install components through rustup. Formatting should be equally informative regardless of the OS, so best to keep it in Linux on Travis	2019-08-12 08:25:47 +09:00
petr-tik	028b0a749c	Elastic unbounded range query (#624 ) * Tidy up fmt remove unneccessary -> Result<()> followed by run.unwrap() in a test * Adding support for elasticsearch-style unbounded queries Extend the UserInputBound to include Unbounded, so we can reuse formatting and internal query format * Still working on elastic-style range queries Fixes #498 Merge the elastic_range into range Reformat to make code easier to follow, use optional() macro to return Some * Fixed bugs Made the range parser insensitive to whitespace between the ":" and the range. Removed optional parsing of field. Added a unit test for the range parser. Derived PartialEq to compare the results of parsing as structs, instead of strings. Found a bug with that unit test - "}" was parsed as an UserInputBound::Exclusive, instead of UserInputBound::Unbounded. Added an early detection-and-return for in the original range parser * Correct failing test Assume that we will use "{" for Unbounded ranges Add a note in the changelog cargo-fmt * Moved parenthesis to a newline to make nested if-else more visible	2019-08-12 08:24:47 +09:00
Paul Masurel	941f06eb9f	Added Schema.from_named_doc	2019-08-11 16:50:32 +09:00