use a single memory arena to store all terms

remove term encoding in term bytes
refactor Term
2026-01-12 20:12:54 +00:00 · 2022-05-03 20:11:28 +08:00 · 2022-05-03 20:04:53 +08:00 · 2022-05-03 20:04:53 +08:00 · 2022-05-03 20:04:53 +08:00
55 changed files with 530 additions and 1136 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -33,7 +33,7 @@ jobs:
            components: rustfmt, clippy

    - name: Run tests
-      run: cargo +stable test --features mmap,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints --verbose --workspace
+      run: cargo +stable test --features mmap,brotli-compression,lz4-compression,snappy-compression,failpoints --verbose --workspace

    - name: Run tests quickwit feature
      run: cargo +stable test --features mmap,quickwit,failpoints --verbose --workspace
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-Tantivy 0.18
+Unreleased
 ================================
 - For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
  - The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
@@ -11,7 +11,6 @@ Tantivy 0.18
 - Add [histogram](https://github.com/quickwit-oss/tantivy/pull/1306) aggregation (@PSeitz)
 - Add support for fastfield on text fields (@PSeitz)
 - Add terms aggregation (@PSeitz)
- Add support for zstd compression (@kryesh)

 Tantivy 0.17
 ================================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.18.0"
+version = "0.17.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -10,72 +10,71 @@ homepage = "https://github.com/quickwit-oss/tantivy"
 repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
-edition = "2021"
+edition = "2018"

 [dependencies]
-oneshot = "0.1.3"
-base64 = "0.13.0"
+oneshot = "0.1"
+base64 = "0.13"
 byteorder = "1.4.3"
-crc32fast = "1.3.2"
-once_cell = "1.10.0"
-regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
-tantivy-fst = "0.3.0"
-memmap2 = { version = "0.5.3", optional = true }
-lz4_flex = { version = "0.9.2", default-features = false, features = ["checked-decode"], optional = true }
-brotli = { version = "3.3.4", optional = true }
-zstd = { version = "0.11", optional = true }
+crc32fast = "1.2.1"
+once_cell = "1.7.2"
+regex ={ version = "1.5.4", default-features = false, features = ["std"] }
+tantivy-fst = "0.3"
+memmap2 = {version = "0.5", optional=true}
+lz4_flex = { version = "0.9", default-features = false, features = ["checked-decode"], optional = true }
+brotli = { version = "3.3", optional = true }
 snap = { version = "1.0.5", optional = true }
-tempfile = { version = "3.3.0", optional = true }
-log = "0.4.16"
-serde = { version = "1.0.136", features = ["derive"] }
-serde_json = "1.0.79"
-num_cpus = "1.13.1"
+tempfile = { version = "3.2", optional = true }
+log = "0.4.14"
+serde = { version = "1.0.126", features = ["derive"] }
+serde_json = "1.0.64"
+num_cpus = "1.13"
 fs2={ version = "0.4.3", optional = true }
-levenshtein_automata = "0.2.1"
+levenshtein_automata = "0.2"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
-crossbeam-channel = "0.5.4"
-tantivy-query-grammar = { version="0.18.0", path="./query-grammar" }
-tantivy-bitpacker = { version="0.2", path="./bitpacker" }
-common = { version = "0.3", path = "./common/", package = "tantivy-common" }
-fastfield_codecs = { version="0.2", path="./fastfield_codecs", default-features = false }
-ownedbytes = { version="0.3", path="./ownedbytes" }
-stable_deref_trait = "1.2.0"
-rust-stemmers = "1.2.0"
-downcast-rs = "1.2.0"
+crossbeam = "0.8.1"
+tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
+tantivy-bitpacker = { version="0.1", path="./bitpacker" }
+common = { version = "0.2", path = "./common/", package = "tantivy-common" }
+fastfield_codecs = { version="0.1", path="./fastfield_codecs", default-features = false }
+ownedbytes = { version="0.2", path="./ownedbytes" }
+stable_deref_trait = "1.2"
+rust-stemmers = "1.2"
+downcast-rs = "1.2"
 bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
-census = "0.4.0"
+census = "0.4"
 fnv = "1.0.7"
-thiserror = "1.0.30"
+thiserror = "1.0.24"
 htmlescape = "0.3.1"
-fail = "0.5.0"
-murmurhash32 = "0.2.0"
-time = { version = "0.3.9", features = ["serde-well-known"] }
-smallvec = "1.8.0"
-rayon = "1.5.2"
-lru = "0.7.5"
-fastdivide = "0.4.0"
-itertools = "0.10.3"
-measure_time = "0.8.2"
-pretty_assertions = "1.2.1"
-serde_cbor = { version = "0.11.2", optional = true }
-async-trait = "0.1.53"
+fail = "0.5"
+murmurhash32 = "0.2"
+time = { version = "0.3.7", features = ["serde-well-known"] }
+smallvec = "1.6.1"
+rayon = "1.5"
+lru = "0.7.0"
+fastdivide = "0.4"
+itertools = "0.10.0"
+measure_time = "0.8.0"
+pretty_assertions = "1.1.0"
+serde_cbor = {version="0.11", optional=true}
+async-trait = "0.1"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"

 [dev-dependencies]
-rand = "0.8.5"
+rand = "0.8.3"
 maplit = "1.0.2"
-matches = "0.1.9"
-proptest = "1.0.0"
+matches = "0.1.8"
+proptest = "1.0"
 criterion = "0.3.5"
-test-log = "0.2.10"
+test-log = "0.2.8"
 env_logger = "0.9.0"
-pprof = { version = "0.9.0", features = ["flamegraph", "criterion"] }
-futures = "0.3.21"
+pprof = {version= "0.8", features=["flamegraph", "criterion"]}
+futures = "0.3.15"

 [dev-dependencies.fail]
-version = "0.5.0"
+version = "0.5"
 features = ["failpoints"]

 [profile.release]
@@ -94,7 +93,6 @@ mmap = ["fs2", "tempfile", "memmap2"]
 brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
 snappy-compression = ["snap"]
-zstd-compression = ["zstd"]

 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.2.0"
+version = "0.1.1"
 edition = "2018"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-common"
-version = "0.3.0"
+version = "0.2.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2018"
@@ -10,7 +10,7 @@ description = "common traits and utility functions used by multiple tantivy subc

 [dependencies]
 byteorder = "1.4.3"
-ownedbytes = { version="0.3", path="../ownedbytes" }
+ownedbytes = { version="0.2", path="../ownedbytes" }

 [dev-dependencies]
 proptest = "1.0.0"
--- a/examples/json_field.rs
+++ b/examples/json_field.rs
@@ -1,8 +1,7 @@
 // # Json field example
 //
 // This example shows how the json field can be used
-// to make tantivy partially schemaless by setting it as
-// default query parser field.
+// to make tantivy partially schemaless.

 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
@@ -11,6 +10,10 @@ use tantivy::Index;

 fn main() -> tantivy::Result<()> {
    // # Defining the schema
+    //
+    // We need two fields:
+    // - a timestamp
+    // - a json object field
    let mut schema_builder = Schema::builder();
    schema_builder.add_date_field("timestamp", FAST | STORED);
    let event_type = schema_builder.add_text_field("event_type", STRING | STORED);
@@ -40,8 +43,7 @@ fn main() -> tantivy::Result<()> {
        "attributes": {
            "target": "submit-button",
            "cart": {"product_id": 133},
-            "description": "das keyboard",
-            "event_type": "holiday-sale"
+            "description": "das keyboard"
        }
    }"#,
    )?;
@@ -51,9 +53,6 @@ fn main() -> tantivy::Result<()> {
    let reader = index.reader()?;
    let searcher = reader.searcher();

-    // # Default fields: event_type and attributes
-    // By setting attributes as a default field it allows omitting attributes itself, e.g. "target",
-    // instead of "attributes.target"
    let query_parser = QueryParser::for_index(&index, vec![event_type, attributes]);
    {
        let query = query_parser.parse_query("target:submit-button")?;
@@ -71,34 +70,10 @@ fn main() -> tantivy::Result<()> {
        assert_eq!(count_docs, 1);
    }
    {
-        let query = query_parser.parse_query("click AND cart.product_id:133")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
-        assert_eq!(hits.len(), 1);
-    }
-    {
-        // The sub-fields in the json field marked as default field still need to be explicitly
-        // addressed
-        let query = query_parser.parse_query("click AND 133")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
-        assert_eq!(hits.len(), 0);
-    }
-    {
-        // Default json fields are ignored if they collide with the schema
-        let query = query_parser.parse_query("event_type:holiday-sale")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
-        assert_eq!(hits.len(), 0);
-    }
-    // # Query via full attribute path
-    {
-        // This only searches in our schema's `event_type` field
-        let query = query_parser.parse_query("event_type:click")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
-        assert_eq!(hits.len(), 2);
-    }
-    {
-        // Default json fields can still be accessed by full path
-        let query = query_parser.parse_query("attributes.event_type:holiday-sale")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let query = query_parser
+            .parse_query("event_type:click AND cart.product_id:133")
+            .unwrap();
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
        assert_eq!(hits.len(), 1);
    }
    Ok(())
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "fastfield_codecs"
-version = "0.2.0"
+version = "0.1.0"
 authors = ["Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2018"
@@ -9,8 +9,8 @@ description = "Fast field codecs used by tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-common = { version = "0.3", path = "../common/", package = "tantivy-common" }
-tantivy-bitpacker = { version="0.2", path = "../bitpacker/" }
+common = { version = "0.2", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version="0.1.1", path = "../bitpacker/" }
 prettytable-rs = {version="0.8.0", optional= true}
 rand = {version="0.8.3", optional= true}

--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 name = "ownedbytes"
-version = "0.3.0"
+version = "0.2.0"
 edition = "2018"
 description = "Expose data as static slice"
 license = "MIT"
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.18.0"
+version = "0.15.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -18,7 +18,7 @@ use crate::Occur;
 const SPECIAL_CHARS: &[char] = &[
    '+', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')', '~', '!', '\\', '*', ' ',
 ];
-const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|\~|!|\\|\*|\s)"#;
+const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|\~|!|\\|\*| )"#;

 /// Parses a field_name
 /// A field name must have at least one character and be followed by a colon.
@@ -34,8 +34,7 @@ fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
            take_while(|c| !SPECIAL_CHARS.contains(&c)),
        ),
        '\\',
-        satisfy(|_| true), /* if the next character is not a special char, the \ will be treated
-                            * as the \ character. */
+        satisfy(|c| SPECIAL_CHARS.contains(&c)),
    ))
    .skip(char(':'))
    .map(|s| ESCAPED_SPECIAL_CHARS_RE.replace_all(&s, "$1").to_string())
@@ -517,27 +516,15 @@ mod test {
    }

    #[test]
-    fn test_field_name() {
+    fn test_field_name() -> TestParseResult {
        assert_eq!(
            super::field_name().parse(".my.field.name:a"),
            Ok((".my.field.name".to_string(), "a"))
        );
-        assert_eq!(
-            super::field_name().parse(r#"my\　field:a"#),
-            Ok(("my　field".to_string(), "a"))
-        );
-        assert_eq!(
-            super::field_name().parse(r#"にんじん:a"#),
-            Ok(("にんじん".to_string(), "a"))
-        );
        assert_eq!(
            super::field_name().parse("my\\ field\\ name:a"),
            Ok(("my field name".to_string(), "a"))
        );
-        assert_eq!(
-            super::field_name().parse(r#"my\field:a"#),
-            Ok((r#"my\field"#.to_string(), "a"))
-        );
        assert!(super::field_name().parse("my field:a").is_err());
        assert_eq!(
            super::field_name().parse("\\(1\\+1\\):2"),
@@ -547,21 +534,14 @@ mod test {
            super::field_name().parse("my_field_name:a"),
            Ok(("my_field_name".to_string(), "a"))
        );
-        assert_eq!(
-            super::field_name().parse("myfield.b:hello").unwrap(),
-            ("myfield.b".to_string(), "hello")
-        );
-        assert_eq!(
-            super::field_name().parse(r#"myfield\.b:hello"#).unwrap(),
-            (r#"myfield\.b"#.to_string(), "hello")
-        );
        assert!(super::field_name().parse("my_field_name").is_err());
        assert!(super::field_name().parse(":a").is_err());
        assert!(super::field_name().parse("-my_field:a").is_err());
        assert_eq!(
-            super::field_name().parse("_my_field:a"),
-            Ok(("_my_field".to_string(), "a"))
+            super::field_name().parse("_my_field:a")?,
+            ("_my_field".to_string(), "a")
        );
+        Ok(())
    }

    #[test]
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -230,7 +230,8 @@ pub enum BucketResult {
 impl BucketResult {
    pub(crate) fn empty_from_req(req: &BucketAggregationInternal) -> crate::Result<Self> {
        let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg);
-        BucketResult::from_intermediate_and_req(empty_bucket, req)
+
+        Ok(BucketResult::from_intermediate_and_req(empty_bucket, req)?)
    }

    fn from_intermediate_and_req(
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1364,29 +1364,4 @@ mod tests {

        Ok(())
    }
-
-    #[test]
-    fn histogram_invalid_request() -> crate::Result<()> {
-        let index = get_test_index_2_segments(true)?;
-
-        let agg_req: Aggregations = vec![(
-            "histogram".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
-                    field: "score_f64".to_string(),
-                    interval: 0.0,
-                    ..Default::default()
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
-
-        let agg_res = exec_request(agg_req, &index);
-
-        assert!(agg_res.is_err());
-
-        Ok(())
-    }
 }
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -81,8 +81,7 @@ pub struct TermsAggregation {
    ///
    /// Should never be smaller than size.
    #[serde(skip_serializing_if = "Option::is_none", default)]
-    #[serde(alias = "shard_size")]
-    pub split_size: Option<u32>,
+    pub shard_size: Option<u32>,

    /// The get more accurate results, we fetch more than `size` from each segment.
    ///
@@ -97,11 +96,11 @@ pub struct TermsAggregation {
    /// doc_count returned by each shard. It’s the sum of the size of the largest bucket on
    /// each segment that didn’t fit into `shard_size`.
    ///
-    /// Defaults to true when ordering by count desc.
+    /// Defaults to true when ordering by counts desc.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub show_term_doc_count_error: Option<bool>,

-    /// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
+    /// Filter all terms than are lower `min_doc_count`. Defaults to 1.
    ///
    /// **Expensive**: When set to 0, this will return all terms in the field.
    #[serde(skip_serializing_if = "Option::is_none", default)]
@@ -144,7 +143,7 @@ pub(crate) struct TermsAggregationInternal {
    /// Increasing this value is will increase the cost for more accuracy.
    pub segment_size: u32,

-    /// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
+    /// Filter all terms than are lower `min_doc_count`. Defaults to 1.
    ///
    /// *Expensive*: When set to 0, this will return all terms in the field.
    pub min_doc_count: u64,
@@ -573,7 +572,7 @@ mod tests {
                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
                    field: "string_id".to_string(),
                    size: Some(2),
-                    split_size: Some(2),
+                    shard_size: Some(2),
                    ..Default::default()
                }),
                sub_aggregation: Default::default(),
@@ -1211,51 +1210,6 @@ mod tests {
                .unwrap();
        assert_eq!(agg_req, agg_req_deser);

-        let elasticsearch_compatible_json = json!(
-        {
-        "term_agg_test":{
-            "terms": {
-                "field": "string_id",
-                "split_size": 2u64,
-            }
-        }
-        });
-
-        // test alias shard_size, split_size
-        let agg_req: Aggregations = vec![(
-            "term_agg_test".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
-                    field: "string_id".to_string(),
-                    split_size: Some(2),
-                    ..Default::default()
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
-
-        let agg_req_deser: Aggregations =
-            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
-                .unwrap();
-        assert_eq!(agg_req, agg_req_deser);
-
-        let elasticsearch_compatible_json = json!(
-        {
-        "term_agg_test":{
-            "terms": {
-                "field": "string_id",
-                "shard_size": 2u64,
-            }
-        }
-        });
-
-        let agg_req_deser: Aggregations =
-            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
-                .unwrap();
-        assert_eq!(agg_req, agg_req_deser);
-
        Ok(())
    }
 }
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -24,9 +24,7 @@ use crate::aggregation::bucket::TermsAggregationInternal;
 /// intermediate results.
 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateAggregationResults {
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) metrics: Option<VecWithNames<IntermediateMetricResult>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) buckets: Option<VecWithNames<IntermediateBucketResult>>,
 }

--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -20,8 +20,7 @@
 //!
 //! #### Limitations
 //!
-//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
-//! fast fields on text fields.
+//! Currently aggregations work only on single value fast fields of type u64, f64 and i64.
 //!
 //! # JSON Format
 //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -92,7 +92,7 @@ mod histogram_collector;
 pub use histogram_collector::HistogramCollector;

 mod multi_collector;
-pub use self::multi_collector::{FruitHandle, MultiCollector, MultiFruit};
+pub use self::multi_collector::MultiCollector;

 mod top_collector;

--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -5,7 +5,6 @@ use super::{Collector, SegmentCollector};
 use crate::collector::Fruit;
 use crate::{DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};

-/// MultiFruit keeps Fruits from every nested Collector
 pub struct MultiFruit {
    sub_fruits: Vec<Option<Box<dyn Fruit>>>,
 }
@@ -80,17 +79,12 @@ impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
    }
 }

-/// FruitHandle stores reference to the corresponding collector inside MultiCollector
 pub struct FruitHandle<TFruit: Fruit> {
    pos: usize,
    _phantom: PhantomData<TFruit>,
 }

 impl<TFruit: Fruit> FruitHandle<TFruit> {
-    /// Extract a typed fruit off a multifruit.
-    ///
-    /// This function involves downcasting and can panic if the multifruit was
-    /// created using faulty code.
    pub fn extract(self, fruits: &mut MultiFruit) -> TFruit {
        let boxed_fruit = fruits.sub_fruits[self.pos].take().expect("");
        *boxed_fruit
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,7 +1,6 @@
+use crossbeam::channel;
 use rayon::{ThreadPool, ThreadPoolBuilder};

-use crate::TantivyError;
-
 /// Search executor whether search request are single thread or multithread.
 ///
 /// We don't expose Rayon thread pool directly here for several reasons.
@@ -48,19 +47,16 @@ impl Executor {
        match self {
            Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
            Executor::ThreadPool(pool) => {
-                let args: Vec<A> = args.collect();
-                let num_fruits = args.len();
+                let args_with_indices: Vec<(usize, A)> = args.enumerate().collect();
+                let num_fruits = args_with_indices.len();
                let fruit_receiver = {
-                    let (fruit_sender, fruit_receiver) = crossbeam_channel::unbounded();
+                    let (fruit_sender, fruit_receiver) = channel::unbounded();
                    pool.scope(|scope| {
-                        for (idx, arg) in args.into_iter().enumerate() {
-                            // We name references for f and fruit_sender_ref because we do not
-                            // want these two to be moved into the closure.
-                            let f_ref = &f;
-                            let fruit_sender_ref = &fruit_sender;
-                            scope.spawn(move |_| {
-                                let fruit = f_ref(arg);
-                                if let Err(err) = fruit_sender_ref.send((idx, fruit)) {
+                        for arg_with_idx in args_with_indices {
+                            scope.spawn(|_| {
+                                let (idx, arg) = arg_with_idx;
+                                let fruit = f(arg);
+                                if let Err(err) = fruit_sender.send((idx, fruit)) {
                                    error!(
                                        "Failed to send search task. It probably means all search \
                                         threads have panicked. {:?}",
@@ -75,19 +71,18 @@ impl Executor {
                    // This is important as it makes it possible for the fruit_receiver iteration to
                    // terminate.
                };
-                let mut result_placeholders: Vec<Option<R>> =
-                    std::iter::repeat_with(|| None).take(num_fruits).collect();
+                // This is lame, but safe.
+                let mut results_with_position = Vec::with_capacity(num_fruits);
                for (pos, fruit_res) in fruit_receiver {
                    let fruit = fruit_res?;
-                    result_placeholders[pos] = Some(fruit);
+                    results_with_position.push((pos, fruit));
                }
-                let results: Vec<R> = result_placeholders.into_iter().flatten().collect();
-                if results.len() != num_fruits {
-                    return Err(TantivyError::InternalError(
-                        "One of the mapped execution failed.".to_string(),
-                    ));
-                }
-                Ok(results)
+                results_with_position.sort_by_key(|(pos, _)| *pos);
+                assert_eq!(results_with_position.len(), num_fruits);
+                Ok(results_with_position
+                    .into_iter()
+                    .map(|(_, fruit)| fruit)
+                    .collect::<Vec<_>>())
            }
        }
    }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -74,7 +74,6 @@ fn load_metas(
 pub struct IndexBuilder {
    schema: Option<Schema>,
    index_settings: IndexSettings,
-    tokenizer_manager: TokenizerManager,
 }
 impl Default for IndexBuilder {
    fn default() -> Self {
@@ -87,7 +86,6 @@ impl IndexBuilder {
        Self {
            schema: None,
            index_settings: IndexSettings::default(),
-            tokenizer_manager: TokenizerManager::default(),
        }
    }

@@ -105,12 +103,6 @@ impl IndexBuilder {
        self
    }

-    /// Set the tokenizers .
-    pub fn tokenizers(mut self, tokenizers: TokenizerManager) -> Self {
-        self.tokenizer_manager = tokenizers;
-        self
-    }
-
    /// Creates a new index using the `RAMDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
@@ -162,8 +154,7 @@ impl IndexBuilder {
        if !Index::exists(&*dir)? {
            return self.create(dir);
        }
-        let mut index = Index::open(dir)?;
-        index.set_tokenizers(self.tokenizer_manager.clone());
+        let index = Index::open(dir)?;
        if index.schema() == self.get_expect_schema()? {
            Ok(index)
        } else {
@@ -185,8 +176,7 @@ impl IndexBuilder {
        )?;
        let mut metas = IndexMeta::with_schema(self.get_expect_schema()?);
        metas.index_settings = self.index_settings;
-        let mut index = Index::open_from_metas(directory, &metas, SegmentMetaInventory::default());
-        index.set_tokenizers(self.tokenizer_manager);
+        let index = Index::open_from_metas(directory, &metas, SegmentMetaInventory::default());
        Ok(index)
    }
 }
@@ -314,11 +304,6 @@ impl Index {
        }
    }

-    /// Setter for the tokenizer manager.
-    pub fn set_tokenizers(&mut self, tokenizers: TokenizerManager) {
-        self.tokenizers = tokenizers;
-    }
-
    /// Accessor for the tokenizer manager.
    pub fn tokenizers(&self) -> &TokenizerManager {
        &self.tokenizers
@@ -329,31 +314,20 @@ impl Index {
        let field_entry = self.schema.get_field_entry(field);
        let field_type = field_entry.field_type();
        let tokenizer_manager: &TokenizerManager = self.tokenizers();
-        let indexing_options_opt = match field_type {
-            FieldType::JsonObject(options) => options.get_text_indexing_options(),
-            FieldType::Str(options) => options.get_indexing_options(),
-            _ => {
-                return Err(TantivyError::SchemaError(format!(
-                    "{:?} is not a text field.",
-                    field_entry.name()
-                )))
-            }
+        let tokenizer_name_opt: Option<TextAnalyzer> = match field_type {
+            FieldType::Str(text_options) => text_options
+                .get_indexing_options()
+                .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
+                .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)),
+            _ => None,
        };
-        let indexing_options = indexing_options_opt.ok_or_else(|| {
-            TantivyError::InvalidArgument(format!(
-                "No indexing options set for field {:?}",
-                field_entry
-            ))
-        })?;
-
-        tokenizer_manager
-            .get(indexing_options.tokenizer())
-            .ok_or_else(|| {
-                TantivyError::InvalidArgument(format!(
-                    "No Tokenizer found for field {:?}",
-                    field_entry
-                ))
-            })
+        match tokenizer_name_opt {
+            Some(tokenizer) => Ok(tokenizer),
+            None => Err(TantivyError::SchemaError(format!(
+                "{:?} is not a text field.",
+                field_entry.name()
+            ))),
+        }
    }

    /// Create a default `IndexReader` for the given index.
@@ -583,8 +557,7 @@ impl fmt::Debug for Index {
 mod tests {
    use crate::directory::{RamDirectory, WatchCallback};
    use crate::schema::{Field, Schema, INDEXED, TEXT};
-    use crate::tokenizer::TokenizerManager;
-    use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy};
+    use crate::{Directory, Index, IndexReader, IndexSettings, ReloadPolicy};

    #[test]
    fn test_indexer_for_field() {
@@ -600,21 +573,6 @@ mod tests {
        );
    }

-    #[test]
-    fn test_set_tokenizer_manager() {
-        let mut schema_builder = Schema::builder();
-        schema_builder.add_u64_field("num_likes", INDEXED);
-        schema_builder.add_text_field("body", TEXT);
-        let schema = schema_builder.build();
-        let index = IndexBuilder::new()
-            // set empty tokenizer manager
-            .tokenizers(TokenizerManager::new())
-            .schema(schema)
-            .create_in_ram()
-            .unwrap();
-        assert!(index.tokenizers().get("raw").is_none());
-    }
-
    #[test]
    fn test_index_exists() {
        let directory: Box<dyn Directory> = Box::new(RamDirectory::create());
@@ -744,7 +702,7 @@ mod tests {
                .try_into()?;
            assert_eq!(reader.searcher().num_docs(), 0);
            writer.add_document(doc!(field=>1u64))?;
-            let (sender, receiver) = crossbeam_channel::unbounded();
+            let (sender, receiver) = crossbeam::channel::unbounded();
            let _handle = index.directory_mut().watch(WatchCallback::new(move || {
                let _ = sender.send(());
            }));
@@ -779,7 +737,7 @@ mod tests {
        reader: &IndexReader,
    ) -> crate::Result<()> {
        let mut reader_index = reader.index();
-        let (sender, receiver) = crossbeam_channel::unbounded();
+        let (sender, receiver) = crossbeam::channel::unbounded();
        let _watch_handle = reader_index
            .directory_mut()
            .watch(WatchCallback::new(move || {
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -239,7 +239,7 @@ impl InnerSegmentMeta {
 ///
 /// Contains settings which are applied on the whole
 /// index, like presort documents.
-#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, Eq, PartialEq)]
 pub struct IndexSettings {
    /// Sorts the documents by information
    /// provided in `IndexSortByField`
@@ -248,26 +248,7 @@ pub struct IndexSettings {
    /// The `Compressor` used to compress the doc store.
    #[serde(default)]
    pub docstore_compression: Compressor,
-    #[serde(default = "default_docstore_blocksize")]
-    /// The size of each block that will be compressed and written to disk
-    pub docstore_blocksize: usize,
 }
-
-/// Must be a function to be compatible with serde defaults
-fn default_docstore_blocksize() -> usize {
-    16_384
-}
-
-impl Default for IndexSettings {
-    fn default() -> Self {
-        Self {
-            sort_by_field: None,
-            docstore_compression: Compressor::default(),
-            docstore_blocksize: default_docstore_blocksize(),
-        }
-    }
-}
-
 /// Settings to presort the documents in an index
 ///
 /// Presorting documents can greatly performance
@@ -420,7 +401,7 @@ mod tests {
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
        assert_eq!(
            json,
-            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
        );

        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
--- a/src/directory/file_watcher.rs
+++ b/src/directory/file_watcher.rs
@@ -110,7 +110,7 @@ mod tests {
        let tmp_file = tmp_dir.path().join("watched.txt");

        let counter: Arc<AtomicUsize> = Default::default();
-        let (tx, rx) = crossbeam_channel::unbounded();
+        let (tx, rx) = crossbeam::channel::unbounded();
        let timeout = Duration::from_millis(100);

        let watcher = FileWatcher::new(&tmp_file);
@@ -153,7 +153,7 @@ mod tests {
        let tmp_file = tmp_dir.path().join("watched.txt");

        let counter: Arc<AtomicUsize> = Default::default();
-        let (tx, rx) = crossbeam_channel::unbounded();
+        let (tx, rx) = crossbeam::channel::unbounded();
        let timeout = Duration::from_millis(100);

        let watcher = FileWatcher::new(&tmp_file);
--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -181,7 +181,7 @@ fn test_directory_delete(directory: &dyn Directory) -> crate::Result<()> {

 fn test_watch(directory: &dyn Directory) {
    let counter: Arc<AtomicUsize> = Default::default();
-    let (tx, rx) = crossbeam_channel::unbounded();
+    let (tx, rx) = crossbeam::channel::unbounded();
    let timeout = Duration::from_millis(500);

    let handle = directory
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -300,7 +300,7 @@ impl IntFastFieldWriter {
    /// If the document has more than one value for the given field,
    /// only the first one is taken in account.
    ///
-    /// Values on text fast fields are skipped.
+    /// Values for string fast fields are skipped.
    pub fn add_document(&mut self, doc: &Document) {
        match doc.get_first(self.field) {
            Some(v) => {
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -4,6 +4,7 @@ use std::thread;
 use std::thread::JoinHandle;

 use common::BitSet;
+use crossbeam::channel;
 use smallvec::smallvec;

 use super::operation::{AddOperation, UserOperation};
@@ -288,7 +289,7 @@ impl IndexWriter {
            return Err(TantivyError::InvalidArgument(err_msg));
        }
        let (document_sender, document_receiver): (AddBatchSender, AddBatchReceiver) =
-            crossbeam_channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
+            channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);

        let delete_queue = DeleteQueue::new();

@@ -325,7 +326,7 @@ impl IndexWriter {
    }

    fn drop_sender(&mut self) {
-        let (sender, _receiver) = crossbeam_channel::bounded(1);
+        let (sender, _receiver) = channel::bounded(1);
        self.operation_sender = sender;
    }

@@ -531,7 +532,7 @@ impl IndexWriter {
    /// Returns the former segment_ready channel.
    fn recreate_document_channel(&mut self) {
        let (document_sender, document_receiver): (AddBatchSender, AddBatchReceiver) =
-            crossbeam_channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
+            channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
        self.operation_sender = document_sender;
        self.index_writer_status = IndexWriterStatus::from(document_receiver);
    }
--- a/src/indexer/index_writer_status.rs
+++ b/src/indexer/index_writer_status.rs
@@ -92,7 +92,7 @@ impl Drop for IndexWriterBomb {
 mod tests {
    use std::mem;

-    use crossbeam_channel as channel;
+    use crossbeam::channel;

    use super::IndexWriterStatus;

--- a/src/indexer/json_term_writer.rs
+++ b/src/indexer/json_term_writer.rs
@@ -4,7 +4,7 @@ use murmurhash32::murmurhash2;
 use crate::fastfield::FastValue;
 use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
 use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP};
-use crate::schema::{Field, Type};
+use crate::schema::Type;
 use crate::time::format_description::well_known::Rfc3339;
 use crate::time::{OffsetDateTime, UtcOffset};
 use crate::tokenizer::TextAnalyzer;
@@ -57,7 +57,7 @@ struct IndexingPositionsPerPath {
 impl IndexingPositionsPerPath {
    fn get_position(&mut self, term: &Term) -> &mut IndexingPosition {
        self.positions_per_path
-            .entry(murmurhash2(term.as_slice()))
+            .entry(murmurhash2(term.value_bytes()))
            .or_insert_with(Default::default)
    }
 }
@@ -199,81 +199,16 @@ fn infer_type_from_str(text: &str) -> TextOrDateTime {
    }
 }

-// Tries to infer a JSON type from a string
-pub(crate) fn convert_to_fast_value_and_get_term(
-    json_term_writer: &mut JsonTermWriter,
-    phrase: &str,
-) -> Option<Term> {
-    if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
-        let dt_utc = dt.to_offset(UtcOffset::UTC);
-        return Some(set_fastvalue_and_get_term(
-            json_term_writer,
-            DateTime::from_utc(dt_utc),
-        ));
-    }
-    if let Ok(u64_val) = str::parse::<u64>(phrase) {
-        return Some(set_fastvalue_and_get_term(json_term_writer, u64_val));
-    }
-    if let Ok(i64_val) = str::parse::<i64>(phrase) {
-        return Some(set_fastvalue_and_get_term(json_term_writer, i64_val));
-    }
-    if let Ok(f64_val) = str::parse::<f64>(phrase) {
-        return Some(set_fastvalue_and_get_term(json_term_writer, f64_val));
-    }
-    None
-}
-// helper function to generate a Term from a json fastvalue
-pub(crate) fn set_fastvalue_and_get_term<T: FastValue>(
-    json_term_writer: &mut JsonTermWriter,
-    value: T,
-) -> Term {
-    json_term_writer.set_fast_value(value);
-    json_term_writer.term().clone()
-}
-
-// helper function to generate a list of terms with their positions from a textual json value
-pub(crate) fn set_string_and_get_terms(
-    json_term_writer: &mut JsonTermWriter,
-    value: &str,
-    text_analyzer: &TextAnalyzer,
-) -> Vec<(usize, Term)> {
-    let mut positions_and_terms = Vec::<(usize, Term)>::new();
-    json_term_writer.close_path_and_set_type(Type::Str);
-    let term_num_bytes = json_term_writer.term_buffer.as_slice().len();
-    let mut token_stream = text_analyzer.token_stream(value);
-    token_stream.process(&mut |token| {
-        json_term_writer.term_buffer.truncate(term_num_bytes);
-        json_term_writer
-            .term_buffer
-            .append_bytes(token.text.as_bytes());
-        positions_and_terms.push((token.position, json_term_writer.term().clone()));
-    });
-    positions_and_terms
-}
-
 pub struct JsonTermWriter<'a> {
    term_buffer: &'a mut Term,
    path_stack: Vec<usize>,
 }

 impl<'a> JsonTermWriter<'a> {
-    pub fn from_field_and_json_path(
-        field: Field,
-        json_path: &str,
-        term_buffer: &'a mut Term,
-    ) -> Self {
-        term_buffer.set_field(Type::Json, field);
-        let mut json_term_writer = Self::wrap(term_buffer);
-        for segment in json_path.split('.') {
-            json_term_writer.push_path_segment(segment);
-        }
-        json_term_writer
-    }
-
    pub fn wrap(term_buffer: &'a mut Term) -> Self {
        term_buffer.clear_with_type(Type::Json);
        let mut path_stack = Vec::with_capacity(10);
-        path_stack.push(5);
+        path_stack.push(5); // magic number?
        Self {
            term_buffer,
            path_stack,
@@ -315,8 +250,8 @@ impl<'a> JsonTermWriter<'a> {
    /// Returns the json path of the term being currently built.
    #[cfg(test)]
    pub(crate) fn path(&self) -> &[u8] {
-        let end_of_path = self.path_stack.last().cloned().unwrap_or(6);
-        &self.term().as_slice()[5..end_of_path - 1]
+        let end_of_path = self.path_stack.last().cloned().unwrap_or(6); // TODO remove magic number
+        &self.term().value_bytes()[..end_of_path - 1]
    }

    pub fn set_fast_value<T: FastValue>(&mut self, val: T) {
@@ -386,10 +321,7 @@ mod tests {
        let mut json_writer = JsonTermWriter::wrap(&mut term);
        json_writer.push_path_segment("color");
        json_writer.set_str("red");
-        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jcolor\x00sred"
-        )
+        assert_eq!(json_writer.term().value_bytes(), b"color\x00sred")
    }

    #[test]
@@ -401,8 +333,8 @@ mod tests {
        json_writer.push_path_segment("color");
        json_writer.set_fast_value(-4i64);
        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jcolor\x00i\x7f\xff\xff\xff\xff\xff\xff\xfc"
+            json_writer.term().value_bytes(),
+            b"color\x00i\x7f\xff\xff\xff\xff\xff\xff\xfc"
        )
    }

@@ -415,8 +347,8 @@ mod tests {
        json_writer.push_path_segment("color");
        json_writer.set_fast_value(4u64);
        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jcolor\x00u\x00\x00\x00\x00\x00\x00\x00\x04"
+            json_writer.term().value_bytes(),
+            b"color\x00u\x00\x00\x00\x00\x00\x00\x00\x04"
        )
    }

@@ -429,8 +361,8 @@ mod tests {
        json_writer.push_path_segment("color");
        json_writer.set_fast_value(4.0f64);
        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jcolor\x00f\xc0\x10\x00\x00\x00\x00\x00\x00"
+            json_writer.term().value_bytes(),
+            b"color\x00f\xc0\x10\x00\x00\x00\x00\x00\x00"
        )
    }

@@ -445,8 +377,8 @@ mod tests {
        json_writer.push_path_segment("color");
        json_writer.set_str("red");
        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jattribute\x01color\x00sred"
+            json_writer.term().value_bytes(),
+            b"attribute\x01color\x00sred"
        )
    }

@@ -460,10 +392,7 @@ mod tests {
        json_writer.push_path_segment("hue");
        json_writer.pop_path_segment();
        json_writer.set_str("red");
-        assert_eq!(
-            json_writer.term().as_slice(),
-            b"\x00\x00\x00\x01jcolor\x00sred"
-        )
+        assert_eq!(json_writer.term().value_bytes(), b"color\x00sred")
    }

    #[test]
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -21,13 +21,11 @@ pub mod segment_updater;
 mod segment_writer;
 mod stamper;

-use crossbeam_channel as channel;
+use crossbeam::channel;
 use smallvec::SmallVec;

 pub use self::index_writer::IndexWriter;
-pub(crate) use self::json_term_writer::{
-    convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
-};
+pub(crate) use self::json_term_writer::JsonTermWriter;
 pub use self::log_merge_policy::LogMergePolicy;
 pub use self::merge_operation::MergeOperation;
 pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -39,10 +39,9 @@ impl SegmentSerializer {

        let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
        let compressor = segment.index().settings().docstore_compression;
-        let blocksize = segment.index().settings().docstore_blocksize;
        Ok(SegmentSerializer {
            segment,
-            store_writer: StoreWriter::new(store_write, compressor, blocksize),
+            store_writer: StoreWriter::new(store_write, compressor),
            fast_field_serializer,
            fieldnorms_serializer: Some(fieldnorms_serializer),
            postings_serializer,
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -1,5 +1,6 @@
 use std::borrow::BorrowMut;
 use std::collections::HashSet;
+use std::io;
 use std::io::Write;
 use std::ops::Deref;
 use std::path::PathBuf;
@@ -26,7 +27,7 @@ use crate::indexer::{
    SegmentSerializer,
 };
 use crate::schema::Schema;
-use crate::{FutureResult, Opstamp};
+use crate::{FutureResult, Opstamp, TantivyError};

 const NUM_MERGE_THREADS: usize = 4;

@@ -72,12 +73,10 @@ fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()>
    let mut buffer = serde_json::to_vec_pretty(metas)?;
    // Just adding a new line at the end of the buffer.
    writeln!(&mut buffer)?;
-    fail_point!("save_metas", |msg| Err(crate::TantivyError::from(
-        std::io::Error::new(
-            std::io::ErrorKind::Other,
-            msg.unwrap_or_else(|| "Undefined".to_string())
-        )
-    )));
+    fail_point!("save_metas", |msg| Err(TantivyError::from(io::Error::new(
+        io::ErrorKind::Other,
+        msg.unwrap_or_else(|| "Undefined".to_string())
+    ))));
    directory.sync_directory()?;
    directory.atomic_write(&META_FILEPATH, &buffer[..])?;
    debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -6,8 +6,7 @@ use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
 use crate::indexer::json_term_writer::index_json_values;
 use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::postings::{
-    compute_table_size, serialize_postings, IndexingContext, IndexingPosition,
-    PerFieldPostingsWriter, PostingsWriter,
+    serialize_postings, IndexingContext, IndexingPosition, PerFieldPostingsWriter, PostingsWriter,
 };
 use crate::schema::{FieldEntry, FieldType, FieldValue, Schema, Term, Value};
 use crate::store::{StoreReader, StoreWriter};
@@ -16,25 +15,6 @@ use crate::tokenizer::{
 };
 use crate::{DocId, Document, Opstamp, SegmentComponent};

-/// Computes the initial size of the hash table.
-///
-/// Returns the recommended initial table size as a power of 2.
-///
-/// Note this is a very dumb way to compute log2, but it is easier to proofread that way.
-fn compute_initial_table_size(per_thread_memory_budget: usize) -> crate::Result<usize> {
-    let table_memory_upper_bound = per_thread_memory_budget / 3;
-    (10..20) // We cap it at 2^19 = 512K capacity.
-        .map(|power| 1 << power)
-        .take_while(|capacity| compute_table_size(*capacity) < table_memory_upper_bound)
-        .last()
-        .ok_or_else(|| {
-            crate::TantivyError::InvalidArgument(format!(
-                "per thread memory budget (={per_thread_memory_budget}) is too small. Raise the \
-                 memory budget or lower the number of threads."
-            ))
-        })
-}
-
 fn remap_doc_opstamps(
    opstamps: Vec<Opstamp>,
    doc_id_mapping_opt: Option<&DocIdMapping>,
@@ -78,12 +58,11 @@ impl SegmentWriter {
    /// - segment: The segment being written
    /// - schema
    pub fn for_segment(
-        memory_budget_in_bytes: usize,
+        _memory_budget_in_bytes: usize,
        segment: Segment,
        schema: Schema,
    ) -> crate::Result<SegmentWriter> {
        let tokenizer_manager = segment.index().tokenizers().clone();
-        let table_size = compute_initial_table_size(memory_budget_in_bytes)?;
        let segment_serializer = SegmentSerializer::for_segment(segment, false)?;
        let per_field_postings_writers = PerFieldPostingsWriter::for_schema(&schema);
        let per_field_text_analyzers = schema
@@ -106,7 +85,7 @@ impl SegmentWriter {
            .collect();
        Ok(SegmentWriter {
            max_doc: 0,
-            ctx: IndexingContext::new(table_size),
+            ctx: IndexingContext::new(),
            per_field_postings_writers,
            fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
            segment_serializer,
@@ -149,6 +128,7 @@ impl SegmentWriter {
    pub fn mem_usage(&self) -> usize {
        self.ctx.mem_usage()
            + self.fieldnorms_writer.mem_usage()
+            + self.per_field_postings_writers.mem_usage()
            + self.fast_field_writers.mem_usage()
            + self.segment_serializer.mem_usage()
    }
@@ -223,7 +203,7 @@ impl SegmentWriter {
                    let mut indexing_position = IndexingPosition::default();

                    for mut token_stream in token_streams {
-                        assert_eq!(term_buffer.as_slice().len(), 5);
+                        // assert_eq!(term_buffer.as_slice().len(), 5);
                        postings_writer.index_text(
                            doc_id,
                            &mut *token_stream,
@@ -372,10 +352,9 @@ fn remap_and_write(
            .segment_mut()
            .open_write(SegmentComponent::Store)?;
        let compressor = serializer.segment().index().settings().docstore_compression;
-        let block_size = serializer.segment().index().settings().docstore_blocksize;
        let old_store_writer = std::mem::replace(
            &mut serializer.store_writer,
-            StoreWriter::new(store_write, compressor, block_size),
+            StoreWriter::new(store_write, compressor),
        );
        old_store_writer.close()?;
        let store_read = StoreReader::open(
@@ -419,7 +398,6 @@ pub fn prepare_doc_for_store(doc: Document, schema: &Schema) -> Document {

 #[cfg(test)]
 mod tests {
-    use super::compute_initial_table_size;
    use crate::collector::Count;
    use crate::indexer::json_term_writer::JsonTermWriter;
    use crate::postings::TermInfo;
@@ -430,15 +408,6 @@ mod tests {
    use crate::tokenizer::{PreTokenizedString, Token};
    use crate::{DateTime, DocAddress, DocSet, Document, Index, Postings, Term, TERMINATED};

-    #[test]
-    fn test_hashmap_size() {
-        assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 11);
-        assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 14);
-        assert_eq!(compute_initial_table_size(10_000_000).unwrap(), 1 << 17);
-        assert_eq!(compute_initial_table_size(1_000_000_000).unwrap(), 1 << 19);
-        assert_eq!(compute_initial_table_size(4_000_000_000).unwrap(), 1 << 19);
-    }
-
    #[test]
    fn test_prepare_for_store() {
        let mut schema_builder = Schema::builder();
--- a/src/postings/indexing_context.rs
+++ b/src/postings/indexing_context.rs
@@ -1,27 +1,24 @@
-use crate::postings::stacker::{MemoryArena, TermHashMap};
+use crate::postings::stacker::MemoryArena;

 /// IndexingContext contains all of the transient memory arenas
 /// required for building the inverted index.
 pub(crate) struct IndexingContext {
-    /// The term index is an adhoc hashmap,
-    /// itself backed by a dedicated memory arena.
-    pub term_index: TermHashMap,
    /// Arena is a memory arena that stores posting lists / term frequencies / positions.
    pub arena: MemoryArena,
+    pub arena_terms: MemoryArena,
 }

 impl IndexingContext {
    /// Create a new IndexingContext given the size of the term hash map.
-    pub(crate) fn new(table_size: usize) -> IndexingContext {
-        let term_index = TermHashMap::new(table_size);
+    pub(crate) fn new() -> IndexingContext {
        IndexingContext {
            arena: MemoryArena::new(),
-            term_index,
+            arena_terms: MemoryArena::new(),
        }
    }

    /// Returns the memory usage for the inverted index memory arenas, in bytes.
    pub(crate) fn mem_usage(&self) -> usize {
-        self.term_index.mem_usage() + self.arena.mem_usage()
+        self.arena.mem_usage() + self.arena_terms.mem_usage()
    }
 }
--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -1,5 +1,6 @@
 use std::io;

+use super::stacker::TermHashMap;
 use crate::fastfield::MultiValuedFastFieldWriter;
 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::postings_writer::SpecializedPostingsWriter;
@@ -26,6 +27,14 @@ impl<Rec: Recorder> From<JsonPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
 }

 impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
+    fn mem_usage(&self) -> usize {
+        self.str_posting_writer.mem_usage() + self.non_str_posting_writer.mem_usage()
+    }
+
+    fn term_map(&self) -> &TermHashMap {
+        self.str_posting_writer.term_map()
+    }
+
    fn subscribe(
        &mut self,
        doc: crate::DocId,
@@ -74,6 +83,7 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
                        doc_id_map,
                        &mut buffer_lender,
                        ctx,
+                        &self.str_posting_writer.term_map,
                        serializer,
                    )?;
                } else {
@@ -83,6 +93,7 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
                        doc_id_map,
                        &mut buffer_lender,
                        ctx,
+                        &self.str_posting_writer.term_map,
                        serializer,
                    )?;
                }
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -26,7 +26,6 @@ pub(crate) use self::postings_writer::{serialize_postings, IndexingPosition, Pos
 pub use self::segment_postings::SegmentPostings;
 pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
 pub(crate) use self::skip::{BlockInfo, SkipReader};
-pub(crate) use self::stacker::compute_table_size;
 pub use self::term_info::TermInfo;

 pub(crate) type UnorderedTermId = u64;
--- a/src/postings/per_field_postings_writer.rs
+++ b/src/postings/per_field_postings_writer.rs
@@ -10,9 +10,10 @@ pub(crate) struct PerFieldPostingsWriter {

 impl PerFieldPostingsWriter {
    pub fn for_schema(schema: &Schema) -> Self {
+        let num_fields = schema.num_fields();
        let per_field_postings_writers = schema
            .fields()
-            .map(|(_, field_entry)| posting_writer_from_field_entry(field_entry))
+            .map(|(_, field_entry)| posting_writer_from_field_entry(field_entry, num_fields))
            .collect();
        PerFieldPostingsWriter {
            per_field_postings_writers,
@@ -26,9 +27,19 @@ impl PerFieldPostingsWriter {
    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut dyn PostingsWriter {
        self.per_field_postings_writers[field.field_id() as usize].as_mut()
    }
+
+    pub(crate) fn mem_usage(&self) -> usize {
+        self.per_field_postings_writers
+            .iter()
+            .map(|postings_writer| postings_writer.mem_usage())
+            .sum()
+    }
 }

-fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter> {
+fn posting_writer_from_field_entry(
+    field_entry: &FieldEntry,
+    _num_fields: usize,
+) -> Box<dyn PostingsWriter> {
    match *field_entry.field_type() {
        FieldType::Str(ref text_options) => text_options
            .get_indexing_options()
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -1,11 +1,10 @@
 use std::collections::HashMap;
 use std::io;
 use std::marker::PhantomData;
-use std::ops::Range;

 use fnv::FnvHashMap;

-use super::stacker::Addr;
+use super::stacker::{Addr, TermHashMap};
 use crate::fastfield::MultiValuedFastFieldWriter;
 use crate::fieldnorm::FieldNormReaders;
 use crate::indexer::doc_id_mapping::DocIdMapping;
@@ -21,31 +20,6 @@ use crate::DocId;

 const POSITION_GAP: u32 = 1;

-fn make_field_partition(
-    term_offsets: &[(Term<&[u8]>, Addr, UnorderedTermId)],
-) -> Vec<(Field, Range<usize>)> {
-    let term_offsets_it = term_offsets
-        .iter()
-        .map(|(term, _, _)| term.field())
-        .enumerate();
-    let mut prev_field_opt = None;
-    let mut fields = vec![];
-    let mut offsets = vec![];
-    for (offset, field) in term_offsets_it {
-        if Some(field) != prev_field_opt {
-            prev_field_opt = Some(field);
-            fields.push(field);
-            offsets.push(offset);
-        }
-    }
-    offsets.push(term_offsets.len());
-    let mut field_offsets = vec![];
-    for i in 0..fields.len() {
-        field_offsets.push((fields[i], offsets[i]..offsets[i + 1]));
-    }
-    field_offsets
-}
-
 /// Serialize the inverted index.
 /// It pushes all term, one field at a time, towards the
 /// postings serializer.
@@ -57,23 +31,23 @@ pub(crate) fn serialize_postings(
    schema: &Schema,
    serializer: &mut InvertedIndexSerializer,
 ) -> crate::Result<HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>> {
-    let mut term_offsets: Vec<(Term<&[u8]>, Addr, UnorderedTermId)> =
-        Vec::with_capacity(ctx.term_index.len());
-    term_offsets.extend(ctx.term_index.iter());
-    term_offsets.sort_unstable_by_key(|(k, _, _)| k.clone());
    let mut unordered_term_mappings: HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>> =
        HashMap::new();

-    let field_offsets = make_field_partition(&term_offsets);
-    for (field, byte_offsets) in field_offsets {
+    for (field, _) in schema.fields() {
+        let postings_writer = per_field_postings_writers.get_for_field(field);
+
+        let mut term_offsets: Vec<(Term<&[u8]>, Addr, UnorderedTermId)> =
+            Vec::with_capacity(postings_writer.term_map().len());
+        term_offsets.extend(postings_writer.term_map().iter(&ctx.arena_terms));
+        term_offsets.sort_unstable_by_key(|(k, _, _)| k.clone());
+
        let field_entry = schema.get_field_entry(field);
        match *field_entry.field_type() {
            FieldType::Str(_) | FieldType::Facet(_) => {
                // populating the (unordered term ord) -> (ordered term ord) mapping
                // for the field.
-                let unordered_term_ids = term_offsets[byte_offsets.clone()]
-                    .iter()
-                    .map(|&(_, _, bucket)| bucket);
+                let unordered_term_ids = term_offsets.iter().map(|&(_, _, bucket)| bucket);
                let mapping: FnvHashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids
                    .enumerate()
                    .map(|(term_ord, unord_term_id)| {
@@ -87,16 +61,10 @@ pub(crate) fn serialize_postings(
            FieldType::JsonObject(_) => {}
        }

-        let postings_writer = per_field_postings_writers.get_for_field(field);
        let fieldnorm_reader = fieldnorm_readers.get_field(field)?;
        let mut field_serializer =
            serializer.new_field(field, postings_writer.total_num_tokens(), fieldnorm_reader)?;
-        postings_writer.serialize(
-            &term_offsets[byte_offsets],
-            doc_id_map,
-            &ctx,
-            &mut field_serializer,
-        )?;
+        postings_writer.serialize(&term_offsets, doc_id_map, &ctx, &mut field_serializer)?;
        field_serializer.close()?;
    }
    Ok(unordered_term_mappings)
@@ -128,6 +96,10 @@ pub(crate) trait PostingsWriter {
        ctx: &mut IndexingContext,
    ) -> UnorderedTermId;

+    fn mem_usage(&self) -> usize;
+
+    fn term_map(&self) -> &TermHashMap;
+
    /// Serializes the postings on disk.
    /// The actual serialization format is handled by the `PostingsSerializer`.
    fn serialize(
@@ -148,7 +120,7 @@ pub(crate) trait PostingsWriter {
        indexing_position: &mut IndexingPosition,
        mut term_id_fast_field_writer_opt: Option<&mut MultiValuedFastFieldWriter>,
    ) {
-        let end_of_path_idx = term_buffer.as_slice().len();
+        let end_of_path_idx = term_buffer.value_bytes().len();
        let mut num_tokens = 0;
        let mut end_position = 0;
        token_stream.process(&mut |token: &Token| {
@@ -188,6 +160,7 @@ pub(crate) trait PostingsWriter {
 pub(crate) struct SpecializedPostingsWriter<Rec: Recorder> {
    total_num_tokens: u64,
    _recorder_type: PhantomData<Rec>,
+    pub(crate) term_map: TermHashMap,
 }

 impl<Rec: Recorder> From<SpecializedPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
@@ -206,9 +179,10 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
        doc_id_map: Option<&DocIdMapping>,
        buffer_lender: &mut BufferLender,
        ctx: &IndexingContext,
+        term_index: &TermHashMap,
        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
-        let recorder: Rec = ctx.term_index.read(addr);
+        let recorder: Rec = term_index.read(addr, &ctx.arena_terms);
        let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
        serializer.new_term(term.value_bytes(), term_doc_freq)?;
        recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender);
@@ -218,6 +192,14 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
 }

 impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
+    fn mem_usage(&self) -> usize {
+        self.term_map.mem_usage()
+    }
+
+    fn term_map(&self) -> &TermHashMap {
+        &self.term_map
+    }
+
    fn subscribe(
        &mut self,
        doc: DocId,
@@ -225,25 +207,30 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
        term: &Term,
        ctx: &mut IndexingContext,
    ) -> UnorderedTermId {
-        debug_assert!(term.as_slice().len() >= 4);
+        //debug_assert!(term.value_bytes().len() >= 1);
        self.total_num_tokens += 1;
-        let (term_index, arena) = (&mut ctx.term_index, &mut ctx.arena);
-        term_index.mutate_or_create(term.as_slice(), |opt_recorder: Option<Rec>| {
-            if let Some(mut recorder) = opt_recorder {
-                let current_doc = recorder.current_doc();
-                if current_doc != doc {
-                    recorder.close_doc(arena);
+        let arena = &mut ctx.arena;
+        let arena_terms = &mut ctx.arena_terms;
+        self.term_map.mutate_or_create(
+            term.value_bytes(),
+            arena_terms,
+            |opt_recorder: Option<Rec>| {
+                if let Some(mut recorder) = opt_recorder {
+                    let current_doc = recorder.current_doc();
+                    if current_doc != doc {
+                        recorder.close_doc(arena);
+                        recorder.new_doc(doc, arena);
+                    }
+                    recorder.record_position(position, arena);
+                    recorder
+                } else {
+                    let mut recorder = Rec::default();
                    recorder.new_doc(doc, arena);
+                    recorder.record_position(position, arena);
+                    recorder
                }
-                recorder.record_position(position, arena);
-                recorder
-            } else {
-                let mut recorder = Rec::default();
-                recorder.new_doc(doc, arena);
-                recorder.record_position(position, arena);
-                recorder
-            }
-        }) as UnorderedTermId
+            },
+        ) as UnorderedTermId
    }

    fn serialize(
@@ -255,7 +242,15 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
    ) -> io::Result<()> {
        let mut buffer_lender = BufferLender::default();
        for (term, addr, _) in term_addrs {
-            Self::serialize_one_term(term, *addr, doc_id_map, &mut buffer_lender, ctx, serializer)?;
+            Self::serialize_one_term(
+                term,
+                *addr,
+                doc_id_map,
+                &mut buffer_lender,
+                ctx,
+                &self.term_map,
+                serializer,
+            )?;
        }
        Ok(())
    }
--- a/src/postings/stacker/memory_arena.rs
+++ b/src/postings/stacker/memory_arena.rs
@@ -46,6 +46,7 @@ impl Addr {
    }

    /// Returns the `Addr` object for `addr + offset`
+    #[inline]
    pub fn offset(self, offset: u32) -> Addr {
        Addr(self.0.wrapping_add(offset))
    }
@@ -54,20 +55,24 @@ impl Addr {
        Addr((page_id << NUM_BITS_PAGE_ADDR | local_addr) as u32)
    }

+    #[inline]
    fn page_id(self) -> usize {
        (self.0 as usize) >> NUM_BITS_PAGE_ADDR
    }

+    #[inline]
    fn page_local_addr(self) -> usize {
        (self.0 as usize) & (PAGE_SIZE - 1)
    }

    /// Returns true if and only if the `Addr` is null.
+    #[inline]
    pub fn is_null(self) -> bool {
        self.0 == u32::max_value()
    }
 }

+#[inline]
 pub fn store<Item: Copy + 'static>(dest: &mut [u8], val: Item) {
    assert_eq!(dest.len(), std::mem::size_of::<Item>());
    unsafe {
@@ -75,6 +80,7 @@ pub fn store<Item: Copy + 'static>(dest: &mut [u8], val: Item) {
    }
 }

+#[inline]
 pub fn load<Item: Copy + 'static>(data: &[u8]) -> Item {
    assert_eq!(data.len(), std::mem::size_of::<Item>());
    unsafe { ptr::read_unaligned(data.as_ptr() as *const Item) }
@@ -110,6 +116,7 @@ impl MemoryArena {
        self.pages.len() * PAGE_SIZE
    }

+    #[inline]
    pub fn write_at<Item: Copy + 'static>(&mut self, addr: Addr, val: Item) {
        let dest = self.slice_mut(addr, std::mem::size_of::<Item>());
        store(dest, val);
@@ -120,6 +127,7 @@ impl MemoryArena {
    /// # Panics
    ///
    /// If the address is erroneous
+    #[inline]
    pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
        load(self.slice(addr, mem::size_of::<Item>()))
    }
@@ -128,6 +136,7 @@ impl MemoryArena {
        self.pages[addr.page_id()].slice(addr.page_local_addr(), len)
    }

+    #[inline]
    pub fn slice_from(&self, addr: Addr) -> &[u8] {
        self.pages[addr.page_id()].slice_from(addr.page_local_addr())
    }
--- a/src/postings/stacker/mod.rs
+++ b/src/postings/stacker/mod.rs
@@ -4,4 +4,4 @@ mod term_hashmap;

 pub(crate) use self::expull::ExpUnrolledLinkedList;
 pub(crate) use self::memory_arena::{Addr, MemoryArena};
-pub(crate) use self::term_hashmap::{compute_table_size, TermHashMap};
+pub(crate) use self::term_hashmap::TermHashMap;
--- a/src/postings/stacker/term_hashmap.rs
+++ b/src/postings/stacker/term_hashmap.rs
@@ -1,6 +1,6 @@
+use std::convert::TryInto;
 use std::{iter, mem, slice};

-use byteorder::{ByteOrder, NativeEndian};
 use murmurhash32::murmurhash2;

 use super::{Addr, MemoryArena};
@@ -8,13 +8,6 @@ use crate::postings::stacker::memory_arena::store;
 use crate::postings::UnorderedTermId;
 use crate::Term;

-/// Returns the actual memory size in bytes
-/// required to create a table with a given capacity.
-/// required to create a table of size
-pub(crate) fn compute_table_size(capacity: usize) -> usize {
-    capacity * mem::size_of::<KeyValue>()
-}
-
 /// `KeyValue` is the item stored in the hash table.
 /// The key is actually a `BytesRef` object stored in an external memory arena.
 /// The `value_addr` also points to an address in the memory arena.
@@ -36,6 +29,7 @@ impl Default for KeyValue {
 }

 impl KeyValue {
+    #[inline]
    fn is_empty(self) -> bool {
        self.key_value_addr.is_null()
    }
@@ -51,12 +45,17 @@ impl KeyValue {
 /// or copying the key as long as there is no insert.
 pub struct TermHashMap {
    table: Box<[KeyValue]>,
-    memory_arena: MemoryArena,
    mask: usize,
    occupied: Vec<usize>,
    len: usize,
 }

+impl Default for TermHashMap {
+    fn default() -> Self {
+        Self::new(1 << 10)
+    }
+}
+
 struct QuadraticProbing {
    hash: usize,
    i: usize,
@@ -75,18 +74,21 @@ impl QuadraticProbing {
    }
 }

-pub struct Iter<'a> {
+pub struct Iter<'a, 'm> {
    hashmap: &'a TermHashMap,
+    memory_arena: &'m MemoryArena,
    inner: slice::Iter<'a, usize>,
 }

-impl<'a> Iterator for Iter<'a> {
-    type Item = (Term<&'a [u8]>, Addr, UnorderedTermId);
+impl<'a, 'm> Iterator for Iter<'a, 'm> {
+    type Item = (Term<&'m [u8]>, Addr, UnorderedTermId);

    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next().cloned().map(move |bucket: usize| {
            let kv = self.hashmap.table[bucket];
-            let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
+            let (key, offset): (&'m [u8], Addr) = self
+                .hashmap
+                .get_key_value(kv.key_value_addr, self.memory_arena);
            (Term::wrap(key), offset, kv.unordered_term_id)
        })
    }
@@ -106,21 +108,19 @@ impl TermHashMap {
    pub(crate) fn new(table_size: usize) -> TermHashMap {
        assert!(table_size > 0);
        let table_size_power_of_2 = compute_previous_power_of_two(table_size);
-        let memory_arena = MemoryArena::new();
        let table: Vec<KeyValue> = iter::repeat(KeyValue::default())
            .take(table_size_power_of_2)
            .collect();
        TermHashMap {
            table: table.into_boxed_slice(),
-            memory_arena,
            mask: table_size_power_of_2 - 1,
            occupied: Vec::with_capacity(table_size_power_of_2 / 2),
            len: 0,
        }
    }

-    pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
-        self.memory_arena.read(addr)
+    pub fn read<Item: Copy + 'static>(&self, addr: Addr, memory_arena: &MemoryArena) -> Item {
+        memory_arena.read(addr)
    }

    fn probe(&self, hash: u32) -> QuadraticProbing {
@@ -129,6 +129,8 @@ impl TermHashMap {

    pub fn mem_usage(&self) -> usize {
        self.table.len() * mem::size_of::<KeyValue>()
+            + self.occupied.len()
+                * std::mem::size_of_val(&self.occupied.get(0).cloned().unwrap_or_default())
    }

    fn is_saturated(&self) -> bool {
@@ -136,16 +138,22 @@ impl TermHashMap {
    }

    #[inline]
-    fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
-        let data = self.memory_arena.slice_from(addr);
-        let key_bytes_len = NativeEndian::read_u16(data) as usize;
-        let key_bytes: &[u8] = &data[2..][..key_bytes_len];
+    fn get_key_value<'m>(&self, addr: Addr, memory_arena: &'m MemoryArena) -> (&'m [u8], Addr) {
+        let data = memory_arena.slice_from(addr);
+        let (key_bytes_len_enc, data) = data.split_at(2);
+        let key_bytes_len: u16 = u16::from_ne_bytes(key_bytes_len_enc.try_into().unwrap());
+        let key_bytes: &[u8] = &data[..key_bytes_len as usize];
        (key_bytes, addr.offset(2u32 + key_bytes_len as u32))
    }

    #[inline]
-    fn get_value_addr_if_key_match(&self, target_key: &[u8], addr: Addr) -> Option<Addr> {
-        let (stored_key, value_addr) = self.get_key_value(addr);
+    fn get_value_addr_if_key_match(
+        &self,
+        target_key: &[u8],
+        addr: Addr,
+        memory_arena: &mut MemoryArena,
+    ) -> Option<Addr> {
+        let (stored_key, value_addr) = self.get_key_value(addr, memory_arena);
        if stored_key == target_key {
            Some(value_addr)
        } else {
@@ -169,10 +177,11 @@ impl TermHashMap {
        self.len
    }

-    pub fn iter(&self) -> Iter<'_> {
+    pub fn iter<'a, 'm>(&'a self, memory_arena: &'m MemoryArena) -> Iter<'a, 'm> {
        Iter {
            inner: self.occupied.iter(),
            hashmap: self,
+            memory_arena,
        }
    }

@@ -209,6 +218,7 @@ impl TermHashMap {
    pub fn mutate_or_create<V, TMutator>(
        &mut self,
        key: &[u8],
+        memory_arena: &mut MemoryArena,
        mut updater: TMutator,
    ) -> UnorderedTermId
    where
@@ -219,28 +229,33 @@ impl TermHashMap {
            self.resize();
        }
        let hash = murmurhash2(key);
+
        let mut probe = self.probe(hash);
        loop {
            let bucket = probe.next_probe();
            let kv: KeyValue = self.table[bucket];
+
            if kv.is_empty() {
                // The key does not exists yet.
                let val = updater(None);
                let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
-                let key_addr = self.memory_arena.allocate_space(num_bytes);
+                let key_addr = memory_arena.allocate_space(num_bytes);
                {
-                    let data = self.memory_arena.slice_mut(key_addr, num_bytes);
-                    NativeEndian::write_u16(data, key.len() as u16);
-                    let stop = 2 + key.len();
-                    data[2..stop].copy_from_slice(key);
+                    let data = memory_arena.slice_mut(key_addr, num_bytes);
+                    let (key_len, data) = data.split_at_mut(2);
+                    key_len.copy_from_slice(&(key.len() as u16).to_le_bytes());
+                    let stop = key.len();
+                    data[..key.len()].copy_from_slice(key);
                    store(&mut data[stop..], val);
                }
                return self.set_bucket(hash, key_addr, bucket);
            } else if kv.hash == hash {
-                if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
-                    let v = self.memory_arena.read(val_addr);
+                if let Some(val_addr) =
+                    self.get_value_addr_if_key_match(key, kv.key_value_addr, memory_arena)
+                {
+                    let v = memory_arena.read(val_addr);
                    let new_v = updater(Some(v));
-                    self.memory_arena.write_at(val_addr, new_v);
+                    memory_arena.write_at(val_addr, new_v);
                    return kv.unordered_term_id;
                }
            }
@@ -254,26 +269,28 @@ mod tests {
    use std::collections::HashMap;

    use super::{compute_previous_power_of_two, TermHashMap};
+    use crate::postings::stacker::MemoryArena;

    #[test]
    fn test_hash_map() {
+        let mut arena = MemoryArena::new();
        let mut hash_map: TermHashMap = TermHashMap::new(1 << 18);
-        hash_map.mutate_or_create(b"abc", |opt_val: Option<u32>| {
+        hash_map.mutate_or_create(b"abc", &mut arena, |opt_val: Option<u32>| {
            assert_eq!(opt_val, None);
            3u32
        });
-        hash_map.mutate_or_create(b"abcd", |opt_val: Option<u32>| {
+        hash_map.mutate_or_create(b"abcd", &mut arena, |opt_val: Option<u32>| {
            assert_eq!(opt_val, None);
            4u32
        });
-        hash_map.mutate_or_create(b"abc", |opt_val: Option<u32>| {
+        hash_map.mutate_or_create(b"abc", &mut arena, |opt_val: Option<u32>| {
            assert_eq!(opt_val, Some(3u32));
            5u32
        });
        let mut vanilla_hash_map = HashMap::new();
-        let iter_values = hash_map.iter();
+        let iter_values = hash_map.iter(&arena);
        for (key, addr, _) in iter_values {
-            let val: u32 = hash_map.memory_arena.read(addr);
+            let val: u32 = arena.read(addr);
            vanilla_hash_map.insert(key.to_owned(), val);
        }
        assert_eq!(vanilla_hash_map.len(), 2);
--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -184,66 +184,6 @@ fn intersection_with_slop(left: &mut [u32], right: &[u32], slop: u32) -> usize {
    count
 }

-fn intersection_count_with_slop(left: &[u32], right: &[u32], slop: u32) -> usize {
-    let mut left_index = 0;
-    let mut right_index = 0;
-    let mut count = 0;
-    let left_len = left.len();
-    let right_len = right.len();
-    while left_index < left_len && right_index < right_len {
-        let left_val = left[left_index];
-        let right_val = right[right_index];
-        let right_slop = if right_val >= slop {
-            right_val - slop
-        } else {
-            0
-        };
-
-        if left_val < right_slop {
-            left_index += 1;
-        } else if right_slop <= left_val && left_val <= right_val {
-            while left_index + 1 < left_len {
-                let next_left_val = left[left_index + 1];
-                if next_left_val > right_val {
-                    break;
-                }
-                left_index += 1;
-            }
-            count += 1;
-            left_index += 1;
-            right_index += 1;
-        } else if left_val > right_val {
-            right_index += 1;
-        }
-    }
-    count
-}
-
-fn intersection_exists_with_slop(left: &[u32], right: &[u32], slop: u32) -> bool {
-    let mut left_index = 0;
-    let mut right_index = 0;
-    let left_len = left.len();
-    let right_len = right.len();
-    while left_index < left_len && right_index < right_len {
-        let left_val = left[left_index];
-        let right_val = right[right_index];
-        let right_slop = if right_val >= slop {
-            right_val - slop
-        } else {
-            0
-        };
-
-        if left_val < right_slop {
-            left_index += 1;
-        } else if right_slop <= left_val && left_val <= right_val {
-            return true;
-        } else if left_val > right_val {
-            right_index += 1;
-        }
-    }
-    false
-}
-
 impl<TPostings: Postings> PhraseScorer<TPostings> {
    pub fn new(
        term_postings: Vec<(usize, TPostings)>,
@@ -297,25 +237,11 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {

    fn phrase_exists(&mut self) -> bool {
        let intersection_len = self.compute_phrase_match();
-        if self.has_slop() {
-            return intersection_exists_with_slop(
-                &self.left[..intersection_len],
-                &self.right[..],
-                self.slop,
-            );
-        }
        intersection_exists(&self.left[..intersection_len], &self.right[..])
    }

    fn compute_phrase_count(&mut self) -> u32 {
        let intersection_len = self.compute_phrase_match();
-        if self.has_slop() {
-            return intersection_count_with_slop(
-                &self.left[..intersection_len],
-                &self.right[..],
-                self.slop,
-            ) as u32;
-        }
        intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
    }

@@ -326,7 +252,12 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
                .positions(&mut self.left);
        }
        let mut intersection_len = self.left.len();
-        for i in 1..self.num_terms - 1 {
+        let end_term = if self.has_slop() {
+            self.num_terms
+        } else {
+            self.num_terms - 1
+        };
+        for i in 1..end_term {
            {
                self.intersection_docset
                    .docset_mut_specialized(i)
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use std::collections::{BTreeSet, HashMap};
 use std::num::{ParseFloatError, ParseIntError};
 use std::ops::Bound;
 use std::str::FromStr;
@@ -7,9 +7,7 @@ use tantivy_query_grammar::{UserInputAst, UserInputBound, UserInputLeaf, UserInp

 use super::logical_ast::*;
 use crate::core::Index;
-use crate::indexer::{
-    convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
-};
+use crate::indexer::JsonTermWriter;
 use crate::query::{
    AllQuery, BooleanQuery, BoostQuery, EmptyQuery, Occur, PhraseQuery, Query, RangeQuery,
    TermQuery,
@@ -18,7 +16,7 @@ use crate::schema::{
    Facet, FacetParseError, Field, FieldType, IndexRecordOption, Schema, Term, Type,
 };
 use crate::time::format_description::well_known::Rfc3339;
-use crate::time::OffsetDateTime;
+use crate::time::{OffsetDateTime, UtcOffset};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
 use crate::{DateTime, Score};

@@ -32,7 +30,7 @@ pub enum QueryParserError {
    #[error("Unsupported query: {0}")]
    UnsupportedQuery(String),
    /// The query references a field that is not in the schema
-    #[error("Field does not exists: '{0}'")]
+    #[error("Field does not exists: '{0:?}'")]
    FieldDoesNotExist(String),
    /// The query contains a term for a `u64` or `i64`-field, but the value
    /// is neither.
@@ -55,11 +53,11 @@ pub enum QueryParserError {
    NoDefaultFieldDeclared,
    /// The field searched for is not declared
    /// as indexed in the schema.
-    #[error("The field '{0}' is not declared as indexed")]
+    #[error("The field '{0:?}' is not declared as indexed")]
    FieldNotIndexed(String),
    /// A phrase query was requested for a field that does not
    /// have any positions indexed.
-    #[error("The field '{0}' does not have positions indexed")]
+    #[error("The field '{0:?}' does not have positions indexed")]
    FieldDoesNotHavePositionsIndexed(String),
    /// The tokenizer for the given field is unknown
    /// The two argument strings are the name of the field, the name of the tokenizer
@@ -171,7 +169,7 @@ pub struct QueryParser {
    conjunction_by_default: bool,
    tokenizer_manager: TokenizerManager,
    boost: HashMap<Field, Score>,
-    field_names: HashMap<String, Field>,
+    field_names: BTreeSet<String>,
 }

 fn all_negative(ast: &LogicalAst) -> bool {
@@ -184,31 +182,6 @@ fn all_negative(ast: &LogicalAst) -> bool {
    }
 }

-// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`.
-//
-// This function operates directly on bytes (as opposed to codepoint), relying
-// on a encoding property of utf-8 for its correctness.
-fn locate_splitting_dots(field_path: &str) -> Vec<usize> {
-    let mut splitting_dots_pos = Vec::new();
-    let mut escape_state = false;
-    for (pos, b) in field_path.bytes().enumerate() {
-        if escape_state {
-            escape_state = false;
-            continue;
-        }
-        match b {
-            b'\\' => {
-                escape_state = true;
-            }
-            b'.' => {
-                splitting_dots_pos.push(pos);
-            }
-            _ => {}
-        }
-    }
-    splitting_dots_pos
-}
-
 impl QueryParser {
    /// Creates a `QueryParser`, given
    /// * schema - index Schema
@@ -220,7 +193,7 @@ impl QueryParser {
    ) -> QueryParser {
        let field_names = schema
            .fields()
-            .map(|(field, field_entry)| (field_entry.name().to_string(), field))
+            .map(|(_, field_entry)| field_entry.name().to_string())
            .collect();
        QueryParser {
            schema,
@@ -234,18 +207,25 @@ impl QueryParser {

    // Splits a full_path as written in a query, into a field name and a
    // json path.
-    pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
-        if let Some(field) = self.field_names.get(full_path) {
-            return Some((*field, ""));
+    pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> (&'a str, &'a str) {
+        if full_path.is_empty() {
+            return ("", "");
        }
-        let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
-        while let Some(pos) = splitting_period_pos.pop() {
-            let (prefix, suffix) = full_path.split_at(pos);
-            if let Some(field) = self.field_names.get(prefix) {
-                return Some((*field, &suffix[1..]));
+        if self.field_names.contains(full_path) {
+            return (full_path, "");
+        }
+        let mut result = ("", full_path);
+        let mut cursor = 0;
+        while let Some(pos) = full_path[cursor..].find('.') {
+            cursor += pos;
+            let prefix = &full_path[..cursor];
+            let suffix = &full_path[cursor + 1..];
+            if self.field_names.contains(prefix) {
+                result = (prefix, suffix);
            }
+            cursor += 1;
        }
-        None
+        result
    }

    /// Creates a `QueryParser`, given
@@ -298,6 +278,12 @@ impl QueryParser {
        self.compute_logical_ast(user_input_ast)
    }

+    fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
+        self.schema
+            .get_field(field_name)
+            .ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name)))
+    }
+
    fn compute_logical_ast(
        &self,
        user_input_ast: UserInputAst,
@@ -404,12 +390,6 @@ impl QueryParser {
        if !field_type.is_indexed() {
            return Err(QueryParserError::FieldNotIndexed(field_name.to_string()));
        }
-        if field_type.value_type() != Type::Json && !json_path.is_empty() {
-            let field_name = self.schema.get_field_name(field);
-            return Err(QueryParserError::FieldDoesNotExist(format!(
-                "{field_name}.{json_path}"
-            )));
-        }
        match *field_type {
            FieldType::U64(_) => {
                let val: u64 = u64::from_str(phrase)?;
@@ -551,56 +531,37 @@ impl QueryParser {
        })
    }

-    /// Given a literal, returns the list of terms that should be searched.
-    ///
-    /// The terms are identified by a triplet:
-    /// - tantivy field
-    /// - field_path: tantivy has JSON fields. It is possible to target a member of a JSON
-    /// object by naturally extending the json field name with a "." separated field_path
-    /// - field_phrase: the phrase that is being searched.
-    ///
-    /// The literal identifies the targetted field by a so-called *full field path*,
-    /// specified before the ":". (e.g. identity.username:fulmicoton).
-    ///
-    /// The way we split the full field path into (field_name, field_path) can be ambiguous,
-    /// because field_names can contain "." themselves.
-    // For instance if a field is named `one.two` and another one is named `one`,
-    /// should `one.two:three` target `one.two` with field path `` or or `one` with
-    /// the field path `two`.
-    ///
-    /// In this case tantivy, just picks the solution with the longest field name.
-    ///
-    /// Quirk: As a hack for quickwit, we do not split over a dot that appear escaped '\.'.
-    fn compute_path_triplets_for_literal<'a>(
+    fn compute_path_triplet_for_literal<'a>(
        &self,
        literal: &'a UserInputLiteral,
    ) -> Result<Vec<(Field, &'a str, &'a str)>, QueryParserError> {
-        let full_path = if let Some(full_path) = &literal.field_name {
-            full_path
-        } else {
-            // The user did not specify any path...
-            // We simply target default fields.
-            if self.default_fields.is_empty() {
-                return Err(QueryParserError::NoDefaultFieldDeclared);
+        match &literal.field_name {
+            Some(ref full_path) => {
+                // We need to add terms associated to json default fields.
+                let (field_name, path) = self.split_full_path(full_path);
+                if let Ok(field) = self.resolve_field_name(field_name) {
+                    return Ok(vec![(field, path, literal.phrase.as_str())]);
+                }
+                let triplets: Vec<(Field, &str, &str)> = self
+                    .default_indexed_json_fields()
+                    .map(|json_field| (json_field, full_path.as_str(), literal.phrase.as_str()))
+                    .collect();
+                if triplets.is_empty() {
+                    return Err(QueryParserError::FieldDoesNotExist(full_path.to_string()));
+                }
+                Ok(triplets)
+            }
+            None => {
+                if self.default_fields.is_empty() {
+                    return Err(QueryParserError::NoDefaultFieldDeclared);
+                }
+                Ok(self
+                    .default_fields
+                    .iter()
+                    .map(|default_field| (*default_field, "", literal.phrase.as_str()))
+                    .collect::<Vec<(Field, &str, &str)>>())
            }
-            return Ok(self
-                .default_fields
-                .iter()
-                .map(|default_field| (*default_field, "", literal.phrase.as_str()))
-                .collect::<Vec<(Field, &str, &str)>>());
-        };
-        if let Some((field, path)) = self.split_full_path(full_path) {
-            return Ok(vec![(field, path, literal.phrase.as_str())]);
        }
-        // We need to add terms associated to json default fields.
-        let triplets: Vec<(Field, &str, &str)> = self
-            .default_indexed_json_fields()
-            .map(|json_field| (json_field, full_path.as_str(), literal.phrase.as_str()))
-            .collect();
-        if triplets.is_empty() {
-            return Err(QueryParserError::FieldDoesNotExist(full_path.to_string()));
-        }
-        Ok(triplets)
    }

    fn compute_logical_ast_from_leaf(
@@ -610,7 +571,7 @@ impl QueryParser {
        match leaf {
            UserInputLeaf::Literal(literal) => {
                let term_phrases: Vec<(Field, &str, &str)> =
-                    self.compute_path_triplets_for_literal(&literal)?;
+                    self.compute_path_triplet_for_literal(&literal)?;
                let mut asts: Vec<LogicalAst> = Vec::new();
                for (field, json_path, phrase) in term_phrases {
                    for ast in self.compute_logical_ast_for_leaf(field, json_path, phrase)? {
@@ -637,9 +598,8 @@ impl QueryParser {
                        "Range query need to target a specific field.".to_string(),
                    )
                })?;
-                let (field, json_path) = self
-                    .split_full_path(&full_path)
-                    .ok_or_else(|| QueryParserError::FieldDoesNotExist(full_path.clone()))?;
+                let (field_name, json_path) = self.split_full_path(&full_path);
+                let field = self.resolve_field_name(field_name)?;
                let field_entry = self.schema.get_field_entry(field);
                let value_type = field_entry.field_type().value_type();
                let logical_ast = LogicalAst::Leaf(Box::new(LogicalLiteral::Range {
@@ -700,6 +660,30 @@ fn generate_literals_for_str(
    Ok(Some(LogicalLiteral::Phrase(terms)))
 }

+enum NumValue {
+    U64(u64),
+    I64(i64),
+    F64(f64),
+    DateTime(OffsetDateTime),
+}
+
+fn infer_type_num(phrase: &str) -> Option<NumValue> {
+    if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
+        let dt_utc = dt.to_offset(UtcOffset::UTC);
+        return Some(NumValue::DateTime(dt_utc));
+    }
+    if let Ok(u64_val) = str::parse::<u64>(phrase) {
+        return Some(NumValue::U64(u64_val));
+    }
+    if let Ok(i64_val) = str::parse::<i64>(phrase) {
+        return Some(NumValue::I64(i64_val));
+    }
+    if let Ok(f64_val) = str::parse::<f64>(phrase) {
+        return Some(NumValue::F64(f64_val));
+    }
+    None
+}
+
 fn generate_literals_for_json_object(
    field_name: &str,
    field: Field,
@@ -710,13 +694,38 @@ fn generate_literals_for_json_object(
 ) -> Result<Vec<LogicalLiteral>, QueryParserError> {
    let mut logical_literals = Vec::new();
    let mut term = Term::new();
-    let mut json_term_writer =
-        JsonTermWriter::from_field_and_json_path(field, json_path, &mut term);
-    if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) {
-        logical_literals.push(LogicalLiteral::Term(term));
+    term.set_field(Type::Json, field);
+    let mut json_term_writer = JsonTermWriter::wrap(&mut term);
+    for segment in json_path.split('.') {
+        json_term_writer.push_path_segment(segment);
    }
-    let terms = set_string_and_get_terms(&mut json_term_writer, phrase, text_analyzer);
+    if let Some(num_value) = infer_type_num(phrase) {
+        match num_value {
+            NumValue::U64(u64_val) => {
+                json_term_writer.set_fast_value(u64_val);
+            }
+            NumValue::I64(i64_val) => {
+                json_term_writer.set_fast_value(i64_val);
+            }
+            NumValue::F64(f64_val) => {
+                json_term_writer.set_fast_value(f64_val);
+            }
+            NumValue::DateTime(dt_val) => {
+                json_term_writer.set_fast_value(DateTime::from_utc(dt_val));
+            }
+        }
+        logical_literals.push(LogicalLiteral::Term(json_term_writer.term().clone()));
+    }
+    json_term_writer.close_path_and_set_type(Type::Str);
    drop(json_term_writer);
+    let term_num_bytes = term.value_bytes().len();
+    let mut token_stream = text_analyzer.token_stream(phrase);
+    let mut terms: Vec<(usize, Term)> = Vec::new();
+    token_stream.process(&mut |token| {
+        term.truncate(term_num_bytes);
+        term.append_bytes(token.text.as_bytes());
+        terms.push((token.position, term.clone()));
+    });
    if terms.len() <= 1 {
        for (_, term) in terms {
            logical_literals.push(LogicalLiteral::Term(term));
@@ -1390,56 +1399,29 @@ mod test {
        }
    }

-    #[test]
-    fn test_escaped_field() {
-        let mut schema_builder = Schema::builder();
-        schema_builder.add_text_field(r#"a\.b"#, STRING);
-        let schema = schema_builder.build();
-        let query_parser = QueryParser::new(schema, Vec::new(), TokenizerManager::default());
-        let query = query_parser.parse_query(r#"a\.b:hello"#).unwrap();
-        assert_eq!(
-            format!("{:?}", query),
-            "TermQuery(Term(type=Str, field=0, \"hello\"))"
-        );
-    }
-
    #[test]
    fn test_split_full_path() {
        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("second", STRING);
        schema_builder.add_text_field("first", STRING);
        schema_builder.add_text_field("first.toto", STRING);
-        schema_builder.add_text_field("first.toto.titi", STRING);
        schema_builder.add_text_field("third.a.b.c", STRING);
        let schema = schema_builder.build();
-        let query_parser =
-            QueryParser::new(schema.clone(), Vec::new(), TokenizerManager::default());
+        let query_parser = QueryParser::new(schema, Vec::new(), TokenizerManager::default());
        assert_eq!(
            query_parser.split_full_path("first.toto"),
-            Some((schema.get_field("first.toto").unwrap(), ""))
-        );
-        assert_eq!(
-            query_parser.split_full_path("first.toto.bubu"),
-            Some((schema.get_field("first.toto").unwrap(), "bubu"))
-        );
-        assert_eq!(
-            query_parser.split_full_path("first.toto.titi"),
-            Some((schema.get_field("first.toto.titi").unwrap(), ""))
+            ("first.toto", "")
        );
        assert_eq!(
            query_parser.split_full_path("first.titi"),
-            Some((schema.get_field("first").unwrap(), "titi"))
+            ("first", "titi")
        );
-        assert_eq!(query_parser.split_full_path("third"), None);
-        assert_eq!(query_parser.split_full_path("hello.toto"), None);
-        assert_eq!(query_parser.split_full_path(""), None);
-        assert_eq!(query_parser.split_full_path("firsty"), None);
-    }
-
-    #[test]
-    fn test_locate_splitting_dots() {
-        assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]);
-        assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]);
-        assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]);
+        assert_eq!(query_parser.split_full_path("third"), ("", "third"));
+        assert_eq!(
+            query_parser.split_full_path("hello.toto"),
+            ("", "hello.toto")
+        );
+        assert_eq!(query_parser.split_full_path(""), ("", ""));
+        assert_eq!(query_parser.split_full_path("firsty"), ("", "firsty"));
    }
 }
--- a/src/reader/pool.rs
+++ b/src/reader/pool.rs
@@ -2,7 +2,7 @@ use std::ops::{Deref, DerefMut};
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;

-use crossbeam_channel::{unbounded, Receiver, RecvError, Sender};
+use crossbeam::channel::{unbounded, Receiver, RecvError, Sender};

 pub struct GenerationItem<T> {
    generation: usize,
@@ -197,7 +197,7 @@ mod tests {

    use std::{iter, mem};

-    use crossbeam_channel as channel;
+    use crossbeam::channel;

    use super::{Pool, Queue};

--- a/src/reader/warming.rs
+++ b/src/reader/warming.rs
@@ -147,7 +147,7 @@ impl WarmingStateInner {
    /// Every [GC_INTERVAL] attempt to GC, with panics caught and logged using
    /// [std::panic::catch_unwind].
    fn gc_loop(inner: Weak<Mutex<WarmingStateInner>>) {
-        for _ in crossbeam_channel::tick(GC_INTERVAL) {
+        for _ in crossbeam::channel::tick(GC_INTERVAL) {
            if let Some(inner) = inner.upgrade() {
                // rely on deterministic gc in tests
                #[cfg(not(test))]
--- a/src/schema/document.rs
+++ b/src/schema/document.rs
@@ -213,8 +213,6 @@ impl BinarySerializable for Document {
 #[cfg(test)]
 mod tests {

-    use common::BinarySerializable;
-
    use crate::schema::*;

    #[test]
@@ -225,22 +223,4 @@ mod tests {
        doc.add_text(text_field, "My title");
        assert_eq!(doc.field_values().len(), 1);
    }
-
-    #[test]
-    fn test_doc_serialization_issue() {
-        let mut doc = Document::default();
-        doc.add_json_object(
-            Field::from_field_id(0),
-            serde_json::json!({"key": 2u64})
-                .as_object()
-                .unwrap()
-                .clone(),
-        );
-        doc.add_text(Field::from_field_id(1), "hello");
-        assert_eq!(doc.field_values().len(), 2);
-        let mut payload: Vec<u8> = Vec::new();
-        doc.serialize(&mut payload).unwrap();
-        assert_eq!(payload.len(), 26);
-        Document::deserialize(&mut &payload[..]).unwrap();
-    }
 }
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -17,7 +17,7 @@ use crate::DateTime;
 ///
 /// - <value> is,  if this is not the json term, a binary representation specific to the type.
 /// If it is a JSON Term, then it is preprended with the path that leads to this leaf value.
-const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8;
+const FAST_VALUE_TERM_LEN: usize = 8;

 /// Separates the different segments of
 /// the json path.
@@ -33,22 +33,33 @@ pub const JSON_END_OF_PATH: u8 = 0u8;
 ///
 /// It actually wraps a `Vec<u8>`.
 #[derive(Clone)]
-pub struct Term<B = Vec<u8>>(B)
-where B: AsRef<[u8]>;
+pub struct Term<B = Vec<u8>> {
+    data: B,
+    field: Field,
+    field_type: Type,
+}

 impl AsMut<Vec<u8>> for Term {
    fn as_mut(&mut self) -> &mut Vec<u8> {
-        &mut self.0
+        &mut self.data
    }
 }

 impl Term {
    pub(crate) fn new() -> Term {
-        Term(Vec::with_capacity(100))
+        Self::with_capacity(32)
+    }
+
+    pub(crate) fn with_capacity(cap: usize) -> Term {
+        Term {
+            data: Vec::with_capacity(cap),
+            field: Field::from_field_id(0),
+            field_type: Type::Str,
+        }
    }

    fn from_fast_value<T: FastValue>(field: Field, val: &T) -> Term {
-        let mut term = Term(vec![0u8; FAST_VALUE_TERM_LEN]);
+        let mut term = Term::with_capacity(FAST_VALUE_TERM_LEN);
        term.set_field(T::to_type(), field);
        term.set_u64(val.to_u64());
        term
@@ -86,9 +97,9 @@ impl Term {
    }

    fn create_bytes_term(typ: Type, field: Field, bytes: &[u8]) -> Term {
-        let mut term = Term(vec![0u8; 5 + bytes.len()]);
+        let mut term = Term::with_capacity(bytes.len());
        term.set_field(typ, field);
-        term.0.extend_from_slice(bytes);
+        term.data.extend_from_slice(bytes);
        term
    }

@@ -98,10 +109,9 @@ impl Term {
    }

    pub(crate) fn set_field(&mut self, typ: Type, field: Field) {
-        self.0.clear();
-        self.0
-            .extend_from_slice(field.field_id().to_be_bytes().as_ref());
-        self.0.push(typ.to_code());
+        self.field = field;
+        self.field_type = typ;
+        self.data.clear();
    }

    /// Sets a u64 value in the term.
@@ -112,11 +122,9 @@ impl Term {
    /// the natural order of the values.
    pub fn set_u64(&mut self, val: u64) {
        self.set_fast_value(val);
-        self.set_bytes(val.to_be_bytes().as_ref());
    }

    fn set_fast_value<T: FastValue>(&mut self, val: T) {
-        self.0.resize(FAST_VALUE_TERM_LEN, 0u8);
        self.set_bytes(val.to_u64().to_be_bytes().as_ref());
    }

@@ -137,8 +145,8 @@ impl Term {

    /// Sets the value of a `Bytes` field.
    pub fn set_bytes(&mut self, bytes: &[u8]) {
-        self.0.resize(5, 0u8);
-        self.0.extend(bytes);
+        self.data.clear();
+        self.data.extend(bytes);
    }

    /// Set the texts only, keeping the field untouched.
@@ -148,18 +156,18 @@ impl Term {

    /// Removes the value_bytes and set the type code.
    pub fn clear_with_type(&mut self, typ: Type) {
-        self.truncate(5);
-        self.0[4] = typ.to_code();
+        self.data.clear();
+        self.field_type = typ;
    }

    /// Truncate the term right after the field and the type code.
    pub fn truncate(&mut self, len: usize) {
-        self.0.truncate(len);
+        self.data.truncate(len);
    }

    /// Truncate the term right after the field and the type code.
    pub fn append_bytes(&mut self, bytes: &[u8]) {
-        self.0.extend_from_slice(bytes);
+        self.data.extend_from_slice(bytes);
    }
 }

@@ -167,7 +175,7 @@ impl<B> Ord for Term<B>
 where B: AsRef<[u8]>
 {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.as_slice().cmp(other.as_slice())
+        self.value_bytes().cmp(other.value_bytes())
    }
 }

@@ -183,7 +191,7 @@ impl<B> PartialEq for Term<B>
 where B: AsRef<[u8]>
 {
    fn eq(&self, other: &Self) -> bool {
-        self.as_slice() == other.as_slice()
+        self.value_bytes() == other.value_bytes()
    }
 }

@@ -193,7 +201,7 @@ impl<B> Hash for Term<B>
 where B: AsRef<[u8]>
 {
    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.0.as_ref().hash(state)
+        self.data.as_ref().hash(state)
    }
 }

@@ -202,14 +210,15 @@ where B: AsRef<[u8]>
 {
    /// Wraps a object holding bytes
    pub fn wrap(data: B) -> Term<B> {
-        Term(data)
+        Term {
+            data,
+            field: Field::from_field_id(0),
+            field_type: Type::Str,
+        }
    }

    fn typ_code(&self) -> u8 {
-        *self
-            .as_slice()
-            .get(4)
-            .expect("the byte representation is too short")
+        self.field_type as u8
    }

    /// Return the type of the term.
@@ -219,55 +228,7 @@ where B: AsRef<[u8]>

    /// Returns the field.
    pub fn field(&self) -> Field {
-        let mut field_id_bytes = [0u8; 4];
-        field_id_bytes.copy_from_slice(&self.0.as_ref()[..4]);
-        Field::from_field_id(u32::from_be_bytes(field_id_bytes))
-    }
-
-    /// Returns the `u64` value stored in a term.
-    ///
-    /// Returns None if the term is not of the u64 type, or if the term byte representation
-    /// is invalid.
-    pub fn as_u64(&self) -> Option<u64> {
-        self.get_fast_type::<u64>()
-    }
-
-    fn get_fast_type<T: FastValue>(&self) -> Option<T> {
-        if self.typ() != T::to_type() {
-            return None;
-        }
-        let mut value_bytes = [0u8; 8];
-        let bytes = self.value_bytes();
-        if bytes.len() != 8 {
-            return None;
-        }
-        value_bytes.copy_from_slice(self.value_bytes());
-        let value_u64 = u64::from_be_bytes(value_bytes);
-        Some(FastValue::from_u64(value_u64))
-    }
-
-    /// Returns the `i64` value stored in a term.
-    ///
-    /// Returns None if the term is not of the i64 type, or if the term byte representation
-    /// is invalid.
-    pub fn as_i64(&self) -> Option<i64> {
-        self.get_fast_type::<i64>()
-    }
-
-    /// Returns the `f64` value stored in a term.
-    ///
-    /// Returns None if the term is not of the f64 type, or if the term byte representation
-    /// is invalid.
-    pub fn as_f64(&self) -> Option<f64> {
-        self.get_fast_type::<f64>()
-    }
-
-    /// Returns the `Date` value stored in a term.
-    ///
-    /// Returns None if the term is not of the Date type, or if the term byte representation
-    /// is invalid.
-    pub fn as_date(&self) -> Option<DateTime> {
-        self.get_fast_type::<DateTime>()
+        self.field
    }

    /// Returns the text associated with the term.
@@ -275,43 +236,12 @@ where B: AsRef<[u8]>
    /// Returns None if the field is not of string type
    /// or if the bytes are not valid utf-8.
    pub fn as_str(&self) -> Option<&str> {
-        if self.as_slice().len() < 5 {
-            return None;
-        }
        if self.typ() != Type::Str {
            return None;
        }
        str::from_utf8(self.value_bytes()).ok()
    }

-    /// Returns the facet associated with the term.
-    ///
-    /// Returns None if the field is not of facet type
-    /// or if the bytes are not valid utf-8.
-    pub fn as_facet(&self) -> Option<Facet> {
-        if self.as_slice().len() < 5 {
-            return None;
-        }
-        if self.typ() != Type::Facet {
-            return None;
-        }
-        let facet_encode_str = str::from_utf8(self.value_bytes()).ok()?;
-        Some(Facet::from_encoded_string(facet_encode_str.to_string()))
-    }
-
-    /// Returns the bytes associated with the term.
-    ///
-    /// Returns None if the field is not of bytes type.
-    pub fn as_bytes(&self) -> Option<&[u8]> {
-        if self.as_slice().len() < 5 {
-            return None;
-        }
-        if self.typ() != Type::Bytes {
-            return None;
-        }
-        Some(self.value_bytes())
-    }
-
    /// Returns the serialized value of the term.
    /// (this does not include the field.)
    ///
@@ -319,15 +249,7 @@ where B: AsRef<[u8]>
    /// If the term is a u64, its value is encoded according
    /// to `byteorder::LittleEndian`.
    pub fn value_bytes(&self) -> &[u8] {
-        &self.0.as_ref()[5..]
-    }
-
-    /// Returns the underlying `&[u8]`.
-    ///
-    /// Do NOT rely on this byte representation in the index.
-    /// This value is likely to change in the future.
-    pub(crate) fn as_slice(&self) -> &[u8] {
-        self.0.as_ref()
+        &self.data.as_ref()
    }
 }

@@ -434,7 +356,6 @@ mod tests {
        let term = Term::from_field_u64(count_field, 983u64);
        assert_eq!(term.field(), count_field);
        assert_eq!(term.typ(), Type::U64);
-        assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN);
-        assert_eq!(term.as_u64(), Some(983u64))
+        assert_eq!(term.value_bytes().len(), super::FAST_VALUE_TERM_LEN);
    }
 }
--- a/src/schema/text_options.rs
+++ b/src/schema/text_options.rs
@@ -42,11 +42,6 @@ impl TextOptions {
    /// Text fast fields will have the term ids stored in the fast field.
    /// The fast field will be a multivalued fast field.
    ///
-    /// The effective cardinality depends on the tokenizer. When creating fast fields on text
-    /// fields it is recommended to use the "raw" tokenizer, since it will store the original text
-    /// unchanged. The "default" tokenizer will store the terms as lower case and this will be
-    /// reflected in the dictionary.
-    ///
    /// The original text can be retrieved via `ord_to_term` from the dictionary.
    #[must_use]
    pub fn set_fast(mut self) -> TextOptions {
--- a/src/schema/value.rs
+++ b/src/schema/value.rs
@@ -388,16 +388,8 @@ mod binary_serialize {
                    }
                }
                JSON_OBJ_CODE => {
-                    // As explained in
-                    // https://docs.serde.rs/serde_json/fn.from_reader.html
-                    //
-                    // `T::from_reader(..)` expects EOF after reading the object,
-                    // which is not what we want here.
-                    //
-                    // For this reason we need to create our own `Deserializer`.
-                    let mut de = serde_json::Deserializer::from_reader(reader);
-                    let json_map = <serde_json::Map::<String, serde_json::Value> as serde::Deserialize>::deserialize(&mut de)?;
-                    Ok(Value::JsonObject(json_map))
+                    let map = serde_json::from_reader(reader)?;
+                    Ok(Value::JsonObject(map))
                }
                _ => Err(io::Error::new(
                    io::ErrorKind::InvalidData,
--- a/src/store/compression_zstd_block.rs
+++ b/src/store/compression_zstd_block.rs
@@ -1,50 +0,0 @@
-use std::io;
-
-use zstd::bulk::{compress_to_buffer, decompress_to_buffer};
-use zstd::DEFAULT_COMPRESSION_LEVEL;
-
-#[inline]
-pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
-    let count_size = std::mem::size_of::<u32>();
-    let max_size = zstd::zstd_safe::compress_bound(uncompressed.len()) + count_size;
-
-    compressed.clear();
-    compressed.resize(max_size, 0);
-
-    let compressed_size = compress_to_buffer(
-        uncompressed,
-        &mut compressed[count_size..],
-        DEFAULT_COMPRESSION_LEVEL,
-    )?;
-
-    compressed[0..count_size].copy_from_slice(&(uncompressed.len() as u32).to_le_bytes());
-    compressed.resize(compressed_size + count_size, 0);
-
-    Ok(())
-}
-
-#[inline]
-pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
-    let count_size = std::mem::size_of::<u32>();
-    let uncompressed_size = u32::from_le_bytes(
-        compressed
-            .get(..count_size)
-            .ok_or(io::ErrorKind::InvalidData)?
-            .try_into()
-            .unwrap(),
-    ) as usize;
-
-    decompressed.clear();
-    decompressed.resize(uncompressed_size, 0);
-
-    let decompressed_size = decompress_to_buffer(&compressed[count_size..], decompressed)?;
-
-    if decompressed_size != uncompressed_size {
-        return Err(io::Error::new(
-            io::ErrorKind::InvalidData,
-            "doc store block not completely decompressed, data corruption".to_string(),
-        ));
-    }
-
-    Ok(())
-}
--- a/src/store/compressors.rs
+++ b/src/store/compressors.rs
@@ -26,9 +26,6 @@ pub enum Compressor {
    #[serde(rename = "snappy")]
    /// Use the snap compressor
    Snappy,
-    #[serde(rename = "zstd")]
-    /// Use the zstd compressor
-    Zstd,
 }

 impl Default for Compressor {
@@ -39,8 +36,6 @@ impl Default for Compressor {
            Compressor::Brotli
        } else if cfg!(feature = "snappy-compression") {
            Compressor::Snappy
-        } else if cfg!(feature = "zstd-compression") {
-            Compressor::Zstd
        } else {
            Compressor::None
        }
@@ -54,7 +49,6 @@ impl Compressor {
            1 => Compressor::Lz4,
            2 => Compressor::Brotli,
            3 => Compressor::Snappy,
-            4 => Compressor::Zstd,
            _ => panic!("unknown compressor id {:?}", id),
        }
    }
@@ -64,7 +58,6 @@ impl Compressor {
            Self::Lz4 => 1,
            Self::Brotli => 2,
            Self::Snappy => 3,
-            Self::Zstd => 4,
        }
    }
    #[inline]
@@ -105,16 +98,6 @@ impl Compressor {
                    panic!("snappy-compression feature flag not activated");
                }
            }
-            Self::Zstd => {
-                #[cfg(feature = "zstd-compression")]
-                {
-                    super::compression_zstd_block::compress(uncompressed, compressed)
-                }
-                #[cfg(not(feature = "zstd-compression"))]
-                {
-                    panic!("zstd-compression feature flag not activated");
-                }
-            }
        }
    }

@@ -160,16 +143,6 @@ impl Compressor {
                    panic!("snappy-compression feature flag not activated");
                }
            }
-            Self::Zstd => {
-                #[cfg(feature = "zstd-compression")]
-                {
-                    super::compression_zstd_block::decompress(compressed, decompressed)
-                }
-                #[cfg(not(feature = "zstd-compression"))]
-                {
-                    panic!("zstd-compression feature flag not activated");
-                }
-            }
        }
    }
 }
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -50,9 +50,6 @@ mod compression_brotli;
 #[cfg(feature = "snappy-compression")]
 mod compression_snap;

-#[cfg(feature = "zstd-compression")]
-mod compression_zstd_block;
-
 #[cfg(test)]
 pub mod tests {

@@ -72,13 +69,10 @@ pub mod tests {
                         sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt \
                         mollit anim id est laborum.";

-    const BLOCK_SIZE: usize = 16_384;
-
    pub fn write_lorem_ipsum_store(
        writer: WritePtr,
        num_docs: usize,
        compressor: Compressor,
-        blocksize: usize,
    ) -> Schema {
        let mut schema_builder = Schema::builder();
        let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored());
@@ -86,7 +80,7 @@ pub mod tests {
            schema_builder.add_text_field("title", TextOptions::default().set_stored());
        let schema = schema_builder.build();
        {
-            let mut store_writer = StoreWriter::new(writer, compressor, blocksize);
+            let mut store_writer = StoreWriter::new(writer, compressor);
            for i in 0..num_docs {
                let mut doc = Document::default();
                doc.add_field_value(field_body, LOREM.to_string());
@@ -109,7 +103,7 @@ pub mod tests {
        let path = Path::new("store");
        let directory = RamDirectory::create();
        let store_wrt = directory.open_write(path)?;
-        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, Compressor::Lz4, BLOCK_SIZE);
+        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, Compressor::Lz4);
        let field_title = schema.get_field("title").unwrap();
        let store_file = directory.open_read(path)?;
        let store = StoreReader::open(store_file)?;
@@ -145,11 +139,11 @@ pub mod tests {
        Ok(())
    }

-    fn test_store(compressor: Compressor, blocksize: usize) -> crate::Result<()> {
+    fn test_store(compressor: Compressor) -> crate::Result<()> {
        let path = Path::new("store");
        let directory = RamDirectory::create();
        let store_wrt = directory.open_write(path)?;
-        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, compressor, blocksize);
+        let schema = write_lorem_ipsum_store(store_wrt, NUM_DOCS, compressor);
        let field_title = schema.get_field("title").unwrap();
        let store_file = directory.open_read(path)?;
        let store = StoreReader::open(store_file)?;
@@ -175,28 +169,22 @@ pub mod tests {

    #[test]
    fn test_store_noop() -> crate::Result<()> {
-        test_store(Compressor::None, BLOCK_SIZE)
+        test_store(Compressor::None)
    }
    #[cfg(feature = "lz4-compression")]
    #[test]
    fn test_store_lz4_block() -> crate::Result<()> {
-        test_store(Compressor::Lz4, BLOCK_SIZE)
+        test_store(Compressor::Lz4)
    }
    #[cfg(feature = "snappy-compression")]
    #[test]
    fn test_store_snap() -> crate::Result<()> {
-        test_store(Compressor::Snappy, BLOCK_SIZE)
+        test_store(Compressor::Snappy)
    }
    #[cfg(feature = "brotli-compression")]
    #[test]
    fn test_store_brotli() -> crate::Result<()> {
-        test_store(Compressor::Brotli, BLOCK_SIZE)
-    }
-
-    #[cfg(feature = "zstd-compression")]
-    #[test]
-    fn test_store_zstd() -> crate::Result<()> {
-        test_store(Compressor::Zstd, BLOCK_SIZE)
+        test_store(Compressor::Brotli)
    }

    #[test]
@@ -360,7 +348,6 @@ mod bench {
                directory.open_write(path).unwrap(),
                1_000,
                Compressor::default(),
-                16_384,
            );
            directory.delete(path).unwrap();
        });
@@ -374,7 +361,6 @@ mod bench {
            directory.open_write(path).unwrap(),
            1_000,
            Compressor::default(),
-            16_384,
        );
        let store_file = directory.open_read(path).unwrap();
        let store = StoreReader::open(store_file).unwrap();
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -304,8 +304,6 @@ mod tests {
    use crate::store::tests::write_lorem_ipsum_store;
    use crate::Directory;

-    const BLOCK_SIZE: usize = 16_384;
-
    fn get_text_field<'a>(doc: &'a Document, field: &'a Field) -> Option<&'a str> {
        doc.get_first(*field).and_then(|f| f.as_text())
    }
@@ -315,7 +313,7 @@ mod tests {
        let directory = RamDirectory::create();
        let path = Path::new("store");
        let writer = directory.open_write(path)?;
-        let schema = write_lorem_ipsum_store(writer, 500, Compressor::default(), BLOCK_SIZE);
+        let schema = write_lorem_ipsum_store(writer, 500, Compressor::default());
        let title = schema.get_field("title").unwrap();
        let store_file = directory.open_read(path)?;
        let store = StoreReader::open(store_file)?;
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -11,6 +11,8 @@ use crate::schema::Document;
 use crate::store::index::Checkpoint;
 use crate::DocId;

+const BLOCK_SIZE: usize = 16_384;
+
 /// Write tantivy's [`Store`](./index.html)
 ///
 /// Contrary to the other components of `tantivy`,
@@ -20,7 +22,6 @@ use crate::DocId;
 /// The skip list index on the other hand, is built in memory.
 pub struct StoreWriter {
    compressor: Compressor,
-    block_size: usize,
    doc: DocId,
    first_doc_in_block: DocId,
    offset_index_writer: SkipIndexBuilder,
@@ -34,10 +35,9 @@ impl StoreWriter {
    ///
    /// The store writer will writes blocks on disc as
    /// document are added.
-    pub fn new(writer: WritePtr, compressor: Compressor, block_size: usize) -> StoreWriter {
+    pub fn new(writer: WritePtr, compressor: Compressor) -> StoreWriter {
        StoreWriter {
            compressor,
-            block_size,
            doc: 0,
            first_doc_in_block: 0,
            offset_index_writer: SkipIndexBuilder::new(),
@@ -65,7 +65,7 @@ impl StoreWriter {
        VInt(doc_num_bytes as u64).serialize(&mut self.current_block)?;
        self.current_block.write_all(serialized_document)?;
        self.doc += 1;
-        if self.current_block.len() > self.block_size {
+        if self.current_block.len() > BLOCK_SIZE {
            self.write_and_compress_block()?;
        }
        Ok(())
@@ -86,7 +86,7 @@ impl StoreWriter {
        self.current_block
            .write_all(&self.intermediary_buffer[..])?;
        self.doc += 1;
-        if self.current_block.len() > self.block_size {
+        if self.current_block.len() > BLOCK_SIZE {
            self.write_and_compress_block()?;
        }
        Ok(())
--- a/src/termdict/mod.rs
+++ b/src/termdict/mod.rs
@@ -28,6 +28,7 @@ use fst_termdict as termdict;
 mod sstable_termdict;
 #[cfg(feature = "quickwit")]
 use sstable_termdict as termdict;
+use tantivy_fst::automaton::AlwaysMatch;

 #[cfg(test)]
 mod tests;
@@ -35,4 +36,24 @@ mod tests;
 /// Position of the term in the sorted list of terms.
 pub type TermOrdinal = u64;

-pub use self::termdict::{TermDictionary, TermDictionaryBuilder, TermMerger, TermStreamer};
+/// The term dictionary contains all of the terms in
+/// `tantivy index` in a sorted manner.
+pub type TermDictionary = self::termdict::TermDictionary;
+
+/// Builder for the new term dictionary.
+///
+/// Inserting must be done in the order of the `keys`.
+pub type TermDictionaryBuilder<W> = self::termdict::TermDictionaryBuilder<W>;
+
+/// Given a list of sorted term streams,
+/// returns an iterator over sorted unique terms.
+///
+/// The item yield is actually a pair with
+/// - the term
+/// - a slice with the ordinal of the segments containing
+/// the terms.
+pub type TermMerger<'a> = self::termdict::TermMerger<'a>;
+
+/// `TermStreamer` acts as a cursor over a range of terms of a segment.
+/// Terms are guaranteed to be sorted.
+pub type TermStreamer<'a, A = AlwaysMatch> = self::termdict::TermStreamer<'a, A>;
--- a/src/termdict/sstable_termdict/sstable/mod.rs
+++ b/src/termdict/sstable_termdict/sstable/mod.rs
@@ -145,12 +145,6 @@ where
    }

    pub fn write_key(&mut self, key: &[u8]) {
-        // If this is the first key in the block, we use it to
-        // shorten the last term in the last block.
-        if self.first_ordinal_of_the_block == self.num_terms {
-            self.index_builder
-                .shorten_last_block_key_given_next_key(key);
-        }
        let keep_len = common_prefix_len(&self.previous_key, key);
        let add_len = key.len() - keep_len;
        let increasing_keys = add_len > 0 && (self.previous_key.len() == keep_len)
@@ -279,12 +273,11 @@ mod test {
                33u8, 18u8, 19u8, // keep 1 push 1 | 20
                17u8, 20u8, 0u8, 0u8, 0u8, 0u8, // no more blocks
                // index
-                161, 102, 98, 108, 111, 99, 107, 115, 129, 162, 115, 108, 97, 115, 116, 95, 107,
-                101, 121, 95, 111, 114, 95, 103, 114, 101, 97, 116, 101, 114, 130, 17, 20, 106, 98,
-                108, 111, 99, 107, 95, 97, 100, 100, 114, 162, 106, 98, 121, 116, 101, 95, 114, 97,
-                110, 103, 101, 162, 101, 115, 116, 97, 114, 116, 0, 99, 101, 110, 100, 11, 109,
-                102, 105, 114, 115, 116, 95, 111, 114, 100, 105, 110, 97, 108, 0, 15, 0, 0, 0, 0,
-                0, 0, 0, // offset for the index
+                161, 102, 98, 108, 111, 99, 107, 115, 129, 162, 104, 108, 97, 115, 116, 95, 107,
+                101, 121, 130, 17, 20, 106, 98, 108, 111, 99, 107, 95, 97, 100, 100, 114, 162, 106,
+                98, 121, 116, 101, 95, 114, 97, 110, 103, 101, 162, 101, 115, 116, 97, 114, 116, 0,
+                99, 101, 110, 100, 11, 109, 102, 105, 114, 115, 116, 95, 111, 114, 100, 105, 110,
+                97, 108, 0, 15, 0, 0, 0, 0, 0, 0, 0, // offset for the index
                3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 // num terms
            ]
        );
--- a/src/termdict/sstable_termdict/sstable/sstable_index.rs
+++ b/src/termdict/sstable_termdict/sstable/sstable_index.rs
@@ -4,7 +4,6 @@ use std::ops::Range;
 use serde::{Deserialize, Serialize};

 use crate::error::DataCorruption;
-use crate::termdict::sstable_termdict::sstable::common_prefix_len;

 #[derive(Default, Debug, Serialize, Deserialize)]
 pub struct SSTableIndex {
@@ -20,7 +19,7 @@ impl SSTableIndex {
    pub fn search(&self, key: &[u8]) -> Option<BlockAddr> {
        self.blocks
            .iter()
-            .find(|block| &block.last_key_or_greater[..] >= key)
+            .find(|block| &block.last_key[..] >= key)
            .map(|block| block.block_addr.clone())
    }
 }
@@ -33,10 +32,7 @@ pub struct BlockAddr {

 #[derive(Debug, Serialize, Deserialize)]
 struct BlockMeta {
-    /// Any byte string that is lexicographically greater or equal to
-    /// the last key in the block,
-    /// and yet stricly smaller than the first key in the next block.
-    pub last_key_or_greater: Vec<u8>,
+    pub last_key: Vec<u8>,
    pub block_addr: BlockAddr,
 }

@@ -45,39 +41,10 @@ pub struct SSTableIndexBuilder {
    index: SSTableIndex,
 }

-/// Given that left < right,
-/// mutates `left into a shorter byte string left'` that
-/// matches `left <= left' < right`.
-fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
-    assert!(&left[..] < right);
-    let common_len = common_prefix_len(&left, right);
-    if left.len() == common_len {
-        return;
-    }
-    // It is possible to do one character shorter in some case,
-    // but it is not worth the extra complexity
-    for pos in (common_len + 1)..left.len() {
-        if left[pos] != u8::MAX {
-            left[pos] += 1;
-            left.truncate(pos + 1);
-            return;
-        }
-    }
-}
-
 impl SSTableIndexBuilder {
-    /// In order to make the index as light as possible, we
-    /// try to find a shorter alternative to the last key of the last block
-    /// that is still smaller than the next key.
-    pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
-        if let Some(last_block) = self.index.blocks.last_mut() {
-            find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
-        }
-    }
-
    pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
        self.index.blocks.push(BlockMeta {
-            last_key_or_greater: last_key.to_vec(),
+            last_key: last_key.to_vec(),
            block_addr: BlockAddr {
                byte_range,
                first_ordinal,
@@ -130,35 +97,4 @@ mod tests {
            "Data corruption: SSTable index is corrupted."
        );
    }
-
-    #[track_caller]
-    fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
-        let mut left_buf = left.to_vec();
-        super::find_shorter_str_in_between(&mut left_buf, right);
-        assert!(left_buf.len() <= left.len());
-        assert!(left <= &left_buf);
-        assert!(&left_buf[..] < &right);
-    }
-
-    #[test]
-    fn test_find_shorter_str_in_between() {
-        test_find_shorter_str_in_between_aux(b"", b"hello");
-        test_find_shorter_str_in_between_aux(b"abc", b"abcd");
-        test_find_shorter_str_in_between_aux(b"abcd", b"abd");
-        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
-        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
-        test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
-    }
-
-    use proptest::prelude::*;
-
-    proptest! {
-        #![proptest_config(ProptestConfig::with_cases(100))]
-        #[test]
-        fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
-            if left < right {
-                test_find_shorter_str_in_between_aux(&left, &right);
-            }
-        }
-    }
 }
--- a/src/tokenizer/tokenizer_manager.rs
+++ b/src/tokenizer/tokenizer_manager.rs
@@ -25,13 +25,6 @@ pub struct TokenizerManager {
 }

 impl TokenizerManager {
-    /// Creates an empty tokenizer manager.
-    pub fn new() -> Self {
-        Self {
-            tokenizers: Arc::new(RwLock::new(HashMap::new())),
-        }
-    }
-
    /// Registers a new tokenizer associated with a given name.
    pub fn register<T>(&self, tokenizer_name: &str, tokenizer: T)
    where TextAnalyzer: From<T> {
@@ -59,7 +52,9 @@ impl Default for TokenizerManager {
    /// - en_stem
    /// - ja
    fn default() -> TokenizerManager {
-        let manager = TokenizerManager::new();
+        let manager = TokenizerManager {
+            tokenizers: Arc::new(RwLock::new(HashMap::new())),
+        };
        manager.register("raw", RawTokenizer);
        manager.register(
            "default",
Author	SHA1	Message	Date
Pascal Seitz	54fc557a6d	use a single memory arena to store all terms	2022-05-03 20:11:28 +08:00
Pascal Seitz	7f5a409d6f	remove term encoding in term bytes	2022-05-03 20:04:53 +08:00
Pascal Seitz	50ee43ab79	refactor Term	2022-05-03 20:04:53 +08:00
Pascal Seitz	2d1beeb6be	term hashmap per field, smaller page size	2022-05-03 20:04:53 +08:00