update basic_search example

chore: fix build as the rev is gone (#2417 )
style: simplify strings with string interpolation (#2412 )
2026-06-21 18:00:42 +00:00 · 2024-05-30 21:56:22 +08:00 · 2024-05-29 09:49:16 +08:00 · 2024-05-27 09:16:47 +02:00 · 2024-05-27 07:33:50 +02:00 · 2024-05-22 10:10:55 +09:00
72 changed files with 1884 additions and 1515 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.22.0"
+version = "0.23.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -15,12 +15,16 @@ rust-version = "1.63"
 exclude = ["benches/*.json", "benches/*.txt"]

 [dependencies]
-oneshot = "0.1.5"
+# Switch back to the non-forked oneshot crate once https://github.com/faern/oneshot/pull/35 is merged
+oneshot = { git = "https://github.com/fulmicoton/oneshot.git", rev = "b208f49" }
 base64 = "0.22.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
-regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
+regex = { version = "1.5.5", default-features = false, features = [
+    "std",
+    "unicode",
+] }
 aho-corasick = "1.0"
 tantivy-fst = "0.5"
 memmap2 = { version = "0.9.0", optional = true }
@@ -30,14 +34,15 @@ tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
-num_cpus = "1.13.1"
 fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { version = "0.9.2", default-features = false, features = [
+    "bitpacker4x",
+] }
 census = "0.4.2"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
@@ -48,26 +53,26 @@ smallvec = "1.8.0"
 rayon = "1.5.2"
 lru = "0.12.0"
 fastdivide = "0.4.0"
-itertools = "0.12.0"
+itertools = "0.13.0"
 measure_time = "0.8.2"
 arc-swap = "1.5.0"

-columnar = { version= "0.3", path="./columnar", package ="tantivy-columnar" }
-sstable = { version= "0.3", path="./sstable", package ="tantivy-sstable", optional = true }
-stacker = { version= "0.3", path="./stacker", package ="tantivy-stacker" }
-query-grammar = { version= "0.22.0", path="./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version= "0.6", path="./bitpacker" }
-common = { version= "0.7", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version= "0.3", path="./tokenizer-api", package="tantivy-tokenizer-api" }
+columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
+sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
+stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
+query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
+tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
+common = { version = "0.7", path = "./common/", package = "tantivy-common" }
+tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] }
 futures-util = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
-mediumvec = "1.3.0"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"

 [dev-dependencies]
+binggan = "0.8.0"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -82,7 +87,6 @@ time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
 postcard = { version = "1.0.4", features = [
  "use-std",
 ], default-features = false }
-peakmem-alloc = "0.3.0"

 [target.'cfg(not(windows))'.dev-dependencies]
 criterion = { version = "0.5", default-features = false }
@@ -114,17 +118,26 @@ lz4-compression = ["lz4_flex"]
 zstd-compression = ["zstd"]

 failpoints = ["fail", "fail/failpoints"]
-unstable = [] # useful for benches.
+unstable = []                            # useful for benches.

 quickwit = ["sstable", "futures-util"]

-# Compares only the hash of a string when indexing data. 
+# Compares only the hash of a string when indexing data.
 # Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
 # Uses 64bit ahash.
 compare_hash_only = ["stacker/compare_hash_only"]

 [workspace]
-members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
+members = [
+    "query-grammar",
+    "bitpacker",
+    "common",
+    "ownedbytes",
+    "stacker",
+    "sstable",
+    "tokenizer-api",
+    "columnar",
+]

 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
@@ -145,3 +158,7 @@ harness = false
 [[bench]]
 name = "index-bench"
 harness = false
+
+[[bench]]
+name = "agg_bench"
+harness = false
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -0,0 +1,413 @@
+use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use rand::prelude::SliceRandom;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use rand_distr::Distribution;
+use serde_json::json;
+use tantivy::aggregation::agg_req::Aggregations;
+use tantivy::aggregation::AggregationCollector;
+use tantivy::query::{AllQuery, TermQuery};
+use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
+use tantivy::{doc, Index, Term};
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
+
+/// Mini macro to register a function via its name
+/// runner.register("average_u64", move |index| average_u64(index));
+macro_rules! register {
+    ($runner:expr, $func:ident) => {
+        $runner.register(stringify!($func), move |index| $func(index))
+    };
+}
+
+fn main() {
+    let inputs = vec![
+        ("full", get_test_index_bench(Cardinality::Full).unwrap()),
+        (
+            "dense",
+            get_test_index_bench(Cardinality::OptionalDense).unwrap(),
+        ),
+        (
+            "sparse",
+            get_test_index_bench(Cardinality::OptionalSparse).unwrap(),
+        ),
+        (
+            "multivalue",
+            get_test_index_bench(Cardinality::Multivalued).unwrap(),
+        ),
+    ];
+
+    bench_agg(InputGroup::new_with_inputs(inputs));
+}
+
+fn bench_agg(mut group: InputGroup<Index>) {
+    group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
+    register!(group, average_u64);
+    register!(group, average_f64);
+    register!(group, average_f64_u64);
+    register!(group, stats_f64);
+    register!(group, percentiles_f64);
+    register!(group, terms_few);
+    register!(group, terms_many);
+    register!(group, terms_many_order_by_term);
+    register!(group, terms_many_with_top_hits);
+    register!(group, terms_many_with_avg_sub_agg);
+    register!(group, terms_many_json_mixed_type_with_sub_agg_card);
+    register!(group, range_agg);
+    register!(group, range_agg_with_avg_sub_agg);
+    register!(group, range_agg_with_term_agg_few);
+    register!(group, range_agg_with_term_agg_many);
+    register!(group, histogram);
+    register!(group, histogram_hard_bounds);
+    register!(group, histogram_with_avg_sub_agg);
+    register!(group, avg_and_range_with_avg_sub_agg);
+
+    group.run();
+}
+
+fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+
+    let reader = index.reader().unwrap();
+    let text_field = reader.searcher().schema().get_field("text").unwrap();
+    let term_query = TermQuery::new(
+        Term::from_field_text(text_field, "cool"),
+        IndexRecordOption::Basic,
+    );
+    let collector = get_collector(agg_req);
+    let searcher = reader.searcher();
+    black_box(searcher.search(&term_query, &collector).unwrap());
+}
+
+fn average_u64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64_u64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "avg": { "field": "score_f64" } },
+        "average": { "avg": { "field": "score" } },
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn stats_f64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "stats": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+
+fn percentiles_f64(index: &Index) {
+    let agg_req = json!({
+      "mypercentiles": {
+        "percentiles": {
+          "field": "score_f64",
+          "percents": [ 95, 99, 99.9 ]
+        }
+      }
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_few(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_few_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_order_by_term(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_top_hits(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "top_hits": { "top_hits":
+                    {
+                        "sort": [
+                            { "score": "desc" }
+                        ],
+                        "size": 2,
+                        "doc_value_fields": ["score_f64"]
+                    }
+                }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_json_mixed_type_with_sub_agg_card(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "json.mixed_type" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn execute_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+    let collector = get_collector(agg_req);
+
+    let reader = index.reader().unwrap();
+    let searcher = reader.searcher();
+    black_box(searcher.search(&AllQuery, &collector).unwrap());
+}
+fn range_agg(index: &Index) {
+    let agg_req = json!({
+        "range_f64": { "range": { "field": "score_f64", "ranges": [
+            { "from": 3, "to": 7000 },
+            { "from": 7000, "to": 20000 },
+            { "from": 20000, "to": 30000 },
+            { "from": 30000, "to": 40000 },
+            { "from": 40000, "to": 50000 },
+            { "from": 50000, "to": 60000 }
+        ] } },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn range_agg_with_term_agg_few(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_few_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_term_agg_many(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_many_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": {
+                "field": "score_f64",
+                "interval": 100 // 1000 buckets
+            },
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_hard_bounds(index: &Index) {
+    let agg_req = json!({
+        "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": { "field": "score_f64", "interval": 100 },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn avg_and_range_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_in_range": { "avg": { "field": "score" } }
+            }
+        },
+        "average": { "avg": { "field": "score" } }
+    });
+    execute_agg(index, agg_req);
+}
+
+#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
+enum Cardinality {
+    /// All documents contain exactly one value.
+    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
+    #[default]
+    Full = 0,
+    /// All documents contain at most one value.
+    OptionalDense = 1,
+    /// All documents may contain any number of values.
+    Multivalued = 2,
+    /// 1 / 20 documents has a value
+    OptionalSparse = 3,
+}
+
+fn get_collector(agg_req: Aggregations) -> AggregationCollector {
+    AggregationCollector::from_aggs(agg_req, Default::default())
+}
+
+fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
+    let mut schema_builder = Schema::builder();
+    let text_fieldtype = tantivy::schema::TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text", text_fieldtype);
+    let json_field = schema_builder.add_json_field("json", FAST);
+    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
+    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
+    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
+    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
+    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
+    let index = Index::create_from_tempdir(schema_builder.build())?;
+    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
+
+    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
+
+    let many_terms_data = (0..150_000)
+        .map(|num| format!("author{num}"))
+        .collect::<Vec<_>>();
+    {
+        let mut rng = StdRng::from_seed([1u8; 32]);
+        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
+        // To make the different test cases comparable we just change one doc to force the
+        // cardinality
+        if cardinality == Cardinality::OptionalDense {
+            index_writer.add_document(doc!())?;
+        }
+        if cardinality == Cardinality::Multivalued {
+            index_writer.add_document(doc!(
+                json_field => json!({"mixed_type": 10.0}),
+                json_field => json!({"mixed_type": 10.0}),
+                text_field => "cool",
+                text_field => "cool",
+                text_field_many_terms => "cool",
+                text_field_many_terms => "cool",
+                text_field_few_terms => "cool",
+                text_field_few_terms => "cool",
+                score_field => 1u64,
+                score_field => 1u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => 1i64,
+                score_field_i64 => 1i64,
+            ))?;
+        }
+        let mut doc_with_value = 1_000_000;
+        if cardinality == Cardinality::OptionalSparse {
+            doc_with_value /= 20;
+        }
+        let _val_max = 1_000_000.0;
+        for _ in 0..doc_with_value {
+            let val: f64 = rng.gen_range(0.0..1_000_000.0);
+            let json = if rng.gen_bool(0.1) {
+                // 10% are numeric values
+                json!({ "mixed_type": val })
+            } else {
+                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
+            };
+            index_writer.add_document(doc!(
+                text_field => "cool",
+                json_field => json,
+                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
+                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                score_field => val as u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => val as i64,
+            ))?;
+            if cardinality == Cardinality::OptionalSparse {
+                for _ in 0..20 {
+                    index_writer.add_document(doc!(text_field => "cool"))?;
+                }
+            }
+        }
+        // writing the segment
+        index_writer.commit()?;
+    }
+
+    Ok(index)
+}
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -18,7 +18,7 @@ fn benchmark(
        benchmark_dynamic_json(b, input, schema, commit, parse_json)
    } else {
        _benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
-            TantivyDocument::parse_json(&schema, doc_json).unwrap()
+            TantivyDocument::parse_json(schema, doc_json).unwrap()
        })
    }
 }
@@ -90,8 +90,7 @@ fn benchmark_dynamic_json(
 ) {
    let json_field = schema.get_field("json").unwrap();
    _benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
-        let json_val: serde_json::Map<String, serde_json::Value> =
-            serde_json::from_str(doc_json).unwrap();
+        let json_val: serde_json::Value = serde_json::from_str(doc_json).unwrap();
        tantivy::doc!(json_field=>json_val)
    })
 }
@@ -138,15 +137,16 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
    for (prefix, schema, is_dynamic) in benches {
        for commit in [false, true] {
            let suffix = if commit { "with-commit" } else { "no-commit" };
-            for parse_json in [false] {
+            {
+                let parse_json = false;
                // for parse_json in [false, true] {
                let suffix = if parse_json {
-                    format!("{}-with-json-parsing", suffix)
+                    format!("{suffix}-with-json-parsing")
                } else {
-                    format!("{}", suffix)
+                    suffix.to_string()
                };

-                let bench_name = format!("{}{}", prefix, suffix);
+                let bench_name = format!("{prefix}{suffix}");
                group.bench_function(bench_name, |b| {
                    benchmark(b, HDFS_LOGS, schema.clone(), commit, parse_json, is_dynamic)
                });
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -9,7 +9,7 @@ description = "column oriented storage for tantivy"
 categories = ["database-implementations", "data-structures", "compression"]

 [dependencies]
-itertools = "0.12.0"
+itertools = "0.13.0"
 fastdivide = "0.4.0"

 stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"}
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -59,22 +59,6 @@ pub struct ColumnarWriter {
    buffers: SpareBuffers,
 }

-#[inline]
-fn mutate_or_create_column<V, TMutator>(
-    arena_hash_map: &mut ArenaHashMap,
-    column_name: &str,
-    updater: TMutator,
-) where
-    V: Copy + 'static,
-    TMutator: FnMut(Option<V>) -> V,
-{
-    assert!(
-        !column_name.as_bytes().contains(&0u8),
-        "key may not contain the 0 byte"
-    );
-    arena_hash_map.mutate_or_create(column_name.as_bytes(), updater);
-}
-
 impl ColumnarWriter {
    pub fn mem_usage(&self) -> usize {
        self.arena.mem_usage()
@@ -175,9 +159,8 @@ impl ColumnarWriter {
                    },
                    &mut self.dictionaries,
                );
-                mutate_or_create_column(
-                    hash_map,
-                    column_name,
+                hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<StrOrBytesColumnWriter>| {
                        let mut column_writer = if let Some(column_writer) = column_opt {
                            column_writer
@@ -192,24 +175,21 @@ impl ColumnarWriter {
                );
            }
            ColumnType::Bool => {
-                mutate_or_create_column(
-                    &mut self.bool_field_hash_map,
-                    column_name,
+                self.bool_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::DateTime => {
-                mutate_or_create_column(
-                    &mut self.datetime_field_hash_map,
-                    column_name,
+                self.datetime_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::I64 | ColumnType::F64 | ColumnType::U64 => {
                let numerical_type = column_type.numerical_type().unwrap();
-                mutate_or_create_column(
-                    &mut self.numerical_field_hash_map,
-                    column_name,
+                self.numerical_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<NumericalColumnWriter>| {
                        let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                        column.force_numerical_type(numerical_type);
@@ -217,9 +197,8 @@ impl ColumnarWriter {
                    },
                );
            }
-            ColumnType::IpAddr => mutate_or_create_column(
-                &mut self.ip_addr_field_hash_map,
-                column_name,
+            ColumnType::IpAddr => self.ip_addr_field_hash_map.mutate_or_create(
+                column_name.as_bytes(),
                |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
            ),
        }
@@ -232,9 +211,8 @@ impl ColumnarWriter {
        numerical_value: T,
    ) {
        let (hash_map, arena) = (&mut self.numerical_field_hash_map, &mut self.arena);
-        mutate_or_create_column(
-            hash_map,
-            column_name,
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
            |column_opt: Option<NumericalColumnWriter>| {
                let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                column.record_numerical_value(doc, numerical_value.into(), arena);
@@ -244,10 +222,6 @@ impl ColumnarWriter {
    }

    pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_addr: Ipv6Addr) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena) = (&mut self.ip_addr_field_hash_map, &mut self.arena);
        hash_map.mutate_or_create(
            column_name.as_bytes(),
@@ -261,24 +235,30 @@ impl ColumnarWriter {

    pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
        let (hash_map, arena) = (&mut self.bool_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(doc, val, arena);
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(doc, val, arena);
+                column
+            },
+        );
    }

    pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) {
        let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(
-                doc,
-                NumericalValue::I64(datetime.into_timestamp_nanos()),
-                arena,
-            );
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(
+                    doc,
+                    NumericalValue::I64(datetime.into_timestamp_nanos()),
+                    arena,
+                );
+                column
+            },
+        );
    }

    pub fn record_str(&mut self, doc: RowId, column_name: &str, value: &str) {
@@ -303,10 +283,6 @@ impl ColumnarWriter {
    }

    pub fn record_bytes(&mut self, doc: RowId, column_name: &str, value: &[u8]) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena, dictionaries) = (
            &mut self.bytes_field_hash_map,
            &mut self.arena,
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -151,7 +151,7 @@ pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
    (result, vlen)
 }
 /// Write a `u32` as a vint payload.
-pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
+pub fn write_u32_vint<W: io::Write + ?Sized>(val: u32, writer: &mut W) -> io::Result<()> {
    let mut buf = [0u8; 8];
    let data = serialize_vint_u32(val, &mut buf);
    writer.write_all(data)
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -19,13 +19,14 @@ use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
 use tempfile::TempDir;

 fn main() -> tantivy::Result<()> {
-    // Let's create a temporary directory for the
-    // sake of this example
+    // Normally you would use `MMapDirectory` instead to persist data on disk.
+    // https://docs.rs/tantivy/latest/tantivy/directory/struct.MmapDirectory.html
+    // But for this example, we will use a temporary directory `TempDir`.
    let index_path = TempDir::new()?;

    // # Defining the schema
    //
-    // The Tantivy index requires a very strict schema.
+    // The Tantivy index requires a schema.
    // The schema declares which fields are in the index,
    // and for each field, its type and "the way it should
    // be indexed".
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -11,9 +11,10 @@ use columnar::Column;
 // ---
 // Importing tantivy...
 use tantivy::collector::{Collector, SegmentCollector};
+use tantivy::index::SegmentReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
+use tantivy::{doc, Index, IndexWriter, Score};

 #[derive(Default)]
 struct Stats {
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -4,7 +4,7 @@

 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
-use tantivy::schema::{DateOptions, Document, OwnedValue, Schema, INDEXED, STORED, STRING};
+use tantivy::schema::{DateOptions, Document, Schema, Value, INDEXED, STORED, STRING};
 use tantivy::{Index, IndexWriter, TantivyDocument};

 fn main() -> tantivy::Result<()> {
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
    let opts = DateOptions::from(INDEXED)
        .set_stored()
        .set_fast()
-        .set_precision(tantivy::DateTimePrecision::Seconds);
+        .set_precision(tantivy::schema::DateTimePrecision::Seconds);
    // Add `occurred_at` date field type
    let occurred_at = schema_builder.add_date_field("occurred_at", opts);
    let event_type = schema_builder.add_text_field("event", STRING | STORED);
@@ -61,10 +61,12 @@ fn main() -> tantivy::Result<()> {
        assert_eq!(count_docs.len(), 1);
        for (_score, doc_address) in count_docs {
            let retrieved_doc = searcher.doc::<TantivyDocument>(doc_address)?;
-            assert!(matches!(
-                retrieved_doc.get_first(occurred_at),
-                Some(OwnedValue::Date(_))
-            ));
+            assert!(retrieved_doc
+                .get_first(occurred_at)
+                .unwrap()
+                .as_value()
+                .as_datetime()
+                .is_some(),);
            assert_eq!(
                retrieved_doc.to_json(&schema),
                r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
--- a/examples/doc_mem.rs
+++ b/examples/doc_mem.rs
@@ -1,335 +0,0 @@
-#![allow(unused_imports)]
-#![allow(dead_code)]
-use std::alloc::System;
-use std::env::args;
-use std::net::Ipv6Addr;
-
-use columnar::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
-use common::{BinarySerializable, CountingWriter, DateTime, FixedSize};
-use peakmem_alloc::*;
-use tantivy::schema::{Field, FieldValue, OwnedValue, FAST, INDEXED, STRING, TEXT};
-use tantivy::tokenizer::PreTokenizedString;
-use tantivy::{doc, TantivyDocument};
-
-const GH_LOGS: &str = include_str!("../benches/gh.json");
-const HDFS_LOGS: &str = include_str!("../benches/hdfs.json");
-
-#[global_allocator]
-static GLOBAL: &PeakMemAlloc<System> = &INSTRUMENTED_SYSTEM;
-
-fn main() {
-    dbg!(std::mem::size_of::<TantivyDocument>());
-    dbg!(std::mem::size_of::<DocContainerRef>());
-    dbg!(std::mem::size_of::<OwnedValue>());
-    dbg!(std::mem::size_of::<OwnedValueMedVec>());
-    dbg!(std::mem::size_of::<ValueContainerRef>());
-    dbg!(std::mem::size_of::<mediumvec::vec32::Vec32::<u8>>());
-
-    let filter = args().nth(1);
-    measure_fn(
-        test_hdfs::<TantivyDocument>,
-        "hdfs TantivyDocument",
-        &filter,
-    );
-    measure_fn(
-        test_hdfs::<TantivyDocumentMedVec>,
-        "hdfs TantivyDocumentMedVec",
-        &filter,
-    );
-    measure_fn(
-        test_hdfs::<DocContainerRef>,
-        "hdfs DocContainerRef",
-        &filter,
-    );
-    measure_fn(test_gh::<TantivyDocument>, "gh TantivyDocument", &filter);
-    measure_fn(
-        test_gh::<TantivyDocumentMedVec>,
-        "gh TantivyDocumentMedVec",
-        &filter,
-    );
-    measure_fn(test_gh::<DocContainerRef>, "gh DocContainerRef", &filter);
-}
-fn measure_fn<F: FnOnce()>(f: F, name: &str, filter: &Option<std::string::String>) {
-    if let Some(filter) = filter {
-        if !name.contains(filter) {
-            return;
-        }
-    }
-    GLOBAL.reset_peak_memory();
-    f();
-    println!("Peak Memory {} : {:#?}", GLOBAL.get_peak_memory(), name);
-}
-fn test_hdfs<T: From<TantivyDocument>>() {
-    let schema = {
-        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
-        schema_builder.add_u64_field("timestamp", INDEXED);
-        schema_builder.add_text_field("body", TEXT);
-        schema_builder.add_text_field("severity", STRING);
-        schema_builder.build()
-    };
-    let mut docs: Vec<T> = Vec::with_capacity(HDFS_LOGS.lines().count());
-    for doc_json in HDFS_LOGS.lines() {
-        let doc = TantivyDocument::parse_json(&schema, doc_json)
-            .unwrap()
-            .into();
-        docs.push(doc);
-    }
-}
-
-fn test_gh<T: From<TantivyDocument>>() {
-    let schema = {
-        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
-        schema_builder.add_json_field("json", FAST);
-        schema_builder.build()
-    };
-    let mut docs: Vec<T> = Vec::with_capacity(GH_LOGS.lines().count());
-    for doc_json in GH_LOGS.lines() {
-        let json_field = schema.get_field("json").unwrap();
-
-        let json_val: serde_json::Map<String, serde_json::Value> =
-            serde_json::from_str(doc_json).unwrap();
-        let doc = tantivy::doc!(json_field=>json_val).into();
-        docs.push(doc);
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-#[allow(dead_code)]
-pub struct TantivyDocumentMedVec {
-    field_values: mediumvec::Vec32<FieldValueMedVec>,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct FieldValueMedVec {
-    pub field: Field,
-    pub value: OwnedValueMedVec,
-}
-
-/// This is a owned variant of `Value`, that can be passed around without lifetimes.
-/// Represents the value of a any field.
-/// It is an enum over all over all of the possible field type.
-#[derive(Debug, Clone, PartialEq)]
-pub enum OwnedValueMedVec {
-    /// A null value.
-    Null,
-    /// The str type is used for any text information.
-    Str(mediumvec::vec32::Vec32<u8>),
-    /// Unsigned 64-bits Integer `u64`
-    U64(u64),
-    /// Signed 64-bits Integer `i64`
-    I64(i64),
-    /// 64-bits Float `f64`
-    F64(f64),
-    /// Bool value
-    Bool(bool),
-    /// Date/time with nanoseconds precision
-    Date(DateTime),
-    Array(mediumvec::vec32::Vec32<Self>),
-    /// Dynamic object value.
-    Object(mediumvec::vec32::Vec32<(String, Self)>),
-    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
-    IpAddr(Ipv6Addr),
-    /// Pre-tokenized str type,
-    PreTokStr(Box<PreTokenizedString>),
-    /// Arbitrarily sized byte array
-    Bytes(mediumvec::vec32::Vec32<u8>),
-}
-
-impl From<TantivyDocument> for TantivyDocumentMedVec {
-    fn from(doc: TantivyDocument) -> Self {
-        let field_values = doc
-            .into_iter()
-            .map(|fv| FieldValueMedVec {
-                field: fv.field,
-                value: fv.value.into(),
-            })
-            .collect();
-        TantivyDocumentMedVec { field_values }
-    }
-}
-impl From<OwnedValue> for OwnedValueMedVec {
-    fn from(value: OwnedValue) -> Self {
-        match value {
-            OwnedValue::Null => OwnedValueMedVec::Null,
-            OwnedValue::Str(s) => {
-                let bytes = s.into_bytes();
-                let vec = mediumvec::vec32::Vec32::from_vec(bytes);
-                OwnedValueMedVec::Str(vec)
-            }
-            OwnedValue::U64(u) => OwnedValueMedVec::U64(u),
-            OwnedValue::I64(i) => OwnedValueMedVec::I64(i),
-            OwnedValue::F64(f) => OwnedValueMedVec::F64(f),
-            OwnedValue::Bool(b) => OwnedValueMedVec::Bool(b),
-            OwnedValue::Date(d) => OwnedValueMedVec::Date(d),
-            OwnedValue::Array(arr) => {
-                let arr = arr.into_iter().map(|v| v.into()).collect();
-                OwnedValueMedVec::Array(arr)
-            }
-            OwnedValue::Object(obj) => {
-                let obj = obj.into_iter().map(|(k, v)| (k, v.into())).collect();
-                OwnedValueMedVec::Object(obj)
-            }
-            OwnedValue::IpAddr(ip) => OwnedValueMedVec::IpAddr(ip),
-            _ => panic!("Unsupported value type {:?}", value),
-        }
-    }
-}
-
-#[repr(packed)]
-pub struct FieldValueContainerRef {
-    pub field: u16,
-    pub value: ValueContainerRef,
-}
-
-#[repr(packed)]
-struct DocContainerRef {
-    container: OwnedValueRefContainer,
-    field_values: mediumvec::Vec32<FieldValueContainerRef>,
-}
-
-#[derive(Default)]
-struct OwnedValueRefContainer {
-    nodes: mediumvec::Vec32<ValueContainerRef>,
-    node_data: mediumvec::Vec32<u8>,
-}
-impl OwnedValueRefContainer {
-    fn shrink_to_fit(&mut self) {
-        self.nodes.shrink_to_fit();
-        self.node_data.shrink_to_fit();
-    }
-}
-
-impl From<TantivyDocument> for DocContainerRef {
-    fn from(doc: TantivyDocument) -> Self {
-        let mut container = OwnedValueRefContainer::default();
-        let field_values = doc
-            .into_iter()
-            .map(|fv| FieldValueContainerRef {
-                field: fv.field.field_id().try_into().unwrap(),
-                value: container.add_value(fv.value),
-            })
-            .collect();
-        container.shrink_to_fit();
-        Self {
-            field_values,
-            container,
-        }
-    }
-}
-
-// References to positions in two array, one for the OwnedValueRef and the other for the encoded
-// bytes
-#[derive(Debug, Clone, PartialEq)]
-pub enum ValueContainerRef {
-    /// A null value.
-    Null,
-    /// The str type is used for any text information.
-    Str(u32),
-    /// Unsigned 64-bits Integer `u64`
-    U64(u32), // position of the serialized 8 bytes in the data array
-    /// Signed 64-bits Integer `i64`
-    I64(u32), // position of the serialized 8 bytes in the data array
-    /// 64-bits Float `f64`
-    F64(u32), // position of the serialized 8 bytes in the data array
-    /// Bool value
-    Bool(bool), // inlined bool
-    /// Date/time with nanoseconds precision
-    Date(u32), // position of the serialized 8 byte in the data array
-    Array(NodeAddress),
-    /// Dynamic object value.
-    Object(NodeAddress),
-    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
-    IpAddr(u32), // position of the serialized 16 bytes in the data array
-    /// Arbitrarily sized byte array
-    Bytes(u32),
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct NodeAddress {
-    pos: u32,
-    num_nodes: u32,
-}
-
-impl OwnedValueRefContainer {
-    pub fn add_value(&mut self, value: OwnedValue) -> ValueContainerRef {
-        match value {
-            OwnedValue::Null => ValueContainerRef::Null,
-            OwnedValue::U64(num) => ValueContainerRef::U64(write_into(&mut self.node_data, num)),
-            OwnedValue::I64(num) => ValueContainerRef::I64(write_into(&mut self.node_data, num)),
-            OwnedValue::F64(num) => ValueContainerRef::F64(write_into(&mut self.node_data, num)),
-            OwnedValue::Bool(b) => ValueContainerRef::Bool(b),
-            OwnedValue::Date(date) => ValueContainerRef::Date(write_into(
-                &mut self.node_data,
-                date.into_timestamp_nanos(),
-            )),
-            OwnedValue::Str(bytes) => {
-                ValueContainerRef::Str(write_into(&mut self.node_data, bytes))
-            }
-            OwnedValue::Bytes(bytes) => {
-                ValueContainerRef::Bytes(write_into(&mut self.node_data, bytes))
-            }
-            OwnedValue::Array(elements) => {
-                let pos = self.nodes.len() as u32;
-                let len = elements.len() as u32;
-                for elem in elements {
-                    let ref_elem = self.add_value(elem);
-                    self.nodes.push(ref_elem);
-                }
-                ValueContainerRef::Array(NodeAddress {
-                    pos,
-                    num_nodes: len,
-                })
-            }
-            OwnedValue::Object(entries) => {
-                let pos = self.nodes.len() as u32;
-                let len = entries.len() as u32;
-                for (key, value) in entries {
-                    let ref_key = self.add_value(OwnedValue::Str(key));
-                    let ref_value = self.add_value(value);
-                    self.nodes.push(ref_key);
-                    self.nodes.push(ref_value);
-                }
-                ValueContainerRef::Object(NodeAddress {
-                    pos,
-                    num_nodes: len,
-                })
-            }
-            OwnedValue::IpAddr(num) => {
-                ValueContainerRef::IpAddr(write_into(&mut self.node_data, num.to_u128()))
-            }
-            OwnedValue::PreTokStr(_) => todo!(),
-            OwnedValue::Facet(_) => todo!(),
-        }
-    }
-}
-
-fn write_into<T: BinarySerializable>(data: &mut mediumvec::Vec32<u8>, value: T) -> u32 {
-    let pos = data.len() as u32;
-    data.as_vec(|vec| value.serialize(vec).unwrap());
-    pos
-}
-
-fn write_into_2<T: BinarySerializable>(data: &mut mediumvec::Vec32<u8>, value: T) -> NodeAddress {
-    let pos = data.len() as u32;
-    let mut len = 0;
-    data.as_vec(|vec| {
-        let mut wrt = CountingWriter::wrap(vec);
-        value.serialize(&mut wrt).unwrap();
-        len = wrt.written_bytes() as u32;
-    });
-    NodeAddress {
-        pos,
-        num_nodes: len,
-    }
-}
-
-// impl From<ContainerDocRef> for TantivyDocument {
-// fn from(doc: ContainerDocRef) -> Self {
-// let mut doc2 = TantivyDocument::new();
-// for fv in doc.field_values {
-// let field = Field::from_field_id(fv.field as u32);
-// let value = doc.container.get_value(fv.value);
-// doc2.add(FieldValue::new(field, value));
-//}
-// doc2
-//}
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -51,7 +51,7 @@ fn main() -> tantivy::Result<()> {
    let reader = index.reader()?;
    let searcher = reader.searcher();
    {
-        let facets = vec![
+        let facets = [
            Facet::from("/ingredient/egg"),
            Facet::from("/ingredient/oil"),
            Facet::from("/ingredient/garlic"),
@@ -94,9 +94,8 @@ fn main() -> tantivy::Result<()> {
                    .doc::<TantivyDocument>(*doc_id)
                    .unwrap()
                    .get_first(title)
-                    .and_then(|v| v.as_str())
+                    .and_then(|v| v.as_str().map(|el| el.to_string()))
                    .unwrap()
-                    .to_owned()
            })
            .collect();
        assert_eq!(titles, vec!["Fried egg", "Egg rolls"]);
--- a/examples/index_from_multiple_threads.rs
+++ b/examples/index_from_multiple_threads.rs
@@ -61,7 +61,7 @@ fn main() -> tantivy::Result<()> {
                        debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
                        limbs and branches that arch over the pool"
                    ))?;
-            println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
+            println!("add doc {i} from thread 1 - opstamp {opstamp}");
            thread::sleep(Duration::from_millis(20));
        }
        Result::<(), TantivyError>::Ok(())
@@ -82,7 +82,7 @@ fn main() -> tantivy::Result<()> {
                    body => "Some great book description..."
                ))?
            };
-            println!("add doc {} from thread 2 - opstamp {}", i, opstamp);
+            println!("add doc {i} from thread 2 - opstamp {opstamp}");
            thread::sleep(Duration::from_millis(10));
        }
        Result::<(), TantivyError>::Ok(())
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -7,10 +7,11 @@
 // the list of documents containing a term, getting
 // its term frequency, and accessing its positions.

+use tantivy::postings::Postings;
 // ---
 // Importing tantivy...
 use tantivy::schema::*;
-use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
+use tantivy::{doc, DocSet, Index, IndexWriter, TERMINATED};

 fn main() -> tantivy::Result<()> {
    // We first create a schema for the sake of the
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -3,10 +3,11 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, RwLock, Weak};

 use tantivy::collector::TopDocs;
+use tantivy::index::SegmentId;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{
-    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration,
    SegmentReader, Warmer,
 };

--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -1,585 +0,0 @@
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-
-    use rand::prelude::SliceRandom;
-    use rand::rngs::StdRng;
-    use rand::{Rng, SeedableRng};
-    use rand_distr::Distribution;
-    use serde_json::json;
-    use test::{self, Bencher};
-
-    use crate::aggregation::agg_req::Aggregations;
-    use crate::aggregation::AggregationCollector;
-    use crate::query::{AllQuery, TermQuery};
-    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
-    use crate::{Index, Term};
-
-    #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
-    enum Cardinality {
-        /// All documents contain exactly one value.
-        /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
-        #[default]
-        Full = 0,
-        /// All documents contain at most one value.
-        Optional = 1,
-        /// All documents may contain any number of values.
-        Multivalued = 2,
-        /// 1 / 20 documents has a value
-        Sparse = 3,
-    }
-
-    fn get_collector(agg_req: Aggregations) -> AggregationCollector {
-        AggregationCollector::from_aggs(agg_req, Default::default())
-    }
-
-    fn get_test_index_bench(cardinality: Cardinality) -> crate::Result<Index> {
-        let mut schema_builder = Schema::builder();
-        let text_fieldtype = crate::schema::TextOptions::default()
-            .set_indexing_options(
-                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
-            )
-            .set_stored();
-        let text_field = schema_builder.add_text_field("text", text_fieldtype);
-        let json_field = schema_builder.add_json_field("json", FAST);
-        let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
-        let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
-        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
-        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
-        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
-        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
-        let index = Index::create_from_tempdir(schema_builder.build())?;
-        let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
-
-        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
-
-        let many_terms_data = (0..150_000)
-            .map(|num| format!("author{}", num))
-            .collect::<Vec<_>>();
-        {
-            let mut rng = StdRng::from_seed([1u8; 32]);
-            let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
-            // To make the different test cases comparable we just change one doc to force the
-            // cardinality
-            if cardinality == Cardinality::Optional {
-                index_writer.add_document(doc!())?;
-            }
-            if cardinality == Cardinality::Multivalued {
-                index_writer.add_document(doc!(
-                    json_field => json!({"mixed_type": 10.0}),
-                    json_field => json!({"mixed_type": 10.0}),
-                    text_field => "cool",
-                    text_field => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_few_terms => "cool",
-                    text_field_few_terms => "cool",
-                    score_field => 1u64,
-                    score_field => 1u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => 1i64,
-                    score_field_i64 => 1i64,
-                ))?;
-            }
-            let mut doc_with_value = 1_000_000;
-            if cardinality == Cardinality::Sparse {
-                doc_with_value /= 20;
-            }
-            let _val_max = 1_000_000.0;
-            for _ in 0..doc_with_value {
-                let val: f64 = rng.gen_range(0.0..1_000_000.0);
-                let json = if rng.gen_bool(0.1) {
-                    // 10% are numeric values
-                    json!({ "mixed_type": val })
-                } else {
-                    json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
-                };
-                index_writer.add_document(doc!(
-                    text_field => "cool",
-                    json_field => json,
-                    text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
-                    text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
-                    score_field => val as u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => val as i64,
-                ))?;
-                if cardinality == Cardinality::Sparse {
-                    for _ in 0..20 {
-                        index_writer.add_document(doc!(text_field => "cool"))?;
-                    }
-                }
-            }
-            // writing the segment
-            index_writer.commit()?;
-        }
-
-        Ok(index)
-    }
-
-    use paste::paste;
-    #[macro_export]
-    macro_rules! bench_all_cardinalities {
-        (  $x:ident ) => {
-            paste! {
-                #[bench]
-                fn $x(b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Full)
-                }
-
-                #[bench]
-                fn [<$x _opt>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Optional)
-                }
-
-                #[bench]
-                fn [<$x _multi>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Multivalued)
-                }
-
-                #[bench]
-                fn [<$x _sparse>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Sparse)
-                }
-
-            }
-        };
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64);
-
-    fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average": { "avg": { "field": "score", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_stats_f64);
-
-    fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "stats": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_f64);
-
-    fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_percentiles_f64);
-
-    fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_str = r#"
-            {
-              "mypercentiles": {
-                "percentiles": {
-                  "field": "score_f64",
-                  "percents": [ 95, 99, 99.9 ]
-                }
-              }
-            } "#;
-            let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64_and_f64);
-
-    fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64" } },
-                "average": { "avg": { "field": "score" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_few);
-
-    fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_few_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg);
-
-    fn bench_aggregation_terms_many_with_top_hits_agg_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "text_many_terms" },
-                    "aggs": {
-                        "top_hits": { "top_hits":
-                            {
-                                "sort": [
-                                    { "score": "desc" }
-                                ],
-                                "size": 2,
-                                "doc_value_fields": ["score_f64"]
-                            }
-                        }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
-
-    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "text_many_terms" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg);
-
-    fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "json.mixed_type" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many2);
-
-    fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term);
-
-    fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_only);
-
-    fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "range_f64": { "range": { "field": "score_f64", "ranges": [
-                    { "from": 3, "to": 7000 },
-                    { "from": 7000, "to": 20000 },
-                    { "from": 20000, "to": 30000 },
-                    { "from": 30000, "to": 40000 },
-                    { "from": 40000, "to": 50000 },
-                    { "from": 50000, "to": 60000 }
-                ] } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_with_avg);
-
-    fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 30000 },
-                            { "from": 30000, "to": 40000 },
-                            { "from": 40000, "to": 50000 },
-                            { "from": 50000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    // hard bounds has a different algorithm, because it actually limits collection range
-    //
-    bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds);
-
-    fn bench_aggregation_histogram_only_hard_bounds_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_with_avg);
-
-    fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": { "field": "score_f64", "interval": 100 },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_only);
-
-    fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": {
-                        "field": "score_f64",
-                        "interval": 100 // 1000 buckets
-                    },
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg);
-
-    fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_in_range": { "avg": { "field": "score" } }
-                    }
-                },
-                "average": { "avg": { "field": "score" } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-}
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -81,10 +81,11 @@ impl AggregationLimits {
        }
    }

-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }

@@ -94,11 +95,11 @@ impl AggregationLimits {
 }

 fn validate_memory_consumption(
-    memory_consumption: &AtomicU64,
+    memory_consumption: u64,
    memory_limit: ByteCount,
 ) -> Result<(), AggregationError> {
    // Load the estimated memory consumed by the aggregations
-    let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
+    let memory_consumed: ByteCount = memory_consumption.into();
    if memory_consumed > memory_limit {
        return Err(AggregationError::MemoryExceeded {
            limit: memory_limit,
@@ -118,10 +119,11 @@ pub struct ResourceLimitGuard {
 }

 impl ResourceLimitGuard {
-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }
 }
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -17,7 +17,8 @@ use super::metric::{
 use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
 use crate::aggregation::{f64_to_fastfield_u64, Key};
-use crate::{SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
+use crate::SegmentOrdinal;

 #[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
@@ -334,8 +335,8 @@ fn get_missing_val(
        }
        _ => {
            return Err(crate::TantivyError::InvalidArgument(format!(
-                "Missing value {:?} for field {} is not supported for column type {:?}",
-                missing, field_name, column_type
+                "Missing value {missing:?} for field {field_name} is not supported for column \
+                 type {column_type:?}"
            )));
        }
    };
@@ -402,7 +403,7 @@ fn get_dynamic_columns(
        .iter()
        .map(|h| h.open())
        .collect::<io::Result<_>>()?;
-    assert!(!ff_fields.is_empty(), "field {} not found", field_name);
+    assert!(!ff_fields.is_empty(), "field {field_name} not found");
    Ok(cols)
 }

--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -331,9 +331,11 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -324,9 +324,11 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
@@ -355,8 +357,7 @@ impl SegmentTermCollector {
    ) -> crate::Result<Self> {
        if field_type == ColumnType::Bytes {
            return Err(TantivyError::InvalidArgument(format!(
-                "terms aggregation is not supported for column type {:?}",
-                field_type
+                "terms aggregation is not supported for column type {field_type:?}"
            )));
        }
        let term_buckets = TermBuckets::default();
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -8,7 +8,8 @@ use super::segment_agg_result::{
 };
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{DocId, SegmentOrdinal, SegmentReader, TantivyError};
+use crate::index::SegmentReader;
+use crate::{DocId, SegmentOrdinal, TantivyError};

 /// The default max bucket count, before the aggregation fails.
 pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -131,8 +131,8 @@ impl<'de> Deserialize<'de> for KeyOrder {
        ))?;
        if key_order.next().is_some() {
            return Err(serde::de::Error::custom(format!(
-                "Expected exactly one key-value pair in sort parameter of top_hits, found {:?}",
-                key_order
+                "Expected exactly one key-value pair in sort parameter of top_hits, found \
+                 {key_order:?}"
            )));
        }
        Ok(Self { field, order })
@@ -144,27 +144,22 @@ fn globbed_string_to_regex(glob: &str) -> Result<Regex, crate::TantivyError> {
    // Replace `*` glob with `.*` regex
    let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
    Regex::new(&sanitized.replace('*', ".*")).map_err(|e| {
-        crate::TantivyError::SchemaError(format!(
-            "Invalid regex '{}' in docvalue_fields: {}",
-            glob, e
-        ))
+        crate::TantivyError::SchemaError(format!("Invalid regex '{glob}' in docvalue_fields: {e}"))
    })
 }

 fn use_doc_value_fields_err(parameter: &str) -> crate::Result<()> {
    Err(crate::TantivyError::AggregationError(
        AggregationError::InvalidRequest(format!(
-            "The `{}` parameter is not supported, only `docvalue_fields` is supported in \
-             `top_hits` aggregation",
-            parameter
+            "The `{parameter}` parameter is not supported, only `docvalue_fields` is supported in \
+             `top_hits` aggregation"
        )),
    ))
 }
 fn unsupported_err(parameter: &str) -> crate::Result<()> {
    Err(crate::TantivyError::AggregationError(
        AggregationError::InvalidRequest(format!(
-            "The `{}` parameter is not supported in the `top_hits` aggregation",
-            parameter
+            "The `{parameter}` parameter is not supported in the `top_hits` aggregation"
        )),
    ))
 }
@@ -217,8 +212,7 @@ impl TopHitsAggregation {
                    .collect::<Vec<_>>();
                assert!(
                    !fields.is_empty(),
-                    "No fields matched the glob '{}' in docvalue_fields",
-                    field
+                    "No fields matched the glob '{field}' in docvalue_fields"
                );
                Ok(fields)
            })
@@ -254,7 +248,7 @@ impl TopHitsAggregation {
            .map(|field| {
                let accessors = accessors
                    .get(field)
-                    .unwrap_or_else(|| panic!("field '{}' not found in accessors", field));
+                    .unwrap_or_else(|| panic!("field '{field}' not found in accessors"));

                let values: Vec<FastFieldValue> = accessors
                    .iter()
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -143,8 +143,6 @@ use std::fmt::Display;
 #[cfg(test)]
 mod agg_tests;

-mod agg_bench;
-
 use core::fmt;

 pub use agg_limits::AggregationLimits;
@@ -160,15 +158,14 @@ use serde::de::{self, Visitor};
 use serde::{Deserialize, Deserializer, Serialize};

 fn parse_str_into_f64<E: de::Error>(value: &str) -> Result<f64, E> {
-    let parsed = value.parse::<f64>().map_err(|_err| {
-        de::Error::custom(format!("Failed to parse f64 from string: {:?}", value))
-    })?;
+    let parsed = value
+        .parse::<f64>()
+        .map_err(|_err| de::Error::custom(format!("Failed to parse f64 from string: {value:?}")))?;

    // Check if the parsed value is NaN or infinity
    if parsed.is_nan() || parsed.is_infinite() {
        Err(de::Error::custom(format!(
-            "Value is not a valid f64 (NaN or Infinity): {:?}",
-            value
+            "Value is not a valid f64 (NaN or Infinity): {value:?}"
        )))
    } else {
        Ok(parsed)
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -598,7 +598,7 @@ mod tests {
                let mid = n % 4;
                n /= 4;
                let leaf = n % 5;
-                Facet::from(&format!("/top{}/mid{}/leaf{}", top, mid, leaf))
+                Facet::from(&format!("/top{top}/mid{mid}/leaf{leaf}"))
            })
            .collect();
        for i in 0..num_facets * 10 {
@@ -737,7 +737,7 @@ mod tests {
            vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
                .into_iter()
                .flat_map(|(c, count)| {
-                    let facet = Facet::from(&format!("/facet/{}", c));
+                    let facet = Facet::from(&format!("/facet/{c}"));
                    let doc = doc!(facet_field => facet);
                    iter::repeat(doc).take(count)
                })
@@ -785,7 +785,7 @@ mod tests {
        let docs: Vec<TantivyDocument> = vec![("b", 2), ("a", 2), ("c", 4)]
            .into_iter()
            .flat_map(|(c, count)| {
-                let facet = Facet::from(&format!("/facet/{}", c));
+                let facet = Facet::from(&format!("/facet/{c}"));
                let doc = doc!(facet_field => facet);
                iter::repeat(doc).take(count)
            })
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -4,7 +4,8 @@ use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};

 use super::top_score_collector::TopNComputer;
-use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
+use crate::{DocAddress, DocId, SegmentOrdinal};

 /// Contains a feature (field, score, etc.) of a document along with the document address.
 ///
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,19 +1,25 @@
-use rayon::{ThreadPool, ThreadPoolBuilder};
+use std::sync::Arc;
+
+#[cfg(feature = "quickwit")]
+use futures_util::{future::Either, FutureExt};

 use crate::TantivyError;

-/// Search executor whether search request are single thread or multithread.
-///
-/// We don't expose Rayon thread pool directly here for several reasons.
-///
-/// First dependency hell. It is not a good idea to expose the
-/// API of a dependency, knowing it might conflict with a different version
-/// used by the client. Second, we may stop using rayon in the future.
+/// Executor makes it possible to run tasks in single thread or
+/// in a thread pool.
+#[derive(Clone)]
 pub enum Executor {
    /// Single thread variant of an Executor
    SingleThread,
    /// Thread pool variant of an Executor
-    ThreadPool(ThreadPool),
+    ThreadPool(Arc<rayon::ThreadPool>),
+}
+
+#[cfg(feature = "quickwit")]
+impl From<Arc<rayon::ThreadPool>> for Executor {
+    fn from(thread_pool: Arc<rayon::ThreadPool>) -> Self {
+        Executor::ThreadPool(thread_pool)
+    }
 }

 impl Executor {
@@ -24,11 +30,11 @@ impl Executor {

    /// Creates an Executor that dispatches the tasks in a thread pool.
    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> crate::Result<Executor> {
-        let pool = ThreadPoolBuilder::new()
+        let pool = rayon::ThreadPoolBuilder::new()
            .num_threads(num_threads)
            .thread_name(move |num| format!("{prefix}{num}"))
            .build()?;
-        Ok(Executor::ThreadPool(pool))
+        Ok(Executor::ThreadPool(Arc::new(pool)))
    }

    /// Perform a map in the thread pool.
@@ -91,11 +97,36 @@ impl Executor {
            }
        }
    }
+
+    /// Spawn a task on the pool, returning a future completing on task success.
+    ///
+    /// If the task panic, returns `Err(())`.
+    #[cfg(feature = "quickwit")]
+    pub fn spawn_blocking<T: Send + 'static>(
+        &self,
+        cpu_intensive_task: impl FnOnce() -> T + Send + 'static,
+    ) -> impl std::future::Future<Output = Result<T, ()>> {
+        match self {
+            Executor::SingleThread => Either::Left(std::future::ready(Ok(cpu_intensive_task()))),
+            Executor::ThreadPool(pool) => {
+                let (sender, receiver) = oneshot::channel();
+                pool.spawn(|| {
+                    if sender.is_closed() {
+                        return;
+                    }
+                    let task_result = cpu_intensive_task();
+                    let _ = sender.send(task_result);
+                });
+
+                let res = receiver.map(|res| res.map_err(|_| ()));
+                Either::Right(res)
+            }
+        }
+    }
 }

 #[cfg(test)]
 mod tests {
-
    use super::Executor;

    #[test]
@@ -147,4 +178,62 @@ mod tests {
            assert_eq!(result[i], i * 2);
        }
    }
+
+    #[cfg(feature = "quickwit")]
+    #[test]
+    fn test_cancel_cpu_intensive_tasks() {
+        use std::sync::atomic::{AtomicU64, Ordering};
+        use std::sync::Arc;
+
+        let counter: Arc<AtomicU64> = Default::default();
+
+        let other_counter: Arc<AtomicU64> = Default::default();
+
+        let mut futures = Vec::new();
+        let mut other_futures = Vec::new();
+
+        let (tx, rx) = crossbeam_channel::bounded::<()>(0);
+        let rx = Arc::new(rx);
+        let executor = Executor::multi_thread(3, "search-test").unwrap();
+        for i in 0..1000 {
+            let counter_clone: Arc<AtomicU64> = counter.clone();
+            let other_counter_clone: Arc<AtomicU64> = other_counter.clone();
+
+            let rx_clone = rx.clone();
+            let rx_clone2 = rx.clone();
+            let fut = executor.spawn_blocking(move || {
+                counter_clone.fetch_add(1, Ordering::SeqCst);
+                let () = rx_clone.recv().unwrap();
+            });
+            futures.push(fut);
+            let other_fut = executor.spawn_blocking(move || {
+                other_counter_clone.fetch_add(1, Ordering::SeqCst);
+                let () = rx_clone2.recv().unwrap();
+            });
+            other_futures.push(other_fut);
+        }
+
+        // We execute 100 futures.
+        for i in 0..100 {
+            tx.send(()).unwrap();
+        }
+
+        let counter_val = counter.load(Ordering::SeqCst);
+        let other_counter_val = other_counter.load(Ordering::SeqCst);
+        assert!(counter_val >= 30);
+        assert!(other_counter_val >= 30);
+
+        drop(other_futures);
+
+        // We execute 100 futures.
+        for i in 0..100 {
+            tx.send(()).unwrap();
+        }
+
+        let counter_val2 = counter.load(Ordering::SeqCst);
+        assert!(counter_val2 >= counter_val + 100 - 6);
+
+        let other_counter_val2 = other_counter.load(Ordering::SeqCst);
+        assert!(other_counter_val2 <= other_counter_val + 6);
+    }
 }
--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -31,7 +31,7 @@ use crate::{DateTime, DocId, Term};
 /// position 1.
 /// As a result, with lemmatization, "The Smiths" will match our object.
 ///
-/// Worse, if a same term is appears in the second object, a non increasing value would be pushed
+/// Worse, if a same term appears in the second object, a non increasing value would be pushed
 /// to the position recorder probably provoking a panic.
 ///
 /// This problem is solved for regular multivalued object by offsetting the position
@@ -50,7 +50,7 @@ use crate::{DateTime, DocId, Term};
 /// We can therefore afford working with a map that is not imperfect. It is fine if several
 /// path map to the same index position as long as the probability is relatively low.
 #[derive(Default)]
-struct IndexingPositionsPerPath {
+pub(crate) struct IndexingPositionsPerPath {
    positions_per_path: FxHashMap<u32, IndexingPosition>,
 }

@@ -58,6 +58,9 @@ impl IndexingPositionsPerPath {
    fn get_position_from_id(&mut self, id: u32) -> &mut IndexingPosition {
        self.positions_per_path.entry(id).or_default()
    }
+    pub fn clear(&mut self) {
+        self.positions_per_path.clear();
+    }
 }

 /// Convert JSON_PATH_SEGMENT_SEP to a dot.
@@ -68,36 +71,6 @@ pub fn json_path_sep_to_dot(path: &mut str) {
    }
 }

-#[allow(clippy::too_many_arguments)]
-pub(crate) fn index_json_values<'a, V: Value<'a>>(
-    doc: DocId,
-    json_visitors: impl Iterator<Item = crate::Result<V::ObjectIter>>,
-    text_analyzer: &mut TextAnalyzer,
-    expand_dots_enabled: bool,
-    term_buffer: &mut Term,
-    postings_writer: &mut dyn PostingsWriter,
-    json_path_writer: &mut JsonPathWriter,
-    ctx: &mut IndexingContext,
-) -> crate::Result<()> {
-    json_path_writer.clear();
-    json_path_writer.set_expand_dots(expand_dots_enabled);
-    let mut positions_per_path: IndexingPositionsPerPath = Default::default();
-    for json_visitor_res in json_visitors {
-        let json_visitor = json_visitor_res?;
-        index_json_object::<V>(
-            doc,
-            json_visitor,
-            text_analyzer,
-            term_buffer,
-            json_path_writer,
-            postings_writer,
-            ctx,
-            &mut positions_per_path,
-        );
-    }
-    Ok(())
-}
-
 #[allow(clippy::too_many_arguments)]
 fn index_json_object<'a, V: Value<'a>>(
    doc: DocId,
@@ -126,7 +99,7 @@ fn index_json_object<'a, V: Value<'a>>(
 }

 #[allow(clippy::too_many_arguments)]
-fn index_json_value<'a, V: Value<'a>>(
+pub(crate) fn index_json_value<'a, V: Value<'a>>(
    doc: DocId,
    json_value: V,
    text_analyzer: &mut TextAnalyzer,
@@ -166,12 +139,18 @@ fn index_json_value<'a, V: Value<'a>>(
                );
            }
            ReferenceValueLeaf::U64(val) => {
+                // try to parse to i64, since when querying we will apply the same logic and prefer
+                // i64 values
                set_path_id(
                    term_buffer,
                    ctx.path_to_unordered_id
                        .get_or_allocate_unordered_id(json_path_writer.as_str()),
                );
-                term_buffer.append_type_and_fast_value(val);
+                if let Ok(i64_val) = val.try_into() {
+                    term_buffer.append_type_and_fast_value::<i64>(i64_val);
+                } else {
+                    term_buffer.append_type_and_fast_value(val);
+                }
                postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
            }
            ReferenceValueLeaf::I64(val) => {
@@ -257,10 +236,7 @@ fn index_json_value<'a, V: Value<'a>>(
 /// Tries to infer a JSON type from a string and append it to the term.
 ///
 /// The term must be json + JSON path.
-pub(crate) fn convert_to_fast_value_and_append_to_json_term(
-    mut term: Term,
-    phrase: &str,
-) -> Option<Term> {
+pub fn convert_to_fast_value_and_append_to_json_term(mut term: Term, phrase: &str) -> Option<Term> {
    assert_eq!(
        term.value()
            .as_json_value_bytes()
@@ -362,14 +338,14 @@ mod tests {
        let mut term = Term::from_field_json_path(field, "attributes.color", false);
        term.append_type_and_str("red");
        assert_eq!(
-            format!("{:?}", term),
+            format!("{term:?}"),
            "Term(field=1, type=Json, path=attributes.color, type=Str, \"red\")"
        );

        let mut term = Term::from_field_json_path(field, "attributes.dimensions.width", false);
        term.append_type_and_fast_value(400i64);
        assert_eq!(
-            format!("{:?}", term),
+            format!("{term:?}"),
            "Term(field=1, type=Json, path=attributes.dimensions.width, type=I64, 400)"
        );
    }
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -4,13 +4,13 @@ use std::{fmt, io};

 use crate::collector::Collector;
 use crate::core::Executor;
-use crate::index::SegmentReader;
+use crate::index::{SegmentId, SegmentReader};
 use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
 use crate::schema::document::DocumentDeserialize;
 use crate::schema::{Schema, Term};
 use crate::space_usage::SearcherSpaceUsage;
 use crate::store::{CacheStats, StoreReader};
-use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
+use crate::{DocAddress, Index, Opstamp, TrackedObject};

 /// Identifies the searcher generation accessed by a [`Searcher`].
 ///
@@ -109,8 +109,9 @@ impl Searcher {
        &self,
        doc_address: DocAddress,
    ) -> crate::Result<D> {
+        let executor = self.inner.index.search_executor();
        let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
-        store_reader.get_async(doc_address.doc_id).await
+        store_reader.get_async(doc_address.doc_id, executor).await
    }

    /// Access the schema associated with the index of this searcher.
--- a/src/core/tests.rs
+++ b/src/core/tests.rs
@@ -1,12 +1,14 @@
 use crate::collector::Count;
 use crate::directory::{RamDirectory, WatchCallback};
+use crate::index::SegmentId;
 use crate::indexer::{LogMergePolicy, NoMergePolicy};
+use crate::postings::Postings;
 use crate::query::TermQuery;
 use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
 use crate::tokenizer::TokenizerManager;
 use crate::{
-    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
-    ReloadPolicy, SegmentId, TantivyDocument, Term,
+    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy,
+    TantivyDocument, Term,
 };

 #[test]
@@ -417,7 +419,7 @@ fn test_non_text_json_term_freq() {
    let inv_idx = segment_reader.inverted_index(field).unwrap();

    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);

    let postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
@@ -451,7 +453,7 @@ fn test_non_text_json_term_freq_bitpacked() {
    let inv_idx = segment_reader.inverted_index(field).unwrap();

    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);

    let mut postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -566,7 +566,7 @@ mod tests {
        let mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
        let num_paths = 10;
        let paths: Vec<PathBuf> = (0..num_paths)
-            .map(|i| PathBuf::from(&*format!("file_{}", i)))
+            .map(|i| PathBuf::from(&*format!("file_{i}")))
            .collect();
        {
            for path in &paths {
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -62,8 +62,7 @@ impl FacetReader {

 #[cfg(test)]
 mod tests {
-    use crate::schema::document::Value;
-    use crate::schema::{Facet, FacetOptions, SchemaBuilder, STORED};
+    use crate::schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED};
    use crate::{DocAddress, Index, IndexWriter, TantivyDocument};

    #[test]
@@ -89,7 +88,9 @@ mod tests {
        let doc = searcher
            .doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))
            .unwrap();
-        let value = doc.get_first(facet_field).and_then(|v| v.as_facet());
+        let value = doc
+            .get_first(facet_field)
+            .and_then(|v| v.as_value().as_facet());
        assert_eq!(value, None);
    }

@@ -146,8 +147,11 @@ mod tests {
        facet_ords.extend(facet_reader.facet_ords(0u32));
        assert_eq!(&facet_ords, &[0u64]);
        let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))?;
-        let value: Option<&Facet> = doc.get_first(facet_field).and_then(|v| v.as_facet());
-        assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
+        let value: Option<Facet> = doc
+            .get_first(facet_field)
+            .and_then(|v| v.as_facet())
+            .map(|facet| Facet::from_encoded_string(facet.to_string()));
+        assert_eq!(value, Facet::from_text("/a/b").ok());
        Ok(())
    }

--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -80,7 +80,7 @@ mod tests {
    use std::path::Path;

    use columnar::StrColumn;
-    use common::{ByteCount, HasLen, TerminatingWrite};
+    use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
@@ -88,14 +88,15 @@ mod tests {

    use super::*;
    use crate::directory::{Directory, RamDirectory, WritePtr};
+    use crate::index::SegmentId;
    use crate::merge_policy::NoMergePolicy;
    use crate::schema::{
-        Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, TantivyDocument,
-        TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
+        DateOptions, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder,
+        TantivyDocument, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
    };
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
-    use crate::{DateOptions, DateTimePrecision, Index, IndexWriter, SegmentId, SegmentReader};
+    use crate::{Index, IndexWriter, SegmentReader};

    pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
        let mut schema_builder = Schema::builder();
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,14 +1,14 @@
 use std::io;

 use columnar::{ColumnarWriter, NumericalValue};
-use common::JsonPathWriter;
+use common::{DateTimePrecision, JsonPathWriter};
 use tokenizer_api::Token;

 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
-use crate::{DateTimePrecision, DocId, TantivyError};
+use crate::{DocId, TantivyError};

 /// Only index JSON down to a depth of 20.
 /// This is mostly to guard us from a stack overflow triggered by malicious input.
@@ -183,8 +183,7 @@ impl FastFieldsWriter {
                        .record_datetime(doc_id, field_name, truncated_datetime);
                }
                ReferenceValueLeaf::Facet(val) => {
-                    self.columnar_writer
-                        .record_str(doc_id, field_name, val.encoded_str());
+                    self.columnar_writer.record_str(doc_id, field_name, val);
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.columnar_writer.record_bytes(doc_id, field_name, val);
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -3,7 +3,7 @@ use std::fmt;
 #[cfg(feature = "mmap")]
 use std::path::Path;
 use std::path::PathBuf;
-use std::sync::Arc;
+use std::thread::available_parallelism;

 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
@@ -252,9 +252,8 @@ impl IndexBuilder {
                let field_type = entry.field_type().value_type();
                if !supported_field_types.contains(&field_type) {
                    return Err(TantivyError::InvalidArgument(format!(
-                        "Unsupported field type in sort_by_field: {:?}. Supported field types: \
-                         {:?} ",
-                        field_type, supported_field_types,
+                        "Unsupported field type in sort_by_field: {field_type:?}. Supported field \
+                         types: {supported_field_types:?} ",
                    )));
                }
            }
@@ -293,7 +292,7 @@ pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
-    executor: Arc<Executor>,
+    executor: Executor,
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
@@ -318,29 +317,25 @@ impl Index {
    ///
    /// By default the executor is single thread, and simply runs in the calling thread.
    pub fn search_executor(&self) -> &Executor {
-        self.executor.as_ref()
+        &self.executor
    }

    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Arc::new(Executor::multi_thread(num_threads, "tantivy-search-")?);
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
        Ok(())
    }

    /// Custom thread pool by a outer thread pool.
-    pub fn set_shared_multithread_executor(
-        &mut self,
-        shared_thread_pool: Arc<Executor>,
-    ) -> crate::Result<()> {
-        self.executor = shared_thread_pool.clone();
-        Ok(())
+    pub fn set_executor(&mut self, executor: Executor) {
+        self.executor = executor;
    }

    /// Replace the default single thread search executor pool
    /// by a thread pool with as many threads as there are CPUs on the system.
    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = num_cpus::get();
+        let default_num_threads = available_parallelism()?.get();
        self.set_multithread_executor(default_num_threads)
    }

@@ -418,7 +413,7 @@ impl Index {
            schema,
            tokenizers: TokenizerManager::default(),
            fast_field_tokenizers: TokenizerManager::default(),
-            executor: Arc::new(Executor::single_thread()),
+            executor: Executor::single_thread(),
            inventory,
        }
    }
@@ -621,7 +616,7 @@ impl Index {
        &self,
        memory_budget_in_bytes: usize,
    ) -> crate::Result<IndexWriter<D>> {
-        let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD);
+        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
            num_threads = (memory_budget_in_bytes / MEMORY_BUDGET_NUM_BYTES_MIN).max(1);
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -1,5 +1,3 @@
-//! # Index Module
-//!
 //! The `index` module in Tantivy contains core components to read and write indexes.
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -318,14 +318,14 @@ impl SegmentReader {
                        if create_canonical {
                            // Without expand dots enabled dots need to be escaped.
                            let escaped_json_path = json_path.replace('.', "\\.");
-                            let full_path = format!("{}.{}", field_name, escaped_json_path);
+                            let full_path = format!("{field_name}.{escaped_json_path}");
                            let full_path_unescaped = format!("{}.{}", field_name, &json_path);
                            map_to_canonical.insert(full_path_unescaped, full_path.to_string());
                            full_path
                        } else {
                            // With expand dots enabled, we can use '.' instead of '\u{1}'.
                            json_path_sep_to_dot(&mut json_path);
-                            format!("{}.{}", field_name, json_path)
+                            format!("{field_name}.{json_path}")
                        }
                    };
                    indexed_fields.extend(
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -246,8 +246,9 @@ impl DeleteCursor {
 mod tests {

    use super::{DeleteOperation, DeleteQueue};
+    use crate::index::SegmentReader;
    use crate::query::{Explanation, Scorer, Weight};
-    use crate::{DocId, Score, SegmentReader};
+    use crate::{DocId, Score};

    struct DummyWeight;
    impl Weight for DummyWeight {
--- a/src/indexer/doc_id_mapping.rs
+++ b/src/indexer/doc_id_mapping.rs
@@ -306,12 +306,10 @@ mod tests_indexsorting {
        let my_string_field = index.schema().get_field("string_field").unwrap();
        let searcher = index.reader()?.searcher();
        {
-            assert_eq!(
-                searcher
-                    .doc::<TantivyDocument>(DocAddress::new(0, 0))?
-                    .get_first(my_string_field),
-                None
-            );
+            assert!(searcher
+                .doc::<TantivyDocument>(DocAddress::new(0, 0))?
+                .get_first(my_string_field)
+                .is_none());
            assert_eq!(
                searcher
                    .doc::<TantivyDocument>(DocAddress::new(0, 3))?
@@ -344,7 +342,7 @@ mod tests_indexsorting {
                Some("blublub")
            );
            let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 4))?;
-            assert_eq!(doc.get_first(my_string_field), None);
+            assert!(doc.get_first(my_string_field).is_none());
        }
        // sort by field desc
        let index = create_test_index(
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -814,10 +814,9 @@ mod tests {
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::indexer::NoMergePolicy;
    use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
-    use crate::schema::document::Value;
    use crate::schema::{
        self, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, NumericOptions, Schema,
-        TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
+        TextFieldIndexing, TextOptions, Value, FAST, INDEXED, STORED, STRING, TEXT,
    };
    use crate::store::DOCSTORE_CACHE_CAPACITY;
    use crate::{
@@ -1980,7 +1979,13 @@ mod tests {
                .unwrap();
            // test store iterator
            for doc in store_reader.iter::<TantivyDocument>(segment_reader.alive_bitset()) {
-                let id = doc.unwrap().get_first(id_field).unwrap().as_u64().unwrap();
+                let id = doc
+                    .unwrap()
+                    .get_first(id_field)
+                    .unwrap()
+                    .as_value()
+                    .as_u64()
+                    .unwrap();
                assert!(expected_ids_and_num_occurrences.contains_key(&id));
            }
            // test store random access
@@ -2013,7 +2018,7 @@ mod tests {
                    let mut bool2 = doc.get_all(multi_bools);
                    assert_eq!(bool, bool2.next().unwrap().as_bool().unwrap());
                    assert_ne!(bool, bool2.next().unwrap().as_bool().unwrap());
-                    assert_eq!(None, bool2.next())
+                    assert!(bool2.next().is_none())
                }
            }
        }
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -144,9 +144,9 @@ mod tests {
    use once_cell::sync::Lazy;

    use super::*;
-    use crate::index::SegmentMetaInventory;
+    use crate::index::{SegmentId, SegmentMetaInventory};
+    use crate::schema;
    use crate::schema::INDEXED;
-    use crate::{schema, SegmentId};

    static INVENTORY: Lazy<SegmentMetaInventory> = Lazy::new(SegmentMetaInventory::default);

--- a/src/indexer/merge_operation.rs
+++ b/src/indexer/merge_operation.rs
@@ -1,7 +1,8 @@
 use std::collections::HashSet;
 use std::ops::Deref;

-use crate::{Inventory, Opstamp, SegmentId, TrackedObject};
+use crate::index::SegmentId;
+use crate::{Inventory, Opstamp, TrackedObject};

 #[derive(Default)]
 pub(crate) struct MergeOperationInventory(Inventory<InnerMergeOperation>);
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -13,7 +13,7 @@ use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
 use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
-use crate::index::{Segment, SegmentReader};
+use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
 use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
@@ -21,8 +21,7 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
 use crate::{
-    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    SegmentComponent, SegmentOrdinal,
+    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, SegmentOrdinal,
 };

 /// Segment's max doc must be `< MAX_DOC_LIMIT`.
@@ -794,17 +793,16 @@ mod tests {
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
-    use crate::index::Index;
+    use crate::index::{Index, SegmentId};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
-    use crate::schema::document::Value;
    use crate::schema::{
        Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
-        TextFieldIndexing, INDEXED, TEXT,
+        TextFieldIndexing, Value, INDEXED, TEXT,
    };
    use crate::time::OffsetDateTime;
    use crate::{
        assert_nearly_equals, schema, DateTime, DocAddress, DocId, DocSet, IndexSettings,
-        IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
+        IndexSortByField, IndexWriter, Order, Searcher,
    };

    #[test]
@@ -911,15 +909,24 @@ mod tests {
            }
            {
                let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 0))?;
-                assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("af b"));
+                assert_eq!(
+                    doc.get_first(text_field).unwrap().as_value().as_str(),
+                    Some("af b")
+                );
            }
            {
                let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 1))?;
-                assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("a b c"));
+                assert_eq!(
+                    doc.get_first(text_field).unwrap().as_value().as_str(),
+                    Some("a b c")
+                );
            }
            {
                let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 2))?;
-                assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("a b c d"));
+                assert_eq!(
+                    doc.get_first(text_field).unwrap().as_value().as_str(),
+                    Some("a b c d")
+                );
            }
            {
                let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 3))?;
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -3,15 +3,15 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
+    use crate::postings::Postings;
    use crate::query::QueryParser;
-    use crate::schema::document::Value;
    use crate::schema::{
        self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
-        TextFieldIndexing, TextOptions,
+        TextFieldIndexing, TextOptions, Value,
    };
    use crate::{
-        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, Postings,
-        TantivyDocument, Term,
+        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, TantivyDocument,
+        Term,
    };

    fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
@@ -280,13 +280,16 @@ mod tests {
                .doc::<TantivyDocument>(DocAddress::new(0, blubber_pos))
                .unwrap();
            assert_eq!(
-                doc.get_first(my_text_field).unwrap().as_str(),
+                doc.get_first(my_text_field).unwrap().as_value().as_str(),
                Some("blubber")
            );
            let doc = searcher
                .doc::<TantivyDocument>(DocAddress::new(0, 0))
                .unwrap();
-            assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1000));
+            assert_eq!(
+                doc.get_first(int_field).unwrap().as_value().as_u64(),
+                Some(1000)
+            );
        }
    }

--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -182,7 +182,7 @@ mod tests_mmap {
        let index = Index::create_in_ram(schema_builder.build());
        let mut index_writer = index.writer_for_tests().unwrap();
        index_writer
-            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1"})))
+            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1", format!("num{field_name_in}"): 10})))
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("a{field_name_in}"): "test2"})))
@@ -216,7 +216,7 @@ mod tests_mmap {
        let test_query = |query_str: &str| {
            let query = parse_query.parse_query(query_str).unwrap();
            let num_docs = searcher.search(&query, &Count).unwrap();
-            assert_eq!(num_docs, 1, "{}", query_str);
+            assert_eq!(num_docs, 1, "{query_str}");
        };
        test_query(format!("json.{field_name_out}:test1").as_str());
        test_query(format!("json.a{field_name_out}:test2").as_str());
@@ -260,6 +260,64 @@ mod tests_mmap {
            "test6",
        );
        test_agg(format!("json.{field_name_out}a").as_str(), "test7");
+
+        // `.` is stored as `\u{0001}` internally in tantivy
+        let field_name_out_internal = if field_name_out == "." {
+            "\u{0001}"
+        } else {
+            field_name_out
+        };
+
+        let mut fields = reader.searcher().segment_readers()[0]
+            .inverted_index(field)
+            .unwrap()
+            .list_encoded_fields()
+            .unwrap();
+        assert_eq!(fields.len(), 8);
+        fields.sort();
+        let mut expected_fields = vec![
+            (format!("a{field_name_out_internal}"), Type::Str),
+            (format!("a{field_name_out_internal}a"), Type::Str),
+            (
+                format!("a{field_name_out_internal}a{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (
+                format!("a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (
+                format!("a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (format!("{field_name_out_internal}a"), Type::Str),
+            (format!("{field_name_out_internal}"), Type::Str),
+            (format!("num{field_name_out_internal}"), Type::I64),
+        ];
+        expected_fields.sort();
+        assert_eq!(fields, expected_fields);
+        // Check columnar reader
+        let mut columns = reader.searcher().segment_readers()[0]
+            .fast_fields()
+            .columnar()
+            .list_columns()
+            .unwrap()
+            .into_iter()
+            .map(|(name, _)| name)
+            .collect::<Vec<_>>();
+        let mut expected_columns = vec![
+            format!("json\u{1}{field_name_out_internal}"),
+            format!("json\u{1}{field_name_out_internal}a"),
+            format!("json\u{1}a{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}a"),
+            format!("json\u{1}a{field_name_out_internal}a{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
+            format!("json\u{1}num{field_name_out_internal}"),
+        ];
+        columns.sort();
+        expected_columns.sort();
+        assert_eq!(columns, expected_columns);
    }

    #[test]
@@ -532,10 +590,10 @@ mod tests_mmap {
        let query_parser = QueryParser::for_index(&index, vec![]);
        // Test if field name can be queried
        for (indexed_field, val) in fields_and_vals.iter() {
-            let query_str = &format!("{}:{}", indexed_field, val);
+            let query_str = &format!("{indexed_field}:{val}");
            let query = query_parser.parse_query(query_str).unwrap();
            let count_docs = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
-            assert!(!count_docs.is_empty(), "{}:{}", indexed_field, val);
+            assert!(!count_docs.is_empty(), "{indexed_field}:{val}");
        }
        // Test if field name can be used for aggregation
        for (field_name, val) in fields_and_vals.iter() {
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -5,20 +5,20 @@ use tokenizer_api::BoxTokenStream;

 use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
 use super::operation::AddOperation;
-use crate::core::json_utils::index_json_values;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
-use crate::index::Segment;
+use crate::index::{Segment, SegmentComponent};
 use crate::indexer::segment_serializer::SegmentSerializer;
+use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
    PerFieldPostingsWriter, PostingsWriter,
 };
-use crate::schema::document::{Document, ReferenceValue, Value};
+use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED};
 use crate::store::{StoreReader, StoreWriter};
 use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
-use crate::{DocId, Opstamp, SegmentComponent, TantivyError};
+use crate::{DocId, Opstamp, TantivyError};

 /// Computes the initial size of the hash table.
 ///
@@ -68,6 +68,7 @@ pub struct SegmentWriter {
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
+    pub(crate) json_positions_per_path: IndexingPositionsPerPath,
    pub(crate) doc_opstamps: Vec<Opstamp>,
    per_field_text_analyzers: Vec<TextAnalyzer>,
    term_buffer: Term,
@@ -119,6 +120,7 @@ impl SegmentWriter {
            per_field_postings_writers,
            fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
            json_path_writer: JsonPathWriter::default(),
+            json_positions_per_path: IndexingPositionsPerPath::default(),
            segment_serializer,
            fast_field_writers: FastFieldsWriter::from_schema_and_tokenizer_manager(
                &schema,
@@ -204,8 +206,7 @@ impl SegmentWriter {
                        // Used to help with linting and type checking.
                        let value = value_access as D::Value<'_>;

-                        let facet = value.as_facet().ok_or_else(make_schema_error)?;
-                        let facet_str = facet.encoded_str();
+                        let facet_str = value.as_facet().ok_or_else(make_schema_error)?;
                        let mut facet_tokenizer = facet_tokenizer.token_stream(facet_str);
                        let mut indexing_position = IndexingPosition::default();
                        postings_writer.index_text(
@@ -228,7 +229,7 @@ impl SegmentWriter {
                                &mut self.per_field_text_analyzers[field.field_id() as usize];
                            text_analyzer.token_stream(text)
                        } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                            BoxTokenStream::new(PreTokenizedStream::from(tok_str.clone()))
+                            BoxTokenStream::new(PreTokenizedStream::from(*tok_str.clone()))
                        } else {
                            continue;
                        };
@@ -342,26 +343,24 @@ impl SegmentWriter {
                FieldType::JsonObject(json_options) => {
                    let text_analyzer =
                        &mut self.per_field_text_analyzers[field.field_id() as usize];
-                    let json_values_it = values.map(|value_access| {
-                        // Used to help with linting and type checking.
-                        let value_access = value_access as D::Value<'_>;
-                        let value = value_access.as_value();

-                        match value {
-                            ReferenceValue::Object(object_iter) => Ok(object_iter),
-                            _ => Err(make_schema_error()),
-                        }
-                    });
-                    index_json_values::<D::Value<'_>>(
-                        doc_id,
-                        json_values_it,
-                        text_analyzer,
-                        json_options.is_expand_dots_enabled(),
-                        term_buffer,
-                        postings_writer,
-                        &mut self.json_path_writer,
-                        ctx,
-                    )?;
+                    self.json_positions_per_path.clear();
+                    self.json_path_writer
+                        .set_expand_dots(json_options.is_expand_dots_enabled());
+                    for json_value in values {
+                        self.json_path_writer.clear();
+
+                        index_json_value(
+                            doc_id,
+                            json_value,
+                            text_analyzer,
+                            term_buffer,
+                            &mut self.json_path_writer,
+                            postings_writer,
+                            ctx,
+                            &mut self.json_positions_per_path,
+                        );
+                    }
                }
                FieldType::IpAddr(_) => {
                    let mut num_vals = 0;
@@ -498,19 +497,19 @@ mod tests {
    use crate::collector::{Count, TopDocs};
    use crate::directory::RamDirectory;
    use crate::fastfield::FastValue;
-    use crate::postings::TermInfo;
+    use crate::postings::{Postings, TermInfo};
    use crate::query::{PhraseQuery, QueryParser};
-    use crate::schema::document::Value;
    use crate::schema::{
-        Document, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, STRING, TEXT,
+        Document, IndexRecordOption, OwnedValue, Schema, TextFieldIndexing, TextOptions, Value,
+        STORED, STRING, TEXT,
    };
    use crate::store::{Compressor, StoreReader, StoreWriter};
    use crate::time::format_description::well_known::Rfc3339;
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{PreTokenizedString, Token};
    use crate::{
-        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, Postings, TantivyDocument,
-        Term, TERMINATED,
+        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, TantivyDocument, Term,
+        TERMINATED,
    };

    #[test]
@@ -555,9 +554,15 @@ mod tests {
        let reader = StoreReader::open(directory.open_read(path).unwrap(), 0).unwrap();
        let doc = reader.get::<TantivyDocument>(0).unwrap();

-        assert_eq!(doc.field_values().len(), 2);
-        assert_eq!(doc.field_values()[0].value().as_str(), Some("A"));
-        assert_eq!(doc.field_values()[1].value().as_str(), Some("title"));
+        assert_eq!(doc.field_values().count(), 2);
+        assert_eq!(
+            doc.get_all(text_field).next().unwrap().as_value().as_str(),
+            Some("A")
+        );
+        assert_eq!(
+            doc.get_all(text_field).nth(1).unwrap().as_value().as_str(),
+            Some("title")
+        );
    }
    #[test]
    fn test_simple_json_indexing() {
@@ -597,12 +602,51 @@ mod tests {
        assert_eq!(score_docs.len(), 2);
    }

+    #[test]
+    fn test_flat_json_indexing() {
+        // A JSON Object that contains mixed values on the first level
+        let mut schema_builder = Schema::builder();
+        let json_field = schema_builder.add_json_field("json", STORED | STRING);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema.clone());
+        let mut writer = index.writer_for_tests().unwrap();
+        // Text, i64, u64
+        writer.add_document(doc!(json_field=>"b")).unwrap();
+        writer
+            .add_document(doc!(json_field=>OwnedValue::I64(10i64)))
+            .unwrap();
+        writer
+            .add_document(doc!(json_field=>OwnedValue::U64(55u64)))
+            .unwrap();
+        writer
+            .add_document(doc!(json_field=>json!({"my_field": "a"})))
+            .unwrap();
+        writer.commit().unwrap();
+
+        let search_and_expect = |query| {
+            let query_parser = QueryParser::for_index(&index, vec![json_field]);
+            let text_query = query_parser.parse_query(query).unwrap();
+            let score_docs: Vec<(_, DocAddress)> = index
+                .reader()
+                .unwrap()
+                .searcher()
+                .search(&text_query, &TopDocs::with_limit(4))
+                .unwrap();
+            assert_eq!(score_docs.len(), 1);
+        };
+
+        search_and_expect("my_field:a");
+        search_and_expect("b");
+        search_and_expect("10");
+        search_and_expect("55");
+    }
+
    #[test]
    fn test_json_indexing() {
        let mut schema_builder = Schema::builder();
        let json_field = schema_builder.add_json_field("json", STORED | TEXT);
        let schema = schema_builder.build();
-        let json_val: serde_json::Map<String, serde_json::Value> = serde_json::from_str(
+        let json_val: serde_json::Value = serde_json::from_str(
            r#"{
            "toto": "titi",
            "float": -0.2,
@@ -630,14 +674,10 @@ mod tests {
                doc_id: 0u32,
            })
            .unwrap();
-        let serdeser_json_val = serde_json::from_str::<serde_json::Map<String, serde_json::Value>>(
-            &doc.to_json(&schema),
-        )
-        .unwrap()
-        .get("json")
-        .unwrap()[0]
-            .as_object()
+        let serdeser_json_val = serde_json::from_str::<serde_json::Value>(&doc.to_json(&schema))
            .unwrap()
+            .get("json")
+            .unwrap()[0]
            .clone();
        assert_eq!(json_val, serdeser_json_val);
        let segment_reader = searcher.segment_reader(0u32);
@@ -801,7 +841,7 @@ mod tests {
        let mut schema_builder = Schema::builder();
        let json_field = schema_builder.add_json_field("json", STRING);
        let schema = schema_builder.build();
-        let json_val: serde_json::Map<String, serde_json::Value> =
+        let json_val: serde_json::Value =
            serde_json::from_str(r#"{"mykey": "two tokens"}"#).unwrap();
        let doc = doc!(json_field=>json_val);
        let index = Index::create_in_ram(schema);
@@ -841,7 +881,7 @@ mod tests {
        let mut schema_builder = Schema::builder();
        let json_field = schema_builder.add_json_field("json", TEXT);
        let schema = schema_builder.build();
-        let json_val: serde_json::Map<String, serde_json::Value> = serde_json::from_str(
+        let json_val: serde_json::Value = serde_json::from_str(
            r#"{"mykey": [{"field": "hello happy tax payer"}, {"field": "nothello"}]}"#,
        )
        .unwrap();
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -216,11 +216,6 @@ use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};

 pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
-#[deprecated(
-    since = "0.22.0",
-    note = "Will be removed in tantivy 0.23. Use export from snippet module instead"
-)]
-pub use self::snippet::{Snippet, SnippetGenerator};
 #[doc(hidden)]
 pub use crate::core::json_utils;
 pub use crate::core::{Executor, Searcher, SearcherGeneration};
@@ -228,16 +223,10 @@ pub use crate::directory::Directory;
 #[allow(deprecated)] // Remove with index sorting
 pub use crate::index::{
    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader,
+    Segment, SegmentMeta, SegmentReader,
 };
-#[deprecated(
-    since = "0.22.0",
-    note = "Will be removed in tantivy 0.23. Use export from indexer module instead"
-)]
-pub use crate::indexer::PreparedCommit;
 pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
-pub use crate::postings::Postings;
-pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
+pub use crate::schema::{Document, TantivyDocument, Term};

 /// Index format version.
 const INDEX_FORMAT_VERSION: u32 = 6;
@@ -392,9 +381,10 @@ pub mod tests {
    use crate::docset::{DocSet, TERMINATED};
    use crate::index::SegmentReader;
    use crate::merge_policy::NoMergePolicy;
+    use crate::postings::Postings;
    use crate::query::BooleanQuery;
    use crate::schema::*;
-    use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
+    use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};

    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
@@ -446,7 +436,6 @@ pub mod tests {
    }

    #[test]
-    #[cfg(not(feature = "lz4"))]
    fn test_version_string() {
        use regex::Regex;
        let regex_ptn = Regex::new(
@@ -946,7 +935,7 @@ pub mod tests {
        let mut schema_builder = Schema::builder();
        let json_field = schema_builder.add_json_field("json", STORED | TEXT);
        let schema = schema_builder.build();
-        let json_val: serde_json::Map<String, serde_json::Value> = serde_json::from_str(
+        let json_val: serde_json::Value = serde_json::from_str(
            r#"{
            "signed": 2,
            "float": 2.0,
@@ -1036,13 +1025,16 @@ pub mod tests {
                            text_field => "some other value",
                            other_text_field => "short");
        assert_eq!(document.len(), 3);
-        let values: Vec<&OwnedValue> = document.get_all(text_field).collect();
+        let values: Vec<OwnedValue> = document.get_all(text_field).map(OwnedValue::from).collect();
        assert_eq!(values.len(), 2);
-        assert_eq!(values[0].as_str(), Some("tantivy"));
-        assert_eq!(values[1].as_str(), Some("some other value"));
-        let values: Vec<&OwnedValue> = document.get_all(other_text_field).collect();
+        assert_eq!(values[0].as_ref().as_str(), Some("tantivy"));
+        assert_eq!(values[1].as_ref().as_str(), Some("some other value"));
+        let values: Vec<OwnedValue> = document
+            .get_all(other_text_field)
+            .map(OwnedValue::from)
+            .collect();
        assert_eq!(values.len(), 1);
-        assert_eq!(values[0].as_str(), Some("short"));
+        assert_eq!(values[0].as_ref().as_str(), Some("short"));
    }

    #[test]
@@ -1109,9 +1101,9 @@ pub mod tests {
    #[test]
    fn test_update_via_delete_insert() -> crate::Result<()> {
        use crate::collector::Count;
+        use crate::index::SegmentId;
        use crate::indexer::NoMergePolicy;
        use crate::query::AllQuery;
-        use crate::SegmentId;

        const DOC_COUNT: u64 = 2u64;

--- a/src/macros.rs
+++ b/src/macros.rs
@@ -41,6 +41,7 @@
 /// );
 /// # }
 /// ```
+
 #[macro_export]
 macro_rules! doc(
    () => {
@@ -52,7 +53,7 @@ macro_rules! doc(
        {
            let mut document = $crate::TantivyDocument::default();
            $(
-                document.add_field_value($field, $value);
+                document.add_field_value($field, &$value);
            )*
            document
        }
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -56,7 +56,7 @@ pub struct InvertedIndexSerializer {
 impl InvertedIndexSerializer {
    /// Open a new `InvertedIndexSerializer` for the given segment
    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
-        use crate::SegmentComponent::{Positions, Postings, Terms};
+        use crate::index::SegmentComponent::{Positions, Postings, Terms};
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
            postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -1,8 +1,9 @@
 use super::Scorer;
 use crate::docset::TERMINATED;
+use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
 use crate::query::{EnableScoring, Explanation, Query, Weight};
-use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
+use crate::{DocId, DocSet, Score, Searcher};

 /// `EmptyQuery` is a dummy `Query` in which no document matches.
 ///
--- a/src/query/fuzzy_query.rs
+++ b/src/query/fuzzy_query.rs
@@ -138,8 +138,7 @@ impl FuzzyTermQuery {
                if json_path_type != Type::Str {
                    return Err(InvalidArgument(format!(
                        "The fuzzy term query requires a string path type for a json term. Found \
-                         {:?}",
-                        json_path_type
+                         {json_path_type:?}"
                    )));
                }
            }
--- a/src/query/more_like_this/more_like_this.rs
+++ b/src/query/more_like_this/more_like_this.rs
@@ -180,7 +180,7 @@ impl MoreLikeThis {
                let facets: Vec<&str> = values
                    .iter()
                    .map(|value| {
-                        value.as_facet().map(|f| f.encoded_str()).ok_or_else(|| {
+                        value.as_facet().ok_or_else(|| {
                            TantivyError::InvalidArgument("invalid field value".to_string())
                        })
                    })
@@ -220,7 +220,7 @@ impl MoreLikeThis {
                        let mut token_stream = tokenizer.token_stream(text);
                        token_stream.process(sink);
                    } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                        let mut token_stream = PreTokenizedStream::from(tok_str.clone());
+                        let mut token_stream = PreTokenizedStream::from(*tok_str.clone());
                        token_stream.process(sink);
                    }
                }
--- a/src/query/range_query/fast_field_range_query.rs
+++ b/src/query/range_query/fast_field_range_query.rs
@@ -174,7 +174,7 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
    }

    fn size_hint(&self) -> u32 {
-        0 // heuristic possible by checking number of hits when fetching a block
+        self.column.num_docs()
    }
 }

--- a/src/query/regex_query.rs
+++ b/src/query/regex_query.rs
@@ -185,7 +185,7 @@ mod test {
            Err(crate::TantivyError::InvalidArgument(msg)) => {
                assert!(msg.contains("error: unclosed group"))
            }
-            res => panic!("unexpected result: {:?}", res),
+            res => panic!("unexpected result: {res:?}"),
        }
    }
 }
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -127,6 +127,7 @@ impl Scorer for TermScorer {
 mod tests {
    use proptest::prelude::*;

+    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::merge_policy::NoMergePolicy;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -134,8 +135,7 @@ mod tests {
    use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TEXT};
    use crate::{
-        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, SegmentId, Term,
-        TERMINATED,
+        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, Term, TERMINATED,
    };

    #[test]
--- a/src/reader/warming.rs
+++ b/src/reader/warming.rs
@@ -179,9 +179,10 @@ mod tests {
    use super::Warmer;
    use crate::core::searcher::SearcherGeneration;
    use crate::directory::RamDirectory;
+    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::schema::{Schema, INDEXED};
-    use crate::{Index, IndexSettings, ReloadPolicy, Searcher, SegmentId};
+    use crate::{Index, IndexSettings, ReloadPolicy, Searcher};

    #[derive(Default)]
    struct TestWarmer {
--- a/src/schema/document/de.rs
+++ b/src/schema/document/de.rs
@@ -873,7 +873,7 @@ mod tests {
        );

        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::Facet(facet));

@@ -881,7 +881,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
+            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
    }
--- a/src/schema/document/default_document.rs
+++ b/src/schema/document/default_document.rs
@@ -1,93 +1,64 @@
 use std::collections::{BTreeMap, HashMap, HashSet};
+use std::io::{self, Read, Write};
 use std::net::Ipv6Addr;

-use common::DateTime;
+use columnar::MonotonicallyMappableToU128;
+use common::{read_u32_vint_no_advance, serialize_vint_u32, BinarySerializable, DateTime, VInt};
 use serde_json::Map;
+pub use CompactDoc as TantivyDocument;

+use super::{ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::document::{
    DeserializeError, Document, DocumentDeserialize, DocumentDeserializer,
 };
 use crate::schema::field_type::ValueParsingError;
-use crate::schema::field_value::FieldValueIter;
-use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema};
+use crate::schema::{Facet, Field, NamedFieldDocument, OwnedValue, Schema};
 use crate::tokenizer::PreTokenizedString;

-/// TantivyDocument provides a default implementation of the `Document` trait.
-/// It is the object that can be indexed and then searched for.
-///
-/// Documents are fundamentally a collection of unordered couples `(field, value)`.
-/// In this list, one field may appear more than once.
-#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
-pub struct TantivyDocument {
-    field_values: Vec<FieldValue>,
+#[repr(packed)]
+#[derive(Debug, Clone)]
+/// A field value pair in the compact tantivy document
+struct FieldValueAddr {
+    pub field: u16,
+    pub value_addr: ValueAddr,
 }

-impl Document for TantivyDocument {
-    type Value<'a> = &'a OwnedValue;
-    type FieldsValuesIter<'a> = FieldValueIter<'a>;
+#[derive(Debug, Clone)]
+/// The default document in tantivy. It encodes data in a compact form.
+pub struct CompactDoc {
+    /// `node_data` is a vec of bytes, where each value is serialized into bytes and stored. It
+    /// includes all the data of the document and also metadata like where the nodes are located
+    /// in an object or array.
+    pub node_data: Vec<u8>,
+    /// The root (Field, Value) pairs
+    field_values: Vec<FieldValueAddr>,
+}

-    fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> {
-        FieldValueIter(self.field_values.iter())
+impl Default for CompactDoc {
+    fn default() -> Self {
+        Self::new()
    }
 }

-impl DocumentDeserialize for TantivyDocument {
-    fn deserialize<'de, D>(mut deserializer: D) -> Result<Self, DeserializeError>
-    where D: DocumentDeserializer<'de> {
-        let mut field_values = Vec::with_capacity(deserializer.size_hint());
-
-        while let Some((field, value)) = deserializer.next_field()? {
-            field_values.push(FieldValue::new(field, value));
-        }
-
-        Ok(Self { field_values })
-    }
-}
-
-impl From<Vec<FieldValue>> for TantivyDocument {
-    fn from(field_values: Vec<FieldValue>) -> Self {
-        Self { field_values }
-    }
-}
-
-impl PartialEq for TantivyDocument {
-    fn eq(&self, other: &Self) -> bool {
-        // super slow, but only here for tests
-        let convert_to_comparable_map = |field_values: &[FieldValue]| {
-            let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
-            for field_value in field_values.iter() {
-                let value = serde_json::to_string(field_value.value()).unwrap();
-                field_value_set
-                    .entry(field_value.field())
-                    .or_default()
-                    .insert(value);
-            }
-            field_value_set
-        };
-        let self_field_values: HashMap<Field, HashSet<String>> =
-            convert_to_comparable_map(&self.field_values);
-        let other_field_values: HashMap<Field, HashSet<String>> =
-            convert_to_comparable_map(&other.field_values);
-        self_field_values.eq(&other_field_values)
-    }
-}
-
-impl Eq for TantivyDocument {}
-
-impl IntoIterator for TantivyDocument {
-    type Item = FieldValue;
-
-    type IntoIter = std::vec::IntoIter<FieldValue>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.field_values.into_iter()
-    }
-}
-
-impl TantivyDocument {
+impl CompactDoc {
    /// Creates a new, empty document object
-    pub fn new() -> TantivyDocument {
-        TantivyDocument::default()
+    /// The reserved capacity is for the total serialized data
+    pub fn with_capacity(bytes: usize) -> CompactDoc {
+        CompactDoc {
+            node_data: Vec::with_capacity(bytes),
+            field_values: Vec::with_capacity(4),
+        }
+    }
+
+    /// Creates a new, empty document object
+    pub fn new() -> CompactDoc {
+        CompactDoc::with_capacity(1024)
+    }
+
+    /// Skrinks the capacity of the document to fit the data
+    pub fn shrink_to_fit(&mut self) {
+        self.node_data.shrink_to_fit();
+        self.field_values.shrink_to_fit();
    }

    /// Returns the length of the document.
@@ -99,83 +70,111 @@ impl TantivyDocument {
    pub fn add_facet<F>(&mut self, field: Field, path: F)
    where Facet: From<F> {
        let facet = Facet::from(path);
-        let value = OwnedValue::Facet(facet);
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, ReferenceValueLeaf::Facet(facet.encoded_str()));
    }

    /// Add a text field.
-    pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
-        let value = OwnedValue::Str(text.to_string());
-        self.add_field_value(field, value);
+    pub fn add_text<S: AsRef<str>>(&mut self, field: Field, text: S) {
+        self.add_leaf_field_value(field, ReferenceValueLeaf::Str(text.as_ref()));
    }

    /// Add a pre-tokenized text field.
    pub fn add_pre_tokenized_text(&mut self, field: Field, pre_tokenized_text: PreTokenizedString) {
-        self.add_field_value(field, pre_tokenized_text);
+        self.add_leaf_field_value(field, pre_tokenized_text);
    }

    /// Add a u64 field
    pub fn add_u64(&mut self, field: Field, value: u64) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a IP address field. Internally only Ipv6Addr is used.
    pub fn add_ip_addr(&mut self, field: Field, value: Ipv6Addr) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a i64 field
    pub fn add_i64(&mut self, field: Field, value: i64) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a f64 field
    pub fn add_f64(&mut self, field: Field, value: f64) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a bool field
    pub fn add_bool(&mut self, field: Field, value: bool) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a date field with unspecified time zone offset
    pub fn add_date(&mut self, field: Field, value: DateTime) {
-        self.add_field_value(field, value);
+        self.add_leaf_field_value(field, value);
    }

    /// Add a bytes field
-    pub fn add_bytes<T: Into<Vec<u8>>>(&mut self, field: Field, value: T) {
-        self.add_field_value(field, value.into());
+    pub fn add_bytes(&mut self, field: Field, value: &[u8]) {
+        self.add_leaf_field_value(field, value);
    }

    /// Add a dynamic object field
    pub fn add_object(&mut self, field: Field, object: BTreeMap<String, OwnedValue>) {
-        self.add_field_value(field, object);
+        self.add_field_value(field, &OwnedValue::from(object));
    }

    /// Add a (field, value) to the document.
-    pub fn add_field_value<T: Into<OwnedValue>>(&mut self, field: Field, typed_val: T) {
+    ///
+    /// `OwnedValue` implements Value, which should be easiest to use, but is not the most
+    /// performant.
+    pub fn add_field_value<'a, V: Value<'a>>(&mut self, field: Field, value: V) {
+        let field_value = FieldValueAddr {
+            field: field
+                .field_id()
+                .try_into()
+                .expect("support only up to u16::MAX field ids"),
+            value_addr: self.add_value(value),
+        };
+        self.field_values.push(field_value);
+    }
+
+    /// Add a (field, leaf value) to the document.
+    /// Leaf values don't have nested values.
+    pub fn add_leaf_field_value<'a, T: Into<ReferenceValueLeaf<'a>>>(
+        &mut self,
+        field: Field,
+        typed_val: T,
+    ) {
        let value = typed_val.into();
-        let field_value = FieldValue { field, value };
+        let field_value = FieldValueAddr {
+            field: field
+                .field_id()
+                .try_into()
+                .expect("support only up to u16::MAX field ids"),
+            value_addr: self.add_value_leaf(value),
+        };
        self.field_values.push(field_value);
    }

    /// field_values accessor
-    pub fn field_values(&self) -> &[FieldValue] {
-        &self.field_values
+    pub fn field_values(&self) -> impl Iterator<Item = (Field, CompactDocValue<'_>)> {
+        self.field_values.iter().map(|field_val| {
+            let field = Field::from_field_id(field_val.field as u32);
+            let val = self.get_compact_doc_value(field_val.value_addr);
+            (field, val)
+        })
    }

-    /// Returns all of the `FieldValue`s associated the given field
-    pub fn get_all(&self, field: Field) -> impl Iterator<Item = &OwnedValue> {
+    /// Returns all of the `ReferenceValue`s associated the given field
+    pub fn get_all(&self, field: Field) -> impl Iterator<Item = CompactDocValue<'_>> + '_ {
        self.field_values
            .iter()
-            .filter(move |field_value| field_value.field() == field)
-            .map(FieldValue::value)
+            .filter(move |field_value| Field::from_field_id(field_value.field as u32) == field)
+            .map(|val| self.get_compact_doc_value(val.value_addr))
    }

-    /// Returns the first `FieldValue` associated the given field
-    pub fn get_first(&self, field: Field) -> Option<&OwnedValue> {
+    /// Returns the first `ReferenceValue` associated the given field
+    pub fn get_first(&self, field: Field) -> Option<CompactDocValue<'_>> {
        self.get_all(field).next()
    }

@@ -183,12 +182,12 @@ impl TantivyDocument {
    pub fn convert_named_doc(
        schema: &Schema,
        named_doc: NamedFieldDocument,
-    ) -> Result<TantivyDocument, DocParsingError> {
-        let mut document = TantivyDocument::new();
+    ) -> Result<Self, DocParsingError> {
+        let mut document = Self::new();
        for (field_name, values) in named_doc.0 {
            if let Ok(field) = schema.get_field(&field_name) {
                for value in values {
-                    document.add_field_value(field, value);
+                    document.add_field_value(field, &value);
                }
            }
        }
@@ -196,7 +195,7 @@ impl TantivyDocument {
    }

    /// Build a document object from a json-object.
-    pub fn parse_json(schema: &Schema, doc_json: &str) -> Result<TantivyDocument, DocParsingError> {
+    pub fn parse_json(schema: &Schema, doc_json: &str) -> Result<Self, DocParsingError> {
        let json_obj: Map<String, serde_json::Value> =
            serde_json::from_str(doc_json).map_err(|_| DocParsingError::invalid_json(doc_json))?;
        Self::from_json_object(schema, json_obj)
@@ -206,8 +205,8 @@ impl TantivyDocument {
    pub fn from_json_object(
        schema: &Schema,
        json_obj: Map<String, serde_json::Value>,
-    ) -> Result<TantivyDocument, DocParsingError> {
-        let mut doc = TantivyDocument::default();
+    ) -> Result<Self, DocParsingError> {
+        let mut doc = Self::default();
        for (field_name, json_value) in json_obj {
            if let Ok(field) = schema.get_field(&field_name) {
                let field_entry = schema.get_field_entry(field);
@@ -218,20 +217,482 @@ impl TantivyDocument {
                            let value = field_type
                                .value_from_json(json_item)
                                .map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
-                            doc.add_field_value(field, value);
+                            doc.add_field_value(field, &value);
                        }
                    }
                    _ => {
                        let value = field_type
                            .value_from_json(json_value)
                            .map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
-                        doc.add_field_value(field, value);
+                        doc.add_field_value(field, &value);
                    }
                }
            }
        }
        Ok(doc)
    }
+
+    fn add_value_leaf(&mut self, leaf: ReferenceValueLeaf) -> ValueAddr {
+        let type_id = ValueType::from(&leaf);
+        // Write into `node_data` and return u32 position as its address
+        // Null and bool are inlined into the address
+        let val_addr = match leaf {
+            ReferenceValueLeaf::Null => 0,
+            ReferenceValueLeaf::Str(bytes) => {
+                write_bytes_into(&mut self.node_data, bytes.as_bytes())
+            }
+            ReferenceValueLeaf::Facet(bytes) => {
+                write_bytes_into(&mut self.node_data, bytes.as_bytes())
+            }
+            ReferenceValueLeaf::Bytes(bytes) => write_bytes_into(&mut self.node_data, bytes),
+            ReferenceValueLeaf::U64(num) => write_into(&mut self.node_data, num),
+            ReferenceValueLeaf::I64(num) => write_into(&mut self.node_data, num),
+            ReferenceValueLeaf::F64(num) => write_into(&mut self.node_data, num),
+            ReferenceValueLeaf::Bool(b) => b as u32,
+            ReferenceValueLeaf::Date(date) => {
+                write_into(&mut self.node_data, date.into_timestamp_nanos())
+            }
+            ReferenceValueLeaf::IpAddr(num) => write_into(&mut self.node_data, num.to_u128()),
+            ReferenceValueLeaf::PreTokStr(pre_tok) => write_into(&mut self.node_data, *pre_tok),
+        };
+        ValueAddr { type_id, val_addr }
+    }
+    /// Adds a value and returns in address into the
+    fn add_value<'a, V: Value<'a>>(&mut self, value: V) -> ValueAddr {
+        let value = value.as_value();
+        let type_id = ValueType::from(&value);
+        match value {
+            ReferenceValue::Leaf(leaf) => self.add_value_leaf(leaf),
+            ReferenceValue::Array(elements) => {
+                // addresses of the elements in node_data
+                // Reusing a vec would be nicer, but it's not easy because of the recursion
+                // A global vec would work if every writer get it's discriminator
+                let mut addresses = Vec::new();
+                for elem in elements {
+                    let value_addr = self.add_value(elem);
+                    write_into(&mut addresses, value_addr);
+                }
+                ValueAddr {
+                    type_id,
+                    val_addr: write_bytes_into(&mut self.node_data, &addresses),
+                }
+            }
+            ReferenceValue::Object(entries) => {
+                // addresses of the elements in node_data
+                let mut addresses = Vec::new();
+                for (key, value) in entries {
+                    let key_addr = self.add_value_leaf(ReferenceValueLeaf::Str(key));
+                    let value_addr = self.add_value(value);
+                    write_into(&mut addresses, key_addr);
+                    write_into(&mut addresses, value_addr);
+                }
+                ValueAddr {
+                    type_id,
+                    val_addr: write_bytes_into(&mut self.node_data, &addresses),
+                }
+            }
+        }
+    }
+
+    /// Get CompactDocValue for address
+    fn get_compact_doc_value(&self, value_addr: ValueAddr) -> CompactDocValue<'_> {
+        CompactDocValue {
+            container: self,
+            value_addr,
+        }
+    }
+
+    /// get &[u8] reference from node_data
+    fn extract_bytes(&self, addr: Addr) -> &[u8] {
+        binary_deserialize_bytes(self.get_slice(addr))
+    }
+
+    /// get &str reference from node_data
+    fn extract_str(&self, addr: Addr) -> &str {
+        let data = self.extract_bytes(addr);
+        // Utf-8 checks would have a noticeable performance overhead here
+        unsafe { std::str::from_utf8_unchecked(data) }
+    }
+
+    /// deserialized owned value from node_data
+    fn read_from<T: BinarySerializable>(&self, addr: Addr) -> io::Result<T> {
+        let data_slice = &self.node_data[addr as usize..];
+        let mut cursor = std::io::Cursor::new(data_slice);
+        T::deserialize(&mut cursor)
+    }
+
+    /// get slice from address. The returned slice is open ended
+    fn get_slice(&self, addr: Addr) -> &[u8] {
+        &self.node_data[addr as usize..]
+    }
+}
+
+/// BinarySerializable alternative to read references
+fn binary_deserialize_bytes(data: &[u8]) -> &[u8] {
+    let (len, bytes_read) = read_u32_vint_no_advance(data);
+    &data[bytes_read..bytes_read + len as usize]
+}
+
+/// Write bytes and return the position of the written data.
+///
+/// BinarySerializable alternative to write references
+fn write_bytes_into(vec: &mut Vec<u8>, data: &[u8]) -> u32 {
+    let pos = vec.len() as u32;
+    let mut buf = [0u8; 8];
+    let len_vint_bytes = serialize_vint_u32(data.len() as u32, &mut buf);
+    vec.extend_from_slice(len_vint_bytes);
+    vec.extend_from_slice(data);
+    pos
+}
+
+/// Serialize and return the position
+fn write_into<T: BinarySerializable>(vec: &mut Vec<u8>, value: T) -> u32 {
+    let pos = vec.len() as u32;
+    value.serialize(vec).unwrap();
+    pos
+}
+
+impl PartialEq for CompactDoc {
+    fn eq(&self, other: &Self) -> bool {
+        // super slow, but only here for tests
+        let convert_to_comparable_map = |doc: &CompactDoc| {
+            let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
+            for field_value in doc.field_values.iter() {
+                let value: OwnedValue = doc.get_compact_doc_value(field_value.value_addr).into();
+                let value = serde_json::to_string(&value).unwrap();
+                field_value_set
+                    .entry(Field::from_field_id(field_value.field as u32))
+                    .or_default()
+                    .insert(value);
+            }
+            field_value_set
+        };
+        let self_field_values: HashMap<Field, HashSet<String>> = convert_to_comparable_map(self);
+        let other_field_values: HashMap<Field, HashSet<String>> = convert_to_comparable_map(other);
+        self_field_values.eq(&other_field_values)
+    }
+}
+
+impl Eq for CompactDoc {}
+
+impl DocumentDeserialize for CompactDoc {
+    fn deserialize<'de, D>(mut deserializer: D) -> Result<Self, DeserializeError>
+    where D: DocumentDeserializer<'de> {
+        let mut doc = CompactDoc::default();
+        // TODO: Deserializing into OwnedValue is wasteful. The deserializer should be able to work
+        // on slices and referenced data.
+        while let Some((field, value)) = deserializer.next_field::<OwnedValue>()? {
+            doc.add_field_value(field, &value);
+        }
+        Ok(doc)
+    }
+}
+
+/// A value of Compact Doc needs a reference to the container to extract its payload
+#[derive(Debug, Clone, Copy)]
+pub struct CompactDocValue<'a> {
+    container: &'a CompactDoc,
+    value_addr: ValueAddr,
+}
+impl PartialEq for CompactDocValue<'_> {
+    fn eq(&self, other: &Self) -> bool {
+        let value1: OwnedValue = (*self).into();
+        let value2: OwnedValue = (*other).into();
+        value1 == value2
+    }
+}
+impl<'a> From<CompactDocValue<'a>> for OwnedValue {
+    fn from(value: CompactDocValue) -> Self {
+        value.as_value().into()
+    }
+}
+impl<'a> Value<'a> for CompactDocValue<'a> {
+    type ArrayIter = CompactDocArrayIter<'a>;
+
+    type ObjectIter = CompactDocObjectIter<'a>;
+
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        self.get_ref_value().unwrap()
+    }
+}
+impl<'a> CompactDocValue<'a> {
+    fn get_ref_value(&self) -> io::Result<ReferenceValue<'a, CompactDocValue<'a>>> {
+        let addr = self.value_addr.val_addr;
+        match self.value_addr.type_id {
+            ValueType::Null => Ok(ReferenceValueLeaf::Null.into()),
+            ValueType::Str => {
+                let str_ref = self.container.extract_str(addr);
+                Ok(ReferenceValueLeaf::Str(str_ref).into())
+            }
+            ValueType::Facet => {
+                let str_ref = self.container.extract_str(addr);
+                Ok(ReferenceValueLeaf::Facet(str_ref).into())
+            }
+            ValueType::Bytes => {
+                let data = self.container.extract_bytes(addr);
+                Ok(ReferenceValueLeaf::Bytes(data).into())
+            }
+            ValueType::U64 => self
+                .container
+                .read_from::<u64>(addr)
+                .map(ReferenceValueLeaf::U64)
+                .map(Into::into),
+            ValueType::I64 => self
+                .container
+                .read_from::<i64>(addr)
+                .map(ReferenceValueLeaf::I64)
+                .map(Into::into),
+            ValueType::F64 => self
+                .container
+                .read_from::<f64>(addr)
+                .map(ReferenceValueLeaf::F64)
+                .map(Into::into),
+            ValueType::Bool => Ok(ReferenceValueLeaf::Bool(addr != 0).into()),
+            ValueType::Date => self
+                .container
+                .read_from::<i64>(addr)
+                .map(|ts| ReferenceValueLeaf::Date(DateTime::from_timestamp_nanos(ts)))
+                .map(Into::into),
+            ValueType::IpAddr => self
+                .container
+                .read_from::<u128>(addr)
+                .map(|num| ReferenceValueLeaf::IpAddr(Ipv6Addr::from_u128(num)))
+                .map(Into::into),
+            ValueType::PreTokStr => self
+                .container
+                .read_from::<PreTokenizedString>(addr)
+                .map(Into::into)
+                .map(ReferenceValueLeaf::PreTokStr)
+                .map(Into::into),
+            ValueType::Object => Ok(ReferenceValue::Object(CompactDocObjectIter::new(
+                self.container,
+                addr,
+            )?)),
+            ValueType::Array => Ok(ReferenceValue::Array(CompactDocArrayIter::new(
+                self.container,
+                addr,
+            )?)),
+        }
+    }
+}
+
+/// The address in the vec
+type Addr = u32;
+
+#[derive(Clone, Copy, Default)]
+#[repr(packed)]
+/// The value type and the address to its payload in the container.
+struct ValueAddr {
+    type_id: ValueType,
+    /// This is the address to the value in the vec, except for bool and null, which are inlined
+    val_addr: Addr,
+}
+impl BinarySerializable for ValueAddr {
+    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+        self.type_id.serialize(writer)?;
+        VInt(self.val_addr as u64).serialize(writer)
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let type_id = ValueType::deserialize(reader)?;
+        let val_addr = VInt::deserialize(reader)?.0 as u32;
+        Ok(ValueAddr { type_id, val_addr })
+    }
+}
+impl std::fmt::Debug for ValueAddr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let val_addr = self.val_addr;
+        f.write_fmt(format_args!("{:?} at {:?}", self.type_id, val_addr))
+    }
+}
+
+/// A enum representing a value for tantivy to index.
+///
+/// Any changes need to be reflected in `BinarySerializable` for `ValueType`
+///
+/// We can't use [schema::Type] or [columnar::ColumnType] here, because they are missing
+/// some items like Array and PreTokStr.
+#[derive(Default, Clone, Copy, Debug, PartialEq)]
+#[repr(u8)]
+pub enum ValueType {
+    /// A null value.
+    #[default]
+    Null = 0,
+    /// The str type is used for any text information.
+    Str = 1,
+    /// Unsigned 64-bits Integer `u64`
+    U64 = 2,
+    /// Signed 64-bits Integer `i64`
+    I64 = 3,
+    /// 64-bits Float `f64`
+    F64 = 4,
+    /// Date/time with nanoseconds precision
+    Date = 5,
+    /// Facet
+    Facet = 6,
+    /// Arbitrarily sized byte array
+    Bytes = 7,
+    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
+    IpAddr = 8,
+    /// Bool value
+    Bool = 9,
+    /// Pre-tokenized str type,
+    PreTokStr = 10,
+    /// Object
+    Object = 11,
+    /// Pre-tokenized str type,
+    Array = 12,
+}
+
+impl BinarySerializable for ValueType {
+    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+        (*self as u8).serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let num = u8::deserialize(reader)?;
+        let type_id = if (0..=12).contains(&num) {
+            unsafe { std::mem::transmute(num) }
+        } else {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                format!("Invalid value type id: {num}"),
+            ));
+        };
+        Ok(type_id)
+    }
+}
+
+impl<'a, V: Value<'a>> From<&ReferenceValue<'a, V>> for ValueType {
+    fn from(value: &ReferenceValue<'a, V>) -> Self {
+        match value {
+            ReferenceValue::Leaf(leaf) => leaf.into(),
+            ReferenceValue::Array(_) => ValueType::Array,
+            ReferenceValue::Object(_) => ValueType::Object,
+        }
+    }
+}
+impl<'a> From<&ReferenceValueLeaf<'a>> for ValueType {
+    fn from(value: &ReferenceValueLeaf<'a>) -> Self {
+        match value {
+            ReferenceValueLeaf::Null => ValueType::Null,
+            ReferenceValueLeaf::Str(_) => ValueType::Str,
+            ReferenceValueLeaf::U64(_) => ValueType::U64,
+            ReferenceValueLeaf::I64(_) => ValueType::I64,
+            ReferenceValueLeaf::F64(_) => ValueType::F64,
+            ReferenceValueLeaf::Bool(_) => ValueType::Bool,
+            ReferenceValueLeaf::Date(_) => ValueType::Date,
+            ReferenceValueLeaf::IpAddr(_) => ValueType::IpAddr,
+            ReferenceValueLeaf::PreTokStr(_) => ValueType::PreTokStr,
+            ReferenceValueLeaf::Facet(_) => ValueType::Facet,
+            ReferenceValueLeaf::Bytes(_) => ValueType::Bytes,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+/// The Iterator for the object values in the compact document
+pub struct CompactDocObjectIter<'a> {
+    container: &'a CompactDoc,
+    node_addresses_slice: &'a [u8],
+}
+
+impl<'a> CompactDocObjectIter<'a> {
+    fn new(container: &'a CompactDoc, addr: Addr) -> io::Result<Self> {
+        // Objects are `&[ValueAddr]` serialized into bytes
+        let node_addresses_slice = container.extract_bytes(addr);
+        Ok(Self {
+            container,
+            node_addresses_slice,
+        })
+    }
+}
+
+impl<'a> Iterator for CompactDocObjectIter<'a> {
+    type Item = (&'a str, CompactDocValue<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.node_addresses_slice.is_empty() {
+            return None;
+        }
+        let key_addr = ValueAddr::deserialize(&mut self.node_addresses_slice).ok()?;
+        let key = self.container.extract_str(key_addr.val_addr);
+        let value = ValueAddr::deserialize(&mut self.node_addresses_slice).ok()?;
+        let value = CompactDocValue {
+            container: self.container,
+            value_addr: value,
+        };
+        Some((key, value))
+    }
+}
+
+#[derive(Debug, Clone)]
+/// The Iterator for the array values in the compact document
+pub struct CompactDocArrayIter<'a> {
+    container: &'a CompactDoc,
+    node_addresses_slice: &'a [u8],
+}
+
+impl<'a> CompactDocArrayIter<'a> {
+    fn new(container: &'a CompactDoc, addr: Addr) -> io::Result<Self> {
+        // Arrays are &[ValueAddr] serialized into bytes
+        let node_addresses_slice = container.extract_bytes(addr);
+        Ok(Self {
+            container,
+            node_addresses_slice,
+        })
+    }
+}
+
+impl<'a> Iterator for CompactDocArrayIter<'a> {
+    type Item = CompactDocValue<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.node_addresses_slice.is_empty() {
+            return None;
+        }
+        let value = ValueAddr::deserialize(&mut self.node_addresses_slice).ok()?;
+        let value = CompactDocValue {
+            container: self.container,
+            value_addr: value,
+        };
+        Some(value)
+    }
+}
+
+impl Document for CompactDoc {
+    type Value<'a> = CompactDocValue<'a>;
+    type FieldsValuesIter<'a> = FieldValueIterRef<'a>;
+
+    fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> {
+        FieldValueIterRef {
+            slice: self.field_values.iter(),
+            container: self,
+        }
+    }
+}
+
+/// A helper wrapper for creating an iterator over the field values
+pub struct FieldValueIterRef<'a> {
+    slice: std::slice::Iter<'a, FieldValueAddr>,
+    container: &'a CompactDoc,
+}
+
+impl<'a> Iterator for FieldValueIterRef<'a> {
+    type Item = (Field, CompactDocValue<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.slice.next().map(|field_value| {
+            (
+                Field::from_field_id(field_value.field as u32),
+                CompactDocValue::<'a> {
+                    container: self.container,
+                    value_addr: field_value.value_addr,
+                },
+            )
+        })
+    }
 }

 /// Error that may happen when deserializing
@@ -264,7 +725,40 @@ mod tests {
        let text_field = schema_builder.add_text_field("title", TEXT);
        let mut doc = TantivyDocument::default();
        doc.add_text(text_field, "My title");
-        assert_eq!(doc.field_values().len(), 1);
+        assert_eq!(doc.field_values().count(), 1);
+
+        let schema = schema_builder.build();
+        let _val = doc.get_first(text_field).unwrap();
+        let _json = doc.to_named_doc(&schema);
+    }
+
+    #[test]
+    fn test_json_value() {
+        let json_str = r#"{ 
+            "toto": "titi",
+            "float": -0.2,
+            "bool": true,
+            "unsigned": 1,
+            "signed": -2,
+            "complexobject": {
+                "field.with.dot": 1
+            },
+            "date": "1985-04-12T23:20:50.52Z",
+            "my_arr": [2, 3, {"my_key": "two tokens"}, 4, {"nested_array": [2, 5, 6, [7, 8, {"a": [{"d": {"e":[99]}}, 9000]}, 9, 10], [5, 5]]}]
+        }"#;
+        let json_val: std::collections::BTreeMap<String, OwnedValue> =
+            serde_json::from_str(json_str).unwrap();
+
+        let mut schema_builder = Schema::builder();
+        let json_field = schema_builder.add_json_field("json", TEXT);
+        let mut doc = TantivyDocument::default();
+        doc.add_object(json_field, json_val);
+
+        let schema = schema_builder.build();
+        let json = doc.to_json(&schema);
+        let actual_json: serde_json::Value = serde_json::from_str(&json).unwrap();
+        let expected_json: serde_json::Value = serde_json::from_str(json_str).unwrap();
+        assert_eq!(actual_json["json"][0], expected_json);
    }

    // TODO: Should this be re-added with the serialize method
--- a/src/schema/document/existing_type_impls.rs
+++ b/src/schema/document/existing_type_impls.rs
@@ -5,21 +5,39 @@
 //! and don't care about some of the more specialised types or only want to customise
 //! part of the document structure.
 use std::collections::{btree_map, hash_map, BTreeMap, HashMap};
+use std::iter::Empty;
+use std::net::Ipv6Addr;

+use common::DateTime;
 use serde_json::Number;
+use time::format_description::well_known::Rfc3339;
+use time::OffsetDateTime;

+use super::facet::Facet;
 use super::ReferenceValueLeaf;
 use crate::schema::document::{
    ArrayAccess, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer,
    ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor,
 };
 use crate::schema::Field;
+use crate::tokenizer::PreTokenizedString;

 // Serde compatibility support.
+pub fn can_be_rfc3339_date_time(text: &str) -> bool {
+    if let Some(&first_byte) = text.as_bytes().first() {
+        if first_byte.is_ascii_digit() {
+            return true;
+        }
+    }
+
+    false
+}
+
 impl<'a> Value<'a> for &'a serde_json::Value {
    type ArrayIter = std::slice::Iter<'a, serde_json::Value>;
    type ObjectIter = JsonObjectIter<'a>;

+    #[inline]
    fn as_value(&self) -> ReferenceValue<'a, Self> {
        match self {
            serde_json::Value::Null => ReferenceValueLeaf::Null.into(),
@@ -35,7 +53,19 @@ impl<'a> Value<'a> for &'a serde_json::Value {
                    panic!("Unsupported serde_json number {number}");
                }
            }
-            serde_json::Value::String(val) => ReferenceValueLeaf::Str(val).into(),
+            serde_json::Value::String(text) => {
+                if can_be_rfc3339_date_time(text) {
+                    match OffsetDateTime::parse(text, &Rfc3339) {
+                        Ok(dt) => {
+                            let dt_utc = dt.to_offset(time::UtcOffset::UTC);
+                            ReferenceValueLeaf::Date(DateTime::from_utc(dt_utc)).into()
+                        }
+                        Err(_) => ReferenceValueLeaf::Str(text).into(),
+                    }
+                } else {
+                    ReferenceValueLeaf::Str(text).into()
+                }
+            }
            serde_json::Value::Array(elements) => ReferenceValue::Array(elements.iter()),
            serde_json::Value::Object(object) => {
                ReferenceValue::Object(JsonObjectIter(object.iter()))
@@ -44,6 +74,126 @@ impl<'a> Value<'a> for &'a serde_json::Value {
    }
 }

+impl<'a> Value<'a> for &'a String {
+    type ArrayIter = Empty<&'a String>;
+    type ObjectIter = Empty<(&'a str, &'a String)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Str(self))
+    }
+}
+
+impl<'a> Value<'a> for &'a Facet {
+    type ArrayIter = Empty<&'a Facet>;
+    type ObjectIter = Empty<(&'a str, &'a Facet)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Facet(self.encoded_str()))
+    }
+}
+
+impl<'a> Value<'a> for &'a u64 {
+    type ArrayIter = Empty<&'a u64>;
+    type ObjectIter = Empty<(&'a str, &'a u64)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::U64(**self))
+    }
+}
+
+impl<'a> Value<'a> for &'a i64 {
+    type ArrayIter = Empty<&'a i64>;
+    type ObjectIter = Empty<(&'a str, &'a i64)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::I64(**self))
+    }
+}
+impl<'a> Value<'a> for &'a f64 {
+    type ArrayIter = Empty<&'a f64>;
+    type ObjectIter = Empty<(&'a str, &'a f64)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::F64(**self))
+    }
+}
+impl<'a> Value<'a> for &'a bool {
+    type ArrayIter = Empty<&'a bool>;
+    type ObjectIter = Empty<(&'a str, &'a bool)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Bool(**self))
+    }
+}
+impl<'a> Value<'a> for &'a str {
+    type ArrayIter = Empty<&'a str>;
+    type ObjectIter = Empty<(&'a str, &'a str)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Str(self))
+    }
+}
+impl<'a> Value<'a> for &'a &'a str {
+    type ArrayIter = Empty<&'a &'a str>;
+    type ObjectIter = Empty<(&'a str, &'a &'a str)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Str(self))
+    }
+}
+
+impl<'a> Value<'a> for &'a [u8] {
+    type ArrayIter = Empty<&'a [u8]>;
+    type ObjectIter = Empty<(&'a str, &'a [u8])>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Bytes(self))
+    }
+}
+
+impl<'a> Value<'a> for &'a &'a [u8] {
+    type ArrayIter = Empty<&'a &'a [u8]>;
+    type ObjectIter = Empty<(&'a str, &'a &'a [u8])>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Bytes(self))
+    }
+}
+
+impl<'a> Value<'a> for &'a Vec<u8> {
+    type ArrayIter = Empty<&'a Vec<u8>>;
+    type ObjectIter = Empty<(&'a str, &'a Vec<u8>)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Bytes(self))
+    }
+}
+
+impl<'a> Value<'a> for &'a DateTime {
+    type ArrayIter = Empty<&'a DateTime>;
+    type ObjectIter = Empty<(&'a str, &'a DateTime)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::Date(**self))
+    }
+}
+impl<'a> Value<'a> for &'a Ipv6Addr {
+    type ArrayIter = Empty<&'a Ipv6Addr>;
+    type ObjectIter = Empty<(&'a str, &'a Ipv6Addr)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::IpAddr(**self))
+    }
+}
+impl<'a> Value<'a> for &'a PreTokenizedString {
+    type ArrayIter = Empty<&'a PreTokenizedString>;
+    type ObjectIter = Empty<(&'a str, &'a PreTokenizedString)>;
+    #[inline]
+    fn as_value(&self) -> ReferenceValue<'a, Self> {
+        ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(Box::new((*self).clone())))
+    }
+}
+
 impl ValueDeserialize for serde_json::Value {
    fn deserialize<'de, D>(deserializer: D) -> Result<Self, DeserializeError>
    where D: ValueDeserializer<'de> {
--- a/src/schema/document/mod.rs
+++ b/src/schema/document/mod.rs
@@ -172,7 +172,9 @@ pub use self::de::{
    ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializer, ObjectAccess,
    ValueDeserialize, ValueDeserializer, ValueType, ValueVisitor,
 };
-pub use self::default_document::{DocParsingError, TantivyDocument};
+pub use self::default_document::{
+    CompactDocArrayIter, CompactDocObjectIter, CompactDocValue, DocParsingError, TantivyDocument,
+};
 pub use self::owned_value::OwnedValue;
 pub(crate) use self::se::BinaryDocumentSerializer;
 pub use self::value::{ReferenceValue, ReferenceValueLeaf, Value};
@@ -233,7 +235,7 @@ pub trait Document: Send + Sync + 'static {
            let field_name = schema.get_field_name(field);
            let values: Vec<OwnedValue> = field_values
                .into_iter()
-                .map(|val| val.as_value().into())
+                .map(|val| OwnedValue::from(val.as_value()))
                .collect();
            field_map.insert(field_name.to_string(), values);
        }
--- a/src/schema/document/owned_value.rs
+++ b/src/schema/document/owned_value.rs
@@ -8,6 +8,7 @@ use serde::de::{MapAccess, SeqAccess};
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

+use super::existing_type_impls::can_be_rfc3339_date_time;
 use super::ReferenceValueLeaf;
 use crate::schema::document::{
    ArrayAccess, DeserializeError, ObjectAccess, ReferenceValue, Value, ValueDeserialize,
@@ -65,13 +66,13 @@ impl<'a> Value<'a> for &'a OwnedValue {
        match self {
            OwnedValue::Null => ReferenceValueLeaf::Null.into(),
            OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(),
-            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(),
+            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val.clone().into()).into(),
            OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(),
            OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(),
            OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(),
            OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(),
            OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(),
-            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(),
+            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val.encoded_str()).into(),
            OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(),
            OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
            OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
@@ -183,7 +184,7 @@ impl serde::Serialize for OwnedValue {
            OwnedValue::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)),
            OwnedValue::Object(ref obj) => {
                let mut map = serializer.serialize_map(Some(obj.len()))?;
-                for &(ref k, ref v) in obj {
+                for (k, v) in obj {
                    map.serialize_entry(k, v)?;
                }
                map.end()
@@ -277,11 +278,13 @@ impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
                ReferenceValueLeaf::I64(val) => OwnedValue::I64(val),
                ReferenceValueLeaf::F64(val) => OwnedValue::F64(val),
                ReferenceValueLeaf::Date(val) => OwnedValue::Date(val),
-                ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()),
+                ReferenceValueLeaf::Facet(val) => {
+                    OwnedValue::Facet(Facet::from_encoded_string(val.to_string()))
+                }
                ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
                ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
                ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
-                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
+                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()),
            },
            ReferenceValue::Array(val) => {
                OwnedValue::Array(val.map(|v| v.as_value().into()).collect())
@@ -373,16 +376,6 @@ impl From<BTreeMap<String, OwnedValue>> for OwnedValue {
    }
 }

-fn can_be_rfc3339_date_time(text: &str) -> bool {
-    if let Some(&first_byte) = text.as_bytes().first() {
-        if first_byte.is_ascii_digit() {
-            return true;
-        }
-    }
-
-    false
-}
-
 impl From<serde_json::Value> for OwnedValue {
    fn from(value: serde_json::Value) -> Self {
        match value {
@@ -470,6 +463,7 @@ mod tests {
        let mut doc = TantivyDocument::default();
        doc.add_bytes(bytes_field, "".as_bytes());
        let json_string = doc.to_json(&schema);
+
        assert_eq!(json_string, r#"{"my_bytes":[""]}"#);
    }

--- a/src/schema/document/se.rs
+++ b/src/schema/document/se.rs
@@ -25,6 +25,7 @@ where W: Write

    /// Attempts to serialize a given document and write the output
    /// to the writer.
+    #[inline]
    pub(crate) fn serialize_doc<D>(&mut self, doc: &D) -> io::Result<()>
    where D: Document {
        let stored_field_values = || {
@@ -57,9 +58,8 @@ where W: Write
            return Err(io::Error::new(
                io::ErrorKind::Other,
                format!(
-                    "Unexpected number of entries written to serializer, expected {} entries, got \
-                     {} entries",
-                    num_field_values, actual_length,
+                    "Unexpected number of entries written to serializer, expected \
+                     {num_field_values} entries, got {actual_length} entries",
                ),
            ));
        }
@@ -121,7 +121,7 @@ where W: Write
                ReferenceValueLeaf::Facet(val) => {
                    self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;

-                    val.serialize(self.writer)
+                    Cow::Borrowed(val).serialize(self.writer)
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.write_type_code(type_codes::BYTES_CODE)?;
@@ -428,7 +428,7 @@ mod tests {
        );

        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let expected = binary_repr!(
            type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(),
        );
@@ -441,7 +441,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
+            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let expected = binary_repr!(
            type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str,
        );
@@ -678,6 +679,7 @@ mod tests {
        );
    }

+    #[inline]
    fn serialize_doc<D: Document>(doc: &D, schema: &Schema) -> Vec<u8> {
        let mut writer = Vec::new();

--- a/src/schema/document/value.rs
+++ b/src/schema/document/value.rs
@@ -3,7 +3,6 @@ use std::net::Ipv6Addr;

 use common::DateTime;

-use crate::schema::Facet;
 use crate::tokenizer::PreTokenizedString;

 /// A single field value.
@@ -28,7 +27,7 @@ pub trait Value<'a>: Send + Sync + Debug {
    }

    #[inline]
-    /// If the Value is a String, returns the associated str. Returns None otherwise.
+    /// If the Value is a leaf, returns the associated leaf. Returns None otherwise.
    fn as_leaf(&self) -> Option<ReferenceValueLeaf<'a>> {
        if let ReferenceValue::Leaf(val) = self.as_value() {
            Some(val)
@@ -82,8 +81,9 @@ pub trait Value<'a>: Send + Sync + Debug {
    #[inline]
    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
    /// otherwise.
-    fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+    fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
+        self.as_leaf()
+            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }

    #[inline]
@@ -94,7 +94,7 @@ pub trait Value<'a>: Send + Sync + Debug {

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    fn as_facet(&self) -> Option<&'a Facet> {
+    fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }

@@ -132,7 +132,7 @@ pub trait Value<'a>: Send + Sync + Debug {
 }

 /// A enum representing a leaf value for tantivy to index.
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub enum ReferenceValueLeaf<'a> {
    /// A null value.
    Null,
@@ -146,8 +146,9 @@ pub enum ReferenceValueLeaf<'a> {
    F64(f64),
    /// Date/time with nanoseconds precision
    Date(DateTime),
-    /// Facet
-    Facet(&'a Facet),
+    /// Facet string needs to match the format of
+    /// [Facet::encoded_str](crate::schema::Facet::encoded_str).
+    Facet(&'a str),
    /// Arbitrarily sized byte array
    Bytes(&'a [u8]),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
@@ -155,7 +156,70 @@ pub enum ReferenceValueLeaf<'a> {
    /// Bool value
    Bool(bool),
    /// Pre-tokenized str type,
-    PreTokStr(&'a PreTokenizedString),
+    PreTokStr(Box<PreTokenizedString>),
+}
+
+impl From<u64> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: u64) -> Self {
+        ReferenceValueLeaf::U64(value)
+    }
+}
+
+impl From<i64> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: i64) -> Self {
+        ReferenceValueLeaf::I64(value)
+    }
+}
+
+impl From<f64> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: f64) -> Self {
+        ReferenceValueLeaf::F64(value)
+    }
+}
+
+impl From<bool> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: bool) -> Self {
+        ReferenceValueLeaf::Bool(value)
+    }
+}
+
+impl<'a> From<&'a str> for ReferenceValueLeaf<'a> {
+    #[inline]
+    fn from(value: &'a str) -> Self {
+        ReferenceValueLeaf::Str(value)
+    }
+}
+
+impl<'a> From<&'a [u8]> for ReferenceValueLeaf<'a> {
+    #[inline]
+    fn from(value: &'a [u8]) -> Self {
+        ReferenceValueLeaf::Bytes(value)
+    }
+}
+
+impl From<DateTime> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: DateTime) -> Self {
+        ReferenceValueLeaf::Date(value)
+    }
+}
+
+impl From<Ipv6Addr> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(value: Ipv6Addr) -> Self {
+        ReferenceValueLeaf::IpAddr(value)
+    }
+}
+
+impl From<PreTokenizedString> for ReferenceValueLeaf<'_> {
+    #[inline]
+    fn from(val: PreTokenizedString) -> Self {
+        ReferenceValueLeaf::PreTokStr(Box::new(val))
+    }
 }

 impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<'a, T> {
@@ -259,9 +323,9 @@ impl<'a> ReferenceValueLeaf<'a> {
    }

    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
-    /// otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
+    /// Returns None otherwise.
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
        if let Self::PreTokStr(val) = self {
            Some(val)
        } else {
@@ -281,7 +345,7 @@ impl<'a> ReferenceValueLeaf<'a> {

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        if let Self::Facet(val) = self {
            Some(val)
        } else {
@@ -322,6 +386,16 @@ where V: Value<'a>
        }
    }

+    #[inline]
+    /// If the Value is a leaf, consume it and return the leaf. Returns None otherwise.
+    pub fn into_leaf(self) -> Option<ReferenceValueLeaf<'a>> {
+        if let Self::Leaf(val) = self {
+            Some(val)
+        } else {
+            None
+        }
+    }
+
    #[inline]
    /// If the Value is a String, returns the associated str. Returns None otherwise.
    pub fn as_str(&self) -> Option<&'a str> {
@@ -365,10 +439,11 @@ where V: Value<'a>
    }

    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
-    /// otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
+    /// Returns None otherwise.
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
+        self.into_leaf()
+            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }

    #[inline]
@@ -379,7 +454,7 @@ where V: Value<'a>

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }

--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -568,21 +568,21 @@ mod tests {
        let schema = schema_builder.build();
        let doc = TantivyDocument::parse_json(&schema, r#"{"id": 100}"#).unwrap();
        assert_eq!(
-            &OwnedValue::Str("100".to_string()),
-            doc.get_first(text_field).unwrap()
+            OwnedValue::Str("100".to_string()),
+            doc.get_first(text_field).unwrap().into()
        );

        let doc = TantivyDocument::parse_json(&schema, r#"{"id": true}"#).unwrap();
        assert_eq!(
-            &OwnedValue::Str("true".to_string()),
-            doc.get_first(text_field).unwrap()
+            OwnedValue::Str("true".to_string()),
+            doc.get_first(text_field).unwrap().into()
        );

        // Not sure if this null coercion is the best approach
        let doc = TantivyDocument::parse_json(&schema, r#"{"id": null}"#).unwrap();
        assert_eq!(
-            &OwnedValue::Str("null".to_string()),
-            doc.get_first(text_field).unwrap()
+            OwnedValue::Str("null".to_string()),
+            doc.get_first(text_field).unwrap().into()
        );
    }

@@ -595,9 +595,18 @@ mod tests {
        let schema = schema_builder.build();
        let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#;
        let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
-        assert_eq!(&OwnedValue::I64(100), doc.get_first(i64_field).unwrap());
-        assert_eq!(&OwnedValue::U64(100), doc.get_first(u64_field).unwrap());
-        assert_eq!(&OwnedValue::F64(100.0), doc.get_first(f64_field).unwrap());
+        assert_eq!(
+            OwnedValue::I64(100),
+            doc.get_first(i64_field).unwrap().into()
+        );
+        assert_eq!(
+            OwnedValue::U64(100),
+            doc.get_first(u64_field).unwrap().into()
+        );
+        assert_eq!(
+            OwnedValue::F64(100.0),
+            doc.get_first(f64_field).unwrap().into()
+        );
    }

    #[test]
@@ -607,11 +616,17 @@ mod tests {
        let schema = schema_builder.build();
        let doc_json = r#"{"bool": "true"}"#;
        let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
-        assert_eq!(&OwnedValue::Bool(true), doc.get_first(bool_field).unwrap());
+        assert_eq!(
+            OwnedValue::Bool(true),
+            doc.get_first(bool_field).unwrap().into()
+        );

        let doc_json = r#"{"bool": "false"}"#;
        let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
-        assert_eq!(&OwnedValue::Bool(false), doc.get_first(bool_field).unwrap());
+        assert_eq!(
+            OwnedValue::Bool(false),
+            doc.get_first(bool_field).unwrap().into()
+        );
    }

    #[test]
@@ -644,7 +659,7 @@ mod tests {
        let schema = schema_builder.build();
        let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
        let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
-        let date = doc.get_first(date_field).unwrap();
+        let date = OwnedValue::from(doc.get_first(date_field).unwrap());
        // Time zone is converted to UTC
        assert_eq!("Date(2019-10-12T05:20:50.52Z)", format!("{date:?}"));
    }
--- a/src/schema/field_value.rs
+++ b/src/schema/field_value.rs
@@ -1,46 +0,0 @@
-use crate::schema::{Field, OwnedValue};
-
-/// `FieldValue` holds together a `Field` and its `Value`.
-#[allow(missing_docs)]
-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct FieldValue {
-    pub field: Field,
-    pub value: OwnedValue,
-}
-
-impl FieldValue {
-    /// Constructor
-    pub fn new(field: Field, value: OwnedValue) -> FieldValue {
-        FieldValue { field, value }
-    }
-
-    /// Field accessor
-    pub fn field(&self) -> Field {
-        self.field
-    }
-
-    /// Value accessor
-    pub fn value(&self) -> &OwnedValue {
-        &self.value
-    }
-}
-
-impl From<FieldValue> for OwnedValue {
-    fn from(field_value: FieldValue) -> Self {
-        field_value.value
-    }
-}
-
-/// A helper wrapper for creating standard iterators
-/// out of the fields iterator trait.
-pub struct FieldValueIter<'a>(pub(crate) std::slice::Iter<'a, FieldValue>);
-
-impl<'a> Iterator for FieldValueIter<'a> {
-    type Item = (Field, &'a OwnedValue);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.0
-            .next()
-            .map(|field_value| (field_value.field, &field_value.value))
-    }
-}
--- a/src/schema/flags.rs
+++ b/src/schema/flags.rs
@@ -1,7 +1,6 @@
 use std::ops::BitOr;

-use crate::schema::{NumericOptions, TextOptions};
-use crate::DateOptions;
+use crate::schema::{DateOptions, NumericOptions, TextOptions};

 #[derive(Clone)]
 pub struct StoredFlag;
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -114,7 +114,6 @@ pub(crate) mod term;

 mod field_entry;
 mod field_type;
-mod field_value;

 mod bytes_options;
 mod date_time_options;
@@ -138,7 +137,6 @@ pub use self::facet_options::FacetOptions;
 pub use self::field::Field;
 pub use self::field_entry::FieldEntry;
 pub use self::field_type::{FieldType, Type};
-pub use self::field_value::FieldValue;
 pub use self::flags::{COERCE, FAST, INDEXED, STORED};
 pub use self::index_record_option::IndexRecordOption;
 pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -645,15 +645,15 @@ mod tests {
        let doc =
            TantivyDocument::convert_named_doc(&schema, NamedFieldDocument(named_doc_map)).unwrap();
        assert_eq!(
-            doc.get_all(title).collect::<Vec<_>>(),
+            doc.get_all(title).map(OwnedValue::from).collect::<Vec<_>>(),
            vec![
-                &OwnedValue::from("title1".to_string()),
-                &OwnedValue::from("title2".to_string())
+                OwnedValue::from("title1".to_string()),
+                OwnedValue::from("title2".to_string())
            ]
        );
        assert_eq!(
-            doc.get_all(val).collect::<Vec<_>>(),
-            vec![&OwnedValue::from(14u64), &OwnedValue::from(-1i64)]
+            doc.get_all(val).map(OwnedValue::from).collect::<Vec<_>>(),
+            vec![OwnedValue::from(14u64), OwnedValue::from(-1i64)]
        );
    }

@@ -682,7 +682,7 @@ mod tests {
        let schema = schema_builder.build();
        {
            let doc = TantivyDocument::parse_json(&schema, "{}").unwrap();
-            assert!(doc.field_values().is_empty());
+            assert!(doc.field_values().next().is_none());
        }
        {
            let doc = TantivyDocument::parse_json(
--- a/src/space_usage/mod.rs
+++ b/src/space_usage/mod.rs
@@ -12,8 +12,8 @@ use std::collections::HashMap;
 use common::ByteCount;
 use serde::{Deserialize, Serialize};

+use crate::index::SegmentComponent;
 use crate::schema::Field;
-use crate::SegmentComponent;

 /// Enum containing any of the possible space usage results for segment components.
 pub enum ComponentSpaceUsage {
@@ -115,7 +115,7 @@ impl SegmentSpaceUsage {
    /// Use the components directly if this is somehow in performance critical code.
    pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
        use self::ComponentSpaceUsage::*;
-        use crate::SegmentComponent::*;
+        use crate::index::SegmentComponent::*;
        match component {
            Postings => PerField(self.postings().clone()),
            Positions => PerField(self.positions().clone()),
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -59,9 +59,8 @@ pub mod tests {
    use super::*;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::fastfield::AliveBitSet;
-    use crate::schema::document::Value;
    use crate::schema::{
-        self, Schema, TantivyDocument, TextFieldIndexing, TextOptions, STORED, TEXT,
+        self, Schema, TantivyDocument, TextFieldIndexing, TextOptions, Value, STORED, TEXT,
    };
    use crate::{Index, IndexWriter, Term};

@@ -92,8 +91,8 @@ pub mod tests {
                StoreWriter::new(writer, compressor, blocksize, separate_thread).unwrap();
            for i in 0..num_docs {
                let mut doc = TantivyDocument::default();
-                doc.add_field_value(field_body, LOREM.to_string());
-                doc.add_field_value(field_title, format!("Doc {i}"));
+                doc.add_text(field_body, LOREM);
+                doc.add_text(field_title, format!("Doc {i}"));
                store_writer.store(&doc, &schema).unwrap();
            }
            store_writer.close().unwrap();
@@ -119,10 +118,11 @@ pub mod tests {
        let store = StoreReader::open(store_file, 10)?;
        for i in 0..NUM_DOCS as u32 {
            assert_eq!(
-                *store
+                store
                    .get::<TantivyDocument>(i)?
                    .get_first(field_title)
                    .unwrap()
+                    .as_value()
                    .as_str()
                    .unwrap(),
                format!("Doc {i}")
@@ -131,7 +131,13 @@ pub mod tests {

        for doc in store.iter::<TantivyDocument>(Some(&alive_bitset)) {
            let doc = doc?;
-            let title_content = doc.get_first(field_title).unwrap().as_str().unwrap();
+            let title_content = doc
+                .get_first(field_title)
+                .unwrap()
+                .as_value()
+                .as_str()
+                .unwrap()
+                .to_string();
            if !title_content.starts_with("Doc ") {
                panic!("unexpected title_content {title_content}");
            }
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -18,6 +18,8 @@ use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize};
 use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
+#[cfg(feature = "quickwit")]
+use crate::Executor;

 pub(crate) const DOCSTORE_CACHE_CAPACITY: usize = 100;

@@ -341,7 +343,11 @@ impl StoreReader {
    /// In most cases use [`get_async`](Self::get_async)
    ///
    /// Loads and decompresses a block asynchronously.
-    async fn read_block_async(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
+    async fn read_block_async(
+        &self,
+        checkpoint: &Checkpoint,
+        executor: &Executor,
+    ) -> io::Result<Block> {
        let cache_key = checkpoint.byte_range.start;
        if let Some(block) = self.cache.get_from_cache(checkpoint.byte_range.start) {
            return Ok(block);
@@ -353,8 +359,12 @@ impl StoreReader {
            .read_bytes_async()
            .await?;

-        let decompressed_block =
-            OwnedBytes::new(self.decompressor.decompress(compressed_block.as_ref())?);
+        let decompressor = self.decompressor;
+        let maybe_decompressed_block = executor
+            .spawn_blocking(move || decompressor.decompress(compressed_block.as_ref()))
+            .await
+            .expect("decompression panicked");
+        let decompressed_block = OwnedBytes::new(maybe_decompressed_block?);

        self.cache
            .put_into_cache(cache_key, decompressed_block.clone());
@@ -363,15 +373,23 @@ impl StoreReader {
    }

    /// Reads raw bytes of a given document asynchronously.
-    pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result<OwnedBytes> {
+    pub async fn get_document_bytes_async(
+        &self,
+        doc_id: DocId,
+        executor: &Executor,
+    ) -> crate::Result<OwnedBytes> {
        let checkpoint = self.block_checkpoint(doc_id)?;
-        let block = self.read_block_async(&checkpoint).await?;
+        let block = self.read_block_async(&checkpoint, executor).await?;
        Self::get_document_bytes_from_block(block, doc_id, &checkpoint)
    }

    /// Fetches a document asynchronously. Async version of [`get`](Self::get).
-    pub async fn get_async<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
-        let mut doc_bytes = self.get_document_bytes_async(doc_id).await?;
+    pub async fn get_async<D: DocumentDeserialize>(
+        &self,
+        doc_id: DocId,
+        executor: &Executor,
+    ) -> crate::Result<D> {
+        let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;

        let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
            .map_err(crate::TantivyError::from)?;
@@ -385,8 +403,7 @@ mod tests {

    use super::*;
    use crate::directory::RamDirectory;
-    use crate::schema::document::Value;
-    use crate::schema::{Field, TantivyDocument};
+    use crate::schema::{Field, TantivyDocument, Value};
    use crate::store::tests::write_lorem_ipsum_store;
    use crate::store::Compressor;
    use crate::Directory;
@@ -394,7 +411,7 @@ mod tests {
    const BLOCK_SIZE: usize = 16_384;

    fn get_text_field<'a>(doc: &'a TantivyDocument, field: &'a Field) -> Option<&'a str> {
-        doc.get_first(*field).and_then(|f| f.as_str())
+        doc.get_first(*field).and_then(|f| f.as_value().as_str())
    }

    #[test]
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -93,7 +93,7 @@ fn open_fst_index(fst_file: FileSlice) -> io::Result<tantivy_fst::Map<OwnedBytes
    let fst = Fst::new(bytes).map_err(|err| {
        io::Error::new(
            io::ErrorKind::InvalidData,
-            format!("Fst data is corrupted: {:?}", err),
+            format!("Fst data is corrupted: {err:?}"),
        )
    })?;
    Ok(tantivy_fst::Map::from(fst))
--- a/src/termdict/tests.rs
+++ b/src/termdict/tests.rs
@@ -95,7 +95,7 @@ fn test_term_dictionary_simple() -> crate::Result<()> {
 #[test]
 fn test_term_dictionary_stream() -> crate::Result<()> {
    let ids: Vec<_> = (0u32..10_000u32)
-        .map(|i| (format!("doc{:0>6}", i), i))
+        .map(|i| (format!("doc{i:0>6}"), i))
        .collect();
    let buffer: Vec<u8> = {
        let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
@@ -156,7 +156,7 @@ fn test_stream_high_range_prefix_suffix() -> crate::Result<()> {
 #[test]
 fn test_stream_range() -> crate::Result<()> {
    let ids: Vec<_> = (0u32..10_000u32)
-        .map(|i| (format!("doc{:0>6}", i), i))
+        .map(|i| (format!("doc{i:0>6}"), i))
        .collect();
    let buffer: Vec<u8> = {
        let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
--- a/src/tokenizer/facet_tokenizer.rs
+++ b/src/tokenizer/facet_tokenizer.rs
@@ -96,7 +96,7 @@ mod tests {
        {
            let mut add_token = |token: &Token| {
                let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap();
-                tokens.push(format!("{}", facet));
+                tokens.push(format!("{facet}"));
            };
            FacetTokenizer::default()
                .token_stream(facet.encoded_str())
@@ -116,7 +116,7 @@ mod tests {
        {
            let mut add_token = |token: &Token| {
                let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap(); // ok test
-                tokens.push(format!("{}", facet));
+                tokens.push(format!("{facet}"));
            };
            FacetTokenizer::default()
                .token_stream(facet.encoded_str()) // ok test
Author	SHA1	Message	Date
Pascal Seitz	f5a716e827	update basic_search example	2024-05-30 21:56:22 +08:00
Meng Zhang	4143d31865	chore: fix build as the rev is gone (#2417 )	2024-05-29 09:49:16 +08:00
Hamir Mahal	0c634adbe1	style: simplify strings with string interpolation (#2412 ) * style: simplify strings with string interpolation * fix: formatting	2024-05-27 09:16:47 +02:00
PSeitz	2e3641c2ae	return CompactDocValue instead of trait (#2410 ) The CompactDocValue is easier to handle than the trait in some cases like comparison and conversion	2024-05-27 07:33:50 +02:00
Paul Masurel	b806122c81	Fixing flaky test (#2407 )	2024-05-22 10:10:55 +09:00
PSeitz	e1679f3fb9	compact doc (#2402 ) * compact doc * add any value type * pass references when building CompactDoc * remove OwnedValue from API * clippy * clippy * fail on large documents * fmt * cleanup * cleanup * implement Value for different types fix serde_json date Value implementation * fmt * cleanup * fmt * cleanup * store positions instead of pos+len * remove nodes array * remove mediumvec * cleanup * infallible serialize into vec * remove positions indirection * remove 24MB limitation in document use u32 for Addr Remove the 3 byte addressing limitation and use VInt instead * cleanup * extend test * cleanup, add comments * rename, remove pub	2024-05-21 10:16:08 +02:00
dependabot[bot]	5a80420b10	--- (#2406 ) updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-21 04:36:32 +02:00
dependabot[bot]	aa26ff5029	Update binggan requirement from 0.6.2 to 0.7.0 (#2401 ) --- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-17 02:53:25 +02:00
dependabot[bot]	e197b59258	Update itertools requirement from 0.12.0 to 0.13.0 (#2400 ) Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version. - [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-itertools/itertools/compare/v0.12.0...v0.13.0) --- updated-dependencies: - dependency-name: itertools dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-17 02:53:02 +02:00
PSeitz	5b7cca13e5	lower contention on AggregationLimits (#2394 ) PR https://github.com/quickwit-oss/quickwit/pull/4962 fixes an issue where the AggregationLimits are not passed correctly. Since the AggregationLimits are shared properly we run into contention issues. This PR includes some straightforward improvement to reduce contention, by only calling if the memory changed and avoiding the second read. We probably need some sharding with multiple counters or local caching before updating the global after some threshold.	2024-05-15 12:25:40 +02:00
dependabot[bot]	a79590477e	Update binggan requirement from 0.5.2 to 0.6.2 (#2399 ) --- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-15 05:40:37 +02:00
Paul Masurel	6181c1eb5e	Small changes in the Executor API. (#2391 ) Warning, this change is mildly not backward compatible so I bumped tantivy's version.	2024-05-10 17:19:12 +09:00
Adam Reichold	1ee5f90761	Give allocation control to the caller instead of force a clone (#2389 ) Achieved by moving the boxes out of the temporary reference wrappers which are cloneable themselves, i.e. if required the caller can clone them already or consume them to reuse existing allocations.	2024-05-09 16:01:13 +09:00
PSeitz	71f3b4e4e3	fix ReferenceValue API flaw (#2372 ) * fix ReferenceValue API flaw Remove `Facet` and `TokenizedString` values from the `ReferenceValue` API, as this requires the trait value to have them stored somewhere. Since `TokenizedString` is quite niche, I just copy it into a Box, instead of designing a reference API around it. * fix comment link	2024-05-09 06:14:42 +02:00
trinity-1686a	8cd7ddc535	run block decompression from executor (#2386 ) * run block decompression from executor * add a wrapper with is_closed to oneshot channel * add cancelation test to Executor::spawn_blocking	2024-05-08 12:22:44 +02:00
Paul Masurel	2b76335a95	Removed usage of num_cpus (#2387 ) * Removed usage of num_cpus * handling error	2024-05-08 13:32:52 +09:00
PSeitz	c6b213d8f0	use bingang for agg benchmark (#2378 ) * use bingang for agg benchmark use bingang for agg benchmark, which includes memory consumption Output: ``` full histogram Memory: 15.8 KB Avg: 10.9322ms (+5.44%) Median: 10.8790ms (+9.28%) Min: 10.7470ms Max: 11.3263ms histogram_hard_bounds Memory: 15.5 KB Avg: 5.1939ms (+6.61%) Median: 5.1722ms (+10.98%) Min: 5.0432ms Max: 5.3910ms histogram_with_avg_sub_agg Memory: 48.7 KB Avg: 23.8165ms (+4.57%) Median: 23.7264ms (+10.06%) Min: 23.4995ms Max: 24.8107ms dense histogram Memory: 17.3 KB Avg: 15.6810ms (-8.54%) Median: 15.6174ms (-8.89%) Min: 15.4953ms Max: 16.0702ms histogram_hard_bounds Memory: 15.4 KB Avg: 10.0720ms (-7.33%) Median: 10.0572ms (-7.06%) Min: 9.8500ms Max: 10.4819ms histogram_with_avg_sub_agg Memory: 50.1 KB Avg: 33.0993ms (-7.04%) Median: 32.9499ms (-6.86%) Min: 32.8284ms Max: 34.0529ms sparse histogram Memory: 16.3 KB Avg: 19.2325ms (-0.44%) Median: 19.1211ms (-1.26%) Min: 19.0348ms Max: 19.7902ms histogram_hard_bounds Memory: 16.1 KB Avg: 18.5179ms (-0.61%) Median: 18.4552ms (-0.90%) Min: 18.3799ms Max: 19.0535ms histogram_with_avg_sub_agg Memory: 34.7 KB Avg: 21.2589ms (-0.69%) Median: 21.1867ms (-1.05%) Min: 21.0342ms Max: 21.9900ms ``` * add more bench with term as sub agg	2024-05-07 11:29:49 +02:00
PSeitz	eea70030bf	cleanup top level exports (#2382 ) remove some top level exports	2024-05-07 09:59:41 +02:00
PSeitz	92b5526310	allow more JSON values, fix i64 special case (#2383 ) This changes three things: - Reuse positions_per_path hashmap instead of allocating one per indexed JSON value - Try to cast u64 values to i64 to streamline with search behaviour - Allow top level json values to be of any type, instead of limiting it to JSON objects. Remove special JSON object handling method. TODO: We probably should also try to check f64 to i64 and u64 when indexing, as values may get converted to f64 by the JSON parser	2024-05-01 12:08:12 +02:00
PSeitz	99a59ad37e	remove zero byte check (#2379 ) remove zero byte checks in columnar. zero bytes are converted during serialization now. unify code paths extend test for expected column names	2024-04-26 06:03:28 +02:00
trinity-1686a	6a66a71cbb	modify fastfield range query heuristic (#2375 )	2024-04-25 10:06:11 +02:00
PSeitz	ff40764204	make convert_to_fast_value_and_append_to_json_term pub (#2370 ) * make convert_to_fast_value_and_append_to_json_term pub * clippy	2024-04-23 04:05:41 +02:00