oneshot 0.1.7

Now that the is_closed changed has been merge upstream, we can rely on that. This commit is a "hotfix" because we don't want to rely on some of the commit in main just yet
lower contention on AggregationLimits (#2394 )
2026-02-21 23:30:37 +00:00 · 2024-05-31 07:54:56 +04:00 · 2024-05-15 12:25:40 +02:00 · 2024-05-15 05:40:37 +02:00 · 2024-05-10 17:19:12 +09:00 · 2024-05-09 16:01:13 +09:00
46 changed files with 861 additions and 1205 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.22.0"
+version = "0.23.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -15,12 +15,16 @@ rust-version = "1.63"
 exclude = ["benches/*.json", "benches/*.txt"]

 [dependencies]
-oneshot = "0.1.5"
+# Switch back to the non-forked oneshot crate once https://github.com/faern/oneshot/pull/35 is merged
+oneshot = "0.1.7"
 base64 = "0.22.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
-regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
+regex = { version = "1.5.5", default-features = false, features = [
+    "std",
+    "unicode",
+] }
 aho-corasick = "1.0"
 tantivy-fst = "0.5"
 memmap2 = { version = "0.9.0", optional = true }
@@ -30,14 +34,15 @@ tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
-num_cpus = "1.13.1"
 fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { version = "0.9.2", default-features = false, features = [
+    "bitpacker4x",
+] }
 census = "0.4.2"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
@@ -52,22 +57,22 @@ itertools = "0.12.0"
 measure_time = "0.8.2"
 arc-swap = "1.5.0"

-columnar = { version= "0.3", path="./columnar", package ="tantivy-columnar" }
-sstable = { version= "0.3", path="./sstable", package ="tantivy-sstable", optional = true }
-stacker = { version= "0.3", path="./stacker", package ="tantivy-stacker" }
-query-grammar = { version= "0.22.0", path="./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version= "0.6", path="./bitpacker" }
-common = { version= "0.7", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version= "0.3", path="./tokenizer-api", package="tantivy-tokenizer-api" }
+columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
+sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
+stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
+query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
+tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
+common = { version = "0.7", path = "./common/", package = "tantivy-common" }
+tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] }
 futures-util = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
-mediumvec = "1.3.0"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"

 [dev-dependencies]
+binggan = "0.6.2"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -82,7 +87,6 @@ time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
 postcard = { version = "1.0.4", features = [
  "use-std",
 ], default-features = false }
-peakmem-alloc = "0.3.0"

 [target.'cfg(not(windows))'.dev-dependencies]
 criterion = { version = "0.5", default-features = false }
@@ -114,17 +118,26 @@ lz4-compression = ["lz4_flex"]
 zstd-compression = ["zstd"]

 failpoints = ["fail", "fail/failpoints"]
-unstable = [] # useful for benches.
+unstable = []                            # useful for benches.

 quickwit = ["sstable", "futures-util"]

-# Compares only the hash of a string when indexing data. 
+# Compares only the hash of a string when indexing data.
 # Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
 # Uses 64bit ahash.
 compare_hash_only = ["stacker/compare_hash_only"]

 [workspace]
-members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
+members = [
+    "query-grammar",
+    "bitpacker",
+    "common",
+    "ownedbytes",
+    "stacker",
+    "sstable",
+    "tokenizer-api",
+    "columnar",
+]

 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
@@ -145,3 +158,7 @@ harness = false
 [[bench]]
 name = "index-bench"
 harness = false
+
+[[bench]]
+name = "agg_bench"
+harness = false
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -0,0 +1,413 @@
+use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use rand::prelude::SliceRandom;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use rand_distr::Distribution;
+use serde_json::json;
+use tantivy::aggregation::agg_req::Aggregations;
+use tantivy::aggregation::AggregationCollector;
+use tantivy::query::{AllQuery, TermQuery};
+use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
+use tantivy::{doc, Index, Term};
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
+
+/// Mini macro to register a function via its name
+/// runner.register("average_u64", move |index| average_u64(index));
+macro_rules! register {
+    ($runner:expr, $func:ident) => {
+        $runner.register(stringify!($func), move |index| $func(index))
+    };
+}
+
+fn main() {
+    let inputs = vec![
+        ("full", get_test_index_bench(Cardinality::Full).unwrap()),
+        (
+            "dense",
+            get_test_index_bench(Cardinality::OptionalDense).unwrap(),
+        ),
+        (
+            "sparse",
+            get_test_index_bench(Cardinality::OptionalSparse).unwrap(),
+        ),
+        (
+            "multivalue",
+            get_test_index_bench(Cardinality::Multivalued).unwrap(),
+        ),
+    ];
+
+    bench_agg(InputGroup::new_with_inputs(inputs));
+}
+
+fn bench_agg(mut group: InputGroup<Index>) {
+    group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
+    register!(group, average_u64);
+    register!(group, average_f64);
+    register!(group, average_f64_u64);
+    register!(group, stats_f64);
+    register!(group, percentiles_f64);
+    register!(group, terms_few);
+    register!(group, terms_many);
+    register!(group, terms_many_order_by_term);
+    register!(group, terms_many_with_top_hits);
+    register!(group, terms_many_with_avg_sub_agg);
+    register!(group, terms_many_json_mixed_type_with_sub_agg_card);
+    register!(group, range_agg);
+    register!(group, range_agg_with_avg_sub_agg);
+    register!(group, range_agg_with_term_agg_few);
+    register!(group, range_agg_with_term_agg_many);
+    register!(group, histogram);
+    register!(group, histogram_hard_bounds);
+    register!(group, histogram_with_avg_sub_agg);
+    register!(group, avg_and_range_with_avg_sub_agg);
+
+    group.run();
+}
+
+fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+
+    let reader = index.reader().unwrap();
+    let text_field = reader.searcher().schema().get_field("text").unwrap();
+    let term_query = TermQuery::new(
+        Term::from_field_text(text_field, "cool"),
+        IndexRecordOption::Basic,
+    );
+    let collector = get_collector(agg_req);
+    let searcher = reader.searcher();
+    black_box(searcher.search(&term_query, &collector).unwrap());
+}
+
+fn average_u64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64_u64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "avg": { "field": "score_f64" } },
+        "average": { "avg": { "field": "score" } },
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn stats_f64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "stats": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+
+fn percentiles_f64(index: &Index) {
+    let agg_req = json!({
+      "mypercentiles": {
+        "percentiles": {
+          "field": "score_f64",
+          "percents": [ 95, 99, 99.9 ]
+        }
+      }
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_few(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_few_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_order_by_term(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_top_hits(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "top_hits": { "top_hits":
+                    {
+                        "sort": [
+                            { "score": "desc" }
+                        ],
+                        "size": 2,
+                        "doc_value_fields": ["score_f64"]
+                    }
+                }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_json_mixed_type_with_sub_agg_card(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "json.mixed_type" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn execute_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+    let collector = get_collector(agg_req);
+
+    let reader = index.reader().unwrap();
+    let searcher = reader.searcher();
+    black_box(searcher.search(&AllQuery, &collector).unwrap());
+}
+fn range_agg(index: &Index) {
+    let agg_req = json!({
+        "range_f64": { "range": { "field": "score_f64", "ranges": [
+            { "from": 3, "to": 7000 },
+            { "from": 7000, "to": 20000 },
+            { "from": 20000, "to": 30000 },
+            { "from": 30000, "to": 40000 },
+            { "from": 40000, "to": 50000 },
+            { "from": 50000, "to": 60000 }
+        ] } },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn range_agg_with_term_agg_few(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_few_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_term_agg_many(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_many_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": {
+                "field": "score_f64",
+                "interval": 100 // 1000 buckets
+            },
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_hard_bounds(index: &Index) {
+    let agg_req = json!({
+        "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": { "field": "score_f64", "interval": 100 },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn avg_and_range_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_in_range": { "avg": { "field": "score" } }
+            }
+        },
+        "average": { "avg": { "field": "score" } }
+    });
+    execute_agg(index, agg_req);
+}
+
+#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
+enum Cardinality {
+    /// All documents contain exactly one value.
+    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
+    #[default]
+    Full = 0,
+    /// All documents contain at most one value.
+    OptionalDense = 1,
+    /// All documents may contain any number of values.
+    Multivalued = 2,
+    /// 1 / 20 documents has a value
+    OptionalSparse = 3,
+}
+
+fn get_collector(agg_req: Aggregations) -> AggregationCollector {
+    AggregationCollector::from_aggs(agg_req, Default::default())
+}
+
+fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
+    let mut schema_builder = Schema::builder();
+    let text_fieldtype = tantivy::schema::TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text", text_fieldtype);
+    let json_field = schema_builder.add_json_field("json", FAST);
+    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
+    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
+    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
+    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
+    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
+    let index = Index::create_from_tempdir(schema_builder.build())?;
+    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
+
+    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
+
+    let many_terms_data = (0..150_000)
+        .map(|num| format!("author{}", num))
+        .collect::<Vec<_>>();
+    {
+        let mut rng = StdRng::from_seed([1u8; 32]);
+        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
+        // To make the different test cases comparable we just change one doc to force the
+        // cardinality
+        if cardinality == Cardinality::OptionalDense {
+            index_writer.add_document(doc!())?;
+        }
+        if cardinality == Cardinality::Multivalued {
+            index_writer.add_document(doc!(
+                json_field => json!({"mixed_type": 10.0}),
+                json_field => json!({"mixed_type": 10.0}),
+                text_field => "cool",
+                text_field => "cool",
+                text_field_many_terms => "cool",
+                text_field_many_terms => "cool",
+                text_field_few_terms => "cool",
+                text_field_few_terms => "cool",
+                score_field => 1u64,
+                score_field => 1u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => 1i64,
+                score_field_i64 => 1i64,
+            ))?;
+        }
+        let mut doc_with_value = 1_000_000;
+        if cardinality == Cardinality::OptionalSparse {
+            doc_with_value /= 20;
+        }
+        let _val_max = 1_000_000.0;
+        for _ in 0..doc_with_value {
+            let val: f64 = rng.gen_range(0.0..1_000_000.0);
+            let json = if rng.gen_bool(0.1) {
+                // 10% are numeric values
+                json!({ "mixed_type": val })
+            } else {
+                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
+            };
+            index_writer.add_document(doc!(
+                text_field => "cool",
+                json_field => json,
+                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
+                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                score_field => val as u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => val as i64,
+            ))?;
+            if cardinality == Cardinality::OptionalSparse {
+                for _ in 0..20 {
+                    index_writer.add_document(doc!(text_field => "cool"))?;
+                }
+            }
+        }
+        // writing the segment
+        index_writer.commit()?;
+    }
+
+    Ok(index)
+}
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -59,22 +59,6 @@ pub struct ColumnarWriter {
    buffers: SpareBuffers,
 }

-#[inline]
-fn mutate_or_create_column<V, TMutator>(
-    arena_hash_map: &mut ArenaHashMap,
-    column_name: &str,
-    updater: TMutator,
-) where
-    V: Copy + 'static,
-    TMutator: FnMut(Option<V>) -> V,
-{
-    assert!(
-        !column_name.as_bytes().contains(&0u8),
-        "key may not contain the 0 byte"
-    );
-    arena_hash_map.mutate_or_create(column_name.as_bytes(), updater);
-}
-
 impl ColumnarWriter {
    pub fn mem_usage(&self) -> usize {
        self.arena.mem_usage()
@@ -175,9 +159,8 @@ impl ColumnarWriter {
                    },
                    &mut self.dictionaries,
                );
-                mutate_or_create_column(
-                    hash_map,
-                    column_name,
+                hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<StrOrBytesColumnWriter>| {
                        let mut column_writer = if let Some(column_writer) = column_opt {
                            column_writer
@@ -192,24 +175,21 @@ impl ColumnarWriter {
                );
            }
            ColumnType::Bool => {
-                mutate_or_create_column(
-                    &mut self.bool_field_hash_map,
-                    column_name,
+                self.bool_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::DateTime => {
-                mutate_or_create_column(
-                    &mut self.datetime_field_hash_map,
-                    column_name,
+                self.datetime_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::I64 | ColumnType::F64 | ColumnType::U64 => {
                let numerical_type = column_type.numerical_type().unwrap();
-                mutate_or_create_column(
-                    &mut self.numerical_field_hash_map,
-                    column_name,
+                self.numerical_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<NumericalColumnWriter>| {
                        let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                        column.force_numerical_type(numerical_type);
@@ -217,9 +197,8 @@ impl ColumnarWriter {
                    },
                );
            }
-            ColumnType::IpAddr => mutate_or_create_column(
-                &mut self.ip_addr_field_hash_map,
-                column_name,
+            ColumnType::IpAddr => self.ip_addr_field_hash_map.mutate_or_create(
+                column_name.as_bytes(),
                |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
            ),
        }
@@ -232,9 +211,8 @@ impl ColumnarWriter {
        numerical_value: T,
    ) {
        let (hash_map, arena) = (&mut self.numerical_field_hash_map, &mut self.arena);
-        mutate_or_create_column(
-            hash_map,
-            column_name,
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
            |column_opt: Option<NumericalColumnWriter>| {
                let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                column.record_numerical_value(doc, numerical_value.into(), arena);
@@ -244,10 +222,6 @@ impl ColumnarWriter {
    }

    pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_addr: Ipv6Addr) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena) = (&mut self.ip_addr_field_hash_map, &mut self.arena);
        hash_map.mutate_or_create(
            column_name.as_bytes(),
@@ -261,24 +235,30 @@ impl ColumnarWriter {

    pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
        let (hash_map, arena) = (&mut self.bool_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(doc, val, arena);
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(doc, val, arena);
+                column
+            },
+        );
    }

    pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) {
        let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(
-                doc,
-                NumericalValue::I64(datetime.into_timestamp_nanos()),
-                arena,
-            );
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(
+                    doc,
+                    NumericalValue::I64(datetime.into_timestamp_nanos()),
+                    arena,
+                );
+                column
+            },
+        );
    }

    pub fn record_str(&mut self, doc: RowId, column_name: &str, value: &str) {
@@ -303,10 +283,6 @@ impl ColumnarWriter {
    }

    pub fn record_bytes(&mut self, doc: RowId, column_name: &str, value: &[u8]) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena, dictionaries) = (
            &mut self.bytes_field_hash_map,
            &mut self.arena,
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -11,9 +11,10 @@ use columnar::Column;
 // ---
 // Importing tantivy...
 use tantivy::collector::{Collector, SegmentCollector};
+use tantivy::index::SegmentReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
+use tantivy::{doc, Index, IndexWriter, Score};

 #[derive(Default)]
 struct Stats {
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
    let opts = DateOptions::from(INDEXED)
        .set_stored()
        .set_fast()
-        .set_precision(tantivy::DateTimePrecision::Seconds);
+        .set_precision(tantivy::schema::DateTimePrecision::Seconds);
    // Add `occurred_at` date field type
    let occurred_at = schema_builder.add_date_field("occurred_at", opts);
    let event_type = schema_builder.add_text_field("event", STRING | STORED);
--- a/examples/doc_mem.rs
+++ b/examples/doc_mem.rs
@@ -1,335 +0,0 @@
-#![allow(unused_imports)]
-#![allow(dead_code)]
-use std::alloc::System;
-use std::env::args;
-use std::net::Ipv6Addr;
-
-use columnar::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
-use common::{BinarySerializable, CountingWriter, DateTime, FixedSize};
-use peakmem_alloc::*;
-use tantivy::schema::{Field, FieldValue, OwnedValue, FAST, INDEXED, STRING, TEXT};
-use tantivy::tokenizer::PreTokenizedString;
-use tantivy::{doc, TantivyDocument};
-
-const GH_LOGS: &str = include_str!("../benches/gh.json");
-const HDFS_LOGS: &str = include_str!("../benches/hdfs.json");
-
-#[global_allocator]
-static GLOBAL: &PeakMemAlloc<System> = &INSTRUMENTED_SYSTEM;
-
-fn main() {
-    dbg!(std::mem::size_of::<TantivyDocument>());
-    dbg!(std::mem::size_of::<DocContainerRef>());
-    dbg!(std::mem::size_of::<OwnedValue>());
-    dbg!(std::mem::size_of::<OwnedValueMedVec>());
-    dbg!(std::mem::size_of::<ValueContainerRef>());
-    dbg!(std::mem::size_of::<mediumvec::vec32::Vec32::<u8>>());
-
-    let filter = args().nth(1);
-    measure_fn(
-        test_hdfs::<TantivyDocument>,
-        "hdfs TantivyDocument",
-        &filter,
-    );
-    measure_fn(
-        test_hdfs::<TantivyDocumentMedVec>,
-        "hdfs TantivyDocumentMedVec",
-        &filter,
-    );
-    measure_fn(
-        test_hdfs::<DocContainerRef>,
-        "hdfs DocContainerRef",
-        &filter,
-    );
-    measure_fn(test_gh::<TantivyDocument>, "gh TantivyDocument", &filter);
-    measure_fn(
-        test_gh::<TantivyDocumentMedVec>,
-        "gh TantivyDocumentMedVec",
-        &filter,
-    );
-    measure_fn(test_gh::<DocContainerRef>, "gh DocContainerRef", &filter);
-}
-fn measure_fn<F: FnOnce()>(f: F, name: &str, filter: &Option<std::string::String>) {
-    if let Some(filter) = filter {
-        if !name.contains(filter) {
-            return;
-        }
-    }
-    GLOBAL.reset_peak_memory();
-    f();
-    println!("Peak Memory {} : {:#?}", GLOBAL.get_peak_memory(), name);
-}
-fn test_hdfs<T: From<TantivyDocument>>() {
-    let schema = {
-        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
-        schema_builder.add_u64_field("timestamp", INDEXED);
-        schema_builder.add_text_field("body", TEXT);
-        schema_builder.add_text_field("severity", STRING);
-        schema_builder.build()
-    };
-    let mut docs: Vec<T> = Vec::with_capacity(HDFS_LOGS.lines().count());
-    for doc_json in HDFS_LOGS.lines() {
-        let doc = TantivyDocument::parse_json(&schema, doc_json)
-            .unwrap()
-            .into();
-        docs.push(doc);
-    }
-}
-
-fn test_gh<T: From<TantivyDocument>>() {
-    let schema = {
-        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
-        schema_builder.add_json_field("json", FAST);
-        schema_builder.build()
-    };
-    let mut docs: Vec<T> = Vec::with_capacity(GH_LOGS.lines().count());
-    for doc_json in GH_LOGS.lines() {
-        let json_field = schema.get_field("json").unwrap();
-
-        let json_val: serde_json::Map<String, serde_json::Value> =
-            serde_json::from_str(doc_json).unwrap();
-        let doc = tantivy::doc!(json_field=>json_val).into();
-        docs.push(doc);
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-#[allow(dead_code)]
-pub struct TantivyDocumentMedVec {
-    field_values: mediumvec::Vec32<FieldValueMedVec>,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct FieldValueMedVec {
-    pub field: Field,
-    pub value: OwnedValueMedVec,
-}
-
-/// This is a owned variant of `Value`, that can be passed around without lifetimes.
-/// Represents the value of a any field.
-/// It is an enum over all over all of the possible field type.
-#[derive(Debug, Clone, PartialEq)]
-pub enum OwnedValueMedVec {
-    /// A null value.
-    Null,
-    /// The str type is used for any text information.
-    Str(mediumvec::vec32::Vec32<u8>),
-    /// Unsigned 64-bits Integer `u64`
-    U64(u64),
-    /// Signed 64-bits Integer `i64`
-    I64(i64),
-    /// 64-bits Float `f64`
-    F64(f64),
-    /// Bool value
-    Bool(bool),
-    /// Date/time with nanoseconds precision
-    Date(DateTime),
-    Array(mediumvec::vec32::Vec32<Self>),
-    /// Dynamic object value.
-    Object(mediumvec::vec32::Vec32<(String, Self)>),
-    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
-    IpAddr(Ipv6Addr),
-    /// Pre-tokenized str type,
-    PreTokStr(Box<PreTokenizedString>),
-    /// Arbitrarily sized byte array
-    Bytes(mediumvec::vec32::Vec32<u8>),
-}
-
-impl From<TantivyDocument> for TantivyDocumentMedVec {
-    fn from(doc: TantivyDocument) -> Self {
-        let field_values = doc
-            .into_iter()
-            .map(|fv| FieldValueMedVec {
-                field: fv.field,
-                value: fv.value.into(),
-            })
-            .collect();
-        TantivyDocumentMedVec { field_values }
-    }
-}
-impl From<OwnedValue> for OwnedValueMedVec {
-    fn from(value: OwnedValue) -> Self {
-        match value {
-            OwnedValue::Null => OwnedValueMedVec::Null,
-            OwnedValue::Str(s) => {
-                let bytes = s.into_bytes();
-                let vec = mediumvec::vec32::Vec32::from_vec(bytes);
-                OwnedValueMedVec::Str(vec)
-            }
-            OwnedValue::U64(u) => OwnedValueMedVec::U64(u),
-            OwnedValue::I64(i) => OwnedValueMedVec::I64(i),
-            OwnedValue::F64(f) => OwnedValueMedVec::F64(f),
-            OwnedValue::Bool(b) => OwnedValueMedVec::Bool(b),
-            OwnedValue::Date(d) => OwnedValueMedVec::Date(d),
-            OwnedValue::Array(arr) => {
-                let arr = arr.into_iter().map(|v| v.into()).collect();
-                OwnedValueMedVec::Array(arr)
-            }
-            OwnedValue::Object(obj) => {
-                let obj = obj.into_iter().map(|(k, v)| (k, v.into())).collect();
-                OwnedValueMedVec::Object(obj)
-            }
-            OwnedValue::IpAddr(ip) => OwnedValueMedVec::IpAddr(ip),
-            _ => panic!("Unsupported value type {:?}", value),
-        }
-    }
-}
-
-#[repr(packed)]
-pub struct FieldValueContainerRef {
-    pub field: u16,
-    pub value: ValueContainerRef,
-}
-
-#[repr(packed)]
-struct DocContainerRef {
-    container: OwnedValueRefContainer,
-    field_values: mediumvec::Vec32<FieldValueContainerRef>,
-}
-
-#[derive(Default)]
-struct OwnedValueRefContainer {
-    nodes: mediumvec::Vec32<ValueContainerRef>,
-    node_data: mediumvec::Vec32<u8>,
-}
-impl OwnedValueRefContainer {
-    fn shrink_to_fit(&mut self) {
-        self.nodes.shrink_to_fit();
-        self.node_data.shrink_to_fit();
-    }
-}
-
-impl From<TantivyDocument> for DocContainerRef {
-    fn from(doc: TantivyDocument) -> Self {
-        let mut container = OwnedValueRefContainer::default();
-        let field_values = doc
-            .into_iter()
-            .map(|fv| FieldValueContainerRef {
-                field: fv.field.field_id().try_into().unwrap(),
-                value: container.add_value(fv.value),
-            })
-            .collect();
-        container.shrink_to_fit();
-        Self {
-            field_values,
-            container,
-        }
-    }
-}
-
-// References to positions in two array, one for the OwnedValueRef and the other for the encoded
-// bytes
-#[derive(Debug, Clone, PartialEq)]
-pub enum ValueContainerRef {
-    /// A null value.
-    Null,
-    /// The str type is used for any text information.
-    Str(u32),
-    /// Unsigned 64-bits Integer `u64`
-    U64(u32), // position of the serialized 8 bytes in the data array
-    /// Signed 64-bits Integer `i64`
-    I64(u32), // position of the serialized 8 bytes in the data array
-    /// 64-bits Float `f64`
-    F64(u32), // position of the serialized 8 bytes in the data array
-    /// Bool value
-    Bool(bool), // inlined bool
-    /// Date/time with nanoseconds precision
-    Date(u32), // position of the serialized 8 byte in the data array
-    Array(NodeAddress),
-    /// Dynamic object value.
-    Object(NodeAddress),
-    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
-    IpAddr(u32), // position of the serialized 16 bytes in the data array
-    /// Arbitrarily sized byte array
-    Bytes(u32),
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct NodeAddress {
-    pos: u32,
-    num_nodes: u32,
-}
-
-impl OwnedValueRefContainer {
-    pub fn add_value(&mut self, value: OwnedValue) -> ValueContainerRef {
-        match value {
-            OwnedValue::Null => ValueContainerRef::Null,
-            OwnedValue::U64(num) => ValueContainerRef::U64(write_into(&mut self.node_data, num)),
-            OwnedValue::I64(num) => ValueContainerRef::I64(write_into(&mut self.node_data, num)),
-            OwnedValue::F64(num) => ValueContainerRef::F64(write_into(&mut self.node_data, num)),
-            OwnedValue::Bool(b) => ValueContainerRef::Bool(b),
-            OwnedValue::Date(date) => ValueContainerRef::Date(write_into(
-                &mut self.node_data,
-                date.into_timestamp_nanos(),
-            )),
-            OwnedValue::Str(bytes) => {
-                ValueContainerRef::Str(write_into(&mut self.node_data, bytes))
-            }
-            OwnedValue::Bytes(bytes) => {
-                ValueContainerRef::Bytes(write_into(&mut self.node_data, bytes))
-            }
-            OwnedValue::Array(elements) => {
-                let pos = self.nodes.len() as u32;
-                let len = elements.len() as u32;
-                for elem in elements {
-                    let ref_elem = self.add_value(elem);
-                    self.nodes.push(ref_elem);
-                }
-                ValueContainerRef::Array(NodeAddress {
-                    pos,
-                    num_nodes: len,
-                })
-            }
-            OwnedValue::Object(entries) => {
-                let pos = self.nodes.len() as u32;
-                let len = entries.len() as u32;
-                for (key, value) in entries {
-                    let ref_key = self.add_value(OwnedValue::Str(key));
-                    let ref_value = self.add_value(value);
-                    self.nodes.push(ref_key);
-                    self.nodes.push(ref_value);
-                }
-                ValueContainerRef::Object(NodeAddress {
-                    pos,
-                    num_nodes: len,
-                })
-            }
-            OwnedValue::IpAddr(num) => {
-                ValueContainerRef::IpAddr(write_into(&mut self.node_data, num.to_u128()))
-            }
-            OwnedValue::PreTokStr(_) => todo!(),
-            OwnedValue::Facet(_) => todo!(),
-        }
-    }
-}
-
-fn write_into<T: BinarySerializable>(data: &mut mediumvec::Vec32<u8>, value: T) -> u32 {
-    let pos = data.len() as u32;
-    data.as_vec(|vec| value.serialize(vec).unwrap());
-    pos
-}
-
-fn write_into_2<T: BinarySerializable>(data: &mut mediumvec::Vec32<u8>, value: T) -> NodeAddress {
-    let pos = data.len() as u32;
-    let mut len = 0;
-    data.as_vec(|vec| {
-        let mut wrt = CountingWriter::wrap(vec);
-        value.serialize(&mut wrt).unwrap();
-        len = wrt.written_bytes() as u32;
-    });
-    NodeAddress {
-        pos,
-        num_nodes: len,
-    }
-}
-
-// impl From<ContainerDocRef> for TantivyDocument {
-// fn from(doc: ContainerDocRef) -> Self {
-// let mut doc2 = TantivyDocument::new();
-// for fv in doc.field_values {
-// let field = Field::from_field_id(fv.field as u32);
-// let value = doc.container.get_value(fv.value);
-// doc2.add(FieldValue::new(field, value));
-//}
-// doc2
-//}
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -7,10 +7,11 @@
 // the list of documents containing a term, getting
 // its term frequency, and accessing its positions.

+use tantivy::postings::Postings;
 // ---
 // Importing tantivy...
 use tantivy::schema::*;
-use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
+use tantivy::{doc, DocSet, Index, IndexWriter, TERMINATED};

 fn main() -> tantivy::Result<()> {
    // We first create a schema for the sake of the
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -3,10 +3,11 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, RwLock, Weak};

 use tantivy::collector::TopDocs;
+use tantivy::index::SegmentId;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{
-    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration,
    SegmentReader, Warmer,
 };

--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -1,585 +0,0 @@
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-
-    use rand::prelude::SliceRandom;
-    use rand::rngs::StdRng;
-    use rand::{Rng, SeedableRng};
-    use rand_distr::Distribution;
-    use serde_json::json;
-    use test::{self, Bencher};
-
-    use crate::aggregation::agg_req::Aggregations;
-    use crate::aggregation::AggregationCollector;
-    use crate::query::{AllQuery, TermQuery};
-    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
-    use crate::{Index, Term};
-
-    #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
-    enum Cardinality {
-        /// All documents contain exactly one value.
-        /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
-        #[default]
-        Full = 0,
-        /// All documents contain at most one value.
-        Optional = 1,
-        /// All documents may contain any number of values.
-        Multivalued = 2,
-        /// 1 / 20 documents has a value
-        Sparse = 3,
-    }
-
-    fn get_collector(agg_req: Aggregations) -> AggregationCollector {
-        AggregationCollector::from_aggs(agg_req, Default::default())
-    }
-
-    fn get_test_index_bench(cardinality: Cardinality) -> crate::Result<Index> {
-        let mut schema_builder = Schema::builder();
-        let text_fieldtype = crate::schema::TextOptions::default()
-            .set_indexing_options(
-                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
-            )
-            .set_stored();
-        let text_field = schema_builder.add_text_field("text", text_fieldtype);
-        let json_field = schema_builder.add_json_field("json", FAST);
-        let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
-        let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
-        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
-        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
-        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
-        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
-        let index = Index::create_from_tempdir(schema_builder.build())?;
-        let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
-
-        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
-
-        let many_terms_data = (0..150_000)
-            .map(|num| format!("author{}", num))
-            .collect::<Vec<_>>();
-        {
-            let mut rng = StdRng::from_seed([1u8; 32]);
-            let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
-            // To make the different test cases comparable we just change one doc to force the
-            // cardinality
-            if cardinality == Cardinality::Optional {
-                index_writer.add_document(doc!())?;
-            }
-            if cardinality == Cardinality::Multivalued {
-                index_writer.add_document(doc!(
-                    json_field => json!({"mixed_type": 10.0}),
-                    json_field => json!({"mixed_type": 10.0}),
-                    text_field => "cool",
-                    text_field => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_few_terms => "cool",
-                    text_field_few_terms => "cool",
-                    score_field => 1u64,
-                    score_field => 1u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => 1i64,
-                    score_field_i64 => 1i64,
-                ))?;
-            }
-            let mut doc_with_value = 1_000_000;
-            if cardinality == Cardinality::Sparse {
-                doc_with_value /= 20;
-            }
-            let _val_max = 1_000_000.0;
-            for _ in 0..doc_with_value {
-                let val: f64 = rng.gen_range(0.0..1_000_000.0);
-                let json = if rng.gen_bool(0.1) {
-                    // 10% are numeric values
-                    json!({ "mixed_type": val })
-                } else {
-                    json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
-                };
-                index_writer.add_document(doc!(
-                    text_field => "cool",
-                    json_field => json,
-                    text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
-                    text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
-                    score_field => val as u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => val as i64,
-                ))?;
-                if cardinality == Cardinality::Sparse {
-                    for _ in 0..20 {
-                        index_writer.add_document(doc!(text_field => "cool"))?;
-                    }
-                }
-            }
-            // writing the segment
-            index_writer.commit()?;
-        }
-
-        Ok(index)
-    }
-
-    use paste::paste;
-    #[macro_export]
-    macro_rules! bench_all_cardinalities {
-        (  $x:ident ) => {
-            paste! {
-                #[bench]
-                fn $x(b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Full)
-                }
-
-                #[bench]
-                fn [<$x _opt>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Optional)
-                }
-
-                #[bench]
-                fn [<$x _multi>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Multivalued)
-                }
-
-                #[bench]
-                fn [<$x _sparse>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Sparse)
-                }
-
-            }
-        };
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64);
-
-    fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average": { "avg": { "field": "score", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_stats_f64);
-
-    fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "stats": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_f64);
-
-    fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_percentiles_f64);
-
-    fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_str = r#"
-            {
-              "mypercentiles": {
-                "percentiles": {
-                  "field": "score_f64",
-                  "percents": [ 95, 99, 99.9 ]
-                }
-              }
-            } "#;
-            let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64_and_f64);
-
-    fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64" } },
-                "average": { "avg": { "field": "score" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_few);
-
-    fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_few_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg);
-
-    fn bench_aggregation_terms_many_with_top_hits_agg_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "text_many_terms" },
-                    "aggs": {
-                        "top_hits": { "top_hits":
-                            {
-                                "sort": [
-                                    { "score": "desc" }
-                                ],
-                                "size": 2,
-                                "doc_value_fields": ["score_f64"]
-                            }
-                        }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
-
-    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "text_many_terms" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg);
-
-    fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "json.mixed_type" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many2);
-
-    fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term);
-
-    fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_only);
-
-    fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "range_f64": { "range": { "field": "score_f64", "ranges": [
-                    { "from": 3, "to": 7000 },
-                    { "from": 7000, "to": 20000 },
-                    { "from": 20000, "to": 30000 },
-                    { "from": 30000, "to": 40000 },
-                    { "from": 40000, "to": 50000 },
-                    { "from": 50000, "to": 60000 }
-                ] } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_with_avg);
-
-    fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 30000 },
-                            { "from": 30000, "to": 40000 },
-                            { "from": 40000, "to": 50000 },
-                            { "from": 50000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    // hard bounds has a different algorithm, because it actually limits collection range
-    //
-    bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds);
-
-    fn bench_aggregation_histogram_only_hard_bounds_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_with_avg);
-
-    fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": { "field": "score_f64", "interval": 100 },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_only);
-
-    fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": {
-                        "field": "score_f64",
-                        "interval": 100 // 1000 buckets
-                    },
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg);
-
-    fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_in_range": { "avg": { "field": "score" } }
-                    }
-                },
-                "average": { "avg": { "field": "score" } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-}
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -81,10 +81,11 @@ impl AggregationLimits {
        }
    }

-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }

@@ -94,11 +95,11 @@ impl AggregationLimits {
 }

 fn validate_memory_consumption(
-    memory_consumption: &AtomicU64,
+    memory_consumption: u64,
    memory_limit: ByteCount,
 ) -> Result<(), AggregationError> {
    // Load the estimated memory consumed by the aggregations
-    let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
+    let memory_consumed: ByteCount = memory_consumption.into();
    if memory_consumed > memory_limit {
        return Err(AggregationError::MemoryExceeded {
            limit: memory_limit,
@@ -118,10 +119,11 @@ pub struct ResourceLimitGuard {
 }

 impl ResourceLimitGuard {
-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }
 }
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -17,7 +17,8 @@ use super::metric::{
 use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
 use crate::aggregation::{f64_to_fastfield_u64, Key};
-use crate::{SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
+use crate::SegmentOrdinal;

 #[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -331,9 +331,11 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -324,9 +324,11 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -8,7 +8,8 @@ use super::segment_agg_result::{
 };
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{DocId, SegmentOrdinal, SegmentReader, TantivyError};
+use crate::index::SegmentReader;
+use crate::{DocId, SegmentOrdinal, TantivyError};

 /// The default max bucket count, before the aggregation fails.
 pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -143,8 +143,6 @@ use std::fmt::Display;
 #[cfg(test)]
 mod agg_tests;

-mod agg_bench;
-
 use core::fmt;

 pub use agg_limits::AggregationLimits;
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -4,7 +4,8 @@ use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};

 use super::top_score_collector::TopNComputer;
-use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
+use crate::{DocAddress, DocId, SegmentOrdinal};

 /// Contains a feature (field, score, etc.) of a document along with the document address.
 ///
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,19 +1,25 @@
-use rayon::{ThreadPool, ThreadPoolBuilder};
+use std::sync::Arc;
+
+#[cfg(feature = "quickwit")]
+use futures_util::{future::Either, FutureExt};

 use crate::TantivyError;

-/// Search executor whether search request are single thread or multithread.
-///
-/// We don't expose Rayon thread pool directly here for several reasons.
-///
-/// First dependency hell. It is not a good idea to expose the
-/// API of a dependency, knowing it might conflict with a different version
-/// used by the client. Second, we may stop using rayon in the future.
+/// Executor makes it possible to run tasks in single thread or
+/// in a thread pool.
+#[derive(Clone)]
 pub enum Executor {
    /// Single thread variant of an Executor
    SingleThread,
    /// Thread pool variant of an Executor
-    ThreadPool(ThreadPool),
+    ThreadPool(Arc<rayon::ThreadPool>),
+}
+
+#[cfg(feature = "quickwit")]
+impl From<Arc<rayon::ThreadPool>> for Executor {
+    fn from(thread_pool: Arc<rayon::ThreadPool>) -> Self {
+        Executor::ThreadPool(thread_pool)
+    }
 }

 impl Executor {
@@ -24,11 +30,11 @@ impl Executor {

    /// Creates an Executor that dispatches the tasks in a thread pool.
    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> crate::Result<Executor> {
-        let pool = ThreadPoolBuilder::new()
+        let pool = rayon::ThreadPoolBuilder::new()
            .num_threads(num_threads)
            .thread_name(move |num| format!("{prefix}{num}"))
            .build()?;
-        Ok(Executor::ThreadPool(pool))
+        Ok(Executor::ThreadPool(Arc::new(pool)))
    }

    /// Perform a map in the thread pool.
@@ -91,11 +97,36 @@ impl Executor {
            }
        }
    }
+
+    /// Spawn a task on the pool, returning a future completing on task success.
+    ///
+    /// If the task panic, returns `Err(())`.
+    #[cfg(feature = "quickwit")]
+    pub fn spawn_blocking<T: Send + 'static>(
+        &self,
+        cpu_intensive_task: impl FnOnce() -> T + Send + 'static,
+    ) -> impl std::future::Future<Output = Result<T, ()>> {
+        match self {
+            Executor::SingleThread => Either::Left(std::future::ready(Ok(cpu_intensive_task()))),
+            Executor::ThreadPool(pool) => {
+                let (sender, receiver) = oneshot::channel();
+                pool.spawn(|| {
+                    if sender.is_closed() {
+                        return;
+                    }
+                    let task_result = cpu_intensive_task();
+                    let _ = sender.send(task_result);
+                });
+
+                let res = receiver.map(|res| res.map_err(|_| ()));
+                Either::Right(res)
+            }
+        }
+    }
 }

 #[cfg(test)]
 mod tests {
-
    use super::Executor;

    #[test]
@@ -147,4 +178,34 @@ mod tests {
            assert_eq!(result[i], i * 2);
        }
    }
+
+    #[cfg(feature = "quickwit")]
+    #[test]
+    fn test_cancel_cpu_intensive_tasks() {
+        use std::sync::atomic::{AtomicU64, Ordering};
+        use std::sync::Arc;
+        use std::time::Duration;
+
+        let counter: Arc<AtomicU64> = Default::default();
+        let mut futures = Vec::new();
+        let executor = Executor::multi_thread(3, "search-test").unwrap();
+        for _ in 0..1_000 {
+            let counter_clone = counter.clone();
+            let fut = executor.spawn_blocking(move || {
+                std::thread::sleep(Duration::from_millis(4));
+                counter_clone.fetch_add(1, Ordering::SeqCst)
+            });
+            futures.push(fut);
+        }
+        std::thread::sleep(Duration::from_millis(5));
+        // The first few num_cores tasks should run, but the other should get cancelled.
+        drop(futures);
+        while Arc::strong_count(&counter) > 1 {
+            std::thread::sleep(Duration::from_millis(10));
+        }
+        // with ideal timing, we expect the result to always be 6, but as long as we run some, and
+        // cancelled most, the test is a success
+        assert!(counter.load(Ordering::SeqCst) > 0);
+        assert!(counter.load(Ordering::SeqCst) < 50);
+    }
 }
--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -31,7 +31,7 @@ use crate::{DateTime, DocId, Term};
 /// position 1.
 /// As a result, with lemmatization, "The Smiths" will match our object.
 ///
-/// Worse, if a same term is appears in the second object, a non increasing value would be pushed
+/// Worse, if a same term appears in the second object, a non increasing value would be pushed
 /// to the position recorder probably provoking a panic.
 ///
 /// This problem is solved for regular multivalued object by offsetting the position
@@ -50,7 +50,7 @@ use crate::{DateTime, DocId, Term};
 /// We can therefore afford working with a map that is not imperfect. It is fine if several
 /// path map to the same index position as long as the probability is relatively low.
 #[derive(Default)]
-struct IndexingPositionsPerPath {
+pub(crate) struct IndexingPositionsPerPath {
    positions_per_path: FxHashMap<u32, IndexingPosition>,
 }

@@ -58,6 +58,9 @@ impl IndexingPositionsPerPath {
    fn get_position_from_id(&mut self, id: u32) -> &mut IndexingPosition {
        self.positions_per_path.entry(id).or_default()
    }
+    pub fn clear(&mut self) {
+        self.positions_per_path.clear();
+    }
 }

 /// Convert JSON_PATH_SEGMENT_SEP to a dot.
@@ -68,36 +71,6 @@ pub fn json_path_sep_to_dot(path: &mut str) {
    }
 }

-#[allow(clippy::too_many_arguments)]
-pub(crate) fn index_json_values<'a, V: Value<'a>>(
-    doc: DocId,
-    json_visitors: impl Iterator<Item = crate::Result<V::ObjectIter>>,
-    text_analyzer: &mut TextAnalyzer,
-    expand_dots_enabled: bool,
-    term_buffer: &mut Term,
-    postings_writer: &mut dyn PostingsWriter,
-    json_path_writer: &mut JsonPathWriter,
-    ctx: &mut IndexingContext,
-) -> crate::Result<()> {
-    json_path_writer.clear();
-    json_path_writer.set_expand_dots(expand_dots_enabled);
-    let mut positions_per_path: IndexingPositionsPerPath = Default::default();
-    for json_visitor_res in json_visitors {
-        let json_visitor = json_visitor_res?;
-        index_json_object::<V>(
-            doc,
-            json_visitor,
-            text_analyzer,
-            term_buffer,
-            json_path_writer,
-            postings_writer,
-            ctx,
-            &mut positions_per_path,
-        );
-    }
-    Ok(())
-}
-
 #[allow(clippy::too_many_arguments)]
 fn index_json_object<'a, V: Value<'a>>(
    doc: DocId,
@@ -126,7 +99,7 @@ fn index_json_object<'a, V: Value<'a>>(
 }

 #[allow(clippy::too_many_arguments)]
-fn index_json_value<'a, V: Value<'a>>(
+pub(crate) fn index_json_value<'a, V: Value<'a>>(
    doc: DocId,
    json_value: V,
    text_analyzer: &mut TextAnalyzer,
@@ -166,12 +139,18 @@ fn index_json_value<'a, V: Value<'a>>(
                );
            }
            ReferenceValueLeaf::U64(val) => {
+                // try to parse to i64, since when querying we will apply the same logic and prefer
+                // i64 values
                set_path_id(
                    term_buffer,
                    ctx.path_to_unordered_id
                        .get_or_allocate_unordered_id(json_path_writer.as_str()),
                );
-                term_buffer.append_type_and_fast_value(val);
+                if let Ok(i64_val) = val.try_into() {
+                    term_buffer.append_type_and_fast_value::<i64>(i64_val);
+                } else {
+                    term_buffer.append_type_and_fast_value(val);
+                }
                postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
            }
            ReferenceValueLeaf::I64(val) => {
@@ -257,10 +236,7 @@ fn index_json_value<'a, V: Value<'a>>(
 /// Tries to infer a JSON type from a string and append it to the term.
 ///
 /// The term must be json + JSON path.
-pub(crate) fn convert_to_fast_value_and_append_to_json_term(
-    mut term: Term,
-    phrase: &str,
-) -> Option<Term> {
+pub fn convert_to_fast_value_and_append_to_json_term(mut term: Term, phrase: &str) -> Option<Term> {
    assert_eq!(
        term.value()
            .as_json_value_bytes()
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -4,13 +4,13 @@ use std::{fmt, io};

 use crate::collector::Collector;
 use crate::core::Executor;
-use crate::index::SegmentReader;
+use crate::index::{SegmentId, SegmentReader};
 use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
 use crate::schema::document::DocumentDeserialize;
 use crate::schema::{Schema, Term};
 use crate::space_usage::SearcherSpaceUsage;
 use crate::store::{CacheStats, StoreReader};
-use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
+use crate::{DocAddress, Index, Opstamp, TrackedObject};

 /// Identifies the searcher generation accessed by a [`Searcher`].
 ///
@@ -109,8 +109,9 @@ impl Searcher {
        &self,
        doc_address: DocAddress,
    ) -> crate::Result<D> {
+        let executor = self.inner.index.search_executor();
        let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
-        store_reader.get_async(doc_address.doc_id).await
+        store_reader.get_async(doc_address.doc_id, executor).await
    }

    /// Access the schema associated with the index of this searcher.
--- a/src/core/tests.rs
+++ b/src/core/tests.rs
@@ -1,12 +1,14 @@
 use crate::collector::Count;
 use crate::directory::{RamDirectory, WatchCallback};
+use crate::index::SegmentId;
 use crate::indexer::{LogMergePolicy, NoMergePolicy};
+use crate::postings::Postings;
 use crate::query::TermQuery;
 use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
 use crate::tokenizer::TokenizerManager;
 use crate::{
-    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
-    ReloadPolicy, SegmentId, TantivyDocument, Term,
+    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy,
+    TantivyDocument, Term,
 };

 #[test]
@@ -417,7 +419,7 @@ fn test_non_text_json_term_freq() {
    let inv_idx = segment_reader.inverted_index(field).unwrap();

    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);

    let postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
@@ -451,7 +453,7 @@ fn test_non_text_json_term_freq_bitpacked() {
    let inv_idx = segment_reader.inverted_index(field).unwrap();

    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);

    let mut postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -146,8 +146,11 @@ mod tests {
        facet_ords.extend(facet_reader.facet_ords(0u32));
        assert_eq!(&facet_ords, &[0u64]);
        let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))?;
-        let value: Option<&Facet> = doc.get_first(facet_field).and_then(|v| v.as_facet());
-        assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
+        let value: Option<Facet> = doc
+            .get_first(facet_field)
+            .and_then(|v| v.as_facet())
+            .map(|facet| Facet::from_encoded_string(facet.to_string()));
+        assert_eq!(value, Facet::from_text("/a/b").ok());
        Ok(())
    }

--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -80,7 +80,7 @@ mod tests {
    use std::path::Path;

    use columnar::StrColumn;
-    use common::{ByteCount, HasLen, TerminatingWrite};
+    use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
@@ -88,14 +88,15 @@ mod tests {

    use super::*;
    use crate::directory::{Directory, RamDirectory, WritePtr};
+    use crate::index::SegmentId;
    use crate::merge_policy::NoMergePolicy;
    use crate::schema::{
-        Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, TantivyDocument,
-        TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
+        DateOptions, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder,
+        TantivyDocument, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
    };
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
-    use crate::{DateOptions, DateTimePrecision, Index, IndexWriter, SegmentId, SegmentReader};
+    use crate::{Index, IndexWriter, SegmentReader};

    pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
        let mut schema_builder = Schema::builder();
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,14 +1,14 @@
 use std::io;

 use columnar::{ColumnarWriter, NumericalValue};
-use common::JsonPathWriter;
+use common::{DateTimePrecision, JsonPathWriter};
 use tokenizer_api::Token;

 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
-use crate::{DateTimePrecision, DocId, TantivyError};
+use crate::{DocId, TantivyError};

 /// Only index JSON down to a depth of 20.
 /// This is mostly to guard us from a stack overflow triggered by malicious input.
@@ -183,8 +183,7 @@ impl FastFieldsWriter {
                        .record_datetime(doc_id, field_name, truncated_datetime);
                }
                ReferenceValueLeaf::Facet(val) => {
-                    self.columnar_writer
-                        .record_str(doc_id, field_name, val.encoded_str());
+                    self.columnar_writer.record_str(doc_id, field_name, val);
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.columnar_writer.record_bytes(doc_id, field_name, val);
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -3,7 +3,7 @@ use std::fmt;
 #[cfg(feature = "mmap")]
 use std::path::Path;
 use std::path::PathBuf;
-use std::sync::Arc;
+use std::thread::available_parallelism;

 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
@@ -293,7 +293,7 @@ pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
-    executor: Arc<Executor>,
+    executor: Executor,
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
@@ -318,29 +318,25 @@ impl Index {
    ///
    /// By default the executor is single thread, and simply runs in the calling thread.
    pub fn search_executor(&self) -> &Executor {
-        self.executor.as_ref()
+        &self.executor
    }

    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Arc::new(Executor::multi_thread(num_threads, "tantivy-search-")?);
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
        Ok(())
    }

    /// Custom thread pool by a outer thread pool.
-    pub fn set_shared_multithread_executor(
-        &mut self,
-        shared_thread_pool: Arc<Executor>,
-    ) -> crate::Result<()> {
-        self.executor = shared_thread_pool.clone();
-        Ok(())
+    pub fn set_executor(&mut self, executor: Executor) {
+        self.executor = executor;
    }

    /// Replace the default single thread search executor pool
    /// by a thread pool with as many threads as there are CPUs on the system.
    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = num_cpus::get();
+        let default_num_threads = available_parallelism()?.get();
        self.set_multithread_executor(default_num_threads)
    }

@@ -418,7 +414,7 @@ impl Index {
            schema,
            tokenizers: TokenizerManager::default(),
            fast_field_tokenizers: TokenizerManager::default(),
-            executor: Arc::new(Executor::single_thread()),
+            executor: Executor::single_thread(),
            inventory,
        }
    }
@@ -621,7 +617,7 @@ impl Index {
        &self,
        memory_budget_in_bytes: usize,
    ) -> crate::Result<IndexWriter<D>> {
-        let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD);
+        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
            num_threads = (memory_budget_in_bytes / MEMORY_BUDGET_NUM_BYTES_MIN).max(1);
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -1,5 +1,3 @@
-//! # Index Module
-//!
 //! The `index` module in Tantivy contains core components to read and write indexes.
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -246,8 +246,9 @@ impl DeleteCursor {
 mod tests {

    use super::{DeleteOperation, DeleteQueue};
+    use crate::index::SegmentReader;
    use crate::query::{Explanation, Scorer, Weight};
-    use crate::{DocId, Score, SegmentReader};
+    use crate::{DocId, Score};

    struct DummyWeight;
    impl Weight for DummyWeight {
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -144,9 +144,9 @@ mod tests {
    use once_cell::sync::Lazy;

    use super::*;
-    use crate::index::SegmentMetaInventory;
+    use crate::index::{SegmentId, SegmentMetaInventory};
+    use crate::schema;
    use crate::schema::INDEXED;
-    use crate::{schema, SegmentId};

    static INVENTORY: Lazy<SegmentMetaInventory> = Lazy::new(SegmentMetaInventory::default);

--- a/src/indexer/merge_operation.rs
+++ b/src/indexer/merge_operation.rs
@@ -1,7 +1,8 @@
 use std::collections::HashSet;
 use std::ops::Deref;

-use crate::{Inventory, Opstamp, SegmentId, TrackedObject};
+use crate::index::SegmentId;
+use crate::{Inventory, Opstamp, TrackedObject};

 #[derive(Default)]
 pub(crate) struct MergeOperationInventory(Inventory<InnerMergeOperation>);
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -13,7 +13,7 @@ use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
 use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
-use crate::index::{Segment, SegmentReader};
+use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
 use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
@@ -21,8 +21,7 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
 use crate::{
-    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    SegmentComponent, SegmentOrdinal,
+    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, SegmentOrdinal,
 };

 /// Segment's max doc must be `< MAX_DOC_LIMIT`.
@@ -794,7 +793,7 @@ mod tests {
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
-    use crate::index::Index;
+    use crate::index::{Index, SegmentId};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
    use crate::schema::document::Value;
    use crate::schema::{
@@ -804,7 +803,7 @@ mod tests {
    use crate::time::OffsetDateTime;
    use crate::{
        assert_nearly_equals, schema, DateTime, DocAddress, DocId, DocSet, IndexSettings,
-        IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
+        IndexSortByField, IndexWriter, Order, Searcher,
    };

    #[test]
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -3,6 +3,7 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
+    use crate::postings::Postings;
    use crate::query::QueryParser;
    use crate::schema::document::Value;
    use crate::schema::{
@@ -10,8 +11,8 @@ mod tests {
        TextFieldIndexing, TextOptions,
    };
    use crate::{
-        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, Postings,
-        TantivyDocument, Term,
+        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, TantivyDocument,
+        Term,
    };

    fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -182,7 +182,7 @@ mod tests_mmap {
        let index = Index::create_in_ram(schema_builder.build());
        let mut index_writer = index.writer_for_tests().unwrap();
        index_writer
-            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1"})))
+            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1", format!("num{field_name_in}"): 10})))
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("a{field_name_in}"): "test2"})))
@@ -260,6 +260,64 @@ mod tests_mmap {
            "test6",
        );
        test_agg(format!("json.{field_name_out}a").as_str(), "test7");
+
+        // `.` is stored as `\u{0001}` internally in tantivy
+        let field_name_out_internal = if field_name_out == "." {
+            "\u{0001}"
+        } else {
+            field_name_out
+        };
+
+        let mut fields = reader.searcher().segment_readers()[0]
+            .inverted_index(field)
+            .unwrap()
+            .list_encoded_fields()
+            .unwrap();
+        assert_eq!(fields.len(), 8);
+        fields.sort();
+        let mut expected_fields = vec![
+            (format!("a{field_name_out_internal}"), Type::Str),
+            (format!("a{field_name_out_internal}a"), Type::Str),
+            (
+                format!("a{field_name_out_internal}a{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (
+                format!("a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (
+                format!("a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
+                Type::Str,
+            ),
+            (format!("{field_name_out_internal}a"), Type::Str),
+            (format!("{field_name_out_internal}"), Type::Str),
+            (format!("num{field_name_out_internal}"), Type::I64),
+        ];
+        expected_fields.sort();
+        assert_eq!(fields, expected_fields);
+        // Check columnar reader
+        let mut columns = reader.searcher().segment_readers()[0]
+            .fast_fields()
+            .columnar()
+            .list_columns()
+            .unwrap()
+            .into_iter()
+            .map(|(name, _)| name)
+            .collect::<Vec<_>>();
+        let mut expected_columns = vec![
+            format!("json\u{1}{field_name_out_internal}"),
+            format!("json\u{1}{field_name_out_internal}a"),
+            format!("json\u{1}a{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}a"),
+            format!("json\u{1}a{field_name_out_internal}a{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
+            format!("json\u{1}a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
+            format!("json\u{1}num{field_name_out_internal}"),
+        ];
+        columns.sort();
+        expected_columns.sort();
+        assert_eq!(columns, expected_columns);
    }

    #[test]
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -5,20 +5,20 @@ use tokenizer_api::BoxTokenStream;

 use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
 use super::operation::AddOperation;
-use crate::core::json_utils::index_json_values;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
-use crate::index::Segment;
+use crate::index::{Segment, SegmentComponent};
 use crate::indexer::segment_serializer::SegmentSerializer;
+use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
    PerFieldPostingsWriter, PostingsWriter,
 };
-use crate::schema::document::{Document, ReferenceValue, Value};
+use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED};
 use crate::store::{StoreReader, StoreWriter};
 use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
-use crate::{DocId, Opstamp, SegmentComponent, TantivyError};
+use crate::{DocId, Opstamp, TantivyError};

 /// Computes the initial size of the hash table.
 ///
@@ -68,6 +68,7 @@ pub struct SegmentWriter {
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
+    pub(crate) json_positions_per_path: IndexingPositionsPerPath,
    pub(crate) doc_opstamps: Vec<Opstamp>,
    per_field_text_analyzers: Vec<TextAnalyzer>,
    term_buffer: Term,
@@ -119,6 +120,7 @@ impl SegmentWriter {
            per_field_postings_writers,
            fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
            json_path_writer: JsonPathWriter::default(),
+            json_positions_per_path: IndexingPositionsPerPath::default(),
            segment_serializer,
            fast_field_writers: FastFieldsWriter::from_schema_and_tokenizer_manager(
                &schema,
@@ -204,8 +206,7 @@ impl SegmentWriter {
                        // Used to help with linting and type checking.
                        let value = value_access as D::Value<'_>;

-                        let facet = value.as_facet().ok_or_else(make_schema_error)?;
-                        let facet_str = facet.encoded_str();
+                        let facet_str = value.as_facet().ok_or_else(make_schema_error)?;
                        let mut facet_tokenizer = facet_tokenizer.token_stream(facet_str);
                        let mut indexing_position = IndexingPosition::default();
                        postings_writer.index_text(
@@ -228,7 +229,7 @@ impl SegmentWriter {
                                &mut self.per_field_text_analyzers[field.field_id() as usize];
                            text_analyzer.token_stream(text)
                        } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                            BoxTokenStream::new(PreTokenizedStream::from(tok_str.clone()))
+                            BoxTokenStream::new(PreTokenizedStream::from(*tok_str.clone()))
                        } else {
                            continue;
                        };
@@ -342,26 +343,24 @@ impl SegmentWriter {
                FieldType::JsonObject(json_options) => {
                    let text_analyzer =
                        &mut self.per_field_text_analyzers[field.field_id() as usize];
-                    let json_values_it = values.map(|value_access| {
-                        // Used to help with linting and type checking.
-                        let value_access = value_access as D::Value<'_>;
-                        let value = value_access.as_value();

-                        match value {
-                            ReferenceValue::Object(object_iter) => Ok(object_iter),
-                            _ => Err(make_schema_error()),
-                        }
-                    });
-                    index_json_values::<D::Value<'_>>(
-                        doc_id,
-                        json_values_it,
-                        text_analyzer,
-                        json_options.is_expand_dots_enabled(),
-                        term_buffer,
-                        postings_writer,
-                        &mut self.json_path_writer,
-                        ctx,
-                    )?;
+                    self.json_positions_per_path.clear();
+                    self.json_path_writer
+                        .set_expand_dots(json_options.is_expand_dots_enabled());
+                    for json_value in values {
+                        self.json_path_writer.clear();
+
+                        index_json_value(
+                            doc_id,
+                            json_value,
+                            text_analyzer,
+                            term_buffer,
+                            &mut self.json_path_writer,
+                            postings_writer,
+                            ctx,
+                            &mut self.json_positions_per_path,
+                        );
+                    }
                }
                FieldType::IpAddr(_) => {
                    let mut num_vals = 0;
@@ -498,19 +497,20 @@ mod tests {
    use crate::collector::{Count, TopDocs};
    use crate::directory::RamDirectory;
    use crate::fastfield::FastValue;
-    use crate::postings::TermInfo;
+    use crate::postings::{Postings, TermInfo};
    use crate::query::{PhraseQuery, QueryParser};
    use crate::schema::document::Value;
    use crate::schema::{
-        Document, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, STRING, TEXT,
+        Document, IndexRecordOption, OwnedValue, Schema, TextFieldIndexing, TextOptions, STORED,
+        STRING, TEXT,
    };
    use crate::store::{Compressor, StoreReader, StoreWriter};
    use crate::time::format_description::well_known::Rfc3339;
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{PreTokenizedString, Token};
    use crate::{
-        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, Postings, TantivyDocument,
-        Term, TERMINATED,
+        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, TantivyDocument, Term,
+        TERMINATED,
    };

    #[test]
@@ -597,6 +597,45 @@ mod tests {
        assert_eq!(score_docs.len(), 2);
    }

+    #[test]
+    fn test_flat_json_indexing() {
+        // A JSON Object that contains mixed values on the first level
+        let mut schema_builder = Schema::builder();
+        let json_field = schema_builder.add_json_field("json", STORED | STRING);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema.clone());
+        let mut writer = index.writer_for_tests().unwrap();
+        // Text, i64, u64
+        writer.add_document(doc!(json_field=>"b")).unwrap();
+        writer
+            .add_document(doc!(json_field=>OwnedValue::I64(10i64)))
+            .unwrap();
+        writer
+            .add_document(doc!(json_field=>OwnedValue::U64(55u64)))
+            .unwrap();
+        writer
+            .add_document(doc!(json_field=>json!({"my_field": "a"})))
+            .unwrap();
+        writer.commit().unwrap();
+
+        let search_and_expect = |query| {
+            let query_parser = QueryParser::for_index(&index, vec![json_field]);
+            let text_query = query_parser.parse_query(query).unwrap();
+            let score_docs: Vec<(_, DocAddress)> = index
+                .reader()
+                .unwrap()
+                .searcher()
+                .search(&text_query, &TopDocs::with_limit(4))
+                .unwrap();
+            assert_eq!(score_docs.len(), 1);
+        };
+
+        search_and_expect("my_field:a");
+        search_and_expect("b");
+        search_and_expect("10");
+        search_and_expect("55");
+    }
+
    #[test]
    fn test_json_indexing() {
        let mut schema_builder = Schema::builder();
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -216,11 +216,6 @@ use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};

 pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
-#[deprecated(
-    since = "0.22.0",
-    note = "Will be removed in tantivy 0.23. Use export from snippet module instead"
-)]
-pub use self::snippet::{Snippet, SnippetGenerator};
 #[doc(hidden)]
 pub use crate::core::json_utils;
 pub use crate::core::{Executor, Searcher, SearcherGeneration};
@@ -228,16 +223,10 @@ pub use crate::directory::Directory;
 #[allow(deprecated)] // Remove with index sorting
 pub use crate::index::{
    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader,
+    Segment, SegmentMeta, SegmentReader,
 };
-#[deprecated(
-    since = "0.22.0",
-    note = "Will be removed in tantivy 0.23. Use export from indexer module instead"
-)]
-pub use crate::indexer::PreparedCommit;
 pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
-pub use crate::postings::Postings;
-pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
+pub use crate::schema::{Document, TantivyDocument, Term};

 /// Index format version.
 const INDEX_FORMAT_VERSION: u32 = 6;
@@ -392,9 +381,10 @@ pub mod tests {
    use crate::docset::{DocSet, TERMINATED};
    use crate::index::SegmentReader;
    use crate::merge_policy::NoMergePolicy;
+    use crate::postings::Postings;
    use crate::query::BooleanQuery;
    use crate::schema::*;
-    use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
+    use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};

    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
@@ -446,7 +436,6 @@ pub mod tests {
    }

    #[test]
-    #[cfg(not(feature = "lz4"))]
    fn test_version_string() {
        use regex::Regex;
        let regex_ptn = Regex::new(
@@ -1109,9 +1098,9 @@ pub mod tests {
    #[test]
    fn test_update_via_delete_insert() -> crate::Result<()> {
        use crate::collector::Count;
+        use crate::index::SegmentId;
        use crate::indexer::NoMergePolicy;
        use crate::query::AllQuery;
-        use crate::SegmentId;

        const DOC_COUNT: u64 = 2u64;

--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -56,7 +56,7 @@ pub struct InvertedIndexSerializer {
 impl InvertedIndexSerializer {
    /// Open a new `InvertedIndexSerializer` for the given segment
    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
-        use crate::SegmentComponent::{Positions, Postings, Terms};
+        use crate::index::SegmentComponent::{Positions, Postings, Terms};
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
            postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -1,8 +1,9 @@
 use super::Scorer;
 use crate::docset::TERMINATED;
+use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
 use crate::query::{EnableScoring, Explanation, Query, Weight};
-use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
+use crate::{DocId, DocSet, Score, Searcher};

 /// `EmptyQuery` is a dummy `Query` in which no document matches.
 ///
--- a/src/query/more_like_this/more_like_this.rs
+++ b/src/query/more_like_this/more_like_this.rs
@@ -180,7 +180,7 @@ impl MoreLikeThis {
                let facets: Vec<&str> = values
                    .iter()
                    .map(|value| {
-                        value.as_facet().map(|f| f.encoded_str()).ok_or_else(|| {
+                        value.as_facet().ok_or_else(|| {
                            TantivyError::InvalidArgument("invalid field value".to_string())
                        })
                    })
@@ -220,7 +220,7 @@ impl MoreLikeThis {
                        let mut token_stream = tokenizer.token_stream(text);
                        token_stream.process(sink);
                    } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                        let mut token_stream = PreTokenizedStream::from(tok_str.clone());
+                        let mut token_stream = PreTokenizedStream::from(*tok_str.clone());
                        token_stream.process(sink);
                    }
                }
--- a/src/query/range_query/fast_field_range_query.rs
+++ b/src/query/range_query/fast_field_range_query.rs
@@ -174,7 +174,7 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
    }

    fn size_hint(&self) -> u32 {
-        0 // heuristic possible by checking number of hits when fetching a block
+        self.column.num_docs()
    }
 }

--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -127,6 +127,7 @@ impl Scorer for TermScorer {
 mod tests {
    use proptest::prelude::*;

+    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::merge_policy::NoMergePolicy;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -134,8 +135,7 @@ mod tests {
    use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TEXT};
    use crate::{
-        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, SegmentId, Term,
-        TERMINATED,
+        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, Term, TERMINATED,
    };

    #[test]
--- a/src/reader/warming.rs
+++ b/src/reader/warming.rs
@@ -179,9 +179,10 @@ mod tests {
    use super::Warmer;
    use crate::core::searcher::SearcherGeneration;
    use crate::directory::RamDirectory;
+    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::schema::{Schema, INDEXED};
-    use crate::{Index, IndexSettings, ReloadPolicy, Searcher, SegmentId};
+    use crate::{Index, IndexSettings, ReloadPolicy, Searcher};

    #[derive(Default)]
    struct TestWarmer {
--- a/src/schema/document/de.rs
+++ b/src/schema/document/de.rs
@@ -873,7 +873,7 @@ mod tests {
        );

        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::Facet(facet));

@@ -881,7 +881,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
+            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
    }
--- a/src/schema/document/owned_value.rs
+++ b/src/schema/document/owned_value.rs
@@ -65,13 +65,13 @@ impl<'a> Value<'a> for &'a OwnedValue {
        match self {
            OwnedValue::Null => ReferenceValueLeaf::Null.into(),
            OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(),
-            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(),
+            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val.clone().into()).into(),
            OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(),
            OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(),
            OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(),
            OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(),
            OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(),
-            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(),
+            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val.encoded_str()).into(),
            OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(),
            OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
            OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
@@ -183,7 +183,7 @@ impl serde::Serialize for OwnedValue {
            OwnedValue::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)),
            OwnedValue::Object(ref obj) => {
                let mut map = serializer.serialize_map(Some(obj.len()))?;
-                for &(ref k, ref v) in obj {
+                for (k, v) in obj {
                    map.serialize_entry(k, v)?;
                }
                map.end()
@@ -277,11 +277,13 @@ impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
                ReferenceValueLeaf::I64(val) => OwnedValue::I64(val),
                ReferenceValueLeaf::F64(val) => OwnedValue::F64(val),
                ReferenceValueLeaf::Date(val) => OwnedValue::Date(val),
-                ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()),
+                ReferenceValueLeaf::Facet(val) => {
+                    OwnedValue::Facet(Facet::from_encoded_string(val.to_string()))
+                }
                ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
                ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
                ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
-                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
+                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()),
            },
            ReferenceValue::Array(val) => {
                OwnedValue::Array(val.map(|v| v.as_value().into()).collect())
--- a/src/schema/document/se.rs
+++ b/src/schema/document/se.rs
@@ -121,7 +121,7 @@ where W: Write
                ReferenceValueLeaf::Facet(val) => {
                    self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;

-                    val.serialize(self.writer)
+                    Cow::Borrowed(val).serialize(self.writer)
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.write_type_code(type_codes::BYTES_CODE)?;
@@ -428,7 +428,7 @@ mod tests {
        );

        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let expected = binary_repr!(
            type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(),
        );
@@ -441,7 +441,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
+            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let expected = binary_repr!(
            type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str,
        );
--- a/src/schema/document/value.rs
+++ b/src/schema/document/value.rs
@@ -3,7 +3,6 @@ use std::net::Ipv6Addr;

 use common::DateTime;

-use crate::schema::Facet;
 use crate::tokenizer::PreTokenizedString;

 /// A single field value.
@@ -28,7 +27,7 @@ pub trait Value<'a>: Send + Sync + Debug {
    }

    #[inline]
-    /// If the Value is a String, returns the associated str. Returns None otherwise.
+    /// If the Value is a leaf, returns the associated leaf. Returns None otherwise.
    fn as_leaf(&self) -> Option<ReferenceValueLeaf<'a>> {
        if let ReferenceValue::Leaf(val) = self.as_value() {
            Some(val)
@@ -82,8 +81,9 @@ pub trait Value<'a>: Send + Sync + Debug {
    #[inline]
    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
    /// otherwise.
-    fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+    fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
+        self.as_leaf()
+            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }

    #[inline]
@@ -94,7 +94,7 @@ pub trait Value<'a>: Send + Sync + Debug {

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    fn as_facet(&self) -> Option<&'a Facet> {
+    fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }

@@ -132,7 +132,7 @@ pub trait Value<'a>: Send + Sync + Debug {
 }

 /// A enum representing a leaf value for tantivy to index.
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub enum ReferenceValueLeaf<'a> {
    /// A null value.
    Null,
@@ -146,8 +146,9 @@ pub enum ReferenceValueLeaf<'a> {
    F64(f64),
    /// Date/time with nanoseconds precision
    Date(DateTime),
-    /// Facet
-    Facet(&'a Facet),
+    /// Facet string needs to match the format of
+    /// [Facet::encoded_str](crate::schema::Facet::encoded_str).
+    Facet(&'a str),
    /// Arbitrarily sized byte array
    Bytes(&'a [u8]),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
@@ -155,7 +156,7 @@ pub enum ReferenceValueLeaf<'a> {
    /// Bool value
    Bool(bool),
    /// Pre-tokenized str type,
-    PreTokStr(&'a PreTokenizedString),
+    PreTokStr(Box<PreTokenizedString>),
 }

 impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<'a, T> {
@@ -259,9 +260,9 @@ impl<'a> ReferenceValueLeaf<'a> {
    }

    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
-    /// otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
+    /// Returns None otherwise.
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
        if let Self::PreTokStr(val) = self {
            Some(val)
        } else {
@@ -281,7 +282,7 @@ impl<'a> ReferenceValueLeaf<'a> {

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        if let Self::Facet(val) = self {
            Some(val)
        } else {
@@ -322,6 +323,16 @@ where V: Value<'a>
        }
    }

+    #[inline]
+    /// If the Value is a leaf, consume it and return the leaf. Returns None otherwise.
+    pub fn into_leaf(self) -> Option<ReferenceValueLeaf<'a>> {
+        if let Self::Leaf(val) = self {
+            Some(val)
+        } else {
+            None
+        }
+    }
+
    #[inline]
    /// If the Value is a String, returns the associated str. Returns None otherwise.
    pub fn as_str(&self) -> Option<&'a str> {
@@ -365,10 +376,11 @@ where V: Value<'a>
    }

    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
-    /// otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
+    /// Returns None otherwise.
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
+        self.into_leaf()
+            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }

    #[inline]
@@ -379,7 +391,7 @@ where V: Value<'a>

    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }

--- a/src/schema/flags.rs
+++ b/src/schema/flags.rs
@@ -1,7 +1,6 @@
 use std::ops::BitOr;

-use crate::schema::{NumericOptions, TextOptions};
-use crate::DateOptions;
+use crate::schema::{DateOptions, NumericOptions, TextOptions};

 #[derive(Clone)]
 pub struct StoredFlag;
--- a/src/space_usage/mod.rs
+++ b/src/space_usage/mod.rs
@@ -12,8 +12,8 @@ use std::collections::HashMap;
 use common::ByteCount;
 use serde::{Deserialize, Serialize};

+use crate::index::SegmentComponent;
 use crate::schema::Field;
-use crate::SegmentComponent;

 /// Enum containing any of the possible space usage results for segment components.
 pub enum ComponentSpaceUsage {
@@ -115,7 +115,7 @@ impl SegmentSpaceUsage {
    /// Use the components directly if this is somehow in performance critical code.
    pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
        use self::ComponentSpaceUsage::*;
-        use crate::SegmentComponent::*;
+        use crate::index::SegmentComponent::*;
        match component {
            Postings => PerField(self.postings().clone()),
            Positions => PerField(self.positions().clone()),
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -18,6 +18,8 @@ use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize};
 use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
+#[cfg(feature = "quickwit")]
+use crate::Executor;

 pub(crate) const DOCSTORE_CACHE_CAPACITY: usize = 100;

@@ -341,7 +343,11 @@ impl StoreReader {
    /// In most cases use [`get_async`](Self::get_async)
    ///
    /// Loads and decompresses a block asynchronously.
-    async fn read_block_async(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
+    async fn read_block_async(
+        &self,
+        checkpoint: &Checkpoint,
+        executor: &Executor,
+    ) -> io::Result<Block> {
        let cache_key = checkpoint.byte_range.start;
        if let Some(block) = self.cache.get_from_cache(checkpoint.byte_range.start) {
            return Ok(block);
@@ -353,8 +359,12 @@ impl StoreReader {
            .read_bytes_async()
            .await?;

-        let decompressed_block =
-            OwnedBytes::new(self.decompressor.decompress(compressed_block.as_ref())?);
+        let decompressor = self.decompressor;
+        let maybe_decompressed_block = executor
+            .spawn_blocking(move || decompressor.decompress(compressed_block.as_ref()))
+            .await
+            .expect("decompression panicked");
+        let decompressed_block = OwnedBytes::new(maybe_decompressed_block?);

        self.cache
            .put_into_cache(cache_key, decompressed_block.clone());
@@ -363,15 +373,23 @@ impl StoreReader {
    }

    /// Reads raw bytes of a given document asynchronously.
-    pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result<OwnedBytes> {
+    pub async fn get_document_bytes_async(
+        &self,
+        doc_id: DocId,
+        executor: &Executor,
+    ) -> crate::Result<OwnedBytes> {
        let checkpoint = self.block_checkpoint(doc_id)?;
-        let block = self.read_block_async(&checkpoint).await?;
+        let block = self.read_block_async(&checkpoint, executor).await?;
        Self::get_document_bytes_from_block(block, doc_id, &checkpoint)
    }

    /// Fetches a document asynchronously. Async version of [`get`](Self::get).
-    pub async fn get_async<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
-        let mut doc_bytes = self.get_document_bytes_async(doc_id).await?;
+    pub async fn get_async<D: DocumentDeserialize>(
+        &self,
+        doc_id: DocId,
+        executor: &Executor,
+    ) -> crate::Result<D> {
+        let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;

        let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
            .map_err(crate::TantivyError::from)?;
Author	SHA1	Message	Date
Paul Masurel	f820d42151	oneshot 0.1.7 Now that the is_closed changed has been merge upstream, we can rely on that. This commit is a "hotfix" because we don't want to rely on some of the commit in main just yet	2024-05-31 07:54:56 +04:00
PSeitz	5b7cca13e5	lower contention on AggregationLimits (#2394 ) PR https://github.com/quickwit-oss/quickwit/pull/4962 fixes an issue where the AggregationLimits are not passed correctly. Since the AggregationLimits are shared properly we run into contention issues. This PR includes some straightforward improvement to reduce contention, by only calling if the memory changed and avoiding the second read. We probably need some sharding with multiple counters or local caching before updating the global after some threshold.	2024-05-15 12:25:40 +02:00
dependabot[bot]	a79590477e	Update binggan requirement from 0.5.2 to 0.6.2 (#2399 ) --- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-15 05:40:37 +02:00
Paul Masurel	6181c1eb5e	Small changes in the Executor API. (#2391 ) Warning, this change is mildly not backward compatible so I bumped tantivy's version.	2024-05-10 17:19:12 +09:00
Adam Reichold	1ee5f90761	Give allocation control to the caller instead of force a clone (#2389 ) Achieved by moving the boxes out of the temporary reference wrappers which are cloneable themselves, i.e. if required the caller can clone them already or consume them to reuse existing allocations.	2024-05-09 16:01:13 +09:00
PSeitz	71f3b4e4e3	fix ReferenceValue API flaw (#2372 ) * fix ReferenceValue API flaw Remove `Facet` and `TokenizedString` values from the `ReferenceValue` API, as this requires the trait value to have them stored somewhere. Since `TokenizedString` is quite niche, I just copy it into a Box, instead of designing a reference API around it. * fix comment link	2024-05-09 06:14:42 +02:00
trinity-1686a	8cd7ddc535	run block decompression from executor (#2386 ) * run block decompression from executor * add a wrapper with is_closed to oneshot channel * add cancelation test to Executor::spawn_blocking	2024-05-08 12:22:44 +02:00
Paul Masurel	2b76335a95	Removed usage of num_cpus (#2387 ) * Removed usage of num_cpus * handling error	2024-05-08 13:32:52 +09:00
PSeitz	c6b213d8f0	use bingang for agg benchmark (#2378 ) * use bingang for agg benchmark use bingang for agg benchmark, which includes memory consumption Output: ``` full histogram Memory: 15.8 KB Avg: 10.9322ms (+5.44%) Median: 10.8790ms (+9.28%) Min: 10.7470ms Max: 11.3263ms histogram_hard_bounds Memory: 15.5 KB Avg: 5.1939ms (+6.61%) Median: 5.1722ms (+10.98%) Min: 5.0432ms Max: 5.3910ms histogram_with_avg_sub_agg Memory: 48.7 KB Avg: 23.8165ms (+4.57%) Median: 23.7264ms (+10.06%) Min: 23.4995ms Max: 24.8107ms dense histogram Memory: 17.3 KB Avg: 15.6810ms (-8.54%) Median: 15.6174ms (-8.89%) Min: 15.4953ms Max: 16.0702ms histogram_hard_bounds Memory: 15.4 KB Avg: 10.0720ms (-7.33%) Median: 10.0572ms (-7.06%) Min: 9.8500ms Max: 10.4819ms histogram_with_avg_sub_agg Memory: 50.1 KB Avg: 33.0993ms (-7.04%) Median: 32.9499ms (-6.86%) Min: 32.8284ms Max: 34.0529ms sparse histogram Memory: 16.3 KB Avg: 19.2325ms (-0.44%) Median: 19.1211ms (-1.26%) Min: 19.0348ms Max: 19.7902ms histogram_hard_bounds Memory: 16.1 KB Avg: 18.5179ms (-0.61%) Median: 18.4552ms (-0.90%) Min: 18.3799ms Max: 19.0535ms histogram_with_avg_sub_agg Memory: 34.7 KB Avg: 21.2589ms (-0.69%) Median: 21.1867ms (-1.05%) Min: 21.0342ms Max: 21.9900ms ``` * add more bench with term as sub agg	2024-05-07 11:29:49 +02:00
PSeitz	eea70030bf	cleanup top level exports (#2382 ) remove some top level exports	2024-05-07 09:59:41 +02:00
PSeitz	92b5526310	allow more JSON values, fix i64 special case (#2383 ) This changes three things: - Reuse positions_per_path hashmap instead of allocating one per indexed JSON value - Try to cast u64 values to i64 to streamline with search behaviour - Allow top level json values to be of any type, instead of limiting it to JSON objects. Remove special JSON object handling method. TODO: We probably should also try to check f64 to i64 and u64 when indexing, as values may get converted to f64 by the JSON parser	2024-05-01 12:08:12 +02:00
PSeitz	99a59ad37e	remove zero byte check (#2379 ) remove zero byte checks in columnar. zero bytes are converted during serialization now. unify code paths extend test for expected column names	2024-04-26 06:03:28 +02:00
trinity-1686a	6a66a71cbb	modify fastfield range query heuristic (#2375 )	2024-04-25 10:06:11 +02:00
PSeitz	ff40764204	make convert_to_fast_value_and_append_to_json_term pub (#2370 ) * make convert_to_fast_value_and_append_to_json_term pub * clippy	2024-04-23 04:05:41 +02:00