oneshot 0.1.7

Now that the is_closed changed has been merge upstream, we can rely on that. This commit is a "hotfix" because we don't want to rely on some of the commit in main just yet
lower contention on AggregationLimits (#2394 )
2026-01-08 10:02:55 +00:00 · 2024-05-31 07:54:56 +04:00 · 2024-05-15 12:25:40 +02:00 · 2024-05-15 05:40:37 +02:00 · 2024-05-10 17:19:12 +09:00 · 2024-05-09 16:01:13 +09:00
50 changed files with 976 additions and 967 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -15,11 +15,11 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
-        run: rustup toolchain install nightly-2023-09-10 --profile minimal --component llvm-tools-preview
+        run: rustup toolchain install nightly-2024-04-10 --profile minimal --component llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
-        run: cargo +nightly-2023-09-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
+        run: cargo +nightly-2024-04-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
        continue-on-error: true
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.22.0"
+version = "0.23.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -15,12 +15,16 @@ rust-version = "1.63"
 exclude = ["benches/*.json", "benches/*.txt"]
 [dependencies]
-oneshot = "0.1.5"
+# Switch back to the non-forked oneshot crate once https://github.com/faern/oneshot/pull/35 is merged
 oneshot = "0.1.7"
 base64 = "0.22.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
-regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
+regex = { version = "1.5.5", default-features = false, features = [
    "std",
    "unicode",
 ] }
 aho-corasick = "1.0"
 tantivy-fst = "0.5"
 memmap2 = { version = "0.9.0", optional = true }
@@ -30,14 +34,15 @@ tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
 num_cpus = "1.13.1"
 fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { version = "0.9.2", default-features = false, features = [
    "bitpacker4x",
 ] }
 census = "0.4.2"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
@@ -67,6 +72,7 @@ fnv = "1.0.7"
 winapi = "0.3.9"
 [dev-dependencies]
 binggan = "0.6.2"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -122,7 +128,16 @@ quickwit = ["sstable", "futures-util"]
 compare_hash_only = ["stacker/compare_hash_only"]
 [workspace]
-members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
+members = [
    "query-grammar",
    "bitpacker",
    "common",
    "ownedbytes",
    "stacker",
    "sstable",
    "tokenizer-api",
    "columnar",
 ]
 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
@@ -143,3 +158,7 @@ harness = false
 [[bench]]
 name = "index-bench"
 harness = false
 [[bench]]
 name = "agg_bench"
 harness = false
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -0,0 +1,413 @@
 use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
 use rand::prelude::SliceRandom;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use rand_distr::Distribution;
 use serde_json::json;
 use tantivy::aggregation::agg_req::Aggregations;
 use tantivy::aggregation::AggregationCollector;
 use tantivy::query::{AllQuery, TermQuery};
 use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
 use tantivy::{doc, Index, Term};
 #[global_allocator]
 pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
 /// Mini macro to register a function via its name
 /// runner.register("average_u64", move |index| average_u64(index));
 macro_rules! register {
    ($runner:expr, $func:ident) => {
        $runner.register(stringify!($func), move |index| $func(index))
    };
 }
 fn main() {
    let inputs = vec![
        ("full", get_test_index_bench(Cardinality::Full).unwrap()),
        (
            "dense",
            get_test_index_bench(Cardinality::OptionalDense).unwrap(),
        ),
        (
            "sparse",
            get_test_index_bench(Cardinality::OptionalSparse).unwrap(),
        ),
        (
            "multivalue",
            get_test_index_bench(Cardinality::Multivalued).unwrap(),
        ),
    ];
    bench_agg(InputGroup::new_with_inputs(inputs));
 }
 fn bench_agg(mut group: InputGroup<Index>) {
    group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
    register!(group, average_u64);
    register!(group, average_f64);
    register!(group, average_f64_u64);
    register!(group, stats_f64);
    register!(group, percentiles_f64);
    register!(group, terms_few);
    register!(group, terms_many);
    register!(group, terms_many_order_by_term);
    register!(group, terms_many_with_top_hits);
    register!(group, terms_many_with_avg_sub_agg);
    register!(group, terms_many_json_mixed_type_with_sub_agg_card);
    register!(group, range_agg);
    register!(group, range_agg_with_avg_sub_agg);
    register!(group, range_agg_with_term_agg_few);
    register!(group, range_agg_with_term_agg_many);
    register!(group, histogram);
    register!(group, histogram_hard_bounds);
    register!(group, histogram_with_avg_sub_agg);
    register!(group, avg_and_range_with_avg_sub_agg);
    group.run();
 }
 fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
    let reader = index.reader().unwrap();
    let text_field = reader.searcher().schema().get_field("text").unwrap();
    let term_query = TermQuery::new(
        Term::from_field_text(text_field, "cool"),
        IndexRecordOption::Basic,
    );
    let collector = get_collector(agg_req);
    let searcher = reader.searcher();
    black_box(searcher.search(&term_query, &collector).unwrap());
 }
 fn average_u64(index: &Index) {
    let agg_req = json!({
        "average": { "avg": { "field": "score", } }
    });
    exec_term_with_agg(index, agg_req)
 }
 fn average_f64(index: &Index) {
    let agg_req = json!({
        "average": { "avg": { "field": "score_f64", } }
    });
    exec_term_with_agg(index, agg_req)
 }
 fn average_f64_u64(index: &Index) {
    let agg_req = json!({
        "average_f64": { "avg": { "field": "score_f64" } },
        "average": { "avg": { "field": "score" } },
    });
    exec_term_with_agg(index, agg_req)
 }
 fn stats_f64(index: &Index) {
    let agg_req = json!({
        "average_f64": { "stats": { "field": "score_f64", } }
    });
    exec_term_with_agg(index, agg_req)
 }
 fn percentiles_f64(index: &Index) {
    let agg_req = json!({
      "mypercentiles": {
        "percentiles": {
          "field": "score_f64",
          "percents": [ 95, 99, 99.9 ]
        }
      }
    });
    execute_agg(index, agg_req);
 }
 fn terms_few(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_few_terms" } },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_many_terms" } },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many_order_by_term(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many_with_top_hits(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_many_terms" },
            "aggs": {
                "top_hits": { "top_hits":
                    {
                        "sort": [
                            { "score": "desc" }
                        ],
                        "size": 2,
                        "doc_value_fields": ["score_f64"]
                    }
                }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_many_terms" },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many_json_mixed_type_with_sub_agg_card(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "json.mixed_type" },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn execute_agg(index: &Index, agg_req: serde_json::Value) {
    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
    let collector = get_collector(agg_req);
    let reader = index.reader().unwrap();
    let searcher = reader.searcher();
    black_box(searcher.search(&AllQuery, &collector).unwrap());
 }
 fn range_agg(index: &Index) {
    let agg_req = json!({
        "range_f64": { "range": { "field": "score_f64", "ranges": [
            { "from": 3, "to": 7000 },
            { "from": 7000, "to": 20000 },
            { "from": 20000, "to": 30000 },
            { "from": 30000, "to": 40000 },
            { "from": 40000, "to": 50000 },
            { "from": 50000, "to": 60000 }
        ] } },
    });
    execute_agg(index, agg_req);
 }
 fn range_agg_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
                "field": "score_f64",
                "ranges": [
                    { "from": 3, "to": 7000 },
                    { "from": 7000, "to": 20000 },
                    { "from": 20000, "to": 30000 },
                    { "from": 30000, "to": 40000 },
                    { "from": 40000, "to": 50000 },
                    { "from": 50000, "to": 60000 }
                ]
            },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn range_agg_with_term_agg_few(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
                "field": "score_f64",
                "ranges": [
                    { "from": 3, "to": 7000 },
                    { "from": 7000, "to": 20000 },
                    { "from": 20000, "to": 30000 },
                    { "from": 30000, "to": 40000 },
                    { "from": 40000, "to": 50000 },
                    { "from": 50000, "to": 60000 }
                ]
            },
            "aggs": {
                "my_texts": { "terms": { "field": "text_few_terms" } },
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn range_agg_with_term_agg_many(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
                "field": "score_f64",
                "ranges": [
                    { "from": 3, "to": 7000 },
                    { "from": 7000, "to": 20000 },
                    { "from": 20000, "to": 30000 },
                    { "from": 30000, "to": 40000 },
                    { "from": 40000, "to": 50000 },
                    { "from": 50000, "to": 60000 }
                ]
            },
            "aggs": {
                "my_texts": { "terms": { "field": "text_many_terms" } },
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn histogram(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "histogram": {
                "field": "score_f64",
                "interval": 100 // 1000 buckets
            },
        }
    });
    execute_agg(index, agg_req);
 }
 fn histogram_hard_bounds(index: &Index) {
    let agg_req = json!({
        "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
    });
    execute_agg(index, agg_req);
 }
 fn histogram_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "histogram": { "field": "score_f64", "interval": 100 },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn avg_and_range_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
                "field": "score_f64",
                "ranges": [
                    { "from": 3, "to": 7000 },
                    { "from": 7000, "to": 20000 },
                    { "from": 20000, "to": 60000 }
                ]
            },
            "aggs": {
                "average_in_range": { "avg": { "field": "score" } }
            }
        },
        "average": { "avg": { "field": "score" } }
    });
    execute_agg(index, agg_req);
 }
 #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
 enum Cardinality {
    /// All documents contain exactly one value.
    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
    #[default]
    Full = 0,
    /// All documents contain at most one value.
    OptionalDense = 1,
    /// All documents may contain any number of values.
    Multivalued = 2,
    /// 1 / 20 documents has a value
    OptionalSparse = 3,
 }
 fn get_collector(agg_req: Aggregations) -> AggregationCollector {
    AggregationCollector::from_aggs(agg_req, Default::default())
 }
 fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
    let mut schema_builder = Schema::builder();
    let text_fieldtype = tantivy::schema::TextOptions::default()
        .set_indexing_options(
            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
        )
        .set_stored();
    let text_field = schema_builder.add_text_field("text", text_fieldtype);
    let json_field = schema_builder.add_json_field("json", FAST);
    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
    let index = Index::create_from_tempdir(schema_builder.build())?;
    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
    let many_terms_data = (0..150_000)
        .map(|num| format!("author{}", num))
        .collect::<Vec<_>>();
    {
        let mut rng = StdRng::from_seed([1u8; 32]);
        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
        // To make the different test cases comparable we just change one doc to force the
        // cardinality
        if cardinality == Cardinality::OptionalDense {
            index_writer.add_document(doc!())?;
        }
        if cardinality == Cardinality::Multivalued {
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
                text_field => "cool",
                text_field => "cool",
                text_field_many_terms => "cool",
                text_field_many_terms => "cool",
                text_field_few_terms => "cool",
                text_field_few_terms => "cool",
                score_field => 1u64,
                score_field => 1u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => 1i64,
                score_field_i64 => 1i64,
            ))?;
        }
        let mut doc_with_value = 1_000_000;
        if cardinality == Cardinality::OptionalSparse {
            doc_with_value /= 20;
        }
        let _val_max = 1_000_000.0;
        for _ in 0..doc_with_value {
            let val: f64 = rng.gen_range(0.0..1_000_000.0);
            let json = if rng.gen_bool(0.1) {
                // 10% are numeric values
                json!({ "mixed_type": val })
            } else {
                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
            };
            index_writer.add_document(doc!(
                text_field => "cool",
                json_field => json,
                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
                score_field => val as u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => val as i64,
            ))?;
            if cardinality == Cardinality::OptionalSparse {
                for _ in 0..20 {
                    index_writer.add_document(doc!(text_field => "cool"))?;
                }
            }
        }
        // writing the segment
        index_writer.commit()?;
    }
    Ok(index)
 }
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -59,22 +59,6 @@ pub struct ColumnarWriter {
    buffers: SpareBuffers,
 }
 #[inline]
 fn mutate_or_create_column<V, TMutator>(
    arena_hash_map: &mut ArenaHashMap,
    column_name: &str,
    updater: TMutator,
 ) where
    V: Copy + 'static,
    TMutator: FnMut(Option<V>) -> V,
 {
    assert!(
        !column_name.as_bytes().contains(&0u8),
        "key may not contain the 0 byte"
    );
    arena_hash_map.mutate_or_create(column_name.as_bytes(), updater);
 }
 impl ColumnarWriter {
    pub fn mem_usage(&self) -> usize {
        self.arena.mem_usage()
@@ -175,9 +159,8 @@ impl ColumnarWriter {
                    },
                    &mut self.dictionaries,
                );
-                mutate_or_create_column(
+                hash_map.mutate_or_create(
-                    hash_map,
+                    column_name.as_bytes(),
                    column_name,
                    |column_opt: Option<StrOrBytesColumnWriter>| {
                        let mut column_writer = if let Some(column_writer) = column_opt {
                            column_writer
@@ -192,24 +175,21 @@ impl ColumnarWriter {
                );
            }
            ColumnType::Bool => {
-                mutate_or_create_column(
+                self.bool_field_hash_map.mutate_or_create(
-                    &mut self.bool_field_hash_map,
+                    column_name.as_bytes(),
                    column_name,
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::DateTime => {
-                mutate_or_create_column(
+                self.datetime_field_hash_map.mutate_or_create(
-                    &mut self.datetime_field_hash_map,
+                    column_name.as_bytes(),
                    column_name,
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::I64 | ColumnType::F64 | ColumnType::U64 => {
                let numerical_type = column_type.numerical_type().unwrap();
-                mutate_or_create_column(
+                self.numerical_field_hash_map.mutate_or_create(
-                    &mut self.numerical_field_hash_map,
+                    column_name.as_bytes(),
                    column_name,
                    |column_opt: Option<NumericalColumnWriter>| {
                        let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                        column.force_numerical_type(numerical_type);
@@ -217,9 +197,8 @@ impl ColumnarWriter {
                    },
                );
            }
-            ColumnType::IpAddr => mutate_or_create_column(
+            ColumnType::IpAddr => self.ip_addr_field_hash_map.mutate_or_create(
-                &mut self.ip_addr_field_hash_map,
+                column_name.as_bytes(),
                column_name,
                |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
            ),
        }
@@ -232,9 +211,8 @@ impl ColumnarWriter {
        numerical_value: T,
    ) {
        let (hash_map, arena) = (&mut self.numerical_field_hash_map, &mut self.arena);
-        mutate_or_create_column(
+        hash_map.mutate_or_create(
-            hash_map,
+            column_name.as_bytes(),
            column_name,
            |column_opt: Option<NumericalColumnWriter>| {
                let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                column.record_numerical_value(doc, numerical_value.into(), arena);
@@ -244,10 +222,6 @@ impl ColumnarWriter {
    }
    pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_addr: Ipv6Addr) {
        assert!(
            !column_name.as_bytes().contains(&0u8),
            "key may not contain the 0 byte"
        );
        let (hash_map, arena) = (&mut self.ip_addr_field_hash_map, &mut self.arena);
        hash_map.mutate_or_create(
            column_name.as_bytes(),
@@ -261,16 +235,21 @@ impl ColumnarWriter {
    pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
        let (hash_map, arena) = (&mut self.bool_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
+        hash_map.mutate_or_create(
            column_name.as_bytes(),
            |column_opt: Option<ColumnWriter>| {
                let mut column: ColumnWriter = column_opt.unwrap_or_default();
                column.record(doc, val, arena);
                column
-        });
+            },
        );
    }
    pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) {
        let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
+        hash_map.mutate_or_create(
            column_name.as_bytes(),
            |column_opt: Option<ColumnWriter>| {
                let mut column: ColumnWriter = column_opt.unwrap_or_default();
                column.record(
                    doc,
@@ -278,7 +257,8 @@ impl ColumnarWriter {
                    arena,
                );
                column
-        });
+            },
        );
    }
    pub fn record_str(&mut self, doc: RowId, column_name: &str, value: &str) {
@@ -303,10 +283,6 @@ impl ColumnarWriter {
    }
    pub fn record_bytes(&mut self, doc: RowId, column_name: &str, value: &[u8]) {
        assert!(
            !column_name.as_bytes().contains(&0u8),
            "key may not contain the 0 byte"
        );
        let (hash_map, arena, dictionaries) = (
            &mut self.bytes_field_hash_map,
            &mut self.arena,
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -11,9 +11,10 @@ use columnar::Column;
 // ---
 // Importing tantivy...
 use tantivy::collector::{Collector, SegmentCollector};
 use tantivy::index::SegmentReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
+use tantivy::{doc, Index, IndexWriter, Score};
 #[derive(Default)]
 struct Stats {
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
    let opts = DateOptions::from(INDEXED)
        .set_stored()
        .set_fast()
-        .set_precision(tantivy::DateTimePrecision::Seconds);
+        .set_precision(tantivy::schema::DateTimePrecision::Seconds);
    // Add `occurred_at` date field type
    let occurred_at = schema_builder.add_date_field("occurred_at", opts);
    let event_type = schema_builder.add_text_field("event", STRING | STORED);
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -7,10 +7,11 @@
 // the list of documents containing a term, getting
 // its term frequency, and accessing its positions.
 use tantivy::postings::Postings;
 // ---
 // Importing tantivy...
 use tantivy::schema::*;
-use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
+use tantivy::{doc, DocSet, Index, IndexWriter, TERMINATED};
 fn main() -> tantivy::Result<()> {
    // We first create a schema for the sake of the
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -3,10 +3,11 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, RwLock, Weak};
 use tantivy::collector::TopDocs;
 use tantivy::index::SegmentId;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{
-    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration,
    SegmentReader, Warmer,
 };
--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -1,585 +0,0 @@
 #[cfg(all(test, feature = "unstable"))]
 mod bench {
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
    use rand::{Rng, SeedableRng};
    use rand_distr::Distribution;
    use serde_json::json;
    use test::{self, Bencher};
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::AggregationCollector;
    use crate::query::{AllQuery, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
    use crate::{Index, Term};
    #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
    enum Cardinality {
        /// All documents contain exactly one value.
        /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
        #[default]
        Full = 0,
        /// All documents contain at most one value.
        Optional = 1,
        /// All documents may contain any number of values.
        Multivalued = 2,
        /// 1 / 20 documents has a value
        Sparse = 3,
    }
    fn get_collector(agg_req: Aggregations) -> AggregationCollector {
        AggregationCollector::from_aggs(agg_req, Default::default())
    }
    fn get_test_index_bench(cardinality: Cardinality) -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
        let text_fieldtype = crate::schema::TextOptions::default()
            .set_indexing_options(
                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
            )
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
        let json_field = schema_builder.add_json_field("json", FAST);
        let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
        let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
        let index = Index::create_from_tempdir(schema_builder.build())?;
        let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
        let many_terms_data = (0..150_000)
            .map(|num| format!("author{}", num))
            .collect::<Vec<_>>();
        {
            let mut rng = StdRng::from_seed([1u8; 32]);
            let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
            // To make the different test cases comparable we just change one doc to force the
            // cardinality
            if cardinality == Cardinality::Optional {
                index_writer.add_document(doc!())?;
            }
            if cardinality == Cardinality::Multivalued {
                index_writer.add_document(doc!(
                    json_field => json!({"mixed_type": 10.0}),
                    json_field => json!({"mixed_type": 10.0}),
                    text_field => "cool",
                    text_field => "cool",
                    text_field_many_terms => "cool",
                    text_field_many_terms => "cool",
                    text_field_few_terms => "cool",
                    text_field_few_terms => "cool",
                    score_field => 1u64,
                    score_field => 1u64,
                    score_field_f64 => lg_norm.sample(&mut rng),
                    score_field_f64 => lg_norm.sample(&mut rng),
                    score_field_i64 => 1i64,
                    score_field_i64 => 1i64,
                ))?;
            }
            let mut doc_with_value = 1_000_000;
            if cardinality == Cardinality::Sparse {
                doc_with_value /= 20;
            }
            let _val_max = 1_000_000.0;
            for _ in 0..doc_with_value {
                let val: f64 = rng.gen_range(0.0..1_000_000.0);
                let json = if rng.gen_bool(0.1) {
                    // 10% are numeric values
                    json!({ "mixed_type": val })
                } else {
                    json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
                };
                index_writer.add_document(doc!(
                    text_field => "cool",
                    json_field => json,
                    text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
                    text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
                    score_field => val as u64,
                    score_field_f64 => lg_norm.sample(&mut rng),
                    score_field_i64 => val as i64,
                ))?;
                if cardinality == Cardinality::Sparse {
                    for _ in 0..20 {
                        index_writer.add_document(doc!(text_field => "cool"))?;
                    }
                }
            }
            // writing the segment
            index_writer.commit()?;
        }
        Ok(index)
    }
    use paste::paste;
    #[macro_export]
    macro_rules! bench_all_cardinalities {
        (  $x:ident ) => {
            paste! {
                #[bench]
                fn $x(b: &mut Bencher) {
                    [<$x _card>](b, Cardinality::Full)
                }
                #[bench]
                fn [<$x _opt>](b: &mut Bencher) {
                    [<$x _card>](b, Cardinality::Optional)
                }
                #[bench]
                fn [<$x _multi>](b: &mut Bencher) {
                    [<$x _card>](b, Cardinality::Multivalued)
                }
                #[bench]
                fn [<$x _sparse>](b: &mut Bencher) {
                    [<$x _card>](b, Cardinality::Sparse)
                }
            }
        };
    }
    bench_all_cardinalities!(bench_aggregation_average_u64);
    fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        let text_field = reader.searcher().schema().get_field("text").unwrap();
        b.iter(|| {
            let term_query = TermQuery::new(
                Term::from_field_text(text_field, "cool"),
                IndexRecordOption::Basic,
            );
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "average": { "avg": { "field": "score", } }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_stats_f64);
    fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        let text_field = reader.searcher().schema().get_field("text").unwrap();
        b.iter(|| {
            let term_query = TermQuery::new(
                Term::from_field_text(text_field, "cool"),
                IndexRecordOption::Basic,
            );
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "average_f64": { "stats": { "field": "score_f64", } }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_average_f64);
    fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        let text_field = reader.searcher().schema().get_field("text").unwrap();
        b.iter(|| {
            let term_query = TermQuery::new(
                Term::from_field_text(text_field, "cool"),
                IndexRecordOption::Basic,
            );
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "average_f64": { "avg": { "field": "score_f64", } }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_percentiles_f64);
    fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_str = r#"
            {
              "mypercentiles": {
                "percentiles": {
                  "field": "score_f64",
                  "percents": [ 95, 99, 99.9 ]
                }
              }
            } "#;
            let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_average_u64_and_f64);
    fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        let text_field = reader.searcher().schema().get_field("text").unwrap();
        b.iter(|| {
            let term_query = TermQuery::new(
                Term::from_field_text(text_field, "cool"),
                IndexRecordOption::Basic,
            );
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "average_f64": { "avg": { "field": "score_f64" } },
                "average": { "avg": { "field": "score" } },
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_few);
    fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": { "terms": { "field": "text_few_terms" } },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg);
    fn bench_aggregation_terms_many_with_top_hits_agg_card(
        b: &mut Bencher,
        cardinality: Cardinality,
    ) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": {
                    "terms": { "field": "text_many_terms" },
                    "aggs": {
                        "top_hits": { "top_hits":
                            {
                                "sort": [
                                    { "score": "desc" }
                                ],
                                "size": 2,
                                "doc_value_fields": ["score_f64"]
                            }
                        }
                    }
                },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": {
                    "terms": { "field": "text_many_terms" },
                    "aggs": {
                        "average_f64": { "avg": { "field": "score_f64" } }
                    }
                },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg);
    fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card(
        b: &mut Bencher,
        cardinality: Cardinality,
    ) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": {
                    "terms": { "field": "json.mixed_type" },
                    "aggs": {
                        "average_f64": { "avg": { "field": "score_f64" } }
                    }
                },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many2);
    fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": { "terms": { "field": "text_many_terms" } },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term);
    fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_range_only);
    fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "range_f64": { "range": { "field": "score_f64", "ranges": [
                    { "from": 3, "to": 7000 },
                    { "from": 7000, "to": 20000 },
                    { "from": 20000, "to": 30000 },
                    { "from": 30000, "to": 40000 },
                    { "from": 40000, "to": 50000 },
                    { "from": 50000, "to": 60000 }
                ] } },
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_range_with_avg);
    fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "rangef64": {
                    "range": {
                        "field": "score_f64",
                        "ranges": [
                            { "from": 3, "to": 7000 },
                            { "from": 7000, "to": 20000 },
                            { "from": 20000, "to": 30000 },
                            { "from": 30000, "to": 40000 },
                            { "from": 40000, "to": 50000 },
                            { "from": 50000, "to": 60000 }
                        ]
                    },
                    "aggs": {
                        "average_f64": { "avg": { "field": "score_f64" } }
                    }
                },
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    // hard bounds has a different algorithm, because it actually limits collection range
    //
    bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds);
    fn bench_aggregation_histogram_only_hard_bounds_card(
        b: &mut Bencher,
        cardinality: Cardinality,
    ) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_histogram_with_avg);
    fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "rangef64": {
                    "histogram": { "field": "score_f64", "interval": 100 },
                    "aggs": {
                        "average_f64": { "avg": { "field": "score_f64" } }
                    }
                }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_histogram_only);
    fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "rangef64": {
                    "histogram": {
                        "field": "score_f64",
                        "interval": 100 // 1000 buckets
                    },
                }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg);
    fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        let text_field = reader.searcher().schema().get_field("text").unwrap();
        b.iter(|| {
            let term_query = TermQuery::new(
                Term::from_field_text(text_field, "cool"),
                IndexRecordOption::Basic,
            );
            let agg_req_1: Aggregations = serde_json::from_value(json!({
                "rangef64": {
                    "range": {
                        "field": "score_f64",
                        "ranges": [
                            { "from": 3, "to": 7000 },
                            { "from": 7000, "to": 20000 },
                            { "from": 20000, "to": 60000 }
                        ]
                    },
                    "aggs": {
                        "average_in_range": { "avg": { "field": "score" } }
                    }
                },
                "average": { "avg": { "field": "score" } }
            }))
            .unwrap();
            let collector = get_collector(agg_req_1);
            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
        });
    }
 }
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -81,10 +81,11 @@ impl AggregationLimits {
        }
    }
-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
+        let prev_value = self
-            .fetch_add(num_bytes, Ordering::Relaxed);
+            .memory_consumption
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }
@@ -94,11 +95,11 @@ impl AggregationLimits {
 }
 fn validate_memory_consumption(
-    memory_consumption: &AtomicU64,
+    memory_consumption: u64,
    memory_limit: ByteCount,
 ) -> Result<(), AggregationError> {
    // Load the estimated memory consumed by the aggregations
-    let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
+    let memory_consumed: ByteCount = memory_consumption.into();
    if memory_consumed > memory_limit {
        return Err(AggregationError::MemoryExceeded {
            limit: memory_limit,
@@ -118,10 +119,11 @@ pub struct ResourceLimitGuard {
 }
 impl ResourceLimitGuard {
-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
+        let prev_value = self
-            .fetch_add(num_bytes, Ordering::Relaxed);
+            .memory_consumption
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }
 }
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -17,7 +17,8 @@ use super::metric::{
 use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
 use crate::aggregation::{f64_to_fastfield_u64, Key};
-use crate::{SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
 use crate::SegmentOrdinal;
 #[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -331,9 +331,11 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }
        let mem_delta = self.get_memory_consumption() - mem_pre;
        if mem_delta > 0 {
            bucket_agg_accessor
                .limits
                .add_memory_consumed(mem_delta as u64)?;
        }
        Ok(())
    }
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -324,9 +324,11 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }
        let mem_delta = self.get_memory_consumption() - mem_pre;
        if mem_delta > 0 {
            bucket_agg_accessor
                .limits
                .add_memory_consumed(mem_delta as u64)?;
        }
        Ok(())
    }
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -8,7 +8,8 @@ use super::segment_agg_result::{
 };
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{DocId, SegmentOrdinal, SegmentReader, TantivyError};
+use crate::index::SegmentReader;
 use crate::{DocId, SegmentOrdinal, TantivyError};
 /// The default max bucket count, before the aggregation fails.
 pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -143,8 +143,6 @@ use std::fmt::Display;
 #[cfg(test)]
 mod agg_tests;
 mod agg_bench;
 use core::fmt;
 pub use agg_limits::AggregationLimits;
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -4,7 +4,8 @@ use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use super::top_score_collector::TopNComputer;
-use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
+use crate::index::SegmentReader;
 use crate::{DocAddress, DocId, SegmentOrdinal};
 /// Contains a feature (field, score, etc.) of a document along with the document address.
 ///
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,19 +1,25 @@
-use rayon::{ThreadPool, ThreadPoolBuilder};
+use std::sync::Arc;
 #[cfg(feature = "quickwit")]
 use futures_util::{future::Either, FutureExt};
 use crate::TantivyError;
-/// Search executor whether search request are single thread or multithread.
+/// Executor makes it possible to run tasks in single thread or
-///
+/// in a thread pool.
-/// We don't expose Rayon thread pool directly here for several reasons.
+#[derive(Clone)]
 ///
 /// First dependency hell. It is not a good idea to expose the
 /// API of a dependency, knowing it might conflict with a different version
 /// used by the client. Second, we may stop using rayon in the future.
 pub enum Executor {
    /// Single thread variant of an Executor
    SingleThread,
    /// Thread pool variant of an Executor
-    ThreadPool(ThreadPool),
+    ThreadPool(Arc<rayon::ThreadPool>),
 }
 #[cfg(feature = "quickwit")]
 impl From<Arc<rayon::ThreadPool>> for Executor {
    fn from(thread_pool: Arc<rayon::ThreadPool>) -> Self {
        Executor::ThreadPool(thread_pool)
    }
 }
 impl Executor {
@@ -24,11 +30,11 @@ impl Executor {
    /// Creates an Executor that dispatches the tasks in a thread pool.
    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> crate::Result<Executor> {
-        let pool = ThreadPoolBuilder::new()
+        let pool = rayon::ThreadPoolBuilder::new()
            .num_threads(num_threads)
            .thread_name(move |num| format!("{prefix}{num}"))
            .build()?;
-        Ok(Executor::ThreadPool(pool))
+        Ok(Executor::ThreadPool(Arc::new(pool)))
    }
    /// Perform a map in the thread pool.
@@ -91,11 +97,36 @@ impl Executor {
            }
        }
    }
    /// Spawn a task on the pool, returning a future completing on task success.
    ///
    /// If the task panic, returns `Err(())`.
    #[cfg(feature = "quickwit")]
    pub fn spawn_blocking<T: Send + 'static>(
        &self,
        cpu_intensive_task: impl FnOnce() -> T + Send + 'static,
    ) -> impl std::future::Future<Output = Result<T, ()>> {
        match self {
            Executor::SingleThread => Either::Left(std::future::ready(Ok(cpu_intensive_task()))),
            Executor::ThreadPool(pool) => {
                let (sender, receiver) = oneshot::channel();
                pool.spawn(|| {
                    if sender.is_closed() {
                        return;
                    }
                    let task_result = cpu_intensive_task();
                    let _ = sender.send(task_result);
                });
                let res = receiver.map(|res| res.map_err(|_| ()));
                Either::Right(res)
            }
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::Executor;
    #[test]
@@ -147,4 +178,34 @@ mod tests {
            assert_eq!(result[i], i * 2);
        }
    }
    #[cfg(feature = "quickwit")]
    #[test]
    fn test_cancel_cpu_intensive_tasks() {
        use std::sync::atomic::{AtomicU64, Ordering};
        use std::sync::Arc;
        use std::time::Duration;
        let counter: Arc<AtomicU64> = Default::default();
        let mut futures = Vec::new();
        let executor = Executor::multi_thread(3, "search-test").unwrap();
        for _ in 0..1_000 {
            let counter_clone = counter.clone();
            let fut = executor.spawn_blocking(move || {
                std::thread::sleep(Duration::from_millis(4));
                counter_clone.fetch_add(1, Ordering::SeqCst)
            });
            futures.push(fut);
        }
        std::thread::sleep(Duration::from_millis(5));
        // The first few num_cores tasks should run, but the other should get cancelled.
        drop(futures);
        while Arc::strong_count(&counter) > 1 {
            std::thread::sleep(Duration::from_millis(10));
        }
        // with ideal timing, we expect the result to always be 6, but as long as we run some, and
        // cancelled most, the test is a success
        assert!(counter.load(Ordering::SeqCst) > 0);
        assert!(counter.load(Ordering::SeqCst) < 50);
    }
 }
--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -4,7 +4,7 @@ use rustc_hash::FxHashMap;
 use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
 use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
-use crate::schema::{Field, Type};
+use crate::schema::Type;
 use crate::time::format_description::well_known::Rfc3339;
 use crate::time::{OffsetDateTime, UtcOffset};
 use crate::tokenizer::TextAnalyzer;
@@ -31,7 +31,7 @@ use crate::{DateTime, DocId, Term};
 /// position 1.
 /// As a result, with lemmatization, "The Smiths" will match our object.
 ///
-/// Worse, if a same term is appears in the second object, a non increasing value would be pushed
+/// Worse, if a same term appears in the second object, a non increasing value would be pushed
 /// to the position recorder probably provoking a panic.
 ///
 /// This problem is solved for regular multivalued object by offsetting the position
@@ -50,7 +50,7 @@ use crate::{DateTime, DocId, Term};
 /// We can therefore afford working with a map that is not imperfect. It is fine if several
 /// path map to the same index position as long as the probability is relatively low.
 #[derive(Default)]
-struct IndexingPositionsPerPath {
+pub(crate) struct IndexingPositionsPerPath {
    positions_per_path: FxHashMap<u32, IndexingPosition>,
 }
@@ -58,6 +58,9 @@ impl IndexingPositionsPerPath {
    fn get_position_from_id(&mut self, id: u32) -> &mut IndexingPosition {
        self.positions_per_path.entry(id).or_default()
    }
    pub fn clear(&mut self) {
        self.positions_per_path.clear();
    }
 }
 /// Convert JSON_PATH_SEGMENT_SEP to a dot.
@@ -68,36 +71,6 @@ pub fn json_path_sep_to_dot(path: &mut str) {
    }
 }
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn index_json_values<'a, V: Value<'a>>(
    doc: DocId,
    json_visitors: impl Iterator<Item = crate::Result<V::ObjectIter>>,
    text_analyzer: &mut TextAnalyzer,
    expand_dots_enabled: bool,
    term_buffer: &mut Term,
    postings_writer: &mut dyn PostingsWriter,
    json_path_writer: &mut JsonPathWriter,
    ctx: &mut IndexingContext,
 ) -> crate::Result<()> {
    json_path_writer.clear();
    json_path_writer.set_expand_dots(expand_dots_enabled);
    let mut positions_per_path: IndexingPositionsPerPath = Default::default();
    for json_visitor_res in json_visitors {
        let json_visitor = json_visitor_res?;
        index_json_object::<V>(
            doc,
            json_visitor,
            text_analyzer,
            term_buffer,
            json_path_writer,
            postings_writer,
            ctx,
            &mut positions_per_path,
        );
    }
    Ok(())
 }
 #[allow(clippy::too_many_arguments)]
 fn index_json_object<'a, V: Value<'a>>(
    doc: DocId,
@@ -126,7 +99,7 @@ fn index_json_object<'a, V: Value<'a>>(
 }
 #[allow(clippy::too_many_arguments)]
-fn index_json_value<'a, V: Value<'a>>(
+pub(crate) fn index_json_value<'a, V: Value<'a>>(
    doc: DocId,
    json_value: V,
    text_analyzer: &mut TextAnalyzer,
@@ -166,12 +139,18 @@ fn index_json_value<'a, V: Value<'a>>(
                );
            }
            ReferenceValueLeaf::U64(val) => {
                // try to parse to i64, since when querying we will apply the same logic and prefer
                // i64 values
                set_path_id(
                    term_buffer,
                    ctx.path_to_unordered_id
                        .get_or_allocate_unordered_id(json_path_writer.as_str()),
                );
                if let Ok(i64_val) = val.try_into() {
                    term_buffer.append_type_and_fast_value::<i64>(i64_val);
                } else {
                    term_buffer.append_type_and_fast_value(val);
                }
                postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
            }
            ReferenceValueLeaf::I64(val) => {
@@ -257,10 +236,7 @@ fn index_json_value<'a, V: Value<'a>>(
 /// Tries to infer a JSON type from a string and append it to the term.
 ///
 /// The term must be json + JSON path.
-pub(crate) fn convert_to_fast_value_and_append_to_json_term(
+pub fn convert_to_fast_value_and_append_to_json_term(mut term: Term, phrase: &str) -> Option<Term> {
    mut term: Term,
    phrase: &str,
 ) -> Option<Term> {
    assert_eq!(
        term.value()
            .as_json_value_bytes()
@@ -349,44 +325,24 @@ pub(crate) fn encode_column_name(
    path.into()
 }
 pub fn term_from_json_paths<'a>(
    json_field: Field,
    paths: impl Iterator<Item = &'a str>,
    expand_dots_enabled: bool,
 ) -> Term {
    let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
    for path in paths {
        json_path.push(path);
    }
    json_path.set_end();
    let mut term = Term::with_type_and_field(Type::Json, json_field);
    term.append_bytes(json_path.as_str().as_bytes());
    term
 }
 #[cfg(test)]
 mod tests {
    use super::split_json_path;
    use crate::json_utils::term_from_json_paths;
    use crate::schema::Field;
    use crate::Term;
    #[test]
    fn test_json_writer() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["attributes", "color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "attributes.color", false);
        term.append_type_and_str("red");
        assert_eq!(
            format!("{:?}", term),
            "Term(field=1, type=Json, path=attributes.color, type=Str, \"red\")"
        );
-        let mut term = term_from_json_paths(
+        let mut term = Term::from_field_json_path(field, "attributes.dimensions.width", false);
            field,
            ["attributes", "dimensions", "width"].into_iter(),
            false,
        );
        term.append_type_and_fast_value(400i64);
        assert_eq!(
            format!("{:?}", term),
@@ -397,7 +353,7 @@ mod tests {
    #[test]
    fn test_string_term() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "color", false);
        term.append_type_and_str("red");
        assert_eq!(term.serialized_term(), b"\x00\x00\x00\x01jcolor\x00sred")
@@ -406,7 +362,7 @@ mod tests {
    #[test]
    fn test_i64_term() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "color", false);
        term.append_type_and_fast_value(-4i64);
        assert_eq!(
@@ -418,7 +374,7 @@ mod tests {
    #[test]
    fn test_u64_term() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "color", false);
        term.append_type_and_fast_value(4u64);
        assert_eq!(
@@ -430,7 +386,7 @@ mod tests {
    #[test]
    fn test_f64_term() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "color", false);
        term.append_type_and_fast_value(4.0f64);
        assert_eq!(
            term.serialized_term(),
@@ -441,7 +397,7 @@ mod tests {
    #[test]
    fn test_bool_term() {
        let field = Field::from_field_id(1);
-        let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
+        let mut term = Term::from_field_json_path(field, "color", false);
        term.append_type_and_fast_value(true);
        assert_eq!(
            term.serialized_term(),
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -4,13 +4,13 @@ use std::{fmt, io};
 use crate::collector::Collector;
 use crate::core::Executor;
-use crate::index::SegmentReader;
+use crate::index::{SegmentId, SegmentReader};
 use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
 use crate::schema::document::DocumentDeserialize;
 use crate::schema::{Schema, Term};
 use crate::space_usage::SearcherSpaceUsage;
 use crate::store::{CacheStats, StoreReader};
-use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};
+use crate::{DocAddress, Index, Opstamp, TrackedObject};
 /// Identifies the searcher generation accessed by a [`Searcher`].
 ///
@@ -109,8 +109,9 @@ impl Searcher {
        &self,
        doc_address: DocAddress,
    ) -> crate::Result<D> {
        let executor = self.inner.index.search_executor();
        let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
-        store_reader.get_async(doc_address.doc_id).await
+        store_reader.get_async(doc_address.doc_id, executor).await
    }
    /// Access the schema associated with the index of this searcher.
--- a/src/core/tests.rs
+++ b/src/core/tests.rs
@@ -1,13 +1,14 @@
 use crate::collector::Count;
 use crate::directory::{RamDirectory, WatchCallback};
 use crate::index::SegmentId;
 use crate::indexer::{LogMergePolicy, NoMergePolicy};
-use crate::json_utils::term_from_json_paths;
+use crate::postings::Postings;
 use crate::query::TermQuery;
 use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
 use crate::tokenizer::TokenizerManager;
 use crate::{
-    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
+    Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy,
-    ReloadPolicy, SegmentId, TantivyDocument, Term,
+    TantivyDocument, Term,
 };
 #[test]
@@ -417,8 +418,8 @@ fn test_non_text_json_term_freq() {
    let segment_reader = searcher.segment_reader(0u32);
    let inv_idx = segment_reader.inverted_index(field).unwrap();
-    let mut term = term_from_json_paths(field, ["tenant_id"].iter().cloned(), false);
+    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);
    let postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
@@ -451,8 +452,8 @@ fn test_non_text_json_term_freq_bitpacked() {
    let segment_reader = searcher.segment_reader(0u32);
    let inv_idx = segment_reader.inverted_index(field).unwrap();
-    let mut term = term_from_json_paths(field, ["tenant_id"].iter().cloned(), false);
+    let mut term = Term::from_field_json_path(field, "tenant_id", false);
-    term.append_type_and_fast_value(75u64);
+    term.append_type_and_fast_value(75i64);
    let mut postings = inv_idx
        .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -146,8 +146,11 @@ mod tests {
        facet_ords.extend(facet_reader.facet_ords(0u32));
        assert_eq!(&facet_ords, &[0u64]);
        let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))?;
-        let value: Option<&Facet> = doc.get_first(facet_field).and_then(|v| v.as_facet());
+        let value: Option<Facet> = doc
-        assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
+            .get_first(facet_field)
            .and_then(|v| v.as_facet())
            .map(|facet| Facet::from_encoded_string(facet.to_string()));
        assert_eq!(value, Facet::from_text("/a/b").ok());
        Ok(())
    }
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -80,7 +80,7 @@ mod tests {
    use std::path::Path;
    use columnar::StrColumn;
-    use common::{ByteCount, HasLen, TerminatingWrite};
+    use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
@@ -88,14 +88,15 @@ mod tests {
    use super::*;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::index::SegmentId;
    use crate::merge_policy::NoMergePolicy;
    use crate::schema::{
-        Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, TantivyDocument,
+        DateOptions, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder,
-        TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
+        TantivyDocument, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
    };
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
-    use crate::{DateOptions, DateTimePrecision, Index, IndexWriter, SegmentId, SegmentReader};
+    use crate::{Index, IndexWriter, SegmentReader};
    pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
        let mut schema_builder = Schema::builder();
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,14 +1,14 @@
 use std::io;
 use columnar::{ColumnarWriter, NumericalValue};
-use common::JsonPathWriter;
+use common::{DateTimePrecision, JsonPathWriter};
 use tokenizer_api::Token;
 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
-use crate::{DateTimePrecision, DocId, TantivyError};
+use crate::{DocId, TantivyError};
 /// Only index JSON down to a depth of 20.
 /// This is mostly to guard us from a stack overflow triggered by malicious input.
@@ -183,8 +183,7 @@ impl FastFieldsWriter {
                        .record_datetime(doc_id, field_name, truncated_datetime);
                }
                ReferenceValueLeaf::Facet(val) => {
-                    self.columnar_writer
+                    self.columnar_writer.record_str(doc_id, field_name, val);
                        .record_str(doc_id, field_name, val.encoded_str());
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.columnar_writer.record_bytes(doc_id, field_name, val);
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -6,6 +6,7 @@ use rand::{thread_rng, Rng};
 use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
 use crate::schema::*;
 #[allow(deprecated)]
 use crate::{doc, schema, Index, IndexSettings, IndexSortByField, IndexWriter, Order, Searcher};
 fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> {
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -3,7 +3,7 @@ use std::fmt;
 #[cfg(feature = "mmap")]
 use std::path::Path;
 use std::path::PathBuf;
-use std::sync::Arc;
+use std::thread::available_parallelism;
 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
@@ -293,7 +293,7 @@ pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
-    executor: Arc<Executor>,
+    executor: Executor,
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
@@ -318,29 +318,25 @@ impl Index {
    ///
    /// By default the executor is single thread, and simply runs in the calling thread.
    pub fn search_executor(&self) -> &Executor {
-        self.executor.as_ref()
+        &self.executor
    }
    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Arc::new(Executor::multi_thread(num_threads, "tantivy-search-")?);
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
        Ok(())
    }
    /// Custom thread pool by a outer thread pool.
-    pub fn set_shared_multithread_executor(
+    pub fn set_executor(&mut self, executor: Executor) {
-        &mut self,
+        self.executor = executor;
        shared_thread_pool: Arc<Executor>,
    ) -> crate::Result<()> {
        self.executor = shared_thread_pool.clone();
        Ok(())
    }
    /// Replace the default single thread search executor pool
    /// by a thread pool with as many threads as there are CPUs on the system.
    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = num_cpus::get();
+        let default_num_threads = available_parallelism()?.get();
        self.set_multithread_executor(default_num_threads)
    }
@@ -418,7 +414,7 @@ impl Index {
            schema,
            tokenizers: TokenizerManager::default(),
            fast_field_tokenizers: TokenizerManager::default(),
-            executor: Arc::new(Executor::single_thread()),
+            executor: Executor::single_thread(),
            inventory,
        }
    }
@@ -621,7 +617,7 @@ impl Index {
        &self,
        memory_budget_in_bytes: usize,
    ) -> crate::Result<IndexWriter<D>> {
-        let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD);
+        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
            num_threads = (memory_budget_in_bytes / MEMORY_BUDGET_NUM_BYTES_MIN).max(1);
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -1,5 +1,3 @@
 //! # Index Module
 //!
 //! The `index` module in Tantivy contains core components to read and write indexes.
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -246,8 +246,9 @@ impl DeleteCursor {
 mod tests {
    use super::{DeleteOperation, DeleteQueue};
    use crate::index::SegmentReader;
    use crate::query::{Explanation, Scorer, Weight};
-    use crate::{DocId, Score, SegmentReader};
+    use crate::{DocId, Score};
    struct DummyWeight;
    impl Weight for DummyWeight {
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -144,9 +144,9 @@ mod tests {
    use once_cell::sync::Lazy;
    use super::*;
-    use crate::index::SegmentMetaInventory;
+    use crate::index::{SegmentId, SegmentMetaInventory};
    use crate::schema;
    use crate::schema::INDEXED;
    use crate::{schema, SegmentId};
    static INVENTORY: Lazy<SegmentMetaInventory> = Lazy::new(SegmentMetaInventory::default);
--- a/src/indexer/merge_operation.rs
+++ b/src/indexer/merge_operation.rs
@@ -1,7 +1,8 @@
 use std::collections::HashSet;
 use std::ops::Deref;
-use crate::{Inventory, Opstamp, SegmentId, TrackedObject};
+use crate::index::SegmentId;
 use crate::{Inventory, Opstamp, TrackedObject};
 #[derive(Default)]
 pub(crate) struct MergeOperationInventory(Inventory<InnerMergeOperation>);
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -13,7 +13,7 @@ use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
 use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
-use crate::index::{Segment, SegmentReader};
+use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
 use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
@@ -21,8 +21,7 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
 use crate::{
-    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
+    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, SegmentOrdinal,
    SegmentComponent, SegmentOrdinal,
 };
 /// Segment's max doc must be `< MAX_DOC_LIMIT`.
@@ -794,7 +793,7 @@ mod tests {
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
-    use crate::index::Index;
+    use crate::index::{Index, SegmentId};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
    use crate::schema::document::Value;
    use crate::schema::{
@@ -804,7 +803,7 @@ mod tests {
    use crate::time::OffsetDateTime;
    use crate::{
        assert_nearly_equals, schema, DateTime, DocAddress, DocId, DocSet, IndexSettings,
-        IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
+        IndexSortByField, IndexWriter, Order, Searcher,
    };
    #[test]
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -3,6 +3,7 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
    use crate::postings::Postings;
    use crate::query::QueryParser;
    use crate::schema::document::Value;
    use crate::schema::{
@@ -10,8 +11,8 @@ mod tests {
        TextFieldIndexing, TextOptions,
    };
    use crate::{
-        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, Postings,
+        DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, TantivyDocument,
-        TantivyDocument, Term,
+        Term,
    };
    fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -182,7 +182,7 @@ mod tests_mmap {
        let index = Index::create_in_ram(schema_builder.build());
        let mut index_writer = index.writer_for_tests().unwrap();
        index_writer
-            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1"})))
+            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1", format!("num{field_name_in}"): 10})))
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("a{field_name_in}"): "test2"})))
@@ -260,6 +260,64 @@ mod tests_mmap {
            "test6",
        );
        test_agg(format!("json.{field_name_out}a").as_str(), "test7");
        // `.` is stored as `\u{0001}` internally in tantivy
        let field_name_out_internal = if field_name_out == "." {
            "\u{0001}"
        } else {
            field_name_out
        };
        let mut fields = reader.searcher().segment_readers()[0]
            .inverted_index(field)
            .unwrap()
            .list_encoded_fields()
            .unwrap();
        assert_eq!(fields.len(), 8);
        fields.sort();
        let mut expected_fields = vec![
            (format!("a{field_name_out_internal}"), Type::Str),
            (format!("a{field_name_out_internal}a"), Type::Str),
            (
                format!("a{field_name_out_internal}a{field_name_out_internal}"),
                Type::Str,
            ),
            (
                format!("a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
                Type::Str,
            ),
            (
                format!("a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
                Type::Str,
            ),
            (format!("{field_name_out_internal}a"), Type::Str),
            (format!("{field_name_out_internal}"), Type::Str),
            (format!("num{field_name_out_internal}"), Type::I64),
        ];
        expected_fields.sort();
        assert_eq!(fields, expected_fields);
        // Check columnar reader
        let mut columns = reader.searcher().segment_readers()[0]
            .fast_fields()
            .columnar()
            .list_columns()
            .unwrap()
            .into_iter()
            .map(|(name, _)| name)
            .collect::<Vec<_>>();
        let mut expected_columns = vec![
            format!("json\u{1}{field_name_out_internal}"),
            format!("json\u{1}{field_name_out_internal}a"),
            format!("json\u{1}a{field_name_out_internal}"),
            format!("json\u{1}a{field_name_out_internal}a"),
            format!("json\u{1}a{field_name_out_internal}a{field_name_out_internal}"),
            format!("json\u{1}a{field_name_out_internal}\u{1}ab{field_name_out_internal}"),
            format!("json\u{1}a{field_name_out_internal}\u{1}a{field_name_out_internal}"),
            format!("json\u{1}num{field_name_out_internal}"),
        ];
        columns.sort();
        expected_columns.sort();
        assert_eq!(columns, expected_columns);
    }
    #[test]
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -5,20 +5,20 @@ use tokenizer_api::BoxTokenStream;
 use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
 use super::operation::AddOperation;
 use crate::core::json_utils::index_json_values;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
-use crate::index::Segment;
+use crate::index::{Segment, SegmentComponent};
 use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
    PerFieldPostingsWriter, PostingsWriter,
 };
-use crate::schema::document::{Document, ReferenceValue, Value};
+use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED};
 use crate::store::{StoreReader, StoreWriter};
 use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
-use crate::{DocId, Opstamp, SegmentComponent, TantivyError};
+use crate::{DocId, Opstamp, TantivyError};
 /// Computes the initial size of the hash table.
 ///
@@ -68,6 +68,7 @@ pub struct SegmentWriter {
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
    pub(crate) json_positions_per_path: IndexingPositionsPerPath,
    pub(crate) doc_opstamps: Vec<Opstamp>,
    per_field_text_analyzers: Vec<TextAnalyzer>,
    term_buffer: Term,
@@ -119,6 +120,7 @@ impl SegmentWriter {
            per_field_postings_writers,
            fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
            json_path_writer: JsonPathWriter::default(),
            json_positions_per_path: IndexingPositionsPerPath::default(),
            segment_serializer,
            fast_field_writers: FastFieldsWriter::from_schema_and_tokenizer_manager(
                &schema,
@@ -204,8 +206,7 @@ impl SegmentWriter {
                        // Used to help with linting and type checking.
                        let value = value_access as D::Value<'_>;
-                        let facet = value.as_facet().ok_or_else(make_schema_error)?;
+                        let facet_str = value.as_facet().ok_or_else(make_schema_error)?;
                        let facet_str = facet.encoded_str();
                        let mut facet_tokenizer = facet_tokenizer.token_stream(facet_str);
                        let mut indexing_position = IndexingPosition::default();
                        postings_writer.index_text(
@@ -228,7 +229,7 @@ impl SegmentWriter {
                                &mut self.per_field_text_analyzers[field.field_id() as usize];
                            text_analyzer.token_stream(text)
                        } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                            BoxTokenStream::new(PreTokenizedStream::from(tok_str.clone()))
+                            BoxTokenStream::new(PreTokenizedStream::from(*tok_str.clone()))
                        } else {
                            continue;
                        };
@@ -342,26 +343,24 @@ impl SegmentWriter {
                FieldType::JsonObject(json_options) => {
                    let text_analyzer =
                        &mut self.per_field_text_analyzers[field.field_id() as usize];
                    let json_values_it = values.map(|value_access| {
                        // Used to help with linting and type checking.
                        let value_access = value_access as D::Value<'_>;
                        let value = value_access.as_value();
-                        match value {
+                    self.json_positions_per_path.clear();
-                            ReferenceValue::Object(object_iter) => Ok(object_iter),
+                    self.json_path_writer
-                            _ => Err(make_schema_error()),
+                        .set_expand_dots(json_options.is_expand_dots_enabled());
-                        }
+                    for json_value in values {
-                    });
+                        self.json_path_writer.clear();
-                    index_json_values::<D::Value<'_>>(
+
                        index_json_value(
                            doc_id,
-                        json_values_it,
+                            json_value,
                            text_analyzer,
                        json_options.is_expand_dots_enabled(),
                            term_buffer,
                        postings_writer,
                            &mut self.json_path_writer,
                            postings_writer,
                            ctx,
-                    )?;
+                            &mut self.json_positions_per_path,
                        );
                    }
                }
                FieldType::IpAddr(_) => {
                    let mut num_vals = 0;
@@ -498,20 +497,20 @@ mod tests {
    use crate::collector::{Count, TopDocs};
    use crate::directory::RamDirectory;
    use crate::fastfield::FastValue;
-    use crate::json_utils::term_from_json_paths;
+    use crate::postings::{Postings, TermInfo};
    use crate::postings::TermInfo;
    use crate::query::{PhraseQuery, QueryParser};
    use crate::schema::document::Value;
    use crate::schema::{
-        Document, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, STRING, TEXT,
+        Document, IndexRecordOption, OwnedValue, Schema, TextFieldIndexing, TextOptions, STORED,
        STRING, TEXT,
    };
    use crate::store::{Compressor, StoreReader, StoreWriter};
    use crate::time::format_description::well_known::Rfc3339;
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{PreTokenizedString, Token};
    use crate::{
-        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, Postings, TantivyDocument,
+        DateTime, Directory, DocAddress, DocSet, Index, IndexWriter, TantivyDocument, Term,
-        Term, TERMINATED,
+        TERMINATED,
    };
    #[test]
@@ -598,6 +597,45 @@ mod tests {
        assert_eq!(score_docs.len(), 2);
    }
    #[test]
    fn test_flat_json_indexing() {
        // A JSON Object that contains mixed values on the first level
        let mut schema_builder = Schema::builder();
        let json_field = schema_builder.add_json_field("json", STORED | STRING);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema.clone());
        let mut writer = index.writer_for_tests().unwrap();
        // Text, i64, u64
        writer.add_document(doc!(json_field=>"b")).unwrap();
        writer
            .add_document(doc!(json_field=>OwnedValue::I64(10i64)))
            .unwrap();
        writer
            .add_document(doc!(json_field=>OwnedValue::U64(55u64)))
            .unwrap();
        writer
            .add_document(doc!(json_field=>json!({"my_field": "a"})))
            .unwrap();
        writer.commit().unwrap();
        let search_and_expect = |query| {
            let query_parser = QueryParser::for_index(&index, vec![json_field]);
            let text_query = query_parser.parse_query(query).unwrap();
            let score_docs: Vec<(_, DocAddress)> = index
                .reader()
                .unwrap()
                .searcher()
                .search(&text_query, &TopDocs::with_limit(4))
                .unwrap();
            assert_eq!(score_docs.len(), 1);
        };
        search_and_expect("my_field:a");
        search_and_expect("b");
        search_and_expect("10");
        search_and_expect("55");
    }
    #[test]
    fn test_json_indexing() {
        let mut schema_builder = Schema::builder();
@@ -647,9 +685,8 @@ mod tests {
        let mut term_stream = term_dict.stream().unwrap();
-        let term_from_path = |paths: &[&str]| -> Term {
+        let term_from_path =
-            term_from_json_paths(json_field, paths.iter().cloned(), false)
+            |path: &str| -> Term { Term::from_field_json_path(json_field, path, false) };
        };
        fn set_fast_val<T: FastValue>(val: T, mut term: Term) -> Term {
            term.append_type_and_fast_value(val);
@@ -660,15 +697,14 @@ mod tests {
            term
        }
-        let term = term_from_path(&["bool"]);
+        let term = term_from_path("bool");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_fast_val(true, term).serialized_value_bytes()
        );
-        let term = term_from_path(&["complexobject", "field.with.dot"]);
+        let term = term_from_path("complexobject.field\\.with\\.dot");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
@@ -676,7 +712,7 @@ mod tests {
        );
        // Date
-        let term = term_from_path(&["date"]);
+        let term = term_from_path("date");
        assert!(term_stream.advance());
        assert_eq!(
@@ -691,7 +727,7 @@ mod tests {
        );
        // Float
-        let term = term_from_path(&["float"]);
+        let term = term_from_path("float");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
@@ -699,21 +735,21 @@ mod tests {
        );
        // Number In Array
-        let term = term_from_path(&["my_arr"]);
+        let term = term_from_path("my_arr");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_fast_val(2i64, term).serialized_value_bytes()
        );
-        let term = term_from_path(&["my_arr"]);
+        let term = term_from_path("my_arr");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_fast_val(3i64, term).serialized_value_bytes()
        );
-        let term = term_from_path(&["my_arr"]);
+        let term = term_from_path("my_arr");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
@@ -721,13 +757,13 @@ mod tests {
        );
        // El in Array
-        let term = term_from_path(&["my_arr", "my_key"]);
+        let term = term_from_path("my_arr.my_key");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_str("tokens", term).serialized_value_bytes()
        );
-        let term = term_from_path(&["my_arr", "my_key"]);
+        let term = term_from_path("my_arr.my_key");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
@@ -735,21 +771,21 @@ mod tests {
        );
        // Signed
-        let term = term_from_path(&["signed"]);
+        let term = term_from_path("signed");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_fast_val(-2i64, term).serialized_value_bytes()
        );
-        let term = term_from_path(&["toto"]);
+        let term = term_from_path("toto");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
            set_str("titi", term).serialized_value_bytes()
        );
        // Unsigned
-        let term = term_from_path(&["unsigned"]);
+        let term = term_from_path("unsigned");
        assert!(term_stream.advance());
        assert_eq!(
            term_stream.key(),
@@ -776,7 +812,7 @@ mod tests {
        let searcher = reader.searcher();
        let segment_reader = searcher.segment_reader(0u32);
        let inv_index = segment_reader.inverted_index(json_field).unwrap();
-        let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
+        let mut term = Term::from_field_json_path(json_field, "mykey", false);
        term.append_type_and_str("token");
        let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
        assert_eq!(
@@ -815,7 +851,7 @@ mod tests {
        let searcher = reader.searcher();
        let segment_reader = searcher.segment_reader(0u32);
        let inv_index = segment_reader.inverted_index(json_field).unwrap();
-        let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
+        let mut term = Term::from_field_json_path(json_field, "mykey", false);
        term.append_type_and_str("two tokens");
        let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
        assert_eq!(
@@ -856,7 +892,7 @@ mod tests {
        let reader = index.reader().unwrap();
        let searcher = reader.searcher();
-        let term = term_from_json_paths(json_field, ["mykey", "field"].into_iter(), false);
+        let term = Term::from_field_json_path(json_field, "mykey.field", false);
        let mut hello_term = term.clone();
        hello_term.append_type_and_str("hello");
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -216,11 +216,6 @@ use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
 pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
 #[deprecated(
    since = "0.22.0",
    note = "Will be removed in tantivy 0.23. Use export from snippet module instead"
 )]
 pub use self::snippet::{Snippet, SnippetGenerator};
 #[doc(hidden)]
 pub use crate::core::json_utils;
 pub use crate::core::{Executor, Searcher, SearcherGeneration};
@@ -228,16 +223,10 @@ pub use crate::directory::Directory;
 #[allow(deprecated)] // Remove with index sorting
 pub use crate::index::{
    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader,
+    Segment, SegmentMeta, SegmentReader,
 };
 #[deprecated(
    since = "0.22.0",
    note = "Will be removed in tantivy 0.23. Use export from indexer module instead"
 )]
 pub use crate::indexer::PreparedCommit;
 pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
-pub use crate::postings::Postings;
+pub use crate::schema::{Document, TantivyDocument, Term};
 pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
 /// Index format version.
 const INDEX_FORMAT_VERSION: u32 = 6;
@@ -392,9 +381,10 @@ pub mod tests {
    use crate::docset::{DocSet, TERMINATED};
    use crate::index::SegmentReader;
    use crate::merge_policy::NoMergePolicy;
    use crate::postings::Postings;
    use crate::query::BooleanQuery;
    use crate::schema::*;
-    use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
+    use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
@@ -446,7 +436,6 @@ pub mod tests {
    }
    #[test]
    #[cfg(not(feature = "lz4"))]
    fn test_version_string() {
        use regex::Regex;
        let regex_ptn = Regex::new(
@@ -1109,9 +1098,9 @@ pub mod tests {
    #[test]
    fn test_update_via_delete_insert() -> crate::Result<()> {
        use crate::collector::Count;
        use crate::index::SegmentId;
        use crate::indexer::NoMergePolicy;
        use crate::query::AllQuery;
        use crate::SegmentId;
        const DOC_COUNT: u64 = 2u64;
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -56,7 +56,7 @@ pub struct InvertedIndexSerializer {
 impl InvertedIndexSerializer {
    /// Open a new `InvertedIndexSerializer` for the given segment
    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
-        use crate::SegmentComponent::{Positions, Postings, Terms};
+        use crate::index::SegmentComponent::{Positions, Postings, Terms};
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
            postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -1,8 +1,9 @@
 use super::Scorer;
 use crate::docset::TERMINATED;
 use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
 use crate::query::{EnableScoring, Explanation, Query, Weight};
-use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
+use crate::{DocId, DocSet, Score, Searcher};
 /// `EmptyQuery` is a dummy `Query` in which no document matches.
 ///
--- a/src/query/more_like_this/more_like_this.rs
+++ b/src/query/more_like_this/more_like_this.rs
@@ -180,7 +180,7 @@ impl MoreLikeThis {
                let facets: Vec<&str> = values
                    .iter()
                    .map(|value| {
-                        value.as_facet().map(|f| f.encoded_str()).ok_or_else(|| {
+                        value.as_facet().ok_or_else(|| {
                            TantivyError::InvalidArgument("invalid field value".to_string())
                        })
                    })
@@ -220,7 +220,7 @@ impl MoreLikeThis {
                        let mut token_stream = tokenizer.token_stream(text);
                        token_stream.process(sink);
                    } else if let Some(tok_str) = value.as_pre_tokenized_text() {
-                        let mut token_stream = PreTokenizedStream::from(tok_str.clone());
+                        let mut token_stream = PreTokenizedStream::from(*tok_str.clone());
                        token_stream.process(sink);
                    }
                }
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -11,9 +11,7 @@ use rustc_hash::FxHashMap;
 use super::logical_ast::*;
 use crate::index::Index;
-use crate::json_utils::{
+use crate::json_utils::convert_to_fast_value_and_append_to_json_term;
    convert_to_fast_value_and_append_to_json_term, split_json_path, term_from_json_paths,
 };
 use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
 use crate::query::{
    AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery,
@@ -966,14 +964,8 @@ fn generate_literals_for_json_object(
    let index_record_option = text_options.index_option();
    let mut logical_literals = Vec::new();
-    let paths = split_json_path(json_path);
+    let get_term_with_path =
-    let get_term_with_path = || {
+        || Term::from_field_json_path(field, json_path, json_options.is_expand_dots_enabled());
        term_from_json_paths(
            field,
            paths.iter().map(|el| el.as_str()),
            json_options.is_expand_dots_enabled(),
        )
    };
    // Try to convert the phrase to a fast value
    if let Some(term) = convert_to_fast_value_and_append_to_json_term(get_term_with_path(), phrase)
--- a/src/query/range_query/fast_field_range_query.rs
+++ b/src/query/range_query/fast_field_range_query.rs
@@ -174,7 +174,7 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
    }
    fn size_hint(&self) -> u32 {
-        0 // heuristic possible by checking number of hits when fetching a block
+        self.column.num_docs()
    }
 }
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -127,6 +127,7 @@ impl Scorer for TermScorer {
 mod tests {
    use proptest::prelude::*;
    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::merge_policy::NoMergePolicy;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -134,8 +135,7 @@ mod tests {
    use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TEXT};
    use crate::{
-        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, SegmentId, Term,
+        assert_nearly_equals, DocId, DocSet, Index, IndexWriter, Score, Searcher, Term, TERMINATED,
        TERMINATED,
    };
    #[test]
--- a/src/reader/warming.rs
+++ b/src/reader/warming.rs
@@ -179,9 +179,10 @@ mod tests {
    use super::Warmer;
    use crate::core::searcher::SearcherGeneration;
    use crate::directory::RamDirectory;
    use crate::index::SegmentId;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::schema::{Schema, INDEXED};
-    use crate::{Index, IndexSettings, ReloadPolicy, Searcher, SegmentId};
+    use crate::{Index, IndexSettings, ReloadPolicy, Searcher};
    #[derive(Default)]
    struct TestWarmer {
--- a/src/schema/document/de.rs
+++ b/src/schema/document/de.rs
@@ -873,7 +873,7 @@ mod tests {
        );
        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::Facet(facet));
@@ -881,7 +881,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let value = deserialize_value(result);
        assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
    }
@@ -960,13 +961,19 @@ mod tests {
            "my-third-key".to_string(),
            crate::schema::OwnedValue::F64(123.0),
        );
-        assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
+        assert_eq!(
            value,
            crate::schema::OwnedValue::Object(expected_object.into_iter().collect())
        );
        let object = serde_json::Map::new();
        let result = serialize_value(ReferenceValue::Object(JsonObjectIter(object.iter())));
        let value = deserialize_value(result);
        let expected_object = BTreeMap::new();
-        assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
+        assert_eq!(
            value,
            crate::schema::OwnedValue::Object(expected_object.into_iter().collect())
        );
        let mut object = serde_json::Map::new();
        object.insert("my-first-key".into(), serde_json::Value::Null);
@@ -978,7 +985,10 @@ mod tests {
        expected_object.insert("my-first-key".to_string(), crate::schema::OwnedValue::Null);
        expected_object.insert("my-second-key".to_string(), crate::schema::OwnedValue::Null);
        expected_object.insert("my-third-key".to_string(), crate::schema::OwnedValue::Null);
-        assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
+        assert_eq!(
            value,
            crate::schema::OwnedValue::Object(expected_object.into_iter().collect())
        );
    }
    #[test]
@@ -1055,7 +1065,10 @@ mod tests {
                .collect(),
            ),
        );
-        assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
+        assert_eq!(
            value,
            crate::schema::OwnedValue::Object(expected_object.into_iter().collect())
        );
        // Some more extreme nesting that might behave weirdly
        let mut object = serde_json::Map::new();
@@ -1077,6 +1090,9 @@ mod tests {
                OwnedValue::Array(vec![OwnedValue::Null]),
            ])]),
        );
-        assert_eq!(value, OwnedValue::Object(expected_object));
+        assert_eq!(
            value,
            OwnedValue::Object(expected_object.into_iter().collect())
        );
    }
 }
--- a/src/schema/document/mod.rs
+++ b/src/schema/document/mod.rs
@@ -5,22 +5,24 @@
 //! - [Value] which provides tantivy with a way to access the document's values in a common way
 //!   without performing any additional allocations.
 //! - [DocumentDeserialize] which implements the necessary code to deserialize the document from the
-//!   doc store.
+//!   doc store. If you are fine with fetching [TantivyDocument] from the doc store, you can skip
 //!   implementing this trait for your type.
 //!
 //! Tantivy provides a few out-of-box implementations of these core traits to provide
 //! some simple usage if you don't want to implement these traits on a custom type yourself.
 //!
 //! # Out-of-box document implementations
-//! - [Document] the old document type used by Tantivy before the trait based approach was
+//! - [TantivyDocument] the old document type used by Tantivy before the trait based approach was
 //!   implemented. This type is still valid and provides all of the original behaviour you might
 //!   expect.
-//! - `BTreeMap<Field, Value>` a mapping of field_ids to their relevant schema value using a
+//! - `BTreeMap<Field, OwnedValue>` a mapping of field_ids to their relevant schema value using a
 //!   BTreeMap.
-//! - `HashMap<Field, Value>` a mapping of field_ids to their relevant schema value using a HashMap.
+//! - `HashMap<Field, OwnedValue>` a mapping of field_ids to their relevant schema value using a
 //!   HashMap.
 //!
 //! # Implementing your custom documents
 //! Often in larger projects or higher performance applications you want to avoid the extra overhead
-//! of converting your own types to the Tantivy [Document] type, this can often save you a
+//! of converting your own types to the [TantivyDocument] type, this can often save you a
 //! significant amount of time when indexing by avoiding the additional allocations.
 //!
 //! ### Important Note
@@ -46,6 +48,7 @@
 //!
 //! impl Document for MyCustomDocument {
 //!     // The value type produced by the `iter_fields_and_values` iterator.
 //!     // tantivy already implements the Value trait for serde_json::Value.
 //!     type Value<'a> = &'a serde_json::Value;
 //!     // The iterator which is produced by `iter_fields_and_values`.
 //!     // Often this is a simple new-type wrapper unless you like super long generics.
@@ -94,10 +97,11 @@
 //! implementation for.
 //!
 //! ## Implementing custom values
-//! Internally, Tantivy only works with `ReferenceValue` which is an enum that tries to borrow
+//! In order to allow documents to return custom types, they must implement
-//! as much data as it can, in order to allow documents to return custom types, they must implement
+//! the [Value] trait which provides a way for Tantivy to get a `ReferenceValue` that it can then
 //! the `Value` trait which provides a way for Tantivy to get a `ReferenceValue` that it can then
 //! index and store.
 //! Internally, Tantivy only works with `ReferenceValue` which is an enum that tries to borrow
 //! as much data as it can
 //!
 //! Values can just as easily be customised as documents by implementing the `Value` trait.
 //!
@@ -105,9 +109,9 @@
 //! hold references of the data held by the parent [Document] which can then be passed
 //! on to the [ReferenceValue].
 //!
-//! This is why `Value` is implemented for `&'a serde_json::Value` and `&'a
+//! This is why [Value] is implemented for `&'a serde_json::Value` and
-//! tantivy::schema::Value` but not for their owned counterparts, as we cannot satisfy the lifetime
+//! [&'a tantivy::schema::document::OwnedValue](OwnedValue) but not for their owned counterparts, as
-//! bounds necessary when indexing the documents.
+//! we cannot satisfy the lifetime bounds necessary when indexing the documents.
 //!
 //! ### A note about returning values
 //! The custom value type does not have to be the type stored by the document, instead the
--- a/src/schema/document/owned_value.rs
+++ b/src/schema/document/owned_value.rs
@@ -1,4 +1,4 @@
-use std::collections::{btree_map, BTreeMap};
+use std::collections::BTreeMap;
 use std::fmt;
 use std::net::Ipv6Addr;
@@ -45,7 +45,7 @@ pub enum OwnedValue {
    /// A set of values.
    Array(Vec<Self>),
    /// Dynamic object value.
-    Object(BTreeMap<String, Self>),
+    Object(Vec<(String, Self)>),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
    IpAddr(Ipv6Addr),
 }
@@ -65,13 +65,13 @@ impl<'a> Value<'a> for &'a OwnedValue {
        match self {
            OwnedValue::Null => ReferenceValueLeaf::Null.into(),
            OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(),
-            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(),
+            OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val.clone().into()).into(),
            OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(),
            OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(),
            OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(),
            OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(),
            OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(),
-            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(),
+            OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val.encoded_str()).into(),
            OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(),
            OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
            OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
@@ -148,10 +148,10 @@ impl ValueDeserialize for OwnedValue {
            fn visit_object<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>
            where A: ObjectAccess<'de> {
-                let mut elements = BTreeMap::new();
+                let mut elements = Vec::with_capacity(access.size_hint());
                while let Some((key, value)) = access.next_entry()? {
-                    elements.insert(key, value);
+                    elements.push((key, value));
                }
                Ok(OwnedValue::Object(elements))
@@ -167,6 +167,7 @@ impl Eq for OwnedValue {}
 impl serde::Serialize for OwnedValue {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where S: serde::Serializer {
        use serde::ser::SerializeMap;
        match *self {
            OwnedValue::Null => serializer.serialize_unit(),
            OwnedValue::Str(ref v) => serializer.serialize_str(v),
@@ -180,7 +181,13 @@ impl serde::Serialize for OwnedValue {
            }
            OwnedValue::Facet(ref facet) => facet.serialize(serializer),
            OwnedValue::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)),
-            OwnedValue::Object(ref obj) => obj.serialize(serializer),
+            OwnedValue::Object(ref obj) => {
                let mut map = serializer.serialize_map(Some(obj.len()))?;
                for (k, v) in obj {
                    map.serialize_entry(k, v)?;
                }
                map.end()
            }
            OwnedValue::IpAddr(ref ip_v6) => {
                // Ensure IpV4 addresses get serialized as IpV4, but excluding IpV6 loopback.
                if let Some(ip_v4) = ip_v6.to_ipv4_mapped() {
@@ -248,12 +255,10 @@ impl<'de> serde::Deserialize<'de> for OwnedValue {
            fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
            where A: MapAccess<'de> {
-                let mut object = BTreeMap::new();
+                let mut object = map.size_hint().map(Vec::with_capacity).unwrap_or_default();
                while let Some((key, value)) = map.next_entry()? {
-                    object.insert(key, value);
+                    object.push((key, value));
                }
                Ok(OwnedValue::Object(object))
            }
        }
@@ -272,11 +277,13 @@ impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
                ReferenceValueLeaf::I64(val) => OwnedValue::I64(val),
                ReferenceValueLeaf::F64(val) => OwnedValue::F64(val),
                ReferenceValueLeaf::Date(val) => OwnedValue::Date(val),
-                ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()),
+                ReferenceValueLeaf::Facet(val) => {
                    OwnedValue::Facet(Facet::from_encoded_string(val.to_string()))
                }
                ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
                ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
                ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
-                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
+                ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()),
            },
            ReferenceValue::Array(val) => {
                OwnedValue::Array(val.map(|v| v.as_value().into()).collect())
@@ -363,7 +370,8 @@ impl From<PreTokenizedString> for OwnedValue {
 impl From<BTreeMap<String, OwnedValue>> for OwnedValue {
    fn from(object: BTreeMap<String, OwnedValue>) -> OwnedValue {
-        OwnedValue::Object(object)
+        let key_values = object.into_iter().collect();
        OwnedValue::Object(key_values)
    }
 }
@@ -417,18 +425,16 @@ impl From<serde_json::Value> for OwnedValue {
 impl From<serde_json::Map<String, serde_json::Value>> for OwnedValue {
    fn from(map: serde_json::Map<String, serde_json::Value>) -> Self {
-        let mut object = BTreeMap::new();
+        let object: Vec<(String, OwnedValue)> = map
-
+            .into_iter()
-        for (key, value) in map {
+            .map(|(key, value)| (key, OwnedValue::from(value)))
-            object.insert(key, OwnedValue::from(value));
+            .collect();
        }
        OwnedValue::Object(object)
    }
 }
 /// A wrapper type for iterating over a serde_json object producing reference values.
-pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, OwnedValue>);
+pub struct ObjectMapIter<'a>(std::slice::Iter<'a, (String, OwnedValue)>);
 impl<'a> Iterator for ObjectMapIter<'a> {
    type Item = (&'a str, &'a OwnedValue);
--- a/src/schema/document/se.rs
+++ b/src/schema/document/se.rs
@@ -121,7 +121,7 @@ where W: Write
                ReferenceValueLeaf::Facet(val) => {
                    self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;
-                    val.serialize(self.writer)
+                    Cow::Borrowed(val).serialize(self.writer)
                }
                ReferenceValueLeaf::Bytes(val) => {
                    self.write_type_code(type_codes::BYTES_CODE)?;
@@ -428,7 +428,7 @@ mod tests {
        );
        let facet = Facet::from_text("/hello/world").unwrap();
-        let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
+        let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
        let expected = binary_repr!(
            type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(),
        );
@@ -441,7 +441,8 @@ mod tests {
            text: "hello, world".to_string(),
            tokens: vec![Token::default(), Token::default()],
        };
-        let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
+        let result =
            serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
        let expected = binary_repr!(
            type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str,
        );
--- a/src/schema/document/value.rs
+++ b/src/schema/document/value.rs
@@ -3,7 +3,6 @@ use std::net::Ipv6Addr;
 use common::DateTime;
 use crate::schema::Facet;
 use crate::tokenizer::PreTokenizedString;
 /// A single field value.
@@ -28,7 +27,7 @@ pub trait Value<'a>: Send + Sync + Debug {
    }
    #[inline]
-    /// If the Value is a String, returns the associated str. Returns None otherwise.
+    /// If the Value is a leaf, returns the associated leaf. Returns None otherwise.
    fn as_leaf(&self) -> Option<ReferenceValueLeaf<'a>> {
        if let ReferenceValue::Leaf(val) = self.as_value() {
            Some(val)
@@ -82,8 +81,9 @@ pub trait Value<'a>: Send + Sync + Debug {
    #[inline]
    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
    /// otherwise.
-    fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
+    fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+        self.as_leaf()
            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }
    #[inline]
@@ -94,7 +94,7 @@ pub trait Value<'a>: Send + Sync + Debug {
    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    fn as_facet(&self) -> Option<&'a Facet> {
+    fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }
@@ -132,7 +132,7 @@ pub trait Value<'a>: Send + Sync + Debug {
 }
 /// A enum representing a leaf value for tantivy to index.
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub enum ReferenceValueLeaf<'a> {
    /// A null value.
    Null,
@@ -146,8 +146,9 @@ pub enum ReferenceValueLeaf<'a> {
    F64(f64),
    /// Date/time with nanoseconds precision
    Date(DateTime),
-    /// Facet
+    /// Facet string needs to match the format of
-    Facet(&'a Facet),
+    /// [Facet::encoded_str](crate::schema::Facet::encoded_str).
    Facet(&'a str),
    /// Arbitrarily sized byte array
    Bytes(&'a [u8]),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
@@ -155,7 +156,7 @@ pub enum ReferenceValueLeaf<'a> {
    /// Bool value
    Bool(bool),
    /// Pre-tokenized str type,
-    PreTokStr(&'a PreTokenizedString),
+    PreTokStr(Box<PreTokenizedString>),
 }
 impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<'a, T> {
@@ -259,9 +260,9 @@ impl<'a> ReferenceValueLeaf<'a> {
    }
    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
-    /// otherwise.
+    /// Returns None otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
        if let Self::PreTokStr(val) = self {
            Some(val)
        } else {
@@ -281,7 +282,7 @@ impl<'a> ReferenceValueLeaf<'a> {
    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        if let Self::Facet(val) = self {
            Some(val)
        } else {
@@ -322,6 +323,16 @@ where V: Value<'a>
        }
    }
    #[inline]
    /// If the Value is a leaf, consume it and return the leaf. Returns None otherwise.
    pub fn into_leaf(self) -> Option<ReferenceValueLeaf<'a>> {
        if let Self::Leaf(val) = self {
            Some(val)
        } else {
            None
        }
    }
    #[inline]
    /// If the Value is a String, returns the associated str. Returns None otherwise.
    pub fn as_str(&self) -> Option<&'a str> {
@@ -365,10 +376,11 @@ where V: Value<'a>
    }
    #[inline]
-    /// If the Value is a pre-tokenized string, returns the associated string. Returns None
+    /// If the Value is a pre-tokenized string, consumes it and returns the string.
-    /// otherwise.
+    /// Returns None otherwise.
-    pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
+    pub fn into_pre_tokenized_text(self) -> Option<Box<PreTokenizedString>> {
-        self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
+        self.into_leaf()
            .and_then(|leaf| leaf.into_pre_tokenized_text())
    }
    #[inline]
@@ -379,7 +391,7 @@ where V: Value<'a>
    #[inline]
    /// If the Value is a facet, returns the associated facet. Returns None otherwise.
-    pub fn as_facet(&self) -> Option<&'a Facet> {
+    pub fn as_facet(&self) -> Option<&'a str> {
        self.as_leaf().and_then(|leaf| leaf.as_facet())
    }
--- a/src/schema/flags.rs
+++ b/src/schema/flags.rs
@@ -1,7 +1,6 @@
 use std::ops::BitOr;
-use crate::schema::{NumericOptions, TextOptions};
+use crate::schema::{DateOptions, NumericOptions, TextOptions};
 use crate::DateOptions;
 #[derive(Clone)]
 pub struct StoredFlag;
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -4,10 +4,12 @@ use std::{fmt, str};
 use columnar::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
 use common::json_path_writer::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP_STR};
 use common::JsonPathWriter;
 use super::date_time_options::DATE_TIME_PRECISION_INDEXED;
 use super::Field;
 use crate::fastfield::FastValue;
 use crate::json_utils::split_json_path;
 use crate::schema::{Facet, Type};
 use crate::DateTime;
@@ -33,6 +35,28 @@ impl Term {
        Term(data)
    }
    /// Creates a term from a json path.
    ///
    /// The json path can address a nested value in a JSON object.
    /// e.g. `{"k8s": {"node": {"id": 5}}}` can be addressed via `k8s.node.id`.
    ///
    /// In case there are dots in the field name, and the `expand_dots_enabled` parameter is not
    /// set they need to be escaped with a backslash.
    /// e.g. `{"k8s.node": {"id": 5}}` can be addressed via `k8s\.node.id`.
    pub fn from_field_json_path(field: Field, json_path: &str, expand_dots_enabled: bool) -> Term {
        let paths = split_json_path(json_path);
        let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
        for path in paths {
            json_path.push(&path);
        }
        json_path.set_end();
        let mut term = Term::with_type_and_field(Type::Json, field);
        term.append_bytes(json_path.as_str().as_bytes());
        term
    }
    pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term {
        let mut term = Self::with_capacity(8);
        term.set_field_and_type(field, typ);
@@ -165,7 +189,7 @@ impl Term {
    /// This is used in JSON type to append a fast value after the path.
    ///
    /// It will not clear existing bytes.
-    pub(crate) fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
+    pub fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
        self.0.push(T::to_type().to_code());
        let value = if T::to_type() == Type::Date {
            DateTime::from_u64(val.to_u64())
@@ -181,7 +205,7 @@ impl Term {
    /// This is used in JSON type to append a str after the path.
    ///
    /// It will not clear existing bytes.
-    pub(crate) fn append_type_and_str(&mut self, val: &str) {
+    pub fn append_type_and_str(&mut self, val: &str) {
        self.0.push(Type::Str.to_code());
        self.0.extend(val.as_bytes().as_ref());
    }
--- a/src/space_usage/mod.rs
+++ b/src/space_usage/mod.rs
@@ -12,8 +12,8 @@ use std::collections::HashMap;
 use common::ByteCount;
 use serde::{Deserialize, Serialize};
 use crate::index::SegmentComponent;
 use crate::schema::Field;
 use crate::SegmentComponent;
 /// Enum containing any of the possible space usage results for segment components.
 pub enum ComponentSpaceUsage {
@@ -115,7 +115,7 @@ impl SegmentSpaceUsage {
    /// Use the components directly if this is somehow in performance critical code.
    pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
        use self::ComponentSpaceUsage::*;
-        use crate::SegmentComponent::*;
+        use crate::index::SegmentComponent::*;
        match component {
            Postings => PerField(self.postings().clone()),
            Positions => PerField(self.positions().clone()),
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -18,6 +18,8 @@ use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize};
 use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
 #[cfg(feature = "quickwit")]
 use crate::Executor;
 pub(crate) const DOCSTORE_CACHE_CAPACITY: usize = 100;
@@ -341,7 +343,11 @@ impl StoreReader {
    /// In most cases use [`get_async`](Self::get_async)
    ///
    /// Loads and decompresses a block asynchronously.
-    async fn read_block_async(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
+    async fn read_block_async(
        &self,
        checkpoint: &Checkpoint,
        executor: &Executor,
    ) -> io::Result<Block> {
        let cache_key = checkpoint.byte_range.start;
        if let Some(block) = self.cache.get_from_cache(checkpoint.byte_range.start) {
            return Ok(block);
@@ -353,8 +359,12 @@ impl StoreReader {
            .read_bytes_async()
            .await?;
-        let decompressed_block =
+        let decompressor = self.decompressor;
-            OwnedBytes::new(self.decompressor.decompress(compressed_block.as_ref())?);
+        let maybe_decompressed_block = executor
            .spawn_blocking(move || decompressor.decompress(compressed_block.as_ref()))
            .await
            .expect("decompression panicked");
        let decompressed_block = OwnedBytes::new(maybe_decompressed_block?);
        self.cache
            .put_into_cache(cache_key, decompressed_block.clone());
@@ -363,15 +373,23 @@ impl StoreReader {
    }
    /// Reads raw bytes of a given document asynchronously.
-    pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result<OwnedBytes> {
+    pub async fn get_document_bytes_async(
        &self,
        doc_id: DocId,
        executor: &Executor,
    ) -> crate::Result<OwnedBytes> {
        let checkpoint = self.block_checkpoint(doc_id)?;
-        let block = self.read_block_async(&checkpoint).await?;
+        let block = self.read_block_async(&checkpoint, executor).await?;
        Self::get_document_bytes_from_block(block, doc_id, &checkpoint)
    }
    /// Fetches a document asynchronously. Async version of [`get`](Self::get).
-    pub async fn get_async<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
+    pub async fn get_async<D: DocumentDeserialize>(
-        let mut doc_bytes = self.get_document_bytes_async(doc_id).await?;
+        &self,
        doc_id: DocId,
        executor: &Executor,
    ) -> crate::Result<D> {
        let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;
        let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
            .map_err(crate::TantivyError::from)?;
Author	SHA1	Message	Date
Paul Masurel	f820d42151	oneshot 0.1.7 Now that the is_closed changed has been merge upstream, we can rely on that. This commit is a "hotfix" because we don't want to rely on some of the commit in main just yet	2024-05-31 07:54:56 +04:00
PSeitz	5b7cca13e5	lower contention on AggregationLimits (#2394 ) PR https://github.com/quickwit-oss/quickwit/pull/4962 fixes an issue where the AggregationLimits are not passed correctly. Since the AggregationLimits are shared properly we run into contention issues. This PR includes some straightforward improvement to reduce contention, by only calling if the memory changed and avoiding the second read. We probably need some sharding with multiple counters or local caching before updating the global after some threshold.	2024-05-15 12:25:40 +02:00
dependabot[bot]	a79590477e	Update binggan requirement from 0.5.2 to 0.6.2 (#2399 ) --- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-05-15 05:40:37 +02:00
Paul Masurel	6181c1eb5e	Small changes in the Executor API. (#2391 ) Warning, this change is mildly not backward compatible so I bumped tantivy's version.	2024-05-10 17:19:12 +09:00
Adam Reichold	1ee5f90761	Give allocation control to the caller instead of force a clone (#2389 ) Achieved by moving the boxes out of the temporary reference wrappers which are cloneable themselves, i.e. if required the caller can clone them already or consume them to reuse existing allocations.	2024-05-09 16:01:13 +09:00
PSeitz	71f3b4e4e3	fix ReferenceValue API flaw (#2372 ) * fix ReferenceValue API flaw Remove `Facet` and `TokenizedString` values from the `ReferenceValue` API, as this requires the trait value to have them stored somewhere. Since `TokenizedString` is quite niche, I just copy it into a Box, instead of designing a reference API around it. * fix comment link	2024-05-09 06:14:42 +02:00
trinity-1686a	8cd7ddc535	run block decompression from executor (#2386 ) * run block decompression from executor * add a wrapper with is_closed to oneshot channel * add cancelation test to Executor::spawn_blocking	2024-05-08 12:22:44 +02:00
Paul Masurel	2b76335a95	Removed usage of num_cpus (#2387 ) * Removed usage of num_cpus * handling error	2024-05-08 13:32:52 +09:00
PSeitz	c6b213d8f0	use bingang for agg benchmark (#2378 ) * use bingang for agg benchmark use bingang for agg benchmark, which includes memory consumption Output: ``` full histogram Memory: 15.8 KB Avg: 10.9322ms (+5.44%) Median: 10.8790ms (+9.28%) Min: 10.7470ms Max: 11.3263ms histogram_hard_bounds Memory: 15.5 KB Avg: 5.1939ms (+6.61%) Median: 5.1722ms (+10.98%) Min: 5.0432ms Max: 5.3910ms histogram_with_avg_sub_agg Memory: 48.7 KB Avg: 23.8165ms (+4.57%) Median: 23.7264ms (+10.06%) Min: 23.4995ms Max: 24.8107ms dense histogram Memory: 17.3 KB Avg: 15.6810ms (-8.54%) Median: 15.6174ms (-8.89%) Min: 15.4953ms Max: 16.0702ms histogram_hard_bounds Memory: 15.4 KB Avg: 10.0720ms (-7.33%) Median: 10.0572ms (-7.06%) Min: 9.8500ms Max: 10.4819ms histogram_with_avg_sub_agg Memory: 50.1 KB Avg: 33.0993ms (-7.04%) Median: 32.9499ms (-6.86%) Min: 32.8284ms Max: 34.0529ms sparse histogram Memory: 16.3 KB Avg: 19.2325ms (-0.44%) Median: 19.1211ms (-1.26%) Min: 19.0348ms Max: 19.7902ms histogram_hard_bounds Memory: 16.1 KB Avg: 18.5179ms (-0.61%) Median: 18.4552ms (-0.90%) Min: 18.3799ms Max: 19.0535ms histogram_with_avg_sub_agg Memory: 34.7 KB Avg: 21.2589ms (-0.69%) Median: 21.1867ms (-1.05%) Min: 21.0342ms Max: 21.9900ms ``` * add more bench with term as sub agg	2024-05-07 11:29:49 +02:00
PSeitz	eea70030bf	cleanup top level exports (#2382 ) remove some top level exports	2024-05-07 09:59:41 +02:00
PSeitz	92b5526310	allow more JSON values, fix i64 special case (#2383 ) This changes three things: - Reuse positions_per_path hashmap instead of allocating one per indexed JSON value - Try to cast u64 values to i64 to streamline with search behaviour - Allow top level json values to be of any type, instead of limiting it to JSON objects. Remove special JSON object handling method. TODO: We probably should also try to check f64 to i64 and u64 when indexing, as values may get converted to f64 by the JSON parser	2024-05-01 12:08:12 +02:00
PSeitz	99a59ad37e	remove zero byte check (#2379 ) remove zero byte checks in columnar. zero bytes are converted during serialization now. unify code paths extend test for expected column names	2024-04-26 06:03:28 +02:00
trinity-1686a	6a66a71cbb	modify fastfield range query heuristic (#2375 )	2024-04-25 10:06:11 +02:00
PSeitz	ff40764204	make convert_to_fast_value_and_append_to_json_term pub (#2370 ) * make convert_to_fast_value_and_append_to_json_term pub * clippy	2024-04-23 04:05:41 +02:00
PSeitz	047da20b5b	add json path constructor to term (#2367 )	2024-04-22 12:23:35 +02:00
PSeitz	1417eaf3a7	fix coverage (#2368 )	2024-04-22 12:23:15 +02:00
PSeitz	4f8493d2de	improve document docs (#2359 )	2024-04-22 12:05:16 +02:00
Paul Masurel	8861366137	Owned value relying on Vec instead of BTreeMap (#2364 ) * Owned value relying on Vec instead of BTreeMap * fmt * fix build * fix serialization --------- Co-authored-by: Pascal Seitz <pascal.seitz@gmail.com>	2024-04-22 09:38:05 +02:00