Speed up searches by removing repeated memsets coming from vec.resize()

Also, reserve exactly the size needed, which is surprisingly needed to get the full speedup of ~5% on a good fraction of the queries.
allow some mixing of occur and bool in strict query parser (#2323 )
2026-01-04 16:22:55 +00:00 · 2024-03-12 17:50:23 +01:00 · 2024-03-07 15:17:48 +01:00 · 2024-03-05 05:49:41 +01:00 · 2024-03-05 04:11:11 +01:00 · 2024-02-27 03:38:04 +01:00
246 changed files with 15701 additions and 4708 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -3,8 +3,6 @@ name: Coverage
 on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
@@ -15,13 +13,13 @@ jobs:
  coverage:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - name: Install Rust
-        run: rustup toolchain install nightly --profile minimal --component llvm-tools-preview
+        run: rustup toolchain install nightly-2023-09-10 --profile minimal --component llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
-        run: cargo +nightly llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
+        run: cargo +nightly-2023-09-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
        continue-on-error: true
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -19,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
    - name: Install stable
      uses: actions-rs/toolchain@v1
      with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,7 +20,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
    - name: Install nightly
      uses: actions-rs/toolchain@v1
@@ -39,6 +39,13 @@ jobs:
    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
    - name: Check Stable Compilation
      run: cargo build --all-features
    - name: Check Bench Compilation
      run: cargo +nightly bench --no-run --profile=dev --all-features
    - uses: actions-rs/clippy-check@v1
      with:
@@ -60,7 +67,7 @@ jobs:
    name: test-${{ matrix.features.label}}
    steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
    - name: Install stable
      uses: actions-rs/toolchain@v1
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,36 @@
 Tantivy 0.21.1
 ================================
 #### Bugfixes
 - Range queries on fast fields with less values on that field than documents had an invalid end condition, leading to missing results. [#2226](https://github.com/quickwit-oss/tantivy/issues/2226)(@appaquet @PSeitz)
 - Increase the minimum memory budget from 3MB to 15MB to avoid single doc segments (API fix). [#2176](https://github.com/quickwit-oss/tantivy/issues/2176)(@PSeitz)
 Tantivy 0.21
 ================================
 #### Bugfixes
 - Fix track fast field memory consumption, which led to higher memory consumption than the budget allowed during indexing [#2148](https://github.com/quickwit-oss/tantivy/issues/2148)[#2147](https://github.com/quickwit-oss/tantivy/issues/2147)(@PSeitz)
 - Fix a regression from 0.20 where sort index by date wasn't working anymore [#2124](https://github.com/quickwit-oss/tantivy/issues/2124)(@PSeitz)
 - Fix getting the root facet on the `FacetCollector`. [#2086](https://github.com/quickwit-oss/tantivy/issues/2086)(@adamreichold)
 - Align numerical type priority order of columnar and query. [#2088](https://github.com/quickwit-oss/tantivy/issues/2088)(@fmassot)
 #### Breaking Changes
 - Remove support for Brotli and Snappy compression [#2123](https://github.com/quickwit-oss/tantivy/issues/2123)(@adamreichold)
 #### Features/Improvements
 - Implement lenient query parser [#2129](https://github.com/quickwit-oss/tantivy/pull/2129)(@trinity-1686a)
 - order_by_u64_field and order_by_fast_field allow sorting in ascending and descending order [#2111](https://github.com/quickwit-oss/tantivy/issues/2111)(@naveenann)
 - Allow dynamic filters in text analyzer builder [#2110](https://github.com/quickwit-oss/tantivy/issues/2110)(@fulmicoton @fmassot)
 - **Aggregation**
  - Add missing parameter for term aggregation [#2149](https://github.com/quickwit-oss/tantivy/issues/2149)[#2103](https://github.com/quickwit-oss/tantivy/issues/2103)(@PSeitz)
  - Add missing parameter for percentiles [#2157](https://github.com/quickwit-oss/tantivy/issues/2157)(@PSeitz)
  - Add missing parameter for stats,min,max,count,sum,avg [#2151](https://github.com/quickwit-oss/tantivy/issues/2151)(@PSeitz)
  - Improve aggregation deserialization error message [#2150](https://github.com/quickwit-oss/tantivy/issues/2150)(@PSeitz)
  - Add validation for type Bytes to term_agg [#2077](https://github.com/quickwit-oss/tantivy/issues/2077)(@PSeitz)
  - Alternative mixed field collection [#2135](https://github.com/quickwit-oss/tantivy/issues/2135)(@PSeitz)
 - Add missing query_terms impl for TermSetQuery. [#2120](https://github.com/quickwit-oss/tantivy/issues/2120)(@adamreichold)
 - Minor improvements to OwnedBytes [#2134](https://github.com/quickwit-oss/tantivy/issues/2134)(@adamreichold)
 - Remove allocations in split compound words [#2080](https://github.com/quickwit-oss/tantivy/issues/2080)(@PSeitz)
 - Ngram tokenizer now returns an error with invalid arguments [#2102](https://github.com/quickwit-oss/tantivy/issues/2102)(@fmassot)
 - Make TextAnalyzerBuilder public [#2097](https://github.com/quickwit-oss/tantivy/issues/2097)(@adamreichold)
 - Return an error when tokenizer is not found while indexing [#2093](https://github.com/quickwit-oss/tantivy/issues/2093)(@naveenann)
 - Delayed column opening during merge [#2132](https://github.com/quickwit-oss/tantivy/issues/2132)(@PSeitz)
 Tantivy 0.20.2
 ================================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.20.2"
+version = "0.22.0-dev"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -22,47 +22,46 @@ crc32fast = "1.3.2"
 once_cell = "1.10.0"
 regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
 aho-corasick = "1.0"
-tantivy-fst = "0.4.0"
+tantivy-fst = "0.5"
-memmap2 = { version = "0.7.1", optional = true }
+memmap2 = { version = "0.9.0", optional = true }
 lz4_flex = { version = "0.11", default-features = false, optional = true }
-zstd = { version = "0.12", optional = true, default-features = false }
+zstd = { version = "0.13", optional = true, default-features = false }
 tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
 num_cpus = "1.13.1"
-fs4 = { version = "0.6.3", optional = true }
+fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
-census = "0.4.0"
+census = "0.4.2"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
 htmlescape = "0.3.1"
 fail = { version = "0.5.0", optional = true }
 murmurhash32 = "0.3.0"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 smallvec = "1.8.0"
 rayon = "1.5.2"
-lru = "0.11.0"
+lru = "0.12.0"
 fastdivide = "0.4.0"
-itertools = "0.11.0"
+itertools = "0.12.0"
 measure_time = "0.8.2"
 async-trait = "0.1.53"
 arc-swap = "1.5.0"
-columnar = { version= "0.1", path="./columnar", package ="tantivy-columnar" }
+columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" }
-sstable = { version= "0.1", path="./sstable", package ="tantivy-sstable", optional = true }
+sstable = { version= "0.2", path="./sstable", package ="tantivy-sstable", optional = true }
-stacker = { version= "0.1", path="./stacker", package ="tantivy-stacker" }
+stacker = { version= "0.2", path="./stacker", package ="tantivy-stacker" }
-query-grammar = { version= "0.20.0", path="./query-grammar", package = "tantivy-query-grammar" }
+query-grammar = { version= "0.21.0", path="./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version= "0.4", path="./bitpacker" }
+tantivy-bitpacker = { version= "0.5", path="./bitpacker" }
-common = { version= "0.5", path = "./common/", package = "tantivy-common" }
+common = { version= "0.6", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version= "0.1", path="./tokenizer-api", package="tantivy-tokenizer-api" }
+tokenizer-api = { version= "0.2", path="./tokenizer-api", package="tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] }
 futures-util = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -73,14 +72,15 @@ maplit = "1.0.2"
 matches = "0.1.9"
 pretty_assertions = "1.2.1"
 proptest = "1.0.0"
 criterion = "0.5"
 test-log = "0.2.10"
 env_logger = "0.10.0"
 pprof = { git = "https://github.com/PSeitz/pprof-rs/", rev = "53af24b", features = ["flamegraph", "criterion"] } # temp fork that works with criterion 0.5
 futures = "0.3.21"
 paste = "1.0.11"
 more-asserts = "0.3.1"
 rand_distr = "0.4.3"
 time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
 [target.'cfg(not(windows))'.dev-dependencies]
 criterion = { version = "0.5", default-features = false }
 [dev-dependencies.fail]
 version = "0.5.0"
@@ -113,6 +113,11 @@ unstable = [] # useful for benches.
 quickwit = ["sstable", "futures-util"]
 # Compares only the hash of a string when indexing data. 
 # Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
 # Uses 64bit ahash.
 compare_hash_only = ["stacker/compare_hash_only"]
 [workspace]
 members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
@@ -126,7 +131,7 @@ members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sst
 [[test]]
 name = "failpoints"
 path = "tests/failpoints/mod.rs"
-required-features = ["fail/failpoints"]
+required-features = ["failpoints"]
 [[bench]]
 name = "analyzer"
--- a/README.md
+++ b/README.md
@@ -5,19 +5,18 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)
-![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)
+<img src="https://tantivy-search.github.io/logo/tantivy-logo.png" alt="Tantivy, the fastest full-text search engine library written in Rust" height="250">
-**Tantivy** is a **full-text search engine library** written in Rust.
+## Fast full-text search engine library written in Rust
-It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
+**If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our distributed search engine built on top of Tantivy.**
-an off-the-shelf search engine server, but rather a crate that can be used
+
-to build such a search engine.
+Tantivy is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
 an off-the-shelf search engine server, but rather a crate that can be used to build such a search engine.
 Tantivy is, in fact, strongly inspired by Lucene's design.
-If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our search engine built on top of Tantivy.
+## Benchmark
 # Benchmark
 The following [benchmark](https://tantivy-search.github.io/bench/) breakdowns
 performance for different types of queries/collections.
@@ -28,7 +27,7 @@ Your mileage WILL vary depending on the nature of queries and their load.
 Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).
-# Features
+## Features
 - Full-text search
 - Configurable tokenizer (stemming available for 17 Latin languages) with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy), [Vaporetto](https://crates.io/crates/vaporetto_tantivy), and [tantivy-tokenizer-tiny-segmenter](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
@@ -54,11 +53,11 @@ Details about the benchmark can be found at this [repository](https://github.com
 - Searcher Warmer API
 - Cheesy logo with a horse
-## Non-features
+### Non-features
 Distributed search is out of the scope of Tantivy, but if you are looking for this feature, check out [Quickwit](https://github.com/quickwit-oss/quickwit/).
-# Getting started
+## Getting started
 Tantivy works on stable Rust and supports Linux, macOS, and Windows.
@@ -68,7 +67,7 @@ index documents, and search via the CLI or a small server with a REST API.
 It walks you through getting a Wikipedia search engine up and running in a few minutes.
 - [Reference doc for the last released version](https://docs.rs/tantivy/)
-# How can I support this project?
+## How can I support this project?
 There are many ways to support this project.
@@ -79,16 +78,16 @@ There are many ways to support this project.
 - Contribute code (you can join [our Discord server](https://discord.gg/MT27AG5EVE))
 - Talk about Tantivy around you
-# Contributing code
+## Contributing code
 We use the GitHub Pull Request workflow: reference a GitHub ticket and/or include a comprehensive commit message when opening a PR.
 Feel free to update CHANGELOG.md with your contribution.
-## Tokenizer
+### Tokenizer
 When implementing a tokenizer for tantivy depend on the `tantivy-tokenizer-api` crate.
-## Clone and build locally
+### Clone and build locally
 Tantivy compiles on stable Rust.
 To check out and run tests, you can simply run:
@@ -99,7 +98,7 @@ cd tantivy
 cargo test
 ```
-# Companies Using Tantivy
+## Companies Using Tantivy
 <p align="left">
 <img align="center" src="doc/assets/images/etsy.png" alt="Etsy" height="25" width="auto" />&nbsp;
@@ -111,7 +110,7 @@ cargo test
 <img align="center" src="doc/assets/images/element-dark-theme.png#gh-dark-mode-only" alt="Element.io" height="25" width="auto" />
 </p>
-# FAQ
+## FAQ
 ### Can I use Tantivy in other languages?
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -1,14 +1,99 @@
-use criterion::{criterion_group, criterion_main, Criterion, Throughput};
+use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion, Throughput};
-use pprof::criterion::{Output, PProfProfiler};
+use tantivy::schema::{TantivyDocument, FAST, INDEXED, STORED, STRING, TEXT};
-use tantivy::schema::{FAST, INDEXED, STORED, STRING, TEXT};
+use tantivy::{tokenizer, Index, IndexWriter};
 use tantivy::Index;
 const HDFS_LOGS: &str = include_str!("hdfs.json");
 const GH_LOGS: &str = include_str!("gh.json");
 const WIKI: &str = include_str!("wiki.json");
-fn get_lines(input: &str) -> Vec<&str> {
+fn benchmark(
-    input.trim().split('\n').collect()
+    b: &mut Bencher,
    input: &str,
    schema: tantivy::schema::Schema,
    commit: bool,
    parse_json: bool,
    is_dynamic: bool,
 ) {
    if is_dynamic {
        benchmark_dynamic_json(b, input, schema, commit, parse_json)
    } else {
        _benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
            TantivyDocument::parse_json(&schema, doc_json).unwrap()
        })
    }
 }
 fn get_index(schema: tantivy::schema::Schema) -> Index {
    let mut index = Index::create_in_ram(schema.clone());
    let ff_tokenizer_manager = tokenizer::TokenizerManager::default();
    ff_tokenizer_manager.register(
        "raw",
        tokenizer::TextAnalyzer::builder(tokenizer::RawTokenizer::default())
            .filter(tokenizer::RemoveLongFilter::limit(255))
            .build(),
    );
    index.set_fast_field_tokenizers(ff_tokenizer_manager.clone());
    index
 }
 fn _benchmark(
    b: &mut Bencher,
    input: &str,
    schema: tantivy::schema::Schema,
    commit: bool,
    include_json_parsing: bool,
    create_doc: impl Fn(&tantivy::schema::Schema, &str) -> TantivyDocument,
 ) {
    if include_json_parsing {
        let lines: Vec<&str> = input.trim().split('\n').collect();
        b.iter(|| {
            let index = get_index(schema.clone());
            let mut index_writer: IndexWriter =
                index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let doc = create_doc(&schema, doc_json);
                index_writer.add_document(doc).unwrap();
            }
            if commit {
                index_writer.commit().unwrap();
            }
        })
    } else {
        let docs: Vec<_> = input
            .trim()
            .split('\n')
            .map(|doc_json| create_doc(&schema, doc_json))
            .collect();
        b.iter_batched(
            || docs.clone(),
            |docs| {
                let index = get_index(schema.clone());
                let mut index_writer: IndexWriter =
                    index.writer_with_num_threads(1, 100_000_000).unwrap();
                for doc in docs {
                    index_writer.add_document(doc).unwrap();
                }
                if commit {
                    index_writer.commit().unwrap();
                }
            },
            BatchSize::SmallInput,
        )
    }
 }
 fn benchmark_dynamic_json(
    b: &mut Bencher,
    input: &str,
    schema: tantivy::schema::Schema,
    commit: bool,
    parse_json: bool,
 ) {
    let json_field = schema.get_field("json").unwrap();
    _benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
        let json_val: serde_json::Map<String, serde_json::Value> =
            serde_json::from_str(doc_json).unwrap();
        tantivy::doc!(json_field=>json_val)
    })
 }
 pub fn hdfs_index_benchmark(c: &mut Criterion) {
@@ -19,7 +104,14 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
        schema_builder.add_text_field("severity", STRING);
        schema_builder.build()
    };
-    let schema_with_store = {
+    let schema_only_fast = {
        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
        schema_builder.add_u64_field("timestamp", FAST);
        schema_builder.add_text_field("body", FAST);
        schema_builder.add_text_field("severity", FAST);
        schema_builder.build()
    };
    let _schema_with_store = {
        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
        schema_builder.add_u64_field("timestamp", INDEXED | STORED);
        schema_builder.add_text_field("body", TEXT | STORED);
@@ -28,74 +120,39 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
    };
    let dynamic_schema = {
        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
-        schema_builder.add_json_field("json", TEXT);
+        schema_builder.add_json_field("json", TEXT | FAST);
        schema_builder.build()
    };
    let mut group = c.benchmark_group("index-hdfs");
    group.throughput(Throughput::Bytes(HDFS_LOGS.len() as u64));
    group.sample_size(20);
-    group.bench_function("index-hdfs-no-commit", |b| {
+
-        let lines = get_lines(HDFS_LOGS);
+    let benches = [
-        b.iter(|| {
+        ("only-indexed-".to_string(), schema, false),
-            let index = Index::create_in_ram(schema.clone());
+        //("stored-".to_string(), _schema_with_store, false),
-            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+        ("only-fast-".to_string(), schema_only_fast, false),
-            for doc_json in &lines {
+        ("dynamic-".to_string(), dynamic_schema, true),
-                let doc = schema.parse_document(doc_json).unwrap();
+    ];
-                index_writer.add_document(doc).unwrap();
+
    for (prefix, schema, is_dynamic) in benches {
        for commit in [false, true] {
            let suffix = if commit { "with-commit" } else { "no-commit" };
            for parse_json in [false] {
                // for parse_json in [false, true] {
                let suffix = if parse_json {
                    format!("{}-with-json-parsing", suffix)
                } else {
                    format!("{}", suffix)
                };
                let bench_name = format!("{}{}", prefix, suffix);
                group.bench_function(bench_name, |b| {
                    benchmark(b, HDFS_LOGS, schema.clone(), commit, parse_json, is_dynamic)
                });
            }
-        })
+        }
-    });
+    }
    group.bench_function("index-hdfs-with-commit", |b| {
        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let doc = schema.parse_document(doc_json).unwrap();
                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
    group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema_with_store.clone());
            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let doc = schema.parse_document(doc_json).unwrap();
                index_writer.add_document(doc).unwrap();
            }
        })
    });
    group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema_with_store.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let doc = schema.parse_document(doc_json).unwrap();
                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
    group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(dynamic_schema.clone());
            let json_field = dynamic_schema.get_field("json").unwrap();
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let json_val: serde_json::Map<String, serde_json::Value> =
                    serde_json::from_str(doc_json).unwrap();
                let doc = tantivy::doc!(json_field=>json_val);
                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
 }
 pub fn gh_index_benchmark(c: &mut Criterion) {
@@ -104,38 +161,24 @@ pub fn gh_index_benchmark(c: &mut Criterion) {
        schema_builder.add_json_field("json", TEXT | FAST);
        schema_builder.build()
    };
    let dynamic_schema_fast = {
        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
        schema_builder.add_json_field("json", FAST);
        schema_builder.build()
    };
    let mut group = c.benchmark_group("index-gh");
    group.throughput(Throughput::Bytes(GH_LOGS.len() as u64));
    group.bench_function("index-gh-no-commit", |b| {
-        let lines = get_lines(GH_LOGS);
+        benchmark_dynamic_json(b, GH_LOGS, dynamic_schema.clone(), false, false)
        b.iter(|| {
            let json_field = dynamic_schema.get_field("json").unwrap();
            let index = Index::create_in_ram(dynamic_schema.clone());
            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let json_val: serde_json::Map<String, serde_json::Value> =
                    serde_json::from_str(doc_json).unwrap();
                let doc = tantivy::doc!(json_field=>json_val);
                index_writer.add_document(doc).unwrap();
            }
        })
    });
-    group.bench_function("index-gh-with-commit", |b| {
+    group.bench_function("index-gh-fast", |b| {
-        let lines = get_lines(GH_LOGS);
+        benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), false, false)
-        b.iter(|| {
+    });
-            let json_field = dynamic_schema.get_field("json").unwrap();
+
-            let index = Index::create_in_ram(dynamic_schema.clone());
+    group.bench_function("index-gh-fast-with-commit", |b| {
-            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+        benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), true, false)
            for doc_json in &lines {
                let json_val: serde_json::Map<String, serde_json::Value> =
                    serde_json::from_str(doc_json).unwrap();
                let doc = tantivy::doc!(json_field=>json_val);
                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
 }
@@ -150,33 +193,10 @@ pub fn wiki_index_benchmark(c: &mut Criterion) {
    group.throughput(Throughput::Bytes(WIKI.len() as u64));
    group.bench_function("index-wiki-no-commit", |b| {
-        let lines = get_lines(WIKI);
+        benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), false, false)
        b.iter(|| {
            let json_field = dynamic_schema.get_field("json").unwrap();
            let index = Index::create_in_ram(dynamic_schema.clone());
            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let json_val: serde_json::Map<String, serde_json::Value> =
                    serde_json::from_str(doc_json).unwrap();
                let doc = tantivy::doc!(json_field=>json_val);
                index_writer.add_document(doc).unwrap();
            }
        })
    });
    group.bench_function("index-wiki-with-commit", |b| {
-        let lines = get_lines(WIKI);
+        benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), true, false)
        b.iter(|| {
            let json_field = dynamic_schema.get_field("json").unwrap();
            let index = Index::create_in_ram(dynamic_schema.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
            for doc_json in &lines {
                let json_val: serde_json::Map<String, serde_json::Value> =
                    serde_json::from_str(doc_json).unwrap();
                let doc = tantivy::doc!(json_field=>json_val);
                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
 }
@@ -187,12 +207,12 @@ criterion_group! {
 }
 criterion_group! {
    name = gh_benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = Criterion::default();
    targets = gh_index_benchmark
 }
 criterion_group! {
    name = wiki_benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = Criterion::default();
    targets = wiki_index_benchmark
 }
 criterion_main!(benches, gh_benches, wiki_benches);
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.4.0"
+version = "0.5.0"
 edition = "2021"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
@@ -15,7 +15,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
-bitpacking = {version="0.8", default-features=false, features = ["bitpacker1x"]}
+bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }
 [dev-dependencies]
 rand = "0.8"
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -125,6 +125,8 @@ impl BitUnpacker {
    // Decodes the range of bitpacked `u32` values with idx
    // in [start_idx, start_idx + output.len()).
    // It is guaranteed to completely fill `output` and not read from it, so passing a vector with
    // un-initialized values is safe.
    //
    // #Panics
    //
@@ -237,7 +239,19 @@ impl BitUnpacker {
        data: &[u8],
        positions: &mut Vec<u32>,
    ) {
-        positions.resize(id_range.len(), 0u32);
+        // We use the code below instead of positions.resize(id_range.len(), 0u32) for performance
        // reasons: on some queries, the CPU cost of memsetting the array and of using a bigger
        // vector than necessary is noticeable (~5%).
        // In particular, searches are a few percent faster when using reserve_exact() as below
        // instead of reserve().
        // The un-initialized values are safe as get_batch_u32s() completely fills `positions`
        // and does not read from it.
        positions.clear();
        positions.reserve_exact(id_range.len());
        #[allow(clippy::uninit_vec)]
        unsafe {
            positions.set_len(id_range.len());
        }
        self.get_batch_u32s(id_range.start, data, positions);
        crate::filter_vec::filter_vec_in_place(value_range, id_range.start, positions)
    }
@@ -367,7 +381,7 @@ mod test {
            let mut output: Vec<u32> = Vec::new();
            for len in [0, 1, 2, 32, 33, 34, 64] {
                for start_idx in 0u32..32u32 {
-                    output.resize(len as usize, 0);
+                    output.resize(len, 0);
                    bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output);
                    for i in 0..len {
                        let expected = (start_idx + i as u32) & mask;
--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -64,10 +64,8 @@ fn mem_usage<T>(items: &Vec<T>) -> usize {
 impl BlockedBitpacker {
    pub fn new() -> Self {
        let mut compressed_blocks = vec![];
        compressed_blocks.resize(8, 0);
        Self {
-            compressed_blocks,
+            compressed_blocks: vec![0; 8],
            buffer: vec![],
            offset_and_bits: vec![],
        }
--- a/cliff.toml
+++ b/cliff.toml
@@ -32,6 +32,7 @@ postprocessors = [
    { pattern = 'Michael Kleen', replace = "mkleen"}, # replace with github user
    { pattern = 'Adrien Guillo', replace = "guilload"}, # replace with github user
    { pattern = 'François Massot', replace = "fmassot"}, # replace with github user
    { pattern = 'Naveen Aiathurai', replace = "naveenann"}, # replace with github user
    { pattern = '', replace = ""}, # replace with github user
 ]
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-columnar"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 license = "MIT"
 homepage = "https://github.com/quickwit-oss/tantivy"
@@ -9,14 +9,13 @@ description = "column oriented storage for tantivy"
 categories = ["database-implementations", "data-structures", "compression"]
 [dependencies]
-itertools = "0.11.0"
+itertools = "0.12.0"
 fnv = "1.0.7"
 fastdivide = "0.4.0"
-stacker = { version= "0.1", path = "../stacker", package="tantivy-stacker"}
+stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"}
-sstable = { version= "0.1", path = "../sstable", package = "tantivy-sstable" }
+sstable = { version= "0.2", path = "../sstable", package = "tantivy-sstable" }
-common = { version= "0.5", path = "../common", package = "tantivy-common" }
+common = { version= "0.6", path = "../common", package = "tantivy-common" }
-tantivy-bitpacker = { version= "0.4", path = "../bitpacker/" }
+tantivy-bitpacker = { version= "0.5", path = "../bitpacker/" }
 serde = "1.0.152"
 [dev-dependencies]
--- a/columnar/columnar-cli/Cargo.toml
+++ b/columnar/columnar-cli/Cargo.toml
@@ -8,7 +8,6 @@ license = "MIT"
 columnar = {path="../", package="tantivy-columnar"}
 serde_json = "1"
 serde_json_borrow = {git="https://github.com/PSeitz/serde_json_borrow/"}
 serde = "1"
 [workspace]
 members = []
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -1,9 +1,12 @@
 use std::cmp::Ordering;
 use crate::{Column, DocId, RowId};
 #[derive(Debug, Default, Clone)]
 pub struct ColumnBlockAccessor<T> {
    val_cache: Vec<T>,
    docid_cache: Vec<DocId>,
    missing_docids_cache: Vec<DocId>,
    row_id_cache: Vec<RowId>,
 }
@@ -20,6 +23,20 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
            .values
            .get_vals(&self.row_id_cache, &mut self.val_cache);
    }
    #[inline]
    pub fn fetch_block_with_missing(&mut self, docs: &[u32], accessor: &Column<T>, missing: T) {
        self.fetch_block(docs, accessor);
        // We can compare docid_cache with docs to find missing docs
        if docs.len() != self.docid_cache.len() || accessor.index.is_multivalue() {
            self.missing_docids_cache.clear();
            find_missing_docs(docs, &self.docid_cache, |doc| {
                self.missing_docids_cache.push(doc);
                self.val_cache.push(missing);
            });
            self.docid_cache
                .extend_from_slice(&self.missing_docids_cache);
        }
    }
    #[inline]
    pub fn iter_vals(&self) -> impl Iterator<Item = T> + '_ {
@@ -34,3 +51,82 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
            .zip(self.val_cache.iter().cloned())
    }
 }
 /// Given two sorted lists of docids `docs` and `hits`, hits is a subset of `docs`.
 /// Return all docs that are not in `hits`.
 fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
 where F: FnMut(u32) {
    let mut docs_iter = docs.iter();
    let mut hits_iter = hits.iter();
    let mut doc = docs_iter.next();
    let mut hit = hits_iter.next();
    while let (Some(&current_doc), Some(&current_hit)) = (doc, hit) {
        match current_doc.cmp(&current_hit) {
            Ordering::Less => {
                callback(current_doc);
                doc = docs_iter.next();
            }
            Ordering::Equal => {
                doc = docs_iter.next();
                hit = hits_iter.next();
            }
            Ordering::Greater => {
                hit = hits_iter.next();
            }
        }
    }
    while let Some(&current_doc) = doc {
        callback(current_doc);
        doc = docs_iter.next();
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_find_missing_docs() {
        let docs: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
        let hits: Vec<u32> = vec![2, 4, 6, 8, 10];
        let mut missing_docs: Vec<u32> = Vec::new();
        find_missing_docs(&docs, &hits, |missing_doc| {
            missing_docs.push(missing_doc);
        });
        assert_eq!(missing_docs, vec![1, 3, 5, 7, 9]);
    }
    #[test]
    fn test_find_missing_docs_empty() {
        let docs: Vec<u32> = Vec::new();
        let hits: Vec<u32> = vec![2, 4, 6, 8, 10];
        let mut missing_docs: Vec<u32> = Vec::new();
        find_missing_docs(&docs, &hits, |missing_doc| {
            missing_docs.push(missing_doc);
        });
        assert_eq!(missing_docs, vec![]);
    }
    #[test]
    fn test_find_missing_docs_all_missing() {
        let docs: Vec<u32> = vec![1, 2, 3, 4, 5];
        let hits: Vec<u32> = Vec::new();
        let mut missing_docs: Vec<u32> = Vec::new();
        find_missing_docs(&docs, &hits, |missing_doc| {
            missing_docs.push(missing_doc);
        });
        assert_eq!(missing_docs, vec![1, 2, 3, 4, 5]);
    }
 }
--- a/columnar/src/column/dictionary_encoded.rs
+++ b/columnar/src/column/dictionary_encoded.rs
@@ -30,6 +30,13 @@ impl fmt::Debug for BytesColumn {
 }
 impl BytesColumn {
    pub fn empty(num_docs: u32) -> BytesColumn {
        BytesColumn {
            dictionary: Arc::new(Dictionary::empty()),
            term_ord_column: Column::build_empty_column(num_docs),
        }
    }
    /// Fills the given `output` buffer with the term associated to the ordinal `ord`.
    ///
    /// Returns `false` if the term does not exist (e.g. `term_ord` is greater or equal to the
@@ -77,7 +84,7 @@ impl From<StrColumn> for BytesColumn {
 }
 impl StrColumn {
-    pub(crate) fn wrap(bytes_column: BytesColumn) -> StrColumn {
+    pub fn wrap(bytes_column: BytesColumn) -> StrColumn {
        StrColumn(bytes_column)
    }
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -130,7 +130,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
            .select_batch_in_place(selected_docid_range.start, doc_ids);
    }
-    /// Fils the output vector with the (possibly multiple values that are associated_with
+    /// Fills the output vector with the (possibly multiple values that are associated_with
    /// `row_id`.
    ///
    /// This method clears the `output` vector.
--- a/columnar/src/column_index/merge/stacked.rs
+++ b/columnar/src/column_index/merge/stacked.rs
@@ -111,10 +111,7 @@ fn stack_multivalued_indexes<'a>(
    let mut last_row_id = 0;
    let mut current_it = multivalued_indexes.next();
    Box::new(std::iter::from_fn(move || loop {
-        let Some(multivalued_index) = current_it.as_mut() else {
+        if let Some(row_id) = current_it.as_mut()?.next() {
            return None;
        };
        if let Some(row_id) = multivalued_index.next() {
            last_row_id = offset + row_id;
            return Some(last_row_id);
        }
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -1,3 +1,8 @@
 //! # `column_index`
 //!
 //! `column_index` provides rank and select operations to associate positions when not all
 //! documents have exactly one element.
 mod merge;
 mod multivalued_index;
 mod optional_index;
@@ -37,10 +42,14 @@ impl From<MultiValueIndex> for ColumnIndex {
 }
 impl ColumnIndex {
-    // Returns the cardinality of the column index.
+    #[inline]
-    //
+    pub fn is_multivalue(&self) -> bool {
-    // By convention, if the column contains no docs, we consider that it is
+        matches!(self, ColumnIndex::Multivalued(_))
-    // full.
+    }
    /// Returns the cardinality of the column index.
    ///
    /// By convention, if the column contains no docs, we consider that it is
    /// full.
    #[inline]
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
@@ -117,18 +126,18 @@ impl ColumnIndex {
        }
    }
-    pub fn docid_range_to_rowids(&self, doc_id: Range<DocId>) -> Range<RowId> {
+    pub fn docid_range_to_rowids(&self, doc_id_range: Range<DocId>) -> Range<RowId> {
        match self {
            ColumnIndex::Empty { .. } => 0..0,
-            ColumnIndex::Full => doc_id,
+            ColumnIndex::Full => doc_id_range,
            ColumnIndex::Optional(optional_index) => {
-                let row_start = optional_index.rank(doc_id.start);
+                let row_start = optional_index.rank(doc_id_range.start);
-                let row_end = optional_index.rank(doc_id.end);
+                let row_end = optional_index.rank(doc_id_range.end);
                row_start..row_end
            }
            ColumnIndex::Multivalued(multivalued_index) => {
-                let end_docid = doc_id.end.min(multivalued_index.num_docs() - 1) + 1;
+                let end_docid = doc_id_range.end.min(multivalued_index.num_docs() - 1) + 1;
-                let start_docid = doc_id.start.min(end_docid);
+                let start_docid = doc_id_range.start.min(end_docid);
                let row_start = multivalued_index.start_index_column.get_val(start_docid);
                let row_end = multivalued_index.start_index_column.get_val(end_docid);
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -21,8 +21,6 @@ const DENSE_BLOCK_THRESHOLD: u32 =
 const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;
 const BLOCK_SIZE: RowId = 1 << 16;
 #[derive(Copy, Clone, Debug)]
 struct BlockMeta {
    non_null_rows_before_block: u32,
@@ -109,8 +107,8 @@ struct RowAddr {
 #[inline(always)]
 fn row_addr_from_row_id(row_id: RowId) -> RowAddr {
    RowAddr {
-        block_id: (row_id / BLOCK_SIZE) as u16,
+        block_id: (row_id / ELEMENTS_PER_BLOCK) as u16,
-        in_block_row_id: (row_id % BLOCK_SIZE) as u16,
+        in_block_row_id: (row_id % ELEMENTS_PER_BLOCK) as u16,
    }
 }
@@ -185,8 +183,13 @@ impl Set<RowId> for OptionalIndex {
        }
    }
    /// Any value doc_id is allowed.
    /// In particular, doc_id = num_rows.
    #[inline]
    fn rank(&self, doc_id: DocId) -> RowId {
        if doc_id >= self.num_docs() {
            return self.num_non_nulls();
        }
        let RowAddr {
            block_id,
            in_block_row_id,
@@ -200,13 +203,15 @@ impl Set<RowId> for OptionalIndex {
        block_meta.non_null_rows_before_block + block_offset_row_id
    }
    /// Any value doc_id is allowed.
    /// In particular, doc_id = num_rows.
    #[inline]
    fn rank_if_exists(&self, doc_id: DocId) -> Option<RowId> {
        let RowAddr {
            block_id,
            in_block_row_id,
        } = row_addr_from_row_id(doc_id);
-        let block_meta = self.block_metas[block_id as usize];
+        let block_meta = *self.block_metas.get(block_id as usize)?;
        let block = self.block(block_meta);
        let block_offset_row_id = match block {
            Block::Dense(dense_block) => dense_block.rank_if_exists(in_block_row_id),
@@ -491,7 +496,7 @@ fn deserialize_optional_index_block_metadatas(
        non_null_rows_before_block += num_non_null_rows;
    }
    block_metas.resize(
-        ((num_rows + BLOCK_SIZE - 1) / BLOCK_SIZE) as usize,
+        ((num_rows + ELEMENTS_PER_BLOCK - 1) / ELEMENTS_PER_BLOCK) as usize,
        BlockMeta {
            non_null_rows_before_block,
            start_byte_offset,
--- a/columnar/src/column_index/optional_index/set.rs
+++ b/columnar/src/column_index/optional_index/set.rs
@@ -39,7 +39,8 @@ pub trait Set<T> {
    ///
    /// # Panics
    ///
-    /// May panic if rank is greater than the number of elements in the Set.
+    /// May panic if rank is greater or equal to the number of
    /// elements in the Set.
    fn select(&self, rank: T) -> T;
    /// Creates a brand new select cursor.
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -3,6 +3,30 @@ use proptest::strategy::Strategy;
 use proptest::{prop_oneof, proptest};
 use super::*;
 use crate::{ColumnarReader, ColumnarWriter, DynamicColumnHandle};
 #[test]
 fn test_optional_index_bug_2293() {
    // tests for panic in docid_range_to_rowids for docid == num_docs
    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK - 1);
    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK);
    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK + 1);
 }
 fn test_optional_index_with_num_docs(num_docs: u32) {
    let mut dataframe_writer = ColumnarWriter::default();
    dataframe_writer.record_numerical(100, "score", 80i64);
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
        .serialize(num_docs, None, &mut buffer)
        .unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("score").unwrap();
    assert_eq!(cols.len(), 1);
    let col = cols[0].open().unwrap();
    col.column_index().docid_range_to_rowids(0..num_docs);
 }
 #[test]
 fn test_dense_block_threshold() {
@@ -35,7 +59,7 @@ proptest! {
 #[test]
 fn test_with_random_sets_simple() {
-    let vals = 10..BLOCK_SIZE * 2;
+    let vals = 10..ELEMENTS_PER_BLOCK * 2;
    let mut out: Vec<u8> = Vec::new();
    serialize_optional_index(&vals, 100, &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
@@ -171,7 +195,7 @@ fn test_optional_index_rank() {
    test_optional_index_rank_aux(&[0u32, 1u32]);
    let mut block = Vec::new();
    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
+    block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
    test_optional_index_rank_aux(&block);
 }
@@ -185,8 +209,8 @@ fn test_optional_index_iter_empty_one() {
 fn test_optional_index_iter_dense_block() {
    let mut block = Vec::new();
    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
+    block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
-    test_optional_index_iter_aux(&block, 3 * BLOCK_SIZE);
+    test_optional_index_iter_aux(&block, 3 * ELEMENTS_PER_BLOCK);
 }
 #[test]
@@ -215,12 +239,12 @@ mod bench {
        let vals: Vec<RowId> = (0..TOTAL_NUM_VALUES)
            .map(|_| rng.gen_bool(fill_ratio))
            .enumerate()
-            .filter(|(pos, val)| *val)
+            .filter(|(_pos, val)| *val)
            .map(|(pos, _)| pos as RowId)
            .collect();
        serialize_optional_index(&&vals[..], TOTAL_NUM_VALUES, &mut out).unwrap();
-        let codec = open_optional_index(OwnedBytes::new(out)).unwrap();
+
-        codec
+        open_optional_index(OwnedBytes::new(out)).unwrap()
    }
    fn random_range_iterator(
@@ -242,7 +266,7 @@ mod bench {
    }
    fn n_percent_step_iterator(percent: f32, num_values: u32) -> impl Iterator<Item = u32> {
-        let ratio = percent as f32 / 100.0;
+        let ratio = percent / 100.0;
        let step_size = (1f32 / ratio) as u32;
        let deviation = step_size - 1;
        random_range_iterator(0, num_values, step_size, deviation)
--- a/columnar/src/column_index/serialize.rs
+++ b/columnar/src/column_index/serialize.rs
@@ -30,6 +30,7 @@ impl<'a> SerializableColumnIndex<'a> {
    }
 }
 /// Serialize a column index.
 pub fn serialize_column_index(
    column_index: SerializableColumnIndex,
    output: &mut impl Write,
@@ -51,6 +52,7 @@ pub fn serialize_column_index(
    Ok(column_index_num_bytes)
 }
 /// Open a serialized column index.
 pub fn open_column_index(mut bytes: OwnedBytes) -> io::Result<ColumnIndex> {
    if bytes.is_empty() {
        return Err(io::Error::new(
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -101,7 +101,7 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
        row_id_hits: &mut Vec<RowId>,
    ) {
        let row_id_range = row_id_range.start..row_id_range.end.min(self.num_vals());
-        for idx in row_id_range.start..row_id_range.end {
+        for idx in row_id_range {
            let val = self.get_val(idx);
            if value_range.contains(&val) {
                row_id_hits.push(idx);
--- a/columnar/src/column_values/u128_based/compact_space/blank_range.rs
+++ b/columnar/src/column_values/u128_based/compact_space/blank_range.rs
@@ -38,6 +38,6 @@ impl Ord for BlankRange {
 }
 impl PartialOrd for BlankRange {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.blank_size().cmp(&other.blank_size()))
+        Some(self.cmp(other))
    }
 }
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -2,7 +2,7 @@ mod merge_dict_column;
 mod merge_mapping;
 mod term_merger;
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{BTreeMap, HashSet};
 use std::io;
 use std::net::Ipv6Addr;
 use std::sync::Arc;
@@ -18,7 +18,8 @@ use crate::columnar::writer::CompatibleNumericalTypes;
 use crate::columnar::ColumnarReader;
 use crate::dynamic_column::DynamicColumn;
 use crate::{
-    BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, NumericalType, NumericalValue,
+    BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
    NumericalValue,
 };
 /// Column types are grouped into different categories.
@@ -28,14 +29,16 @@ use crate::{
 /// In practise, today, only Numerical colummns are coerced into one type today.
 ///
 /// See also [README.md].
-#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+///
 /// The ordering has to match the ordering of the variants in [ColumnType].
 #[derive(Copy, Clone, Eq, PartialOrd, Ord, PartialEq, Hash, Debug)]
 pub(crate) enum ColumnTypeCategory {
    Bool,
    Str,
    Numerical,
    DateTime,
    Bytes,
    Str,
    Bool,
    IpAddr,
    DateTime,
 }
 impl From<ColumnType> for ColumnTypeCategory {
@@ -83,9 +86,20 @@ pub fn merge_columnar(
        .iter()
        .map(|reader| reader.num_rows())
        .collect::<Vec<u32>>();
    let columns_to_merge =
        group_columns_for_merge(columnar_readers, required_columns, &merge_row_order)?;
-    for ((column_name, column_type), columns) in columns_to_merge {
+    for res in columns_to_merge {
        let ((column_name, _column_type_category), grouped_columns) = res;
        let grouped_columns = grouped_columns.open(&merge_row_order)?;
        if grouped_columns.is_empty() {
            continue;
        }
        let column_type = grouped_columns.column_type_after_merge();
        let mut columns = grouped_columns.columns;
        coerce_columns(column_type, &mut columns)?;
        let mut column_serializer =
            serializer.start_serialize_column(column_name.as_bytes(), column_type);
        merge_column(
@@ -97,6 +111,7 @@ pub fn merge_columnar(
        )?;
        column_serializer.finalize()?;
    }
    serializer.finalize(merge_row_order.num_rows())?;
    Ok(())
 }
@@ -210,40 +225,12 @@ fn merge_column(
 struct GroupedColumns {
    required_column_type: Option<ColumnType>,
    columns: Vec<Option<DynamicColumn>>,
    column_category: ColumnTypeCategory,
 }
 impl GroupedColumns {
-    fn for_category(column_category: ColumnTypeCategory, num_columnars: usize) -> Self {
+    /// Check is column group can be skipped during serialization.
-        GroupedColumns {
+    fn is_empty(&self) -> bool {
-            required_column_type: None,
+        self.required_column_type.is_none() && self.columns.iter().all(Option::is_none)
            columns: vec![None; num_columnars],
            column_category,
        }
    }
    /// Set the dynamic column for a given columnar.
    fn set_column(&mut self, columnar_id: usize, column: DynamicColumn) {
        self.columns[columnar_id] = Some(column);
    }
    /// Force the existence of a column, as well as its type.
    fn require_type(&mut self, required_type: ColumnType) -> io::Result<()> {
        if let Some(existing_required_type) = self.required_column_type {
            if existing_required_type == required_type {
                // This was just a duplicate in the `required_columns`.
                // Nothing to do.
                return Ok(());
            } else {
                return Err(io::Error::new(
                    io::ErrorKind::InvalidInput,
                    "Required column conflicts with another required column of the same type \
                     category.",
                ));
            }
        }
        self.required_column_type = Some(required_type);
        Ok(())
    }
    /// Returns the column type after merge.
@@ -265,11 +252,76 @@ impl GroupedColumns {
        }
        // At the moment, only the numerical categorical column type has more than one possible
        // column type.
-        assert_eq!(self.column_category, ColumnTypeCategory::Numerical);
+        assert!(self
            .columns
            .iter()
            .flatten()
            .all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical));
        merged_numerical_columns_type(self.columns.iter().flatten()).into()
    }
 }
 struct GroupedColumnsHandle {
    required_column_type: Option<ColumnType>,
    columns: Vec<Option<DynamicColumnHandle>>,
 }
 impl GroupedColumnsHandle {
    fn new(num_columnars: usize) -> Self {
        GroupedColumnsHandle {
            required_column_type: None,
            columns: vec![None; num_columnars],
        }
    }
    fn open(self, merge_row_order: &MergeRowOrder) -> io::Result<GroupedColumns> {
        let mut columns: Vec<Option<DynamicColumn>> = Vec::new();
        for (columnar_id, column) in self.columns.iter().enumerate() {
            if let Some(column) = column {
                let column = column.open()?;
                // We skip columns that end up with 0 documents.
                // That way, we make sure they don't end up influencing the merge type or
                // creating empty columns.
                if is_empty_after_merge(merge_row_order, &column, columnar_id) {
                    columns.push(None);
                } else {
                    columns.push(Some(column));
                }
            } else {
                columns.push(None);
            }
        }
        Ok(GroupedColumns {
            required_column_type: self.required_column_type,
            columns,
        })
    }
    /// Set the dynamic column for a given columnar.
    fn set_column(&mut self, columnar_id: usize, column: DynamicColumnHandle) {
        self.columns[columnar_id] = Some(column);
    }
    /// Force the existence of a column, as well as its type.
    fn require_type(&mut self, required_type: ColumnType) -> io::Result<()> {
        if let Some(existing_required_type) = self.required_column_type {
            if existing_required_type == required_type {
                // This was just a duplicate in the `required_columns`.
                // Nothing to do.
                return Ok(());
            } else {
                return Err(io::Error::new(
                    io::ErrorKind::InvalidInput,
                    "Required column conflicts with another required column of the same type \
                     category.",
                ));
            }
        }
        self.required_column_type = Some(required_type);
        Ok(())
    }
 }
 /// Returns the type of the merged numerical column.
 ///
 /// This function picks the first numerical type out of i64, u64, f64 (order matters
@@ -293,7 +345,7 @@ fn merged_numerical_columns_type<'a>(
 fn is_empty_after_merge(
    merge_row_order: &MergeRowOrder,
    column: &DynamicColumn,
-    columnar_id: usize,
+    columnar_ord: usize,
 ) -> bool {
    if column.num_values() == 0u32 {
        // It was empty before the merge.
@@ -305,7 +357,7 @@ fn is_empty_after_merge(
            false
        }
        MergeRowOrder::Shuffled(shuffled) => {
-            if let Some(alive_bitset) = &shuffled.alive_bitsets[columnar_id] {
+            if let Some(alive_bitset) = &shuffled.alive_bitsets[columnar_ord] {
                let column_index = column.column_index();
                match column_index {
                    ColumnIndex::Empty { .. } => true,
@@ -348,56 +400,34 @@ fn is_empty_after_merge(
    }
 }
-#[allow(clippy::type_complexity)]
+/// Iterates over the columns of the columnar readers, grouped by column name.
-fn group_columns_for_merge(
+/// Key functionality is that `open` of the Columns is done lazy per group.
-    columnar_readers: &[&ColumnarReader],
+fn group_columns_for_merge<'a>(
-    required_columns: &[(String, ColumnType)],
+    columnar_readers: &'a [&'a ColumnarReader],
-    merge_row_order: &MergeRowOrder,
+    required_columns: &'a [(String, ColumnType)],
-) -> io::Result<BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>>> {
+    _merge_row_order: &'a MergeRowOrder,
-    // Each column name may have multiple types of column associated.
+) -> io::Result<BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle>> {
-    // For merging we are interested in the same column type category since they can be merged.
+    let mut columns: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> = BTreeMap::new();
    let mut columns_grouped: HashMap<(String, ColumnTypeCategory), GroupedColumns> = HashMap::new();
    for &(ref column_name, column_type) in required_columns {
-        columns_grouped
+        columns
            .entry((column_name.clone(), column_type.into()))
-            .or_insert_with(|| {
+            .or_insert_with(|| GroupedColumnsHandle::new(columnar_readers.len()))
                GroupedColumns::for_category(column_type.into(), columnar_readers.len())
            })
            .require_type(column_type)?;
    }
    for (columnar_id, columnar_reader) in columnar_readers.iter().enumerate() {
-        let column_name_and_handle = columnar_reader.list_columns()?;
+        let column_name_and_handle = columnar_reader.iter_columns()?;
        // We skip columns that end up with 0 documents.
        // That way, we make sure they don't end up influencing the merge type or
        // creating empty columns.
        for (column_name, handle) in column_name_and_handle {
            let column_category: ColumnTypeCategory = handle.column_type().into();
-            let column = handle.open()?;
+            columns
            if is_empty_after_merge(merge_row_order, &column, columnar_id) {
                continue;
            }
            columns_grouped
                .entry((column_name, column_category))
-                .or_insert_with(|| {
+                .or_insert_with(|| GroupedColumnsHandle::new(columnar_readers.len()))
-                    GroupedColumns::for_category(column_category, columnar_readers.len())
+                .set_column(columnar_id, handle);
                })
                .set_column(columnar_id, column);
        }
    }
-
+    Ok(columns)
    let mut merge_columns: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
        Default::default();
    for ((column_name, _), mut grouped_columns) in columns_grouped {
        let column_type = grouped_columns.column_type_after_merge();
        coerce_columns(column_type, &mut grouped_columns.columns)?;
        merge_columns.insert((column_name, column_type), grouped_columns.columns);
    }
    Ok(merge_columns)
 }
 fn coerce_columns(
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,3 +1,5 @@
 use std::collections::BTreeMap;
 use itertools::Itertools;
 use super::*;
@@ -27,22 +29,10 @@ fn test_column_coercion_to_u64() {
    let columnar2 = make_columnar("numbers", &[u64::MAX]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
+    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
 #[test]
 fn test_column_no_coercion_if_all_the_same() {
    let columnar1 = make_columnar("numbers", &[1u64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
 }
 #[test]
@@ -51,24 +41,24 @@ fn test_column_coercion_to_i64() {
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
+    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
-#[test]
+//#[test]
-fn test_impossible_coercion_returns_an_error() {
+// fn test_impossible_coercion_returns_an_error() {
-    let columnar1 = make_columnar("numbers", &[u64::MAX]);
+// let columnar1 = make_columnar("numbers", &[u64::MAX]);
-    let merge_order = StackMergeOrder::stack(&[&columnar1]).into();
+// let merge_order = StackMergeOrder::stack(&[&columnar1]).into();
-    let group_error = group_columns_for_merge(
+// let group_error = group_columns_for_merge_iter(
-        &[&columnar1],
+//&[&columnar1],
-        &[("numbers".to_string(), ColumnType::I64)],
+//&[("numbers".to_string(), ColumnType::I64)],
-        &merge_order,
+//&merge_order,
-    )
+//)
-    .unwrap_err();
+//.unwrap_err();
-    assert_eq!(group_error.kind(), io::ErrorKind::InvalidInput);
+// assert_eq!(group_error.kind(), io::ErrorKind::InvalidInput);
-}
+//}
 #[test]
 fn test_group_columns_with_required_column() {
@@ -76,7 +66,7 @@ fn test_group_columns_with_required_column() {
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
        group_columns_for_merge(
            &[&columnar1, &columnar2],
            &[("numbers".to_string(), ColumnType::U64)],
@@ -84,7 +74,7 @@ fn test_group_columns_with_required_column() {
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
+    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
 #[test]
@@ -93,17 +83,17 @@ fn test_group_columns_required_column_with_no_existing_columns() {
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<_, _> = group_columns_for_merge(
-        group_columns_for_merge(
+        columnars,
-            columnars,
+        &[("required_col".to_string(), ColumnType::Str)],
-            &[("required_col".to_string(), ColumnType::Str)],
+        &merge_order,
-            &merge_order,
+    )
-        )
+    .unwrap();
        .unwrap();
    assert_eq!(column_map.len(), 2);
-    let columns = column_map
+    let columns = &column_map
-        .get(&("required_col".to_string(), ColumnType::Str))
+        .get(&("required_col".to_string(), ColumnTypeCategory::Str))
-        .unwrap();
+        .unwrap()
        .columns;
    assert_eq!(columns.len(), 2);
    assert!(columns[0].is_none());
    assert!(columns[1].is_none());
@@ -115,7 +105,7 @@ fn test_group_columns_required_column_is_above_all_columns_have_the_same_type_ru
    let columnar2 = make_columnar("numbers", &[2i64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
        group_columns_for_merge(
            columnars,
            &[("numbers".to_string(), ColumnType::U64)],
@@ -123,7 +113,7 @@ fn test_group_columns_required_column_is_above_all_columns_have_the_same_type_ru
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
+    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
 #[test]
@@ -132,21 +122,23 @@ fn test_missing_column() {
    let columnar2 = make_columnar("numbers2", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
+    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 2);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
+    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
    {
-        let columns = column_map
+        let columns = &column_map
-            .get(&("numbers".to_string(), ColumnType::I64))
+            .get(&("numbers".to_string(), ColumnTypeCategory::Numerical))
-            .unwrap();
+            .unwrap()
            .columns;
        assert!(columns[0].is_some());
        assert!(columns[1].is_none());
    }
    {
-        let columns = column_map
+        let columns = &column_map
-            .get(&("numbers2".to_string(), ColumnType::U64))
+            .get(&("numbers2".to_string(), ColumnTypeCategory::Numerical))
-            .unwrap();
+            .unwrap()
            .columns;
        assert!(columns[0].is_none());
        assert!(columns[1].is_some());
    }
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -102,30 +102,41 @@ impl ColumnarReader {
    pub fn num_rows(&self) -> RowId {
        self.num_rows
    }
    // Iterate over the columns in a sorted way
    pub fn iter_columns(
        &self,
    ) -> io::Result<impl Iterator<Item = (String, DynamicColumnHandle)> + '_> {
        let mut stream = self.column_dictionary.stream()?;
        Ok(std::iter::from_fn(move || {
            if stream.advance() {
                let key_bytes: &[u8] = stream.key();
                let column_code: u8 = key_bytes.last().cloned().unwrap();
                // TODO Error Handling. The API gets quite ugly when returning the error here, so
                // instead we could just check the first N columns upfront.
                let column_type: ColumnType = ColumnType::try_from_code(column_code)
                    .map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))
                    .unwrap();
                let range = stream.value().clone();
                let column_name =
                // The last two bytes are respectively the 0u8 separator and the column_type.
                String::from_utf8_lossy(&key_bytes[..key_bytes.len() - 2]).to_string();
                let file_slice = self
                    .column_data
                    .slice(range.start as usize..range.end as usize);
                let column_handle = DynamicColumnHandle {
                    file_slice,
                    column_type,
                };
                Some((column_name, column_handle))
            } else {
                None
            }
        }))
    }
    // TODO Add unit tests
    pub fn list_columns(&self) -> io::Result<Vec<(String, DynamicColumnHandle)>> {
-        let mut stream = self.column_dictionary.stream()?;
+        Ok(self.iter_columns()?.collect())
        let mut results = Vec::new();
        while stream.advance() {
            let key_bytes: &[u8] = stream.key();
            let column_code: u8 = key_bytes.last().cloned().unwrap();
            let column_type: ColumnType = ColumnType::try_from_code(column_code)
                .map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
            let range = stream.value().clone();
            let column_name =
                // The last two bytes are respectively the 0u8 separator and the column_type.
                String::from_utf8_lossy(&key_bytes[..key_bytes.len() - 2]).to_string();
            let file_slice = self
                .column_data
                .slice(range.start as usize..range.end as usize);
            let column_handle = DynamicColumnHandle {
                file_slice,
                column_type,
            };
            results.push((column_name, column_handle));
        }
        Ok(results)
    }
    fn stream_for_column_range(&self, column_name: &str) -> sstable::StreamerBuilder<RangeSSTable> {
--- a/columnar/src/columnar/writer/column_writers.rs
+++ b/columnar/src/columnar/writer/column_writers.rs
@@ -269,7 +269,8 @@ impl StrOrBytesColumnWriter {
        dictionaries: &mut [DictionaryBuilder],
        arena: &mut MemoryArena,
    ) {
-        let unordered_id = dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes);
+        let unordered_id =
            dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes, arena);
        self.column_writer.record(doc, unordered_id, arena);
    }
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -79,7 +79,6 @@ fn mutate_or_create_column<V, TMutator>(
 impl ColumnarWriter {
    pub fn mem_usage(&self) -> usize {
        // TODO add dictionary builders.
        self.arena.mem_usage()
            + self.numerical_field_hash_map.mem_usage()
            + self.bool_field_hash_map.mem_usage()
@@ -87,6 +86,11 @@ impl ColumnarWriter {
            + self.str_field_hash_map.mem_usage()
            + self.ip_addr_field_hash_map.mem_usage()
            + self.datetime_field_hash_map.mem_usage()
            + self
                .dictionaries
                .iter()
                .map(|dict| dict.mem_usage())
                .sum::<usize>()
    }
    /// Returns the list of doc ids from 0..num_docs sorted by the `sort_field`
@@ -101,6 +105,10 @@ impl ColumnarWriter {
        let Some(numerical_col_writer) = self
            .numerical_field_hash_map
            .get::<NumericalColumnWriter>(sort_field.as_bytes())
            .or_else(|| {
                self.datetime_field_hash_map
                    .get::<NumericalColumnWriter>(sort_field.as_bytes())
            })
        else {
            return Vec::new();
        };
@@ -330,7 +338,7 @@ impl ColumnarWriter {
        let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
            .numerical_field_hash_map
            .iter()
-            .map(|(column_name, addr, _)| {
+            .map(|(column_name, addr)| {
                let numerical_column_writer: NumericalColumnWriter =
                    self.numerical_field_hash_map.read(addr);
                let column_type = numerical_column_writer.numerical_type().into();
@@ -340,27 +348,27 @@ impl ColumnarWriter {
        columns.extend(
            self.bytes_field_hash_map
                .iter()
-                .map(|(term, addr, _)| (term, ColumnType::Bytes, addr)),
+                .map(|(term, addr)| (term, ColumnType::Bytes, addr)),
        );
        columns.extend(
            self.str_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)),
+                .map(|(column_name, addr)| (column_name, ColumnType::Str, addr)),
        );
        columns.extend(
            self.bool_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)),
+                .map(|(column_name, addr)| (column_name, ColumnType::Bool, addr)),
        );
        columns.extend(
            self.ip_addr_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)),
+                .map(|(column_name, addr)| (column_name, ColumnType::IpAddr, addr)),
        );
        columns.extend(
            self.datetime_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)),
+                .map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
        );
        columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
@@ -429,6 +437,7 @@ impl ColumnarWriter {
                            &mut symbol_byte_buffer,
                        ),
                        buffers,
                        &self.arena,
                        &mut column_serializer,
                    )?;
                    column_serializer.finalize()?;
@@ -482,6 +491,7 @@ impl ColumnarWriter {
 // Serialize [Dictionary, Column, dictionary num bytes U32::LE]
 // Column: [Column Index, Column Values, column index num bytes U32::LE]
 #[allow(clippy::too_many_arguments)]
 fn serialize_bytes_or_str_column(
    cardinality: Cardinality,
    num_docs: RowId,
@@ -489,6 +499,7 @@ fn serialize_bytes_or_str_column(
    dictionary_builder: &DictionaryBuilder,
    operation_it: impl Iterator<Item = ColumnOperation<UnorderedId>>,
    buffers: &mut SpareBuffers,
    arena: &MemoryArena,
    wrt: impl io::Write,
 ) -> io::Result<()> {
    let SpareBuffers {
@@ -497,7 +508,8 @@ fn serialize_bytes_or_str_column(
        ..
    } = buffers;
    let mut counting_writer = CountingWriter::wrap(wrt);
-    let term_id_mapping: TermIdMapping = dictionary_builder.serialize(&mut counting_writer)?;
+    let term_id_mapping: TermIdMapping =
        dictionary_builder.serialize(arena, &mut counting_writer)?;
    let dictionary_num_bytes: u32 = counting_writer.written_bytes() as u32;
    let mut wrt = counting_writer.finish();
    let operation_iterator = operation_it.map(|symbol: ColumnOperation<UnorderedId>| {
--- a/columnar/src/dictionary.rs
+++ b/columnar/src/dictionary.rs
@@ -1,7 +1,7 @@
 use std::io;
 use fnv::FnvHashMap;
 use sstable::SSTable;
 use stacker::{MemoryArena, SharedArenaHashMap};
 pub(crate) struct TermIdMapping {
    unordered_to_ord: Vec<OrderedId>,
@@ -31,26 +31,38 @@ pub struct OrderedId(pub u32);
 /// mapping.
 #[derive(Default)]
 pub(crate) struct DictionaryBuilder {
-    dict: FnvHashMap<Vec<u8>, UnorderedId>,
+    dict: SharedArenaHashMap,
 }
 impl DictionaryBuilder {
    /// Get or allocate an unordered id.
    /// (This ID is simply an auto-incremented id.)
-    pub fn get_or_allocate_id(&mut self, term: &[u8]) -> UnorderedId {
+    pub fn get_or_allocate_id(&mut self, term: &[u8], arena: &mut MemoryArena) -> UnorderedId {
-        if let Some(term_id) = self.dict.get(term) {
+        let next_id = self.dict.len() as u32;
-            return *term_id;
+        let unordered_id = self
-        }
+            .dict
-        let new_id = UnorderedId(self.dict.len() as u32);
+            .mutate_or_create(term, arena, |unordered_id: Option<u32>| {
-        self.dict.insert(term.to_vec(), new_id);
+                if let Some(unordered_id) = unordered_id {
-        new_id
+                    unordered_id
                } else {
                    next_id
                }
            });
        UnorderedId(unordered_id)
    }
    /// Serialize the dictionary into an fst, and returns the
    /// `UnorderedId -> TermOrdinal` map.
-    pub fn serialize<'a, W: io::Write + 'a>(&self, wrt: &mut W) -> io::Result<TermIdMapping> {
+    pub fn serialize<'a, W: io::Write + 'a>(
-        let mut terms: Vec<(&[u8], UnorderedId)> =
+        &self,
-            self.dict.iter().map(|(k, v)| (k.as_slice(), *v)).collect();
+        arena: &MemoryArena,
        wrt: &mut W,
    ) -> io::Result<TermIdMapping> {
        let mut terms: Vec<(&[u8], UnorderedId)> = self
            .dict
            .iter(arena)
            .map(|(k, v)| (k, arena.read(v)))
            .collect();
        terms.sort_unstable_by_key(|(key, _)| *key);
        // TODO Remove the allocation.
        let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
@@ -63,6 +75,10 @@ impl DictionaryBuilder {
        sstable_builder.finish()?;
        Ok(TermIdMapping { unordered_to_ord })
    }
    pub(crate) fn mem_usage(&self) -> usize {
        self.dict.mem_usage()
    }
 }
 #[cfg(test)]
@@ -71,12 +87,13 @@ mod tests {
    #[test]
    fn test_dictionary_builder() {
        let mut arena = MemoryArena::default();
        let mut dictionary_builder = DictionaryBuilder::default();
-        let hello_uid = dictionary_builder.get_or_allocate_id(b"hello");
+        let hello_uid = dictionary_builder.get_or_allocate_id(b"hello", &mut arena);
-        let happy_uid = dictionary_builder.get_or_allocate_id(b"happy");
+        let happy_uid = dictionary_builder.get_or_allocate_id(b"happy", &mut arena);
-        let tax_uid = dictionary_builder.get_or_allocate_id(b"tax");
+        let tax_uid = dictionary_builder.get_or_allocate_id(b"tax", &mut arena);
        let mut buffer = Vec::new();
-        let id_mapping = dictionary_builder.serialize(&mut buffer).unwrap();
+        let id_mapping = dictionary_builder.serialize(&arena, &mut buffer).unwrap();
        assert_eq!(id_mapping.to_ord(hello_uid), OrderedId(1));
        assert_eq!(id_mapping.to_ord(happy_uid), OrderedId(0));
        assert_eq!(id_mapping.to_ord(tax_uid), OrderedId(2));
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -228,7 +228,7 @@ static_dynamic_conversions!(StrColumn, Str);
 static_dynamic_conversions!(BytesColumn, Bytes);
 static_dynamic_conversions!(Column<Ipv6Addr>, IpAddr);
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct DynamicColumnHandle {
    pub(crate) file_slice: FileSlice,
    pub(crate) column_type: ColumnType,
@@ -247,7 +247,7 @@ impl DynamicColumnHandle {
    }
    /// Returns the `u64` fast field reader reader associated with `fields` of types
-    /// Str, u64, i64, f64, or datetime.
+    /// Str, u64, i64, f64, bool, or datetime.
    ///
    /// If not, the fastfield reader will returns the u64-value associated with the original
    /// FastValue.
@@ -258,9 +258,12 @@ impl DynamicColumnHandle {
                let column: BytesColumn = crate::column::open_column_bytes(column_bytes)?;
                Ok(Some(column.term_ord_column))
            }
            ColumnType::Bool => Ok(None),
            ColumnType::IpAddr => Ok(None),
-            ColumnType::I64 | ColumnType::U64 | ColumnType::F64 | ColumnType::DateTime => {
+            ColumnType::Bool
            | ColumnType::I64
            | ColumnType::U64
            | ColumnType::F64
            | ColumnType::DateTime => {
                let column = crate::column::open_column_u64::<u64>(column_bytes)?;
                Ok(Some(column))
            }
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -1,3 +1,22 @@
 //! # Tantivy-Columnar
 //!
 //! `tantivy-columnar`provides a columnar storage for tantivy.
 //! The crate allows for efficient read operations on specific columns rather than entire records.
 //!
 //! ## Overview
 //!
 //! - **columnar**: Reading, writing, and merging multiple columns:
 //!   - **[ColumnarWriter]**: Makes it possible to create a new columnar.
 //!   - **[ColumnarReader]**: The ColumnarReader makes it possible to access a set of columns
 //!     associated to field names.
 //!   - **[merge_columnar]**: Contains the functionalities to merge multiple ColumnarReader or
 //!     segments into a single one.
 //!
 //! - **column**: A single column, which contains
 //!     - [column_index]: Resolves the rows for a document id. Manages the cardinality of the
 //!       column.
 //!     - [column_values]: Stores the values of a column in a dense format.
 #![cfg_attr(all(feature = "unstable", test), feature(test))]
 #[cfg(test)]
@@ -12,7 +31,7 @@ use std::io;
 mod block_accessor;
 mod column;
-mod column_index;
+pub mod column_index;
 pub mod column_values;
 mod columnar;
 mod dictionary;
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -26,7 +26,7 @@ fn test_dataframe_writer_str() {
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
    assert_eq!(cols.len(), 1);
-    assert_eq!(cols[0].num_bytes(), 87);
+    assert_eq!(cols[0].num_bytes(), 73);
 }
 #[test]
@@ -40,7 +40,7 @@ fn test_dataframe_writer_bytes() {
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
    assert_eq!(cols.len(), 1);
-    assert_eq!(cols[0].num_bytes(), 87);
+    assert_eq!(cols[0].num_bytes(), 73);
 }
 #[test]
@@ -330,9 +330,9 @@ fn bytes_strategy() -> impl Strategy<Value = &'static [u8]> {
 // A random column value
 fn column_value_strategy() -> impl Strategy<Value = ColumnValue> {
    prop_oneof![
-        10 => string_strategy().prop_map(|s| ColumnValue::Str(s)),
+        10 => string_strategy().prop_map(ColumnValue::Str),
-        1 => bytes_strategy().prop_map(|b| ColumnValue::Bytes(b)),
+        1 => bytes_strategy().prop_map(ColumnValue::Bytes),
-        40 => num_strategy().prop_map(|n| ColumnValue::Numerical(n)),
+        40 => num_strategy().prop_map(ColumnValue::Numerical),
        1 => (1u16..3u16).prop_map(|ip_addr_byte| ColumnValue::IpAddr(Ipv6Addr::new(
            127,
            0,
@@ -343,7 +343,7 @@ fn column_value_strategy() -> impl Strategy<Value = ColumnValue> {
            0,
            ip_addr_byte
        ))),
-        1 => any::<bool>().prop_map(|b| ColumnValue::Bool(b)),
+        1 => any::<bool>().prop_map(ColumnValue::Bool),
        1 => (0_679_723_993i64..1_679_723_995i64)
            .prop_map(|val| { ColumnValue::DateTime(DateTime::from_timestamp_secs(val)) })
    ]
@@ -419,8 +419,8 @@ fn build_columnar_with_mapping(
    columnar_writer
        .serialize(num_docs, old_to_new_row_ids_opt, &mut buffer)
        .unwrap();
-    let columnar_reader = ColumnarReader::open(buffer).unwrap();
+
-    columnar_reader
+    ColumnarReader::open(buffer).unwrap()
 }
 fn build_columnar(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
@@ -746,7 +746,7 @@ proptest! {
        let stack_merge_order = StackMergeOrder::stack(&columnar_readers_arr[..]).into();
        crate::merge_columnar(&columnar_readers_arr[..], &[], stack_merge_order, &mut output).unwrap();
        let merged_columnar = ColumnarReader::open(output).unwrap();
-        let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> = columnar_docs.iter().cloned().flatten().collect();
+        let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> = columnar_docs.iter().flatten().cloned().collect();
        let expected_merged_columnar = build_columnar(&concat_rows[..]);
        assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
    }
@@ -772,7 +772,7 @@ fn test_columnar_merging_empty_columnar() {
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
    let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> =
-        columnar_docs.iter().cloned().flatten().collect();
+        columnar_docs.iter().flatten().cloned().collect();
    let expected_merged_columnar = build_columnar(&concat_rows[..]);
    assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
 }
@@ -809,7 +809,7 @@ fn test_columnar_merging_number_columns() {
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
    let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> =
-        columnar_docs.iter().cloned().flatten().collect();
+        columnar_docs.iter().flatten().cloned().collect();
    let expected_merged_columnar = build_columnar(&concat_rows[..]);
    assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
 }
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-common"
-version = "0.5.0"
+version = "0.6.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2021"
@@ -14,7 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
 [dependencies]
 byteorder = "1.4.3"
-ownedbytes = { version= "0.5", path="../ownedbytes" }
+ownedbytes = { version= "0.6", path="../ownedbytes" }
 async-trait = "0.1"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 serde = { version = "1.0.136", features = ["derive"] }
--- a/common/src/datetime.rs
+++ b/common/src/datetime.rs
@@ -1,11 +1,14 @@
 #![allow(deprecated)]
 use std::fmt;
 use std::io::{Read, Write};
 use serde::{Deserialize, Serialize};
 use time::format_description::well_known::Rfc3339;
 use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
 use crate::BinarySerializable;
 /// Precision with which datetimes are truncated when stored in fast fields. This setting is only
 /// relevant for fast fields. In the docstore, datetimes are always saved with nanosecond precision.
 #[derive(
@@ -164,3 +167,15 @@ impl fmt::Debug for DateTime {
        f.write_str(&utc_rfc3339)
    }
 }
 impl BinarySerializable for DateTime {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
        let timestamp_micros = self.into_timestamp_micros();
        <i64 as BinarySerializable>::serialize(&timestamp_micros, writer)
    }
    fn deserialize<R: Read>(reader: &mut R) -> std::io::Result<Self> {
        let timestamp_micros = <i64 as BinarySerializable>::deserialize(reader)?;
        Ok(Self::from_timestamp_micros(timestamp_micros))
    }
 }
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -1,3 +1,4 @@
 use std::fs::File;
 use std::ops::{Deref, Range, RangeBounds};
 use std::sync::Arc;
 use std::{fmt, io};
@@ -32,6 +33,62 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + fmt::Debug {
    }
 }
 #[derive(Debug)]
 /// A File with it's length included.
 pub struct WrapFile {
    file: File,
    len: usize,
 }
 impl WrapFile {
    /// Creates a new WrapFile and stores its length.
    pub fn new(file: File) -> io::Result<Self> {
        let len = file.metadata()?.len() as usize;
        Ok(WrapFile { file, len })
    }
 }
 #[async_trait]
 impl FileHandle for WrapFile {
    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
        let file_len = self.len();
        // Calculate the actual range to read, ensuring it stays within file boundaries
        let start = range.start;
        let end = range.end.min(file_len);
        // Ensure the start is before the end of the range
        if start >= end {
            return Err(io::Error::new(io::ErrorKind::InvalidInput, "Invalid range"));
        }
        let mut buffer = vec![0; end - start];
        #[cfg(unix)]
        {
            use std::os::unix::prelude::FileExt;
            self.file.read_exact_at(&mut buffer, start as u64)?;
        }
        #[cfg(not(unix))]
        {
            use std::io::{Read, Seek};
            let mut file = self.file.try_clone()?; // Clone the file to read from it separately
                                                   // Seek to the start position in the file
            file.seek(io::SeekFrom::Start(start as u64))?;
            // Read the data into the buffer
            file.read_exact(&mut buffer)?;
        }
        Ok(OwnedBytes::new(buffer))
    }
    // todo implement async
 }
 impl HasLen for WrapFile {
    fn len(&self) -> usize {
        self.len
    }
 }
 #[async_trait]
 impl FileHandle for &'static [u8] {
    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
@@ -67,6 +124,30 @@ impl fmt::Debug for FileSlice {
    }
 }
 impl FileSlice {
    pub fn stream_file_chunks(&self) -> impl Iterator<Item = io::Result<OwnedBytes>> + '_ {
        let len = self.range.end;
        let mut start = self.range.start;
        std::iter::from_fn(move || {
            /// Returns chunks of 1MB of data from the FileHandle.
            const CHUNK_SIZE: usize = 1024 * 1024; // 1MB
            if start < len {
                let end = (start + CHUNK_SIZE).min(len);
                let range = start..end;
                let chunk = self.data.read_bytes(range);
                start += CHUNK_SIZE;
                match chunk {
                    Ok(chunk) => Some(Ok(chunk)),
                    Err(e) => Some(Err(e)),
                }
            } else {
                None
            }
        })
    }
 }
 /// Takes a range, a `RangeBounds` object, and returns
 /// a `Range` that corresponds to the relative application of the
 /// `RangeBounds` object to the original `Range`.
--- a/common/src/group_by.rs
+++ b/common/src/group_by.rs
@@ -27,15 +27,15 @@ pub trait GroupByIteratorExtended: Iterator {
    where
        Self: Sized,
        F: FnMut(&Self::Item) -> K,
-        K: PartialEq + Copy,
+        K: PartialEq + Clone,
-        Self::Item: Copy,
+        Self::Item: Clone,
    {
        GroupByIterator::new(self, key)
    }
 }
 impl<I: Iterator> GroupByIteratorExtended for I {}
-pub struct GroupByIterator<I, F, K: Copy>
+pub struct GroupByIterator<I, F, K: Clone>
 where
    I: Iterator,
    F: FnMut(&I::Item) -> K,
@@ -50,7 +50,7 @@ where
    inner: Rc<RefCell<GroupByShared<I, F, K>>>,
 }
-struct GroupByShared<I, F, K: Copy>
+struct GroupByShared<I, F, K: Clone>
 where
    I: Iterator,
    F: FnMut(&I::Item) -> K,
@@ -63,7 +63,7 @@ impl<I, F, K> GroupByIterator<I, F, K>
 where
    I: Iterator,
    F: FnMut(&I::Item) -> K,
-    K: Copy,
+    K: Clone,
 {
    fn new(inner: I, group_by_fn: F) -> Self {
        let inner = GroupByShared {
@@ -80,28 +80,28 @@ where
 impl<I, F, K> Iterator for GroupByIterator<I, F, K>
 where
    I: Iterator,
-    I::Item: Copy,
+    I::Item: Clone,
    F: FnMut(&I::Item) -> K,
-    K: Copy,
+    K: Clone,
 {
    type Item = (K, GroupIterator<I, F, K>);
    fn next(&mut self) -> Option<Self::Item> {
        let mut inner = self.inner.borrow_mut();
-        let value = *inner.iter.peek()?;
+        let value = inner.iter.peek()?.clone();
        let key = (inner.group_by_fn)(&value);
        let inner = self.inner.clone();
        let group_iter = GroupIterator {
            inner,
-            group_key: key,
+            group_key: key.clone(),
        };
        Some((key, group_iter))
    }
 }
-pub struct GroupIterator<I, F, K: Copy>
+pub struct GroupIterator<I, F, K: Clone>
 where
    I: Iterator,
    F: FnMut(&I::Item) -> K,
@@ -110,10 +110,10 @@ where
    group_key: K,
 }
-impl<I, F, K: PartialEq + Copy> Iterator for GroupIterator<I, F, K>
+impl<I, F, K: PartialEq + Clone> Iterator for GroupIterator<I, F, K>
 where
    I: Iterator,
-    I::Item: Copy,
+    I::Item: Clone,
    F: FnMut(&I::Item) -> K,
 {
    type Item = I::Item;
@@ -121,7 +121,7 @@ where
    fn next(&mut self) -> Option<Self::Item> {
        let mut inner = self.inner.borrow_mut();
        // peek if next value is in group
-        let peek_val = *inner.iter.peek()?;
+        let peek_val = inner.iter.peek()?.clone();
        if (inner.group_by_fn)(&peek_val) == self.group_key {
            inner.iter.next()
        } else {
--- a/common/src/json_path_writer.rs
+++ b/common/src/json_path_writer.rs
@@ -0,0 +1,112 @@
 use crate::replace_in_place;
 /// Separates the different segments of a json path.
 pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
 pub const JSON_PATH_SEGMENT_SEP_STR: &str =
    unsafe { std::str::from_utf8_unchecked(&[JSON_PATH_SEGMENT_SEP]) };
 /// Create a new JsonPathWriter, that creates flattened json paths for tantivy.
 #[derive(Clone, Debug, Default)]
 pub struct JsonPathWriter {
    path: String,
    indices: Vec<usize>,
    expand_dots: bool,
 }
 impl JsonPathWriter {
    pub fn new() -> Self {
        JsonPathWriter {
            path: String::new(),
            indices: Vec::new(),
            expand_dots: false,
        }
    }
    /// When expand_dots is enabled, json object like
    /// `{"k8s.node.id": 5}` is processed as if it was
    /// `{"k8s": {"node": {"id": 5}}}`.
    /// This option has the merit of allowing users to
    /// write queries  like `k8s.node.id:5`.
    /// On the other, enabling that feature can lead to
    /// ambiguity.
    #[inline]
    pub fn set_expand_dots(&mut self, expand_dots: bool) {
        self.expand_dots = expand_dots;
    }
    /// Push a new segment to the path.
    #[inline]
    pub fn push(&mut self, segment: &str) {
        let len_path = self.path.len();
        self.indices.push(len_path);
        if !self.path.is_empty() {
            self.path.push_str(JSON_PATH_SEGMENT_SEP_STR);
        }
        self.path.push_str(segment);
        if self.expand_dots {
            // This might include the separation byte, which is ok because it is not a dot.
            let appended_segment = &mut self.path[len_path..];
            // The unsafe below is safe as long as b'.' and JSON_PATH_SEGMENT_SEP are
            // valid single byte ut8 strings.
            // By utf-8 design, they cannot be part of another codepoint.
            unsafe {
                replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, appended_segment.as_bytes_mut())
            };
        }
    }
    /// Remove the last segment. Does nothing if the path is empty.
    #[inline]
    pub fn pop(&mut self) {
        if let Some(last_idx) = self.indices.pop() {
            self.path.truncate(last_idx);
        }
    }
    /// Clear the path.
    #[inline]
    pub fn clear(&mut self) {
        self.path.clear();
        self.indices.clear();
    }
    /// Get the current path.
    #[inline]
    pub fn as_str(&self) -> &str {
        &self.path
    }
 }
 impl From<JsonPathWriter> for String {
    #[inline]
    fn from(value: JsonPathWriter) -> Self {
        value.path
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn json_path_writer_test() {
        let mut writer = JsonPathWriter::new();
        writer.push("root");
        assert_eq!(writer.as_str(), "root");
        writer.push("child");
        assert_eq!(writer.as_str(), "root\u{1}child");
        writer.pop();
        assert_eq!(writer.as_str(), "root");
        writer.push("k8s.node.id");
        assert_eq!(writer.as_str(), "root\u{1}k8s.node.id");
        writer.set_expand_dots(true);
        writer.pop();
        writer.push("k8s.node.id");
        assert_eq!(writer.as_str(), "root\u{1}k8s\u{1}node\u{1}id");
    }
 }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -9,6 +9,7 @@ mod byte_count;
 mod datetime;
 pub mod file_slice;
 mod group_by;
 mod json_path_writer;
 mod serialize;
 mod vint;
 mod writer;
@@ -18,6 +19,7 @@ pub use byte_count::ByteCount;
 pub use datetime::DatePrecision;
 pub use datetime::{DateTime, DateTimePrecision};
 pub use group_by::GroupByIteratorExtended;
 pub use json_path_writer::JsonPathWriter;
 pub use ownedbytes::{OwnedBytes, StableDeref};
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{
@@ -116,6 +118,7 @@ pub fn u64_to_f64(val: u64) -> f64 {
 ///
 /// This function assumes that the needle is rarely contained in the bytes string
 /// and offers a fast path if the needle is not present.
 #[inline]
 pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
    if !bytes.contains(&needle) {
        return;
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -1,3 +1,4 @@
 use std::borrow::Cow;
 use std::io::{Read, Write};
 use std::{fmt, io};
@@ -249,6 +250,43 @@ impl BinarySerializable for String {
    }
 }
 impl<'a> BinarySerializable for Cow<'a, str> {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
        VInt(data.len() as u64).serialize(writer)?;
        writer.write_all(data)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
        let string_length = VInt::deserialize(reader)?.val() as usize;
        let mut result = String::with_capacity(string_length);
        reader
            .take(string_length as u64)
            .read_to_string(&mut result)?;
        Ok(Cow::Owned(result))
    }
 }
 impl<'a> BinarySerializable for Cow<'a, [u8]> {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.len() as u64).serialize(writer)?;
        for it in self.iter() {
            it.serialize(writer)?;
        }
        Ok(())
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
        let num_items = VInt::deserialize(reader)?.val();
        let mut items: Vec<u8> = Vec::with_capacity(num_items as usize);
        for _ in 0..num_items {
            let item = u8::deserialize(reader)?;
            items.push(item);
        }
        Ok(Cow::Owned(items))
    }
 }
 #[cfg(test)]
 pub mod test {
--- a/examples/aggregation.rs
+++ b/examples/aggregation.rs
@@ -12,7 +12,7 @@ use tantivy::aggregation::agg_result::AggregationResults;
 use tantivy::aggregation::AggregationCollector;
 use tantivy::query::AllQuery;
 use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing, FAST};
-use tantivy::Index;
+use tantivy::{Index, IndexWriter, TantivyDocument};
 fn main() -> tantivy::Result<()> {
    // # Create Schema
@@ -37,7 +37,7 @@ fn main() -> tantivy::Result<()> {
                .set_index_option(IndexRecordOption::WithFreqs)
                .set_tokenizer("raw"),
        )
-        .set_fast("default")
+        .set_fast(None)
        .set_stored();
    schema_builder.add_text_field("category", text_fieldtype);
    schema_builder.add_f64_field("stock", FAST);
@@ -132,10 +132,10 @@ fn main() -> tantivy::Result<()> {
    let stream = Deserializer::from_str(data).into_iter::<Value>();
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    let mut num_indexed = 0;
    for value in stream {
-        let doc = schema.parse_document(&serde_json::to_string(&value.unwrap())?)?;
+        let doc = TantivyDocument::parse_json(&schema, &serde_json::to_string(&value.unwrap())?)?;
        index_writer.add_document(doc)?;
        num_indexed += 1;
        if num_indexed > 4 {
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -15,7 +15,7 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
-use tantivy::{doc, Index, ReloadPolicy};
+use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
 use tempfile::TempDir;
 fn main() -> tantivy::Result<()> {
@@ -75,7 +75,7 @@ fn main() -> tantivy::Result<()> {
    // Here we give tantivy a budget of `50MB`.
    // Using a bigger memory_arena for the indexer may increase
    // throughput, but 50 MB is already plenty.
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // Let's index our documents!
    // We first need a handle on the title and the body field.
@@ -87,7 +87,7 @@ fn main() -> tantivy::Result<()> {
    let title = schema.get_field("title").unwrap();
    let body = schema.get_field("body").unwrap();
-    let mut old_man_doc = Document::default();
+    let mut old_man_doc = TantivyDocument::default();
    old_man_doc.add_text(title, "The Old Man and the Sea");
    old_man_doc.add_text(
        body,
@@ -164,7 +164,7 @@ fn main() -> tantivy::Result<()> {
    // will reload the index automatically after each commit.
    let reader = index
        .reader_builder()
-        .reload_policy(ReloadPolicy::OnCommit)
+        .reload_policy(ReloadPolicy::OnCommitWithDelay)
        .try_into()?;
    // We now need to acquire a searcher.
@@ -217,9 +217,23 @@ fn main() -> tantivy::Result<()> {
    // the document returned will only contain
    // a title.
    for (_score, doc_address) in top_docs {
-        let retrieved_doc = searcher.doc(doc_address)?;
+        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
-        println!("{}", schema.to_json(&retrieved_doc));
+        println!("{}", retrieved_doc.to_json(&schema));
    }
    // We can also get an explanation to understand
    // how a found document got its score.
    let query = query_parser.parse_query("title:sea^20 body:whale^70")?;
    let (_score, doc_address) = searcher
        .search(&query, &TopDocs::with_limit(1))?
        .into_iter()
        .next()
        .unwrap();
    let explanation = query.explain(&searcher, doc_address)?;
    println!("{}", explanation.to_pretty_json());
    Ok(())
 }
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -13,7 +13,7 @@ use columnar::Column;
 use tantivy::collector::{Collector, SegmentCollector};
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::{doc, Index, Score, SegmentReader};
+use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
 #[derive(Default)]
 struct Stats {
@@ -142,7 +142,7 @@ fn main() -> tantivy::Result<()> {
    // this example.
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    index_writer.add_document(doc!(
        product_name => "Super Broom 2000",
        product_description => "While it is ok for short distance travel, this broom \
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -6,7 +6,7 @@ use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::tokenizer::NgramTokenizer;
-use tantivy::{doc, Index};
+use tantivy::{doc, Index, IndexWriter};
 fn main() -> tantivy::Result<()> {
    // # Defining the schema
@@ -62,7 +62,7 @@ fn main() -> tantivy::Result<()> {
    //
    // Here we use a buffer of 50MB per thread. Using a bigger
    // memory arena for the indexer can increase its throughput.
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    index_writer.add_document(doc!(
    title => "The Old Man and the Sea",
    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
@@ -103,8 +103,8 @@ fn main() -> tantivy::Result<()> {
    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
    for (_, doc_address) in top_docs {
-        let retrieved_doc = searcher.doc(doc_address)?;
+        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
-        println!("{}", schema.to_json(&retrieved_doc));
+        println!("{}", retrieved_doc.to_json(&schema));
    }
    Ok(())
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -4,8 +4,8 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
-use tantivy::schema::{DateOptions, Schema, Value, INDEXED, STORED, STRING};
+use tantivy::schema::{DateOptions, Document, OwnedValue, Schema, INDEXED, STORED, STRING};
-use tantivy::Index;
+use tantivy::{Index, IndexWriter, TantivyDocument};
 fn main() -> tantivy::Result<()> {
    // # Defining the schema
@@ -22,16 +22,18 @@ fn main() -> tantivy::Result<()> {
    // # Indexing documents
    let index = Index::create_in_ram(schema.clone());
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // The dates are passed as string in the RFC3339 format
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
        "occurred_at": "2022-06-22T12:53:50.53Z",
        "event": "pull-request"
    }"#,
    )?;
    index_writer.add_document(doc)?;
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
        "occurred_at": "2022-06-22T13:00:00.22Z",
        "event": "comment"
@@ -58,13 +60,13 @@ fn main() -> tantivy::Result<()> {
        let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?;
        assert_eq!(count_docs.len(), 1);
        for (_score, doc_address) in count_docs {
-            let retrieved_doc = searcher.doc(doc_address)?;
+            let retrieved_doc = searcher.doc::<TantivyDocument>(doc_address)?;
            assert!(matches!(
                retrieved_doc.get_first(occurred_at),
-                Some(Value::Date(_))
+                Some(OwnedValue::Date(_))
            ));
            assert_eq!(
-                schema.to_json(&retrieved_doc),
+                retrieved_doc.to_json(&schema),
                r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
            );
        }
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -11,7 +11,7 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::TermQuery;
 use tantivy::schema::*;
-use tantivy::{doc, Index, IndexReader};
+use tantivy::{doc, Index, IndexReader, IndexWriter};
 // A simple helper function to fetch a single document
 // given its id from our index.
@@ -19,7 +19,7 @@ use tantivy::{doc, Index, IndexReader};
 fn extract_doc_given_isbn(
    reader: &IndexReader,
    isbn_term: &Term,
-) -> tantivy::Result<Option<Document>> {
+) -> tantivy::Result<Option<TantivyDocument>> {
    let searcher = reader.searcher();
    // This is the simplest query you can think of.
@@ -69,10 +69,10 @@ fn main() -> tantivy::Result<()> {
    let index = Index::create_in_ram(schema.clone());
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // Let's add a couple of documents, for the sake of the example.
-    let mut old_man_doc = Document::default();
+    let mut old_man_doc = TantivyDocument::default();
    old_man_doc.add_text(title, "The Old Man and the Sea");
    index_writer.add_document(doc!(
        isbn => "978-0099908401",
@@ -94,7 +94,7 @@ fn main() -> tantivy::Result<()> {
    // Oops our frankenstein doc seems misspelled
    let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
    assert_eq!(
-        schema.to_json(&frankenstein_doc_misspelled),
+        frankenstein_doc_misspelled.to_json(&schema),
        r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#,
    );
@@ -136,7 +136,7 @@ fn main() -> tantivy::Result<()> {
    // No more typo!
    let frankenstein_new_doc = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
    assert_eq!(
-        schema.to_json(&frankenstein_new_doc),
+        frankenstein_new_doc.to_json(&schema),
        r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#,
    );
--- a/examples/faceted_search.rs
+++ b/examples/faceted_search.rs
@@ -17,7 +17,7 @@
 use tantivy::collector::FacetCollector;
 use tantivy::query::{AllQuery, TermQuery};
 use tantivy::schema::*;
-use tantivy::{doc, Index};
+use tantivy::{doc, Index, IndexWriter};
 fn main() -> tantivy::Result<()> {
    // Let's create a temporary directory for the sake of this example
@@ -30,7 +30,7 @@ fn main() -> tantivy::Result<()> {
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer(30_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(30_000_000)?;
    // For convenience, tantivy also comes with a macro to
    // reduce the boilerplate above.
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -12,7 +12,7 @@ use std::collections::HashSet;
 use tantivy::collector::TopDocs;
 use tantivy::query::BooleanQuery;
 use tantivy::schema::*;
-use tantivy::{doc, DocId, Index, Score, SegmentReader};
+use tantivy::{doc, DocId, Index, IndexWriter, Score, SegmentReader};
 fn main() -> tantivy::Result<()> {
    let mut schema_builder = Schema::builder();
@@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer(30_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(30_000_000)?;
    index_writer.add_document(doc!(
        title => "Fried egg",
@@ -91,11 +91,10 @@ fn main() -> tantivy::Result<()> {
            .iter()
            .map(|(_, doc_id)| {
                searcher
-                    .doc(*doc_id)
+                    .doc::<TantivyDocument>(*doc_id)
                    .unwrap()
                    .get_first(title)
-                    .unwrap()
+                    .and_then(|v| v.as_str())
                    .as_text()
                    .unwrap()
                    .to_owned()
            })
--- a/examples/fuzzy_search.rs
+++ b/examples/fuzzy_search.rs
@@ -14,7 +14,7 @@
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::FuzzyTermQuery;
 use tantivy::schema::*;
-use tantivy::{doc, Index, ReloadPolicy};
+use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
 use tempfile::TempDir;
 fn main() -> tantivy::Result<()> {
@@ -66,7 +66,7 @@ fn main() -> tantivy::Result<()> {
    // Here we give tantivy a budget of `50MB`.
    // Using a bigger memory_arena for the indexer may increase
    // throughput, but 50 MB is already plenty.
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // Let's index our documents!
    // We first need a handle on the title and the body field.
@@ -123,7 +123,7 @@ fn main() -> tantivy::Result<()> {
    // will reload the index automatically after each commit.
    let reader = index
        .reader_builder()
-        .reload_policy(ReloadPolicy::OnCommit)
+        .reload_policy(ReloadPolicy::OnCommitWithDelay)
        .try_into()?;
    // We now need to acquire a searcher.
@@ -151,10 +151,10 @@ fn main() -> tantivy::Result<()> {
        assert_eq!(count, 3);
        assert_eq!(top_docs.len(), 3);
        for (score, doc_address) in top_docs {
            let retrieved_doc = searcher.doc(doc_address)?;
            // Note that the score is not lower for the fuzzy hit.
            // There's an issue open for that: https://github.com/quickwit-oss/tantivy/issues/563
-            println!("score {score:?} doc {}", schema.to_json(&retrieved_doc));
+            let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
            println!("score {score:?} doc {}", retrieved_doc.to_json(&schema));
            // score 1.0 doc {"title":["The Diary of Muadib"]}
            //
            // score 1.0 doc {"title":["The Diary of a Young Girl"]}
--- a/examples/index_with_json.rs
+++ b/examples/index_with_json.rs
@@ -21,7 +21,7 @@ fn main() -> tantivy::Result<()> {
    }"#;
    // We can parse our document
-    let _mice_and_men_doc = schema.parse_document(mice_and_men_doc_json)?;
+    let _mice_and_men_doc = TantivyDocument::parse_json(&schema, mice_and_men_doc_json)?;
    // Multi-valued field are allowed, they are
    // expressed in JSON by an array.
@@ -30,7 +30,7 @@ fn main() -> tantivy::Result<()> {
       "title": ["Frankenstein", "The Modern Prometheus"],
       "year": 1818
    }"#;
-    let _frankenstein_doc = schema.parse_document(frankenstein_json)?;
+    let _frankenstein_doc = TantivyDocument::parse_json(&schema, frankenstein_json)?;
    // Note that the schema is saved in your index directory.
    //
--- a/examples/integer_range_search.rs
+++ b/examples/integer_range_search.rs
@@ -5,7 +5,7 @@
 use tantivy::collector::Count;
 use tantivy::query::RangeQuery;
 use tantivy::schema::{Schema, INDEXED};
-use tantivy::{doc, Index, Result};
+use tantivy::{doc, Index, IndexWriter, Result};
 fn main() -> Result<()> {
    // For the sake of simplicity, this schema will only have 1 field
@@ -17,7 +17,7 @@ fn main() -> Result<()> {
    let index = Index::create_in_ram(schema);
    let reader = index.reader()?;
    {
-        let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
+        let mut index_writer: IndexWriter = index.writer_with_num_threads(1, 6_000_000)?;
        for year in 1950u64..2019u64 {
            index_writer.add_document(doc!(year_field => year))?;
        }
--- a/examples/ip_field.rs
+++ b/examples/ip_field.rs
@@ -6,7 +6,7 @@
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, STORED, STRING};
-use tantivy::Index;
+use tantivy::{Index, IndexWriter, TantivyDocument};
 fn main() -> tantivy::Result<()> {
    // # Defining the schema
@@ -22,20 +22,22 @@ fn main() -> tantivy::Result<()> {
    // # Indexing documents
    let index = Index::create_in_ram(schema.clone());
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // ### IPv4
    // Adding documents that contain an IPv4 address. Notice that the IP addresses are passed as
    // `String`. Since the field is of type ip, we parse the IP address from the string and store it
    // internally as IPv6.
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
            "ip": "192.168.0.33",
            "event_type": "login"
        }"#,
    )?;
    index_writer.add_document(doc)?;
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
            "ip": "192.168.0.80",
            "event_type": "checkout"
@@ -44,7 +46,8 @@ fn main() -> tantivy::Result<()> {
    index_writer.add_document(doc)?;
    // ### IPv6
    // Adding a document that contains an IPv6 address.
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
            "ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
            "event_type": "checkout"
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -10,7 +10,7 @@
 // ---
 // Importing tantivy...
 use tantivy::schema::*;
-use tantivy::{doc, DocSet, Index, Postings, TERMINATED};
+use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
 fn main() -> tantivy::Result<()> {
    // We first create a schema for the sake of the
@@ -24,7 +24,7 @@ fn main() -> tantivy::Result<()> {
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer_with_num_threads(1, 50_000_000)?;
    index_writer.add_document(doc!(title => "The Old Man and the Sea"))?;
    index_writer.add_document(doc!(title => "Of Mice and Men"))?;
    index_writer.add_document(doc!(title => "The modern Promotheus"))?;
--- a/examples/json_field.rs
+++ b/examples/json_field.rs
@@ -7,7 +7,7 @@
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, STORED, STRING, TEXT};
-use tantivy::Index;
+use tantivy::{Index, IndexWriter, TantivyDocument};
 fn main() -> tantivy::Result<()> {
    // # Defining the schema
@@ -20,8 +20,9 @@ fn main() -> tantivy::Result<()> {
    // # Indexing documents
    let index = Index::create_in_ram(schema.clone());
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
        "timestamp": "2022-02-22T23:20:50.53Z",
        "event_type": "click",
@@ -33,7 +34,8 @@ fn main() -> tantivy::Result<()> {
    }"#,
    )?;
    index_writer.add_document(doc)?;
-    let doc = schema.parse_document(
+    let doc = TantivyDocument::parse_json(
        &schema,
        r#"{
        "timestamp": "2022-02-22T23:20:51.53Z",
        "event_type": "click",
--- a/examples/phrase_prefix_search.rs
+++ b/examples/phrase_prefix_search.rs
@@ -1,7 +1,7 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
-use tantivy::{doc, Index, ReloadPolicy, Result};
+use tantivy::{doc, Index, IndexWriter, ReloadPolicy, Result};
 use tempfile::TempDir;
 fn main() -> Result<()> {
@@ -17,7 +17,7 @@ fn main() -> Result<()> {
    let index = Index::create_in_dir(&index_path, schema)?;
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    index_writer.add_document(doc!(
    title => "The Old Man and the Sea",
@@ -51,7 +51,7 @@ fn main() -> Result<()> {
    let reader = index
        .reader_builder()
-        .reload_policy(ReloadPolicy::OnCommit)
+        .reload_policy(ReloadPolicy::OnCommitWithDelay)
        .try_into()?;
    let searcher = reader.searcher();
@@ -67,8 +67,12 @@ fn main() -> Result<()> {
    let mut titles = top_docs
        .into_iter()
        .map(|(_score, doc_address)| {
-            let doc = searcher.doc(doc_address)?;
+            let doc = searcher.doc::<TantivyDocument>(doc_address)?;
-            let title = doc.get_first(title).unwrap().as_text().unwrap().to_owned();
+            let title = doc
                .get_first(title)
                .and_then(|v| v.as_str())
                .unwrap()
                .to_owned();
            Ok(title)
        })
        .collect::<Result<Vec<_>>>()?;
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -13,7 +13,7 @@ use tantivy::collector::{Count, TopDocs};
 use tantivy::query::TermQuery;
 use tantivy::schema::*;
 use tantivy::tokenizer::{PreTokenizedString, SimpleTokenizer, Token, TokenStream, Tokenizer};
-use tantivy::{doc, Index, ReloadPolicy};
+use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
 use tempfile::TempDir;
 fn pre_tokenize_text(text: &str) -> Vec<Token> {
@@ -38,7 +38,7 @@ fn main() -> tantivy::Result<()> {
    let index = Index::create_in_dir(&index_path, schema.clone())?;
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // We can create a document manually, by setting the fields
    // one by one in a Document object.
@@ -83,7 +83,7 @@ fn main() -> tantivy::Result<()> {
        }]
    }"#;
-    let short_man_doc = schema.parse_document(short_man_json)?;
+    let short_man_doc = TantivyDocument::parse_json(&schema, short_man_json)?;
    index_writer.add_document(short_man_doc)?;
@@ -94,7 +94,7 @@ fn main() -> tantivy::Result<()> {
    let reader = index
        .reader_builder()
-        .reload_policy(ReloadPolicy::OnCommit)
+        .reload_policy(ReloadPolicy::OnCommitWithDelay)
        .try_into()?;
    let searcher = reader.searcher();
@@ -115,8 +115,8 @@ fn main() -> tantivy::Result<()> {
    // Note that the tokens are not stored along with the original text
    // in the document store
    for (_score, doc_address) in top_docs {
-        let retrieved_doc = searcher.doc(doc_address)?;
+        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
-        println!("Document: {}", schema.to_json(&retrieved_doc));
+        println!("{}", retrieved_doc.to_json(&schema));
    }
    // In contrary to the previous query, when we search for the "man" term we
--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -10,7 +10,8 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
-use tantivy::{doc, Index, Snippet, SnippetGenerator};
+use tantivy::snippet::{Snippet, SnippetGenerator};
 use tantivy::{doc, Index, IndexWriter};
 use tempfile::TempDir;
 fn main() -> tantivy::Result<()> {
@@ -27,7 +28,7 @@ fn main() -> tantivy::Result<()> {
    // # Indexing documents
    let index = Index::create_in_dir(&index_path, schema)?;
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    // we'll only need one doc for this example.
    index_writer.add_document(doc!(
@@ -54,13 +55,10 @@ fn main() -> tantivy::Result<()> {
    let snippet_generator = SnippetGenerator::create(&searcher, &*query, body)?;
    for (score, doc_address) in top_docs {
-        let doc = searcher.doc(doc_address)?;
+        let doc = searcher.doc::<TantivyDocument>(doc_address)?;
        let snippet = snippet_generator.snippet_from_doc(&doc);
        println!("Document score {score}:");
-        println!(
+        println!("title: {}", doc.get_first(title).unwrap().as_str().unwrap());
            "title: {}",
            doc.get_first(title).unwrap().as_text().unwrap()
        );
        println!("snippet: {}", snippet.to_html());
        println!("custom highlighting: {}", highlight(snippet));
    }
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -15,7 +15,7 @@ use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::tokenizer::*;
-use tantivy::{doc, Index};
+use tantivy::{doc, Index, IndexWriter};
 fn main() -> tantivy::Result<()> {
    // this example assumes you understand the content in `basic_search`
@@ -60,7 +60,7 @@ fn main() -> tantivy::Result<()> {
    index.tokenizers().register("stoppy", tokenizer);
-    let mut index_writer = index.writer(50_000_000)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
    let title = schema.get_field("title").unwrap();
    let body = schema.get_field("body").unwrap();
@@ -105,9 +105,9 @@ fn main() -> tantivy::Result<()> {
    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
    for (score, doc_address) in top_docs {
-        let retrieved_doc = searcher.doc(doc_address)?;
+        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
        println!("\n==\nDocument score {score}:");
-        println!("{}", schema.to_json(&retrieved_doc));
+        println!("{}", retrieved_doc.to_json(&schema));
    }
    Ok(())
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -6,8 +6,8 @@ use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{
-    doc, DocAddress, DocId, Index, Opstamp, Searcher, SearcherGeneration, SegmentId, SegmentReader,
+    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
-    Warmer,
+    SegmentReader, Warmer,
 };
 // This example shows how warmers can be used to
@@ -143,7 +143,7 @@ fn main() -> tantivy::Result<()> {
    const SNEAKERS: ProductId = 23222;
    let index = Index::create_in_ram(schema);
-    let mut writer = index.writer_with_num_threads(1, 10_000_000)?;
+    let mut writer: IndexWriter = index.writer_with_num_threads(1, 15_000_000)?;
    writer.add_document(doc!(product_id=>OLIVE_OIL, text=>"cooking olive oil from greece"))?;
    writer.add_document(doc!(product_id=>GLOVES, text=>"kitchen gloves, perfect for cooking"))?;
    writer.add_document(doc!(product_id=>SNEAKERS, text=>"uber sweet sneakers"))?;
--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 name = "ownedbytes"
-version = "0.5.0"
+version = "0.6.0"
 edition = "2021"
 description = "Expose data as static slice"
 license = "MIT"
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -1,7 +1,7 @@
 use std::convert::TryInto;
 use std::ops::{Deref, Range};
 use std::sync::Arc;
-use std::{fmt, io, mem};
+use std::{fmt, io};
 pub use stable_deref_trait::StableDeref;
@@ -26,8 +26,8 @@ impl OwnedBytes {
        data_holder: T,
    ) -> OwnedBytes {
        let box_stable_deref = Arc::new(data_holder);
-        let bytes: &[u8] = box_stable_deref.as_ref();
+        let bytes: &[u8] = box_stable_deref.deref();
-        let data = unsafe { mem::transmute::<_, &'static [u8]>(bytes.deref()) };
+        let data = unsafe { &*(bytes as *const [u8]) };
        OwnedBytes {
            data,
            box_stable_deref,
@@ -57,6 +57,12 @@ impl OwnedBytes {
        self.data.len()
    }
    /// Returns true iff this `OwnedBytes` is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }
    /// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
    ///
    /// Left will hold `split_len` bytes.
@@ -68,13 +74,14 @@ impl OwnedBytes {
    #[inline]
    #[must_use]
    pub fn split(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
        let (left_data, right_data) = self.data.split_at(split_len);
        let right_box_stable_deref = self.box_stable_deref.clone();
        let left = OwnedBytes {
-            data: &self.data[..split_len],
+            data: left_data,
            box_stable_deref: self.box_stable_deref,
        };
        let right = OwnedBytes {
-            data: &self.data[split_len..],
+            data: right_data,
            box_stable_deref: right_box_stable_deref,
        };
        (left, right)
@@ -99,55 +106,45 @@ impl OwnedBytes {
    ///
    /// `self` is truncated to `split_len`, left with the remaining bytes.
    pub fn split_off(&mut self, split_len: usize) -> OwnedBytes {
        let (left, right) = self.data.split_at(split_len);
        let right_box_stable_deref = self.box_stable_deref.clone();
        let right_piece = OwnedBytes {
-            data: &self.data[split_len..],
+            data: right,
            box_stable_deref: right_box_stable_deref,
        };
-        self.data = &self.data[..split_len];
+        self.data = left;
        right_piece
    }
    /// Returns true iff this `OwnedBytes` is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.as_slice().is_empty()
    }
    /// Drops the left most `advance_len` bytes.
    #[inline]
-    pub fn advance(&mut self, advance_len: usize) {
+    pub fn advance(&mut self, advance_len: usize) -> &[u8] {
-        self.data = &self.data[advance_len..]
+        let (data, rest) = self.data.split_at(advance_len);
        self.data = rest;
        data
    }
    /// Reads an `u8` from the `OwnedBytes` and advance by one byte.
    #[inline]
    pub fn read_u8(&mut self) -> u8 {
-        assert!(!self.is_empty());
+        self.advance(1)[0]
        let byte = self.as_slice()[0];
        self.advance(1);
        byte
    }
    /// Reads an `u64` encoded as little-endian from the `OwnedBytes` and advance by 8 bytes.
    #[inline]
-    pub fn read_u64(&mut self) -> u64 {
+    fn read_n<const N: usize>(&mut self) -> [u8; N] {
-        assert!(self.len() > 7);
+        self.advance(N).try_into().unwrap()
        let octlet: [u8; 8] = self.as_slice()[..8].try_into().unwrap();
        self.advance(8);
        u64::from_le_bytes(octlet)
    }
    /// Reads an `u32` encoded as little-endian from the `OwnedBytes` and advance by 4 bytes.
    #[inline]
    pub fn read_u32(&mut self) -> u32 {
-        assert!(self.len() > 3);
+        u32::from_le_bytes(self.read_n())
    }
-        let quad: [u8; 4] = self.as_slice()[..4].try_into().unwrap();
+    /// Reads an `u64` encoded as little-endian from the `OwnedBytes` and advance by 8 bytes.
-        self.advance(4);
+    #[inline]
-        u32::from_le_bytes(quad)
+    pub fn read_u64(&mut self) -> u64 {
        u64::from_le_bytes(self.read_n())
    }
 }
@@ -201,32 +198,33 @@ impl Deref for OwnedBytes {
    }
 }
 impl AsRef<[u8]> for OwnedBytes {
    #[inline]
    fn as_ref(&self) -> &[u8] {
        self.as_slice()
    }
 }
 impl io::Read for OwnedBytes {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        let read_len = {
+        let data_len = self.data.len();
-            let data = self.as_slice();
+        let buf_len = buf.len();
-            if data.len() >= buf.len() {
+        if data_len >= buf_len {
-                let buf_len = buf.len();
+            let data = self.advance(buf_len);
-                buf.copy_from_slice(&data[..buf_len]);
+            buf.copy_from_slice(data);
-                buf.len()
+            Ok(buf_len)
-            } else {
+        } else {
-                let data_len = data.len();
+            buf[..data_len].copy_from_slice(self.data);
-                buf[..data_len].copy_from_slice(data);
+            self.data = &[];
-                data_len
+            Ok(data_len)
-            }
+        }
        };
        self.advance(read_len);
        Ok(read_len)
    }
    #[inline]
    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
-        let read_len = {
+        buf.extend(self.data);
-            let data = self.as_slice();
+        let read_len = self.data.len();
-            buf.extend(data);
+        self.data = &[];
            data.len()
        };
        self.advance(read_len);
        Ok(read_len)
    }
    #[inline]
@@ -242,13 +240,6 @@ impl io::Read for OwnedBytes {
    }
 }
 impl AsRef<[u8]> for OwnedBytes {
    #[inline]
    fn as_ref(&self) -> &[u8] {
        self.as_slice()
    }
 }
 #[cfg(test)]
 mod tests {
    use std::io::{self, Read};
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.20.0"
+version = "0.21.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -12,6 +12,4 @@ keywords = ["search", "information", "retrieval"]
 edition = "2021"
 [dependencies]
-combine = {version="4", default-features=false, features=[] }
+nom = "7"
 once_cell = "1.7.2"
 regex ={ version = "1.5.4", default-features = false, features = ["std", "unicode"] }
--- a/query-grammar/src/infallible.rs
+++ b/query-grammar/src/infallible.rs
@@ -0,0 +1,353 @@
 //! nom combinators for infallible operations
 use std::convert::Infallible;
 use nom::{AsChar, IResult, InputLength, InputTakeAtPosition};
 pub(crate) type ErrorList = Vec<LenientErrorInternal>;
 pub(crate) type JResult<I, O> = IResult<I, (O, ErrorList), Infallible>;
 /// An error, with an end-of-string based offset
 #[derive(Debug)]
 pub(crate) struct LenientErrorInternal {
    pub pos: usize,
    pub message: String,
 }
 /// A recoverable error and the position it happened at
 #[derive(Debug, PartialEq)]
 pub struct LenientError {
    pub pos: usize,
    pub message: String,
 }
 impl LenientError {
    pub(crate) fn from_internal(internal: LenientErrorInternal, str_len: usize) -> LenientError {
        LenientError {
            pos: str_len - internal.pos,
            message: internal.message,
        }
    }
 }
 fn unwrap_infallible<T>(res: Result<T, nom::Err<Infallible>>) -> T {
    match res {
        Ok(val) => val,
        Err(_) => unreachable!(),
    }
 }
 // when rfcs#1733 get stabilized, this can make things clearer
 // trait InfallibleParser<I, O> = nom::Parser<I, (O, ErrorList), std::convert::Infallible>;
 /// A variant of the classical `opt` parser, except it returns an infallible error type.
 ///
 /// It's less generic than the original to ease type resolution in the rest of the code.
 pub(crate) fn opt_i<I: Clone, O, F>(mut f: F) -> impl FnMut(I) -> JResult<I, Option<O>>
 where F: nom::Parser<I, O, nom::error::Error<I>> {
    move |input: I| {
        let i = input.clone();
        match f.parse(input) {
            Ok((i, o)) => Ok((i, (Some(o), Vec::new()))),
            Err(_) => Ok((i, (None, Vec::new()))),
        }
    }
 }
 pub(crate) fn opt_i_err<'a, I: Clone + InputLength, O, F>(
    mut f: F,
    message: impl ToString + 'a,
 ) -> impl FnMut(I) -> JResult<I, Option<O>> + 'a
 where
    F: nom::Parser<I, O, nom::error::Error<I>> + 'a,
 {
    move |input: I| {
        let i = input.clone();
        match f.parse(input) {
            Ok((i, o)) => Ok((i, (Some(o), Vec::new()))),
            Err(_) => {
                let errs = vec![LenientErrorInternal {
                    pos: i.input_len(),
                    message: message.to_string(),
                }];
                Ok((i, (None, errs)))
            }
        }
    }
 }
 pub(crate) fn space0_infallible<T>(input: T) -> JResult<T, T>
 where
    T: InputTakeAtPosition + Clone,
    <T as InputTakeAtPosition>::Item: AsChar + Clone,
 {
    opt_i(nom::character::complete::multispace0)(input)
        .map(|(left, (spaces, errors))| (left, (spaces.expect("multispace0 can't fail"), errors)))
 }
 pub(crate) fn space1_infallible<T>(input: T) -> JResult<T, Option<T>>
 where
    T: InputTakeAtPosition + Clone + InputLength,
    <T as InputTakeAtPosition>::Item: AsChar + Clone,
 {
    opt_i(nom::character::complete::multispace1)(input).map(|(left, (spaces, mut errors))| {
        if spaces.is_none() {
            errors.push(LenientErrorInternal {
                pos: left.input_len(),
                message: "missing space".to_string(),
            })
        }
        (left, (spaces, errors))
    })
 }
 pub(crate) fn fallible<I, O, E: nom::error::ParseError<I>, F>(
    mut f: F,
 ) -> impl FnMut(I) -> IResult<I, O, E>
 where F: nom::Parser<I, (O, ErrorList), Infallible> {
    use nom::Err;
    move |input: I| match f.parse(input) {
        Ok((input, (output, _err))) => Ok((input, output)),
        Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
        Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {},
    }
 }
 pub(crate) fn delimited_infallible<I, O1, O2, O3, F, G, H>(
    mut first: F,
    mut second: G,
    mut third: H,
 ) -> impl FnMut(I) -> JResult<I, O2>
 where
    F: nom::Parser<I, (O1, ErrorList), Infallible>,
    G: nom::Parser<I, (O2, ErrorList), Infallible>,
    H: nom::Parser<I, (O3, ErrorList), Infallible>,
 {
    move |input: I| {
        let (input, (_, mut err)) = first.parse(input)?;
        let (input, (o2, mut err2)) = second.parse(input)?;
        err.append(&mut err2);
        let (input, (_, mut err3)) = third.parse(input)?;
        err.append(&mut err3);
        Ok((input, (o2, err)))
    }
 }
 // Parse nothing. Just a lazy way to not implement terminated/preceded and use delimited instead
 pub(crate) fn nothing(i: &str) -> JResult<&str, ()> {
    Ok((i, ((), Vec::new())))
 }
 pub(crate) trait TupleInfallible<I, O> {
    /// Parses the input and returns a tuple of results of each parser.
    fn parse(&mut self, input: I) -> JResult<I, O>;
 }
 impl<Input, Output, F: nom::Parser<Input, (Output, ErrorList), Infallible>>
    TupleInfallible<Input, (Output,)> for (F,)
 {
    fn parse(&mut self, input: Input) -> JResult<Input, (Output,)> {
        self.0.parse(input).map(|(i, (o, e))| (i, ((o,), e)))
    }
 }
 // these macros are heavily copied from nom, with some minor adaptations for our type
 macro_rules! tuple_trait(
  ($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => (
    tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*);
  );
  (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => (
    tuple_trait_impl!($($name $ty),+);
    tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*);
  );
  (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => (
    tuple_trait_impl!($($name $ty),+);
    tuple_trait_impl!($($name $ty),+, $name1 $ty1);
  );
 );
 macro_rules! tuple_trait_impl(
  ($($name:ident $ty: ident),+) => (
    impl<
      Input: Clone, $($ty),+ ,
      $($name: nom::Parser<Input, ($ty, ErrorList), Infallible>),+
    > TupleInfallible<Input, ( $($ty),+ )> for ( $($name),+ ) {
      fn parse(&mut self, input: Input) -> JResult<Input, ( $($ty),+ )> {
        let mut error_list = Vec::new();
        tuple_trait_inner!(0, self, input, (), error_list, $($name)+)
      }
    }
  );
 );
 macro_rules! tuple_trait_inner(
  ($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({
    let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
    $error_list.append(&mut err);
    succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
  });
  ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({
    let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
    $error_list.append(&mut err);
    succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
  });
  ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({
    let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
    $error_list.append(&mut err);
    Ok((i, (($($parsed)* , o), $error_list)))
  });
 );
 macro_rules! succ (
  (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*));
  (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*));
  (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*));
  (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*));
  (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*));
  (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*));
  (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*));
  (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*));
  (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*));
  (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*));
  (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*));
  (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*));
  (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*));
  (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*));
  (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*));
  (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*));
  (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*));
  (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*));
  (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*));
  (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*));
  (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*));
 );
 tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L,
  FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U);
 // Special case: implement `TupleInfallible` for `()`, the unit type.
 // This can come up in macros which accept a variable number of arguments.
 // Literally, `()` is an empty tuple, so it should simply parse nothing.
 impl<I> TupleInfallible<I, ()> for () {
    fn parse(&mut self, input: I) -> JResult<I, ()> {
        Ok((input, ((), Vec::new())))
    }
 }
 pub(crate) fn tuple_infallible<I, O, List: TupleInfallible<I, O>>(
    mut l: List,
 ) -> impl FnMut(I) -> JResult<I, O> {
    move |i: I| l.parse(i)
 }
 pub(crate) fn separated_list_infallible<I, O, O2, F, G>(
    mut sep: G,
    mut f: F,
 ) -> impl FnMut(I) -> JResult<I, Vec<O>>
 where
    I: Clone + InputLength,
    F: nom::Parser<I, (O, ErrorList), Infallible>,
    G: nom::Parser<I, (O2, ErrorList), Infallible>,
 {
    move |i: I| {
        let mut res: Vec<O> = Vec::new();
        let mut errors: ErrorList = Vec::new();
        let (mut i, (o, mut err)) = unwrap_infallible(f.parse(i.clone()));
        errors.append(&mut err);
        res.push(o);
        loop {
            let (i_sep_parsed, (_, mut err_sep)) = unwrap_infallible(sep.parse(i.clone()));
            let len_before = i_sep_parsed.input_len();
            let (i_elem_parsed, (o, mut err_elem)) =
                unwrap_infallible(f.parse(i_sep_parsed.clone()));
            // infinite loop check: the parser must always consume
            // if we consumed nothing here, don't produce an element.
            if i_elem_parsed.input_len() == len_before {
                return Ok((i, (res, errors)));
            }
            res.push(o);
            errors.append(&mut err_sep);
            errors.append(&mut err_elem);
            i = i_elem_parsed;
        }
    }
 }
 pub(crate) trait Alt<I, O> {
    /// Tests each parser in the tuple and returns the result of the first one that succeeds
    fn choice(&mut self, input: I) -> Option<JResult<I, O>>;
 }
 macro_rules! alt_trait(
  ($first_cond:ident $first:ident, $($id_cond:ident $id: ident),+) => (
    alt_trait!(__impl $first_cond $first; $($id_cond $id),+);
  );
  (__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
    alt_trait_impl!($($current_cond $current),*);
    alt_trait!(__impl $($current_cond $current,)* $head_cond $head; $($id_cond $id),+);
  );
  (__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident) => (
    alt_trait_impl!($($current_cond $current),*);
    alt_trait_impl!($($current_cond $current,)* $head_cond $head);
  );
 );
 macro_rules! alt_trait_impl(
  ($($id_cond:ident $id:ident),+) => (
    impl<
      Input: Clone, Output,
      $(
          // () are to make things easier on me, but I'm not entirely sure whether we can do better
          // with rule E0207
          $id_cond: nom::Parser<Input, (), ()>,
          $id: nom::Parser<Input, (Output, ErrorList), Infallible>
      ),+
    > Alt<Input, Output> for ( $(($id_cond, $id),)+ ) {
      fn choice(&mut self, input: Input) -> Option<JResult<Input, Output>> {
        match self.0.0.parse(input.clone()) {
          Err(_) => alt_trait_inner!(1, self, input, $($id_cond $id),+),
          Ok((input_left, _)) => Some(self.0.1.parse(input_left)),
        }
      }
    }
  );
 );
 macro_rules! alt_trait_inner(
  ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
    match $self.$it.0.parse($input.clone()) {
      Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
      Ok((input_left, _)) => Some($self.$it.1.parse(input_left)),
    }
  );
  ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => (
    None
  );
 );
 alt_trait!(A1 A, B1 B, C1 C, D1 D, E1 E, F1 F, G1 G, H1 H, I1 I, J1 J, K1 K,
           L1 L, M1 M, N1 N, O1 O, P1 P, Q1 Q, R1 R, S1 S, T1 T, U1 U);
 /// An alt() like combinator. For each branch, it first tries a fallible parser, which commits to
 /// this branch, or tells to check next branch, and the execute the infallible parser which follow.
 ///
 /// In case no branch match, the default (fallible) parser is executed.
 pub(crate) fn alt_infallible<I: Clone, O, F, List: Alt<I, O>>(
    mut l: List,
    mut default: F,
 ) -> impl FnMut(I) -> JResult<I, O>
 where
    F: nom::Parser<I, (O, ErrorList), Infallible>,
 {
    move |i: I| l.choice(i.clone()).unwrap_or_else(|| default.parse(i))
 }
--- a/query-grammar/src/lib.rs
+++ b/query-grammar/src/lib.rs
@@ -1,19 +1,26 @@
 #![allow(clippy::derive_partial_eq_without_eq)]
 mod infallible;
 mod occur;
 mod query_grammar;
 mod user_input_ast;
 use combine::parser::Parser;
 pub use crate::infallible::LenientError;
 pub use crate::occur::Occur;
-use crate::query_grammar::parse_to_ast;
+use crate::query_grammar::{parse_to_ast, parse_to_ast_lenient};
 pub use crate::user_input_ast::{
    Delimiter, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral,
 };
 pub struct Error;
 /// Parse a query
 pub fn parse_query(query: &str) -> Result<UserInputAst, Error> {
-    let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
+    let (_remaining, user_input_ast) = parse_to_ast(query).map_err(|_| Error)?;
    Ok(user_input_ast)
 }
 /// Parse a query, trying to recover from syntax errors, and giving hints toward fixing errors.
 pub fn parse_query_lenient(query: &str) -> (UserInputAst, Vec<LenientError>) {
    parse_to_ast_lenient(query)
 }
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Formatter};
 use crate::Occur;
-#[derive(PartialEq)]
+#[derive(PartialEq, Clone)]
 pub enum UserInputLeaf {
    Literal(UserInputLiteral),
    All,
@@ -16,6 +16,34 @@ pub enum UserInputLeaf {
        field: Option<String>,
        elements: Vec<String>,
    },
    Exists {
        field: String,
    },
 }
 impl UserInputLeaf {
    pub(crate) fn set_field(self, field: Option<String>) -> Self {
        match self {
            UserInputLeaf::Literal(mut literal) => {
                literal.field_name = field;
                UserInputLeaf::Literal(literal)
            }
            UserInputLeaf::All => UserInputLeaf::All,
            UserInputLeaf::Range {
                field: _,
                lower,
                upper,
            } => UserInputLeaf::Range {
                field,
                lower,
                upper,
            },
            UserInputLeaf::Set { field: _, elements } => UserInputLeaf::Set { field, elements },
            UserInputLeaf::Exists { field: _ } => UserInputLeaf::Exists {
                field: field.expect("Exist query without a field isn't allowed"),
            },
        }
    }
 }
 impl Debug for UserInputLeaf {
@@ -28,6 +56,7 @@ impl Debug for UserInputLeaf {
                ref upper,
            } => {
                if let Some(ref field) = field {
                    // TODO properly escape field (in case of \")
                    write!(formatter, "\"{field}\":")?;
                }
                lower.display_lower(formatter)?;
@@ -37,6 +66,7 @@ impl Debug for UserInputLeaf {
            }
            UserInputLeaf::Set { field, elements } => {
                if let Some(ref field) = field {
                    // TODO properly escape field (in case of \")
                    write!(formatter, "\"{field}\": ")?;
                }
                write!(formatter, "IN [")?;
@@ -44,11 +74,15 @@ impl Debug for UserInputLeaf {
                    if i != 0 {
                        write!(formatter, " ")?;
                    }
                    // TODO properly escape element
                    write!(formatter, "\"{text}\"")?;
                }
                write!(formatter, "]")
            }
            UserInputLeaf::All => write!(formatter, "*"),
            UserInputLeaf::Exists { field } => {
                write!(formatter, "\"{field}\":*")
            }
        }
    }
 }
@@ -60,7 +94,7 @@ pub enum Delimiter {
    None,
 }
-#[derive(PartialEq)]
+#[derive(PartialEq, Clone)]
 pub struct UserInputLiteral {
    pub field_name: Option<String>,
    pub phrase: String,
@@ -72,16 +106,20 @@ pub struct UserInputLiteral {
 impl fmt::Debug for UserInputLiteral {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Some(ref field) = self.field_name {
            // TODO properly escape field (in case of \")
            write!(formatter, "\"{field}\":")?;
        }
        match self.delimiter {
            Delimiter::SingleQuotes => {
                // TODO properly escape element (in case of \')
                write!(formatter, "'{}'", self.phrase)?;
            }
            Delimiter::DoubleQuotes => {
                // TODO properly escape element (in case of \")
                write!(formatter, "\"{}\"", self.phrase)?;
            }
            Delimiter::None => {
                // TODO properly escape element
                write!(formatter, "{}", self.phrase)?;
            }
        }
@@ -94,7 +132,7 @@ impl fmt::Debug for UserInputLiteral {
    }
 }
-#[derive(PartialEq)]
+#[derive(PartialEq, Debug, Clone)]
 pub enum UserInputBound {
    Inclusive(String),
    Exclusive(String),
@@ -104,6 +142,7 @@ pub enum UserInputBound {
 impl UserInputBound {
    fn display_lower(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match *self {
            // TODO properly escape word if required
            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{word}\""),
            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{word}\""),
            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
@@ -112,6 +151,7 @@ impl UserInputBound {
    fn display_upper(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match *self {
            // TODO properly escape word if required
            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{word}\"]"),
            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{word}\"}}"),
            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
@@ -127,6 +167,7 @@ impl UserInputBound {
    }
 }
 #[derive(PartialEq, Clone)]
 pub enum UserInputAst {
    Clause(Vec<(Option<Occur>, UserInputAst)>),
    Leaf(Box<UserInputLeaf>),
@@ -196,6 +237,7 @@ impl fmt::Debug for UserInputAst {
        match *self {
            UserInputAst::Clause(ref subqueries) => {
                if subqueries.is_empty() {
                    // TODO this will break ast reserialization, is writing "( )" enought?
                    write!(formatter, "<emptyclause>")?;
                } else {
                    write!(formatter, "(")?;
--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -48,7 +48,7 @@ mod bench {
        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
        let index = Index::create_from_tempdir(schema_builder.build())?;
-        let few_terms_data = vec!["INFO", "ERROR", "WARN", "DEBUG"];
+        let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
@@ -85,7 +85,7 @@ mod bench {
            if cardinality == Cardinality::Sparse {
                doc_with_value /= 20;
            }
-            let val_max = 1_000_000.0;
+            let _val_max = 1_000_000.0;
            for _ in 0..doc_with_value {
                let val: f64 = rng.gen_range(0.0..1_000_000.0);
                let json = if rng.gen_bool(0.1) {
@@ -290,6 +290,41 @@ mod bench {
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg);
    fn bench_aggregation_terms_many_with_top_hits_agg_card(
        b: &mut Bencher,
        cardinality: Cardinality,
    ) {
        let index = get_test_index_bench(cardinality).unwrap();
        let reader = index.reader().unwrap();
        b.iter(|| {
            let agg_req: Aggregations = serde_json::from_value(json!({
                "my_texts": {
                    "terms": { "field": "text_many_terms" },
                    "aggs": {
                        "top_hits": { "top_hits":
                            {
                                "sort": [
                                    { "score": "desc" }
                                ],
                                "size": 2,
                                "doc_value_fields": ["score_f64"]
                            }
                        }
                    }
                },
            }))
            .unwrap();
            let collector = get_collector(agg_req);
            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
        });
    }
    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -73,9 +73,9 @@ impl AggregationLimits {
    /// Create a new ResourceLimitGuard, that will release the memory when dropped.
    pub fn new_guard(&self) -> ResourceLimitGuard {
        ResourceLimitGuard {
-            /// The counter which is shared between the aggregations for one request.
+            // The counter which is shared between the aggregations for one request.
            memory_consumption: Arc::clone(&self.memory_consumption),
-            /// The memory_limit in bytes
+            // The memory_limit in bytes
            memory_limit: self.memory_limit,
            allocated_with_the_guard: 0,
        }
@@ -134,3 +134,142 @@ impl Drop for ResourceLimitGuard {
            .fetch_sub(self.allocated_with_the_guard, Ordering::Relaxed);
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::aggregation::tests::exec_request_with_query;
    // https://github.com/quickwit-oss/quickwit/issues/3837
    #[test]
    fn test_agg_limits_with_empty_merge() {
        use crate::aggregation::agg_req::Aggregations;
        use crate::aggregation::bucket::tests::get_test_index_from_docs;
        let docs = vec![
            vec![r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb", "text2": "bbb" }"#],
            vec![r#"{ "text": "aaa", "text2": "bbb" }"#],
        ];
        let index = get_test_index_from_docs(false, &docs).unwrap();
        {
            let elasticsearch_compatible_json = json!(
                {
                    "1": {
                        "terms": {"field": "text2", "min_doc_count": 0},
                        "aggs": {
                            "2":{
                                "date_histogram": {
                                    "field": "date",
                                    "fixed_interval": "1d",
                                    "extended_bounds": {
                                        "min": "2015-01-01T00:00:00Z",
                                        "max": "2015-01-10T00:00:00Z"
                                    }
                                }
                            }
                        }
                    }
                }
            );
            let agg_req: Aggregations = serde_json::from_str(
                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
            )
            .unwrap();
            let res = exec_request_with_query(agg_req, &index, Some(("text", "bbb"))).unwrap();
            let expected_res = json!({
             "1": {
                "buckets": [
                  {
                    "2": {
                      "buckets": [
                        { "doc_count": 0, "key": 1420070400000.0, "key_as_string": "2015-01-01T00:00:00Z" },
                        { "doc_count": 1, "key": 1420156800000.0, "key_as_string": "2015-01-02T00:00:00Z" },
                        { "doc_count": 0, "key": 1420243200000.0, "key_as_string": "2015-01-03T00:00:00Z" },
                        { "doc_count": 0, "key": 1420329600000.0, "key_as_string": "2015-01-04T00:00:00Z" },
                        { "doc_count": 0, "key": 1420416000000.0, "key_as_string": "2015-01-05T00:00:00Z" },
                        { "doc_count": 0, "key": 1420502400000.0, "key_as_string": "2015-01-06T00:00:00Z" },
                        { "doc_count": 0, "key": 1420588800000.0, "key_as_string": "2015-01-07T00:00:00Z" },
                        { "doc_count": 0, "key": 1420675200000.0, "key_as_string": "2015-01-08T00:00:00Z" },
                        { "doc_count": 0, "key": 1420761600000.0, "key_as_string": "2015-01-09T00:00:00Z" },
                        { "doc_count": 0, "key": 1420848000000.0, "key_as_string": "2015-01-10T00:00:00Z" }
                      ]
                    },
                    "doc_count": 1,
                    "key": "bbb"
                  }
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
              }
            });
            assert_eq!(res, expected_res);
        }
    }
    // https://github.com/quickwit-oss/quickwit/issues/3837
    #[test]
    fn test_agg_limits_with_empty_data() {
        use crate::aggregation::agg_req::Aggregations;
        use crate::aggregation::bucket::tests::get_test_index_from_docs;
        let docs = vec![vec![r#"{ "text": "aaa", "text2": "bbb" }"#]];
        let index = get_test_index_from_docs(false, &docs).unwrap();
        {
            // Empty result since there is no doc with dates
            let elasticsearch_compatible_json = json!(
                {
                    "1": {
                        "terms": {"field": "text2", "min_doc_count": 0},
                        "aggs": {
                            "2":{
                                "date_histogram": {
                                    "field": "date",
                                    "fixed_interval": "1d",
                                    "extended_bounds": {
                                        "min": "2015-01-01T00:00:00Z",
                                        "max": "2015-01-10T00:00:00Z"
                                    }
                                }
                            }
                        }
                    }
                }
            );
            let agg_req: Aggregations = serde_json::from_str(
                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
            )
            .unwrap();
            let res = exec_request_with_query(agg_req, &index, Some(("text", "bbb"))).unwrap();
            let expected_res = json!({
             "1": {
                "buckets": [
                  {
                    "2": {
                      "buckets": [
                        { "doc_count": 0, "key": 1420070400000.0, "key_as_string": "2015-01-01T00:00:00Z" },
                        { "doc_count": 0, "key": 1420156800000.0, "key_as_string": "2015-01-02T00:00:00Z" },
                        { "doc_count": 0, "key": 1420243200000.0, "key_as_string": "2015-01-03T00:00:00Z" },
                        { "doc_count": 0, "key": 1420329600000.0, "key_as_string": "2015-01-04T00:00:00Z" },
                        { "doc_count": 0, "key": 1420416000000.0, "key_as_string": "2015-01-05T00:00:00Z" },
                        { "doc_count": 0, "key": 1420502400000.0, "key_as_string": "2015-01-06T00:00:00Z" },
                        { "doc_count": 0, "key": 1420588800000.0, "key_as_string": "2015-01-07T00:00:00Z" },
                        { "doc_count": 0, "key": 1420675200000.0, "key_as_string": "2015-01-08T00:00:00Z" },
                        { "doc_count": 0, "key": 1420761600000.0, "key_as_string": "2015-01-09T00:00:00Z" },
                        { "doc_count": 0, "key": 1420848000000.0, "key_as_string": "2015-01-10T00:00:00Z" }
                      ]
                    },
                    "doc_count": 0,
                    "key": "bbb"
                  }
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
              }
            });
            assert_eq!(res, expected_res);
        }
    }
 }
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -35,7 +35,7 @@ use super::bucket::{
 };
 use super::metric::{
    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation,
-    PercentilesAggregationReq, StatsAggregation, SumAggregation,
+    PercentilesAggregationReq, StatsAggregation, SumAggregation, TopHitsAggregation,
 };
 /// The top-level aggregation request structure, which contains [`Aggregation`] and their user
@@ -44,29 +44,61 @@ use super::metric::{
 /// The key is the user defined name of the aggregation.
 pub type Aggregations = HashMap<String, Aggregation>;
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// Aggregation request.
 ///
 /// An aggregation is either a bucket or a metric.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 #[serde(try_from = "AggregationForDeserialization")]
 pub struct Aggregation {
    /// The aggregation variant, which can be either a bucket or a metric.
    #[serde(flatten)]
    pub agg: AggregationVariants,
    /// The sub_aggregations, only valid for bucket type aggregations. Each bucket will aggregate
    /// on the document set in the bucket.
    #[serde(rename = "aggs")]
    #[serde(default)]
    #[serde(skip_serializing_if = "Aggregations::is_empty")]
    pub sub_aggregation: Aggregations,
 }
 /// In order to display proper error message, we cannot rely on flattening
 /// the json enum. Instead we introduce an intermediary struct to separate
 /// the aggregation from the subaggregation.
 #[derive(Deserialize)]
 struct AggregationForDeserialization {
    #[serde(flatten)]
    pub aggs_remaining_json: serde_json::Value,
    #[serde(rename = "aggs")]
    #[serde(default)]
    pub sub_aggregation: Aggregations,
 }
 impl TryFrom<AggregationForDeserialization> for Aggregation {
    type Error = serde_json::Error;
    fn try_from(value: AggregationForDeserialization) -> serde_json::Result<Self> {
        let AggregationForDeserialization {
            aggs_remaining_json,
            sub_aggregation,
        } = value;
        let agg: AggregationVariants = serde_json::from_value(aggs_remaining_json)?;
        Ok(Aggregation {
            agg,
            sub_aggregation,
        })
    }
 }
 impl Aggregation {
    pub(crate) fn sub_aggregation(&self) -> &Aggregations {
        &self.sub_aggregation
    }
    fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
-        fast_field_names.insert(self.agg.get_fast_field_name().to_string());
+        fast_field_names.extend(
            self.agg
                .get_fast_field_names()
                .iter()
                .map(|s| s.to_string()),
        );
        fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
    }
 }
@@ -120,22 +152,27 @@ pub enum AggregationVariants {
    /// Computes the sum of the extracted values.
    #[serde(rename = "percentiles")]
    Percentiles(PercentilesAggregationReq),
    /// Finds the top k values matching some order
    #[serde(rename = "top_hits")]
    TopHits(TopHitsAggregation),
 }
 impl AggregationVariants {
-    fn get_fast_field_name(&self) -> &str {
+    /// Returns the name of the fields used by the aggregation.
    pub fn get_fast_field_names(&self) -> Vec<&str> {
        match self {
-            AggregationVariants::Terms(terms) => terms.field.as_str(),
+            AggregationVariants::Terms(terms) => vec![terms.field.as_str()],
-            AggregationVariants::Range(range) => range.field.as_str(),
+            AggregationVariants::Range(range) => vec![range.field.as_str()],
-            AggregationVariants::Histogram(histogram) => histogram.field.as_str(),
+            AggregationVariants::Histogram(histogram) => vec![histogram.field.as_str()],
-            AggregationVariants::DateHistogram(histogram) => histogram.field.as_str(),
+            AggregationVariants::DateHistogram(histogram) => vec![histogram.field.as_str()],
-            AggregationVariants::Average(avg) => avg.field_name(),
+            AggregationVariants::Average(avg) => vec![avg.field_name()],
-            AggregationVariants::Count(count) => count.field_name(),
+            AggregationVariants::Count(count) => vec![count.field_name()],
-            AggregationVariants::Max(max) => max.field_name(),
+            AggregationVariants::Max(max) => vec![max.field_name()],
-            AggregationVariants::Min(min) => min.field_name(),
+            AggregationVariants::Min(min) => vec![min.field_name()],
-            AggregationVariants::Stats(stats) => stats.field_name(),
+            AggregationVariants::Stats(stats) => vec![stats.field_name()],
-            AggregationVariants::Sum(sum) => sum.field_name(),
+            AggregationVariants::Sum(sum) => vec![sum.field_name()],
-            AggregationVariants::Percentiles(per) => per.field_name(),
+            AggregationVariants::Percentiles(per) => vec![per.field_name()],
            AggregationVariants::TopHits(top_hits) => top_hits.field_names(),
        }
    }
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -1,6 +1,9 @@
 //! This will enhance the request tree with access to the fastfield and metadata.
-use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};
+use std::collections::HashMap;
 use std::io;
 use columnar::{Column, ColumnBlockAccessor, ColumnType, DynamicColumn, StrColumn};
 use super::agg_limits::ResourceLimitGuard;
 use super::agg_req::{Aggregation, AggregationVariants, Aggregations};
@@ -13,7 +16,8 @@ use super::metric::{
 };
 use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
-use crate::SegmentReader;
+use crate::aggregation::{f64_to_fastfield_u64, Key};
 use crate::{SegmentOrdinal, SegmentReader};
 #[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
@@ -31,100 +35,313 @@ impl AggregationsWithAccessor {
 }
 pub struct AggregationWithAccessor {
    pub(crate) segment_ordinal: SegmentOrdinal,
    /// In general there can be buckets without fast field access, e.g. buckets that are created
    /// based on search terms. That is not that case currently, but eventually this needs to be
    /// Option or moved.
    pub(crate) accessor: Column<u64>,
    /// Load insert u64 for missing use case
    pub(crate) missing_value_for_accessor: Option<u64>,
    pub(crate) str_dict_column: Option<StrColumn>,
    pub(crate) field_type: ColumnType,
    /// In case there are multiple types of fast fields, e.g. string and numeric.
    /// Only used for term aggregations currently.
    pub(crate) accessor2: Option<(Column<u64>, ColumnType)>,
    pub(crate) sub_aggregation: AggregationsWithAccessor,
    pub(crate) limits: ResourceLimitGuard,
    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
    /// Used for missing term aggregation, which checks all columns for existence.
    /// And also for `top_hits` aggregation, which may sort on multiple fields.
    /// By convention the missing aggregation is chosen, when this property is set
    /// (instead bein set in `agg`).
    /// If this needs to used by other aggregations, we need to refactor this.
    // NOTE: we can make all other aggregations use this instead of the `accessor` and `field_type`
    // (making them obsolete) But will it have a performance impact?
    pub(crate) accessors: Vec<(Column<u64>, ColumnType)>,
    /// Map field names to all associated column accessors.
    /// This field is used for `docvalue_fields`, which is currently only supported for `top_hits`.
    pub(crate) value_accessors: HashMap<String, Vec<DynamicColumn>>,
    pub(crate) agg: Aggregation,
 }
 impl AggregationWithAccessor {
    /// May return multiple accessors if the aggregation is e.g. on mixed field types.
    fn try_from_agg(
        agg: &Aggregation,
        sub_aggregation: &Aggregations,
        reader: &SegmentReader,
        segment_ordinal: SegmentOrdinal,
        limits: AggregationLimits,
-    ) -> crate::Result<AggregationWithAccessor> {
+    ) -> crate::Result<Vec<AggregationWithAccessor>> {
-        let mut str_dict_column = None;
+        let mut agg = agg.clone();
-        let mut accessor2 = None;
+
        let add_agg_with_accessor = |agg: &Aggregation,
                                     accessor: Column<u64>,
                                     column_type: ColumnType,
                                     aggs: &mut Vec<AggregationWithAccessor>|
         -> crate::Result<()> {
            let res = AggregationWithAccessor {
                segment_ordinal,
                accessor,
                accessors: Default::default(),
                value_accessors: Default::default(),
                field_type: column_type,
                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                    sub_aggregation,
                    reader,
                    segment_ordinal,
                    &limits,
                )?,
                agg: agg.clone(),
                limits: limits.new_guard(),
                missing_value_for_accessor: None,
                str_dict_column: None,
                column_block_accessor: Default::default(),
            };
            aggs.push(res);
            Ok(())
        };
        let add_agg_with_accessors = |agg: &Aggregation,
                                      accessors: Vec<(Column<u64>, ColumnType)>,
                                      aggs: &mut Vec<AggregationWithAccessor>,
                                      value_accessors: HashMap<String, Vec<DynamicColumn>>|
         -> crate::Result<()> {
            let (accessor, field_type) = accessors.first().expect("at least one accessor");
            let res = AggregationWithAccessor {
                segment_ordinal,
                // TODO: We should do away with the `accessor` field altogether
                accessor: accessor.clone(),
                value_accessors,
                field_type: *field_type,
                accessors,
                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                    sub_aggregation,
                    reader,
                    segment_ordinal,
                    &limits,
                )?,
                agg: agg.clone(),
                limits: limits.new_guard(),
                missing_value_for_accessor: None,
                str_dict_column: None,
                column_block_accessor: Default::default(),
            };
            aggs.push(res);
            Ok(())
        };
        let mut res: Vec<AggregationWithAccessor> = Vec::new();
        use AggregationVariants::*;
-        let (accessor, field_type) = match &agg.agg {
+
        match agg.agg {
            Range(RangeAggregation {
-                field: field_name, ..
+                field: ref field_name,
-            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
+                ..
            Histogram(HistogramAggregation {
                field: field_name, ..
            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
            DateHistogram(DateHistogramAggregationReq {
                field: field_name, ..
            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
            Terms(TermsAggregation {
                field: field_name, ..
            }) => {
-                str_dict_column = reader.fast_fields().str(field_name)?;
+                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            Histogram(HistogramAggregation {
                field: ref field_name,
                ..
            }) => {
                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            DateHistogram(DateHistogramAggregationReq {
                field: ref field_name,
                ..
            }) => {
                let (accessor, column_type) =
                    // Only DateTime is supported for DateHistogram
                    get_ff_reader(reader, field_name, Some(&[ColumnType::DateTime]))?;
                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            Terms(TermsAggregation {
                field: ref field_name,
                ref missing,
                ..
            }) => {
                let str_dict_column = reader.fast_fields().str(field_name)?;
                let allowed_column_types = [
                    ColumnType::I64,
                    ColumnType::U64,
                    ColumnType::F64,
                    ColumnType::Str,
                    ColumnType::DateTime,
                    ColumnType::Bool,
                    // ColumnType::Bytes Unsupported
                    // ColumnType::Bool Unsupported
                    // ColumnType::IpAddr Unsupported
                    // ColumnType::DateTime Unsupported
                ];
                let mut columns =
                    get_all_ff_reader_or_empty(reader, field_name, Some(&allowed_column_types))?;
                let first = columns.pop().unwrap();
                accessor2 = columns.pop();
                first
            }
            Average(AverageAggregation { field: field_name })
            | Count(CountAggregation { field: field_name })
            | Max(MaxAggregation { field: field_name })
            | Min(MinAggregation { field: field_name })
            | Stats(StatsAggregation { field: field_name })
            | Sum(SumAggregation { field: field_name }) => {
                let (accessor, field_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                (accessor, field_type)
+                // In case the column is empty we want the shim column to match the missing type
                let fallback_type = missing
                    .as_ref()
                    .map(|missing| match missing {
                        Key::Str(_) => ColumnType::Str,
                        Key::F64(_) => ColumnType::F64,
                    })
                    .unwrap_or(ColumnType::U64);
                let column_and_types = get_all_ff_reader_or_empty(
                    reader,
                    field_name,
                    Some(&allowed_column_types),
                    fallback_type,
                )?;
                let missing_and_more_than_one_col = column_and_types.len() > 1 && missing.is_some();
                let text_on_non_text_col = column_and_types.len() == 1
                    && column_and_types[0].1.numerical_type().is_some()
                    && missing
                        .as_ref()
                        .map(|m| matches!(m, Key::Str(_)))
                        .unwrap_or(false);
                // Actually we could convert the text to a number and have the fast path, if it is
                // provided in Rfc3339 format. But this use case is probably common
                // enough to justify the effort.
                let text_on_date_col = column_and_types.len() == 1
                    && column_and_types[0].1 == ColumnType::DateTime
                    && missing
                        .as_ref()
                        .map(|m| matches!(m, Key::Str(_)))
                        .unwrap_or(false);
                let use_special_missing_agg =
                    missing_and_more_than_one_col || text_on_non_text_col || text_on_date_col;
                if use_special_missing_agg {
                    let column_and_types =
                        get_all_ff_reader_or_empty(reader, field_name, None, fallback_type)?;
                    let accessors = column_and_types
                        .iter()
                        .map(|c_t| (c_t.0.clone(), c_t.1))
                        .collect();
                    add_agg_with_accessors(&agg, accessors, &mut res, Default::default())?;
                }
                for (accessor, column_type) in column_and_types {
                    let missing_value_term_agg = if use_special_missing_agg {
                        None
                    } else {
                        missing.clone()
                    };
                    let missing_value_for_accessor = if let Some(missing) =
                        missing_value_term_agg.as_ref()
                    {
                        get_missing_val(column_type, missing, agg.agg.get_fast_field_names()[0])?
                    } else {
                        None
                    };
                    let agg = AggregationWithAccessor {
                        segment_ordinal,
                        missing_value_for_accessor,
                        accessor,
                        accessors: Default::default(),
                        value_accessors: Default::default(),
                        field_type: column_type,
                        sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                            sub_aggregation,
                            reader,
                            segment_ordinal,
                            &limits,
                        )?,
                        agg: agg.clone(),
                        str_dict_column: str_dict_column.clone(),
                        limits: limits.new_guard(),
                        column_block_accessor: Default::default(),
                    };
                    res.push(agg);
                }
            }
-            Percentiles(percentiles) => {
+            Average(AverageAggregation {
-                let (accessor, field_type) = get_ff_reader(
+                field: ref field_name,
                ..
            })
            | Count(CountAggregation {
                field: ref field_name,
                ..
            })
            | Max(MaxAggregation {
                field: ref field_name,
                ..
            })
            | Min(MinAggregation {
                field: ref field_name,
                ..
            })
            | Stats(StatsAggregation {
                field: ref field_name,
                ..
            })
            | Sum(SumAggregation {
                field: ref field_name,
                ..
            }) => {
                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            Percentiles(ref percentiles) => {
                let (accessor, column_type) = get_ff_reader(
                    reader,
                    percentiles.field_name(),
                    Some(get_numeric_or_date_column_types()),
                )?;
-                (accessor, field_type)
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            TopHits(ref mut top_hits) => {
                top_hits.validate_and_resolve(reader.fast_fields().columnar())?;
                let accessors: Vec<(Column<u64>, ColumnType)> = top_hits
                    .field_names()
                    .iter()
                    .map(|field| {
                        get_ff_reader(reader, field, Some(get_numeric_or_date_column_types()))
                    })
                    .collect::<crate::Result<_>>()?;
                let value_accessors = top_hits
                    .value_field_names()
                    .iter()
                    .map(|field_name| {
                        Ok((
                            field_name.to_string(),
                            get_dynamic_columns(reader, field_name)?,
                        ))
                    })
                    .collect::<crate::Result<_>>()?;
                add_agg_with_accessors(&agg, accessors, &mut res, value_accessors)?;
            }
        };
-        let sub_aggregation = sub_aggregation.clone();
+        Ok(res)
        Ok(AggregationWithAccessor {
            accessor,
            accessor2,
            field_type,
            sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                &sub_aggregation,
                reader,
                &limits,
            )?,
            agg: agg.clone(),
            str_dict_column,
            limits: limits.new_guard(),
            column_block_accessor: Default::default(),
        })
    }
 }
 fn get_missing_val(
    column_type: ColumnType,
    missing: &Key,
    field_name: &str,
 ) -> crate::Result<Option<u64>> {
    let missing_val = match missing {
        Key::Str(_) if column_type == ColumnType::Str => Some(u64::MAX),
        // Allow fallback to number on text fields
        Key::F64(_) if column_type == ColumnType::Str => Some(u64::MAX),
        Key::F64(val) if column_type.numerical_type().is_some() => {
            f64_to_fastfield_u64(*val, &column_type)
        }
        _ => {
            return Err(crate::TantivyError::InvalidArgument(format!(
                "Missing value {:?} for field {} is not supported for column type {:?}",
                missing, field_name, column_type
            )));
        }
    };
    Ok(missing_val)
 }
 fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
    &[
        ColumnType::F64,
@@ -137,19 +354,21 @@ fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
 pub(crate) fn get_aggs_with_segment_accessor_and_validate(
    aggs: &Aggregations,
    reader: &SegmentReader,
    segment_ordinal: SegmentOrdinal,
    limits: &AggregationLimits,
 ) -> crate::Result<AggregationsWithAccessor> {
    let mut aggss = Vec::new();
    for (key, agg) in aggs.iter() {
-        aggss.push((
+        let aggs = AggregationWithAccessor::try_from_agg(
-            key.to_string(),
+            agg,
-            AggregationWithAccessor::try_from_agg(
+            agg.sub_aggregation(),
-                agg,
+            reader,
-                agg.sub_aggregation(),
+            segment_ordinal,
-                reader,
+            limits.clone(),
-                limits.clone(),
+        )?;
-            )?,
+        for agg in aggs {
-        ));
+            aggss.push((key.to_string(), agg));
        }
    }
    Ok(AggregationsWithAccessor::from_data(
        VecWithNames::from_entries(aggss),
@@ -174,6 +393,19 @@ fn get_ff_reader(
    Ok(ff_field_with_type)
 }
 fn get_dynamic_columns(
    reader: &SegmentReader,
    field_name: &str,
 ) -> crate::Result<Vec<columnar::DynamicColumn>> {
    let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?;
    let cols = ff_fields
        .iter()
        .map(|h| h.open())
        .collect::<io::Result<_>>()?;
    assert!(!ff_fields.is_empty(), "field {} not found", field_name);
    Ok(cols)
 }
 /// Get all fast field reader or empty as default.
 ///
 /// Is guaranteed to return at least one column.
@@ -181,15 +413,13 @@ fn get_all_ff_reader_or_empty(
    reader: &SegmentReader,
    field_name: &str,
    allowed_column_types: Option<&[ColumnType]>,
    fallback_type: ColumnType,
 ) -> crate::Result<Vec<(columnar::Column<u64>, ColumnType)>> {
    let ff_fields = reader.fast_fields();
    let mut ff_field_with_type =
        ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
    if ff_field_with_type.is_empty() {
-        ff_field_with_type.push((
+        ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type));
            Column::build_empty_column(reader.num_docs()),
            ColumnType::U64,
        ));
    }
    Ok(ff_field_with_type)
 }
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -8,7 +8,7 @@ use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use super::bucket::GetDocCount;
-use super::metric::{PercentilesMetricResult, SingleMetricResult, Stats};
+use super::metric::{PercentilesMetricResult, SingleMetricResult, Stats, TopHitsMetricResult};
 use super::{AggregationError, Key};
 use crate::TantivyError;
@@ -90,8 +90,10 @@ pub enum MetricResult {
    Stats(Stats),
    /// Sum metric result.
    Sum(SingleMetricResult),
-    /// Sum metric result.
+    /// Percentiles metric result.
    Percentiles(PercentilesMetricResult),
    /// Top hits metric result
    TopHits(TopHitsMetricResult),
 }
 impl MetricResult {
@@ -106,6 +108,9 @@ impl MetricResult {
            MetricResult::Percentiles(_) => Err(TantivyError::AggregationError(
                AggregationError::InvalidRequest("percentiles can't be used to order".to_string()),
            )),
            MetricResult::TopHits(_) => Err(TantivyError::AggregationError(
                AggregationError::InvalidRequest("top_hits can't be used to order".to_string()),
            )),
        }
    }
 }
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -9,7 +9,7 @@ use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_v
 use crate::aggregation::DistributedAggregationCollector;
 use crate::query::{AllQuery, TermQuery};
 use crate::schema::{IndexRecordOption, Schema, FAST};
-use crate::{Index, Term};
+use crate::{Index, IndexWriter, Term};
 fn get_avg_req(field_name: &str) -> Aggregation {
    serde_json::from_value(json!({
@@ -558,10 +558,10 @@ fn test_aggregation_invalid_requests() -> crate::Result<()> {
    assert_eq!(agg_req_1.is_err(), true);
    // TODO: This should list valid values
-    assert_eq!(
+    assert!(agg_req_1
-        agg_req_1.unwrap_err().to_string(),
+        .unwrap_err()
-        "no variant of enum AggregationVariants found in flattened data"
+        .to_string()
-    );
+        .contains("unknown variant `doesnotmatchanyagg`, expected one of"));
    // TODO: This should return an error
    // let agg_res = avg_on_field("not_exist_field").unwrap_err();
@@ -586,7 +586,10 @@ fn test_aggregation_on_json_object() {
    let json = schema_builder.add_json_field("json", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer_for_tests().unwrap();
+    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    index_writer
        .add_document(doc!(json => json!({"color": "red"})))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"color": "red"})))
        .unwrap();
@@ -614,12 +617,74 @@ fn test_aggregation_on_json_object() {
        &serde_json::json!({
            "jsonagg": {
                "buckets": [
                    {"doc_count": 2, "key": "red"},
                    {"doc_count": 1, "key": "blue"},
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
            }
        })
    );
 }
 #[test]
 fn test_aggregation_on_nested_json_object() {
    let mut schema_builder = Schema::builder();
    let json = schema_builder.add_json_field("json.blub", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    index_writer
        .add_document(doc!(json => json!({"color.dot": "red", "color": {"nested":"red"} })))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
        .unwrap();
    index_writer.commit().unwrap();
    let reader = index.reader().unwrap();
    let searcher = reader.searcher();
    let agg: Aggregations = serde_json::from_value(json!({
        "jsonagg1": {
            "terms": {
                "field": "json\\.blub.color\\.dot",
            }
        },
        "jsonagg2": {
            "terms": {
                "field": "json\\.blub.color.nested",
            }
        }
    }))
    .unwrap();
    let aggregation_collector = get_collector(agg);
    let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
    let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
    assert_eq!(
        &aggregation_res_json,
        &serde_json::json!({
            "jsonagg1": {
                "buckets": [
                    {"doc_count": 2, "key": "blue"},
                    {"doc_count": 1, "key": "red"}
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
            },
            "jsonagg2": {
                "buckets": [
                    {"doc_count": 2, "key": "blue"},
                    {"doc_count": 1, "key": "red"}
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
            }
        })
    );
 }
@@ -630,7 +695,7 @@ fn test_aggregation_on_json_object_empty_columns() {
    let json = schema_builder.add_json_field("json", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer_for_tests().unwrap();
+    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    // => Empty column when accessing color
    index_writer
        .add_document(doc!(json => json!({"price": 10.0})))
@@ -748,13 +813,19 @@ fn test_aggregation_on_json_object_mixed_types() {
    let json = schema_builder.add_json_field("json", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer_for_tests().unwrap();
+    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    // => Segment with all values numeric
    index_writer
        .add_document(doc!(json => json!({"mixed_type": 10.0})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with all values text
    index_writer
        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
@@ -766,6 +837,9 @@ fn test_aggregation_on_json_object_mixed_types() {
    index_writer.commit().unwrap();
    // => Segment with mixed values
    index_writer
        .add_document(doc!(json => json!({"mixed_type": "red"})))
        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"mixed_type": "red"})))
        .unwrap();
@@ -811,6 +885,8 @@ fn test_aggregation_on_json_object_mixed_types() {
    let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
    let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
    // pretty print as json
    use pretty_assertions::assert_eq;
    assert_eq!(
        &aggregation_res_json,
        &serde_json::json!({
@@ -826,9 +902,9 @@ fn test_aggregation_on_json_object_mixed_types() {
            "buckets": [
              { "doc_count": 1, "key": 10.0, "min_price": { "value": 10.0 } },
              { "doc_count": 1, "key": -20.5, "min_price": { "value": -20.5 } },
-              // TODO bool is also not yet handled in aggregation
+              { "doc_count": 2, "key": "red", "min_price": { "value": null } },
-              { "doc_count": 1, "key": "blue", "min_price": { "value": null } },
+              { "doc_count": 2, "key": 1.0, "key_as_string": "true", "min_price": { "value": null } },
-              { "doc_count": 1, "key": "red", "min_price": { "value": null } },
+              { "doc_count": 3, "key": "blue", "min_price": { "value": null } },
            ],
            "sum_other_doc_count": 0
          }
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};
 use super::{HistogramAggregation, HistogramBounds};
-use crate::aggregation::AggregationError;
+use crate::aggregation::*;
 /// DateHistogramAggregation is similar to `HistogramAggregation`, but it can only be used with date
 /// type.
@@ -132,6 +132,7 @@ impl DateHistogramAggregationReq {
            hard_bounds: self.hard_bounds,
            extended_bounds: self.extended_bounds,
            keyed: self.keyed,
            is_normalized_to_ns: false,
        })
    }
@@ -243,15 +244,15 @@ fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
 }
 #[cfg(test)]
-mod tests {
+pub mod tests {
    use pretty_assertions::assert_eq;
    use super::*;
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::exec_request;
    use crate::indexer::NoMergePolicy;
-    use crate::schema::{Schema, FAST};
+    use crate::schema::{Schema, FAST, STRING};
-    use crate::Index;
+    use crate::{Index, IndexWriter, TantivyDocument};
    #[test]
    fn test_parse_into_millisecs() {
@@ -306,7 +307,9 @@ mod tests {
    ) -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
        schema_builder.add_date_field("date", FAST);
-        schema_builder.add_text_field("text", FAST);
+        schema_builder.add_json_field("mixed", FAST);
        schema_builder.add_text_field("text", FAST | STRING);
        schema_builder.add_text_field("text2", FAST | STRING);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema.clone());
        {
@@ -314,7 +317,7 @@ mod tests {
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            for values in segment_and_docs {
                for doc_str in values {
-                    let doc = schema.parse_document(doc_str)?;
+                    let doc = TantivyDocument::parse_json(&schema, doc_str)?;
                    index_writer.add_document(doc)?;
                }
                // writing the segment
@@ -326,7 +329,7 @@ mod tests {
                .searchable_segment_ids()
                .expect("Searchable segments failed.");
            if segment_ids.len() > 1 {
-                let mut index_writer = index.writer_for_tests()?;
+                let mut index_writer: IndexWriter = index.writer_for_tests()?;
                index_writer.merge(&segment_ids).wait()?;
                index_writer.wait_merging_threads()?;
            }
@@ -349,8 +352,10 @@ mod tests {
        let docs = vec![
            vec![r#"{ "date": "2015-01-01T12:10:30Z", "text": "aaa" }"#],
            vec![r#"{ "date": "2015-01-01T11:11:30Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-01T11:11:30Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-06T00:00:00Z", "text": "ccc" }"#],
            vec![r#"{ "date": "2015-01-06T00:00:00Z", "text": "ccc" }"#],
        ];
        let index = get_test_index_from_docs(merge_segments, &docs).unwrap();
@@ -379,7 +384,7 @@ mod tests {
                        {
                            "key_as_string" : "2015-01-01T00:00:00Z",
                            "key" : 1420070400000.0,
-                            "doc_count" : 4
+                            "doc_count" : 6
                        }
                    ]
                }
@@ -417,15 +422,15 @@ mod tests {
                    {
                        "key_as_string" : "2015-01-01T00:00:00Z",
                        "key" : 1420070400000.0,
-                        "doc_count" : 4,
+                        "doc_count" : 6,
                        "texts": {
                            "buckets": [
                                {
-                                "doc_count": 2,
+                                "doc_count": 3,
                                "key": "bbb"
                                },
                                {
-                                "doc_count": 1,
+                                "doc_count": 2,
                                "key": "ccc"
                                },
                                {
@@ -464,7 +469,7 @@ mod tests {
                "sales_over_time": {
                    "buckets": [
                        {
-                            "doc_count": 2,
+                            "doc_count": 3,
                            "key": 1420070400000.0,
                            "key_as_string": "2015-01-01T00:00:00Z"
                        },
@@ -489,7 +494,7 @@ mod tests {
                            "key_as_string": "2015-01-05T00:00:00Z"
                        },
                        {
-                            "doc_count": 1,
+                            "doc_count": 2,
                            "key": 1420502400000.0,
                            "key_as_string": "2015-01-06T00:00:00Z"
                        }
@@ -530,7 +535,7 @@ mod tests {
                            "key_as_string": "2014-12-31T00:00:00Z"
                        },
                        {
-                            "doc_count": 2,
+                            "doc_count": 3,
                            "key": 1420070400000.0,
                            "key_as_string": "2015-01-01T00:00:00Z"
                        },
@@ -555,7 +560,7 @@ mod tests {
                            "key_as_string": "2015-01-05T00:00:00Z"
                        },
                        {
-                            "doc_count": 1,
+                            "doc_count": 2,
                            "key": 1420502400000.0,
                            "key_as_string": "2015-01-06T00:00:00Z"
                        },
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -20,7 +20,7 @@ use crate::aggregation::intermediate_agg_result::{
 use crate::aggregation::segment_agg_result::{
    build_segment_agg_collector, AggregationLimits, SegmentAggregationCollector,
 };
-use crate::aggregation::{f64_from_fastfield_u64, format_date};
+use crate::aggregation::*;
 use crate::TantivyError;
 /// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
@@ -73,6 +73,7 @@ pub struct HistogramAggregation {
    pub field: String,
    /// The interval to chunk your data range. Each bucket spans a value range of [0..interval).
    /// Must be a positive value.
    #[serde(deserialize_with = "deserialize_f64")]
    pub interval: f64,
    /// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
    /// 1))`.
@@ -85,6 +86,7 @@ pub struct HistogramAggregation {
    /// fall into the buckets with the key 0 and 10.
    /// With offset 5 and interval 10, they would both fall into the bucket with they key 5 and the
    /// range [5..15)
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub offset: Option<f64>,
    /// The minimum number of documents in a bucket to be returned. Defaults to 0.
    pub min_doc_count: Option<u64>,
@@ -122,11 +124,14 @@ pub struct HistogramAggregation {
    /// Whether to return the buckets as a hash map
    #[serde(default)]
    pub keyed: bool,
    /// Whether the values are normalized to ns for date time values. Defaults to false.
    #[serde(default)]
    pub is_normalized_to_ns: bool,
 }
 impl HistogramAggregation {
-    pub(crate) fn normalize(&mut self, column_type: ColumnType) {
+    pub(crate) fn normalize_date_time(&mut self) {
-        if column_type.is_date_time() {
+        if !self.is_normalized_to_ns {
            // values are provided in ms, but the fastfield is in nano seconds
            self.interval *= 1_000_000.0;
            self.offset = self.offset.map(|off| off * 1_000_000.0);
@@ -138,6 +143,7 @@ impl HistogramAggregation {
                min: bounds.min * 1_000_000.0,
                max: bounds.max * 1_000_000.0,
            });
            self.is_normalized_to_ns = true;
        }
    }
@@ -351,6 +357,7 @@ impl SegmentHistogramCollector {
        let buckets_mem = self.buckets.memory_consumption();
        self_mem + sub_aggs_mem + buckets_mem
    }
    /// Converts the collector result into a intermediate bucket result.
    pub fn into_intermediate_bucket_result(
        self,
        agg_with_accessor: &AggregationWithAccessor,
@@ -369,7 +376,7 @@ impl SegmentHistogramCollector {
        Ok(IntermediateBucketResult::Histogram {
            buckets,
-            column_type: Some(self.column_type),
+            is_date_agg: self.column_type == ColumnType::DateTime,
        })
    }
@@ -380,7 +387,9 @@ impl SegmentHistogramCollector {
        accessor_idx: usize,
    ) -> crate::Result<Self> {
        req.validate()?;
-        req.normalize(field_type);
+        if field_type == ColumnType::DateTime {
            req.normalize_date_time();
        }
        let sub_aggregation_blueprint = if sub_aggregation.is_empty() {
            None
@@ -438,6 +447,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
    // memory check upfront
    let (_, first_bucket_num, last_bucket_num) =
        generate_bucket_pos_with_opt_minmax(histogram_req, min_max);
    // It's based on user input, so we need to account for overflows
    let added_buckets = ((last_bucket_num.saturating_sub(first_bucket_num)).max(0) as u64)
        .saturating_sub(buckets.len() as u64);
@@ -453,15 +463,12 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
    let final_buckets: Vec<BucketEntry> = buckets
        .into_iter()
-        .merge_join_by(
+        .merge_join_by(fill_gaps_buckets, |existing_bucket, fill_gaps_bucket| {
-            fill_gaps_buckets.into_iter(),
+            existing_bucket
-            |existing_bucket, fill_gaps_bucket| {
+                .key
-                existing_bucket
+                .partial_cmp(fill_gaps_bucket)
-                    .key
+                .unwrap_or(Ordering::Equal)
-                    .partial_cmp(fill_gaps_bucket)
+        })
                    .unwrap_or(Ordering::Equal)
            },
        )
        .map(|either| match either {
            // Ignore the generated bucket
            itertools::EitherOrBoth::Both(existing, _) => existing,
@@ -484,7 +491,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
 // Convert to BucketEntry
 pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    buckets: Vec<IntermediateHistogramBucketEntry>,
-    column_type: Option<ColumnType>,
+    is_date_agg: bool,
    histogram_req: &HistogramAggregation,
    sub_aggregation: &Aggregations,
    limits: &AggregationLimits,
@@ -493,8 +500,8 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    // The request used in the the call to final is not yet be normalized.
    // Normalization is changing the precision from milliseconds to nanoseconds.
    let mut histogram_req = histogram_req.clone();
-    if let Some(column_type) = column_type {
+    if is_date_agg {
-        histogram_req.normalize(column_type);
+        histogram_req.normalize_date_time();
    }
    let mut buckets = if histogram_req.min_doc_count() == 0 {
        // With min_doc_count != 0, we may need to add buckets, so that there are no
@@ -518,7 +525,7 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    // If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339
    // and normalize from nanoseconds to milliseconds
-    if column_type == Some(ColumnType::DateTime) {
+    if is_date_agg {
        for bucket in buckets.iter_mut() {
            if let crate::aggregation::Key::F64(ref mut val) = bucket.key {
                let key_as_string = format_date(*val as i64)?;
@@ -591,10 +598,13 @@ mod tests {
    use super::*;
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::tests::{
        exec_request, exec_request_with_query, exec_request_with_query_and_memory_limit,
        get_test_index_2_segments, get_test_index_from_values, get_test_index_with_num_docs,
    };
    use crate::aggregation::AggregationCollector;
    use crate::query::AllQuery;
    #[test]
    fn histogram_test_crooked_values() -> crate::Result<()> {
@@ -1346,6 +1356,35 @@ mod tests {
            })
        );
        Ok(())
    }
    #[test]
    fn test_aggregation_histogram_empty_index() -> crate::Result<()> {
        // test index without segments
        let values = vec![];
        let index = get_test_index_from_values(false, &values)?;
        let agg_req_1: Aggregations = serde_json::from_value(json!({
            "myhisto": {
                "histogram": {
                    "field": "score",
                    "interval": 10.0
                },
            }
        }))
        .unwrap();
        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
        let reader = index.reader()?;
        let searcher = reader.searcher();
        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
        // Make sure the result structure is correct
        assert_eq!(res["myhisto"]["buckets"].as_array().unwrap().len(), 0);
        Ok(())
    }
 }
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -25,15 +25,15 @@
 mod histogram;
 mod range;
 mod term_agg;
 mod term_missing_agg;
 use std::collections::HashMap;
 pub(crate) use histogram::SegmentHistogramCollector;
 pub use histogram::*;
 pub(crate) use range::SegmentRangeCollector;
 pub use range::*;
 use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
 pub use term_agg::*;
 pub use term_missing_agg::*;
 /// Order for buckets in a bucket aggregation.
 #[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, Default)]
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -14,9 +14,7 @@ use crate::aggregation::intermediate_agg_result::{
 use crate::aggregation::segment_agg_result::{
    build_segment_agg_collector, SegmentAggregationCollector,
 };
-use crate::aggregation::{
+use crate::aggregation::*;
    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
 };
 use crate::TantivyError;
 /// Provide user-defined buckets to aggregate on.
@@ -72,11 +70,19 @@ pub struct RangeAggregationRange {
    pub key: Option<String>,
    /// The from range value, which is inclusive in the range.
    /// `None` equals to an open ended interval.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(
        skip_serializing_if = "Option::is_none",
        default,
        deserialize_with = "deserialize_option_f64"
    )]
    pub from: Option<f64>,
    /// The to range value, which is not inclusive in the range.
    /// `None` equals to an open ended interval.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(
        skip_serializing_if = "Option::is_none",
        default,
        deserialize_with = "deserialize_option_f64"
    )]
    pub to: Option<f64>,
 }
@@ -262,7 +268,7 @@ impl SegmentRangeCollector {
    pub(crate) fn from_req_and_validate(
        req: &RangeAggregation,
        sub_aggregation: &mut AggregationsWithAccessor,
-        limits: &mut ResourceLimitGuard,
+        limits: &ResourceLimitGuard,
        field_type: ColumnType,
        accessor_idx: usize,
    ) -> crate::Result<Self> {
@@ -465,7 +471,7 @@ mod tests {
        SegmentRangeCollector::from_req_and_validate(
            &req,
            &mut Default::default(),
-            &mut AggregationLimits::default().new_guard(),
+            &AggregationLimits::default().new_guard(),
            field_type,
            0,
        )
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,6 +1,6 @@
 use std::fmt::Debug;
-use columnar::ColumnType;
+use columnar::{BytesColumn, ColumnType, MonotonicallyMappableToU64, StrColumn};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
@@ -9,7 +9,6 @@ use crate::aggregation::agg_limits::MemoryConsumption;
 use crate::aggregation::agg_req_with_accessor::{
    AggregationWithAccessor, AggregationsWithAccessor,
 };
 use crate::aggregation::f64_from_fastfield_u64;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
@@ -17,6 +16,7 @@ use crate::aggregation::intermediate_agg_result::{
 use crate::aggregation::segment_agg_result::{
    build_segment_agg_collector, SegmentAggregationCollector,
 };
 use crate::aggregation::{f64_from_fastfield_u64, format_date, Key};
 use crate::error::DataCorruption;
 use crate::TantivyError;
@@ -99,24 +99,15 @@ pub struct TermsAggregation {
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub size: Option<u32>,
-    /// Unused by tantivy.
+    /// To get more accurate results, we fetch more than `size` from each segment.
    ///
    /// Since tantivy doesn't know shards, this parameter is merely there to be used by consumers
    /// of tantivy. shard_size is the number of terms returned by each shard.
    /// The default value in elasticsearch is size * 1.5 + 10.
    ///
    /// Should never be smaller than size.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    #[serde(alias = "shard_size")]
    pub split_size: Option<u32>,
    /// The get more accurate results, we fetch more than `size` from each segment.
    ///
    /// Increasing this value is will increase the cost for more accuracy.
    ///
    /// Defaults to 10 * size.
    #[serde(skip_serializing_if = "Option::is_none", default)]
-    pub segment_size: Option<u32>,
+    #[serde(alias = "segment_size")]
    #[serde(alias = "split_size")]
    pub shard_size: Option<u32>,
    /// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will
    /// include doc_count_error_upper_bound, which is an upper bound to the error on the
@@ -146,6 +137,28 @@ pub struct TermsAggregation {
    /// { "average_price": "asc" }
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub order: Option<CustomOrder>,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "missing": "NO_DATA" }
    ///
    /// # Internal
    ///
    /// Internally, `missing` requires some specialized handling in some scenarios.
    ///
    /// Simple Case:
    /// In the simplest case, we can just put the missing value in the termmap use that. In case of
    /// text we put a special u64::MAX and replace it at the end with the actual missing value,
    /// when loading the text.
    /// Special Case 1:
    /// If we have multiple columns on one field, we need to have a union on the indices on both
    /// columns, to find docids without a value. That requires a special missing aggreggation.
    /// Special Case 2: if the key is of type text and the column is numerical, we also need to use
    /// the special missing aggregation, since there is no mechanism in the numerical column to
    /// add text.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub missing: Option<Key>,
 }
 /// Same as TermsAggregation, but with populated defaults.
@@ -176,13 +189,14 @@ pub(crate) struct TermsAggregationInternal {
    pub min_doc_count: u64,
    pub order: CustomOrder,
    pub missing: Option<Key>,
 }
 impl TermsAggregationInternal {
    pub(crate) fn from_req(req: &TermsAggregation) -> Self {
        let size = req.size.unwrap_or(10);
-        let mut segment_size = req.segment_size.unwrap_or(size * 10);
+        let mut segment_size = req.shard_size.unwrap_or(size * 10);
        let order = req.order.clone().unwrap_or_default();
        segment_size = segment_size.max(size);
@@ -195,6 +209,7 @@ impl TermsAggregationInternal {
                .unwrap_or_else(|| order == CustomOrder::default()),
            min_doc_count: req.min_doc_count.unwrap_or(1),
            order,
            missing: req.missing.clone(),
        }
    }
 }
@@ -224,110 +239,6 @@ impl TermBuckets {
    }
 }
 /// The composite collector is used, when we have different types under one field, to support a term
 /// aggregation on both.
 #[derive(Clone, Debug)]
 pub struct SegmentTermCollectorComposite {
    term_agg1: SegmentTermCollector, // field type 1, e.g. strings
    term_agg2: SegmentTermCollector, // field type 2, e.g. u64
    accessor_idx: usize,
 }
 impl SegmentAggregationCollector for SegmentTermCollectorComposite {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
        let agg_with_accessor = &agg_with_accessor.aggs.values[self.accessor_idx];
        let bucket = self
            .term_agg1
            .into_intermediate_bucket_result(agg_with_accessor)?;
        results.push(
            name.to_string(),
            IntermediateAggregationResult::Bucket(bucket),
        )?;
        let bucket = self
            .term_agg2
            .into_intermediate_bucket_result(agg_with_accessor)?;
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
        Ok(())
    }
    #[inline]
    fn collect(
        &mut self,
        doc: crate::DocId,
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        self.term_agg1.collect_block(&[doc], agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        self.term_agg2.collect_block(&[doc], agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        Ok(())
    }
    #[inline]
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        self.term_agg1.collect_block(docs, agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        self.term_agg2.collect_block(docs, agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        Ok(())
    }
    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
        self.term_agg1.flush(agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        self.term_agg2.flush(agg_with_accessor)?;
        self.swap_accessor(&mut agg_with_accessor.aggs.values[self.accessor_idx]);
        Ok(())
    }
 }
 impl SegmentTermCollectorComposite {
    /// Swaps the accessor and field type with the second accessor and field type.
    /// This way we can use the same code for both aggregations.
    fn swap_accessor(&self, aggregations: &mut AggregationWithAccessor) {
        if let Some(accessor) = aggregations.accessor2.as_mut() {
            std::mem::swap(&mut accessor.0, &mut aggregations.accessor);
            std::mem::swap(&mut accessor.1, &mut aggregations.field_type);
        }
    }
    pub(crate) fn from_req_and_validate(
        req: &TermsAggregation,
        sub_aggregations: &mut AggregationsWithAccessor,
        field_type: ColumnType,
        field_type2: ColumnType,
        accessor_idx: usize,
    ) -> crate::Result<Self> {
        Ok(Self {
            term_agg1: SegmentTermCollector::from_req_and_validate(
                req,
                sub_aggregations,
                field_type,
                accessor_idx,
            )?,
            term_agg2: SegmentTermCollector::from_req_and_validate(
                req,
                sub_aggregations,
                field_type2,
                accessor_idx,
            )?,
            accessor_idx,
        })
    }
 }
 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
 #[derive(Clone, Debug)]
@@ -336,7 +247,7 @@ pub struct SegmentTermCollector {
    term_buckets: TermBuckets,
    req: TermsAggregationInternal,
    blueprint: Option<Box<dyn SegmentAggregationCollector>>,
-    field_type: ColumnType,
+    column_type: ColumnType,
    accessor_idx: usize,
 }
@@ -379,9 +290,16 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        let mem_pre = self.get_memory_consumption();
-        bucket_agg_accessor
+        if let Some(missing) = bucket_agg_accessor.missing_value_for_accessor {
-            .column_block_accessor
+            bucket_agg_accessor
-            .fetch_block(docs, &bucket_agg_accessor.accessor);
+                .column_block_accessor
                .fetch_block_with_missing(docs, &bucket_agg_accessor.accessor, missing);
        } else {
            bucket_agg_accessor
                .column_block_accessor
                .fetch_block(docs, &bucket_agg_accessor.accessor);
        }
        for term_id in bucket_agg_accessor.column_block_accessor.iter_vals() {
            let entry = self.term_buckets.entries.entry(term_id).or_default();
            *entry += 1;
@@ -428,7 +346,7 @@ impl SegmentTermCollector {
        field_type: ColumnType,
        accessor_idx: usize,
    ) -> crate::Result<Self> {
-        if field_type == ColumnType::Bytes || field_type == ColumnType::Bool {
+        if field_type == ColumnType::Bytes {
            return Err(TantivyError::InvalidArgument(format!(
                "terms aggregation is not supported for column type {:?}",
                field_type
@@ -462,7 +380,7 @@ impl SegmentTermCollector {
            req: TermsAggregationInternal::from_req(req),
            term_buckets,
            blueprint,
-            field_type,
+            column_type: field_type,
            accessor_idx,
        })
    }
@@ -539,23 +457,46 @@ impl SegmentTermCollector {
                Ok(intermediate_entry)
            };
-        if self.field_type == ColumnType::Str {
+        if self.column_type == ColumnType::Str {
            let term_dict = agg_with_accessor
                .str_dict_column
                .as_ref()
-                .expect("internal error: term dictionary not found for term aggregation");
+                .cloned()
-
+                .unwrap_or_else(|| {
                    StrColumn::wrap(BytesColumn::empty(agg_with_accessor.accessor.num_docs()))
                });
            let mut buffer = String::new();
            for (term_id, doc_count) in entries {
                if !term_dict.ord_to_str(term_id, &mut buffer)? {
                    return Err(TantivyError::InternalError(format!(
                        "Couldn't find term_id {term_id} in dict"
                    )));
                }
                let intermediate_entry = into_intermediate_bucket_entry(term_id, doc_count)?;
-
+                // Special case for missing key
-                dict.insert(IntermediateKey::Str(buffer.to_string()), intermediate_entry);
+                if term_id == u64::MAX {
                    let missing_key = self
                        .req
                        .missing
                        .as_ref()
                        .expect("Found placeholder term_id but `missing` is None");
                    match missing_key {
                        Key::Str(missing) => {
                            buffer.clear();
                            buffer.push_str(missing);
                            dict.insert(
                                IntermediateKey::Str(buffer.to_string()),
                                intermediate_entry,
                            );
                        }
                        Key::F64(val) => {
                            buffer.push_str(&val.to_string());
                            dict.insert(IntermediateKey::F64(*val), intermediate_entry);
                        }
                    }
                } else {
                    if !term_dict.ord_to_str(term_id, &mut buffer)? {
                        return Err(TantivyError::InternalError(format!(
                            "Couldn't find term_id {term_id} in dict"
                        )));
                    }
                    dict.insert(IntermediateKey::Str(buffer.to_string()), intermediate_entry);
                }
            }
            if self.req.min_doc_count == 0 {
                // TODO: Handle rev streaming for descending sorting by keys
@@ -581,21 +522,34 @@ impl SegmentTermCollector {
                        });
                }
            }
        } else if self.column_type == ColumnType::DateTime {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
                let val = i64::from_u64(val);
                let date = format_date(val)?;
                dict.insert(IntermediateKey::Str(date), intermediate_entry);
            }
        } else if self.column_type == ColumnType::Bool {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
                let val = bool::from_u64(val);
                dict.insert(IntermediateKey::Bool(val), intermediate_entry);
            }
        } else {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
-                let val = f64_from_fastfield_u64(val, &self.field_type);
+                let val = f64_from_fastfield_u64(val, &self.column_type);
                dict.insert(IntermediateKey::F64(val), intermediate_entry);
            }
        };
-        Ok(IntermediateBucketResult::Terms(
+        Ok(IntermediateBucketResult::Terms {
-            IntermediateTermBucketResult {
+            buckets: IntermediateTermBucketResult {
                entries: dict,
                sum_other_doc_count,
                doc_count_error_upper_bound: term_doc_count_before_cutoff,
            },
-        ))
+        })
    }
 }
@@ -633,6 +587,9 @@ pub(crate) fn cut_off_buckets<T: GetDocCount + Debug>(
 #[cfg(test)]
 mod tests {
    use common::DateTime;
    use time::{Date, Month};
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::{
        exec_request, exec_request_with_query, exec_request_with_query_and_memory_limit,
@@ -641,7 +598,7 @@ mod tests {
    use crate::aggregation::AggregationLimits;
    use crate::indexer::NoMergePolicy;
    use crate::schema::{Schema, FAST, STRING};
-    use crate::Index;
+    use crate::{Index, IndexWriter};
    #[test]
    fn terms_aggregation_test_single_segment() -> crate::Result<()> {
@@ -1293,13 +1250,13 @@ mod tests {
        // searching for terma, but min_doc_count will return all terms
        let res = exec_request_with_query(agg_req, &index, Some(("string2", "hit")))?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "a");
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);
        assert_eq!(
            res["my_texts"]["buckets"][0]["elhistogram"]["buckets"],
            json!([{ "doc_count": 1, "key": 1.0 }, { "doc_count": 1, "key": 2.0 } ])
        );
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "b");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "B");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);
        assert_eq!(
            res["my_texts"]["buckets"][1]["elhistogram"]["buckets"],
@@ -1321,6 +1278,7 @@ mod tests {
        ];
        let index = get_test_index_from_terms(false, &terms_per_segment)?;
        assert_eq!(index.searchable_segments().unwrap().len(), 2);
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_texts": {
@@ -1404,7 +1362,7 @@ mod tests {
    #[test]
    fn terms_aggregation_different_tokenizer_on_ff_test() -> crate::Result<()> {
-        let terms = vec!["Hello Hello", "Hallo Hallo"];
+        let terms = vec!["Hello Hello", "Hallo Hallo", "Hallo Hallo"];
        let index = get_test_index_from_terms(true, &[terms])?;
@@ -1421,10 +1379,10 @@ mod tests {
        let res = exec_request_with_query(agg_req, &index, None).unwrap();
        println!("{}", serde_json::to_string_pretty(&res).unwrap());
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "hallo hallo");
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hallo Hallo");
-        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);
+        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "hello hello");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Hello Hello");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);
        Ok(())
@@ -1506,6 +1464,47 @@ mod tests {
        Ok(())
    }
    #[test]
    fn terms_empty_json() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with empty json
        index_writer.add_document(doc!()).unwrap();
        index_writer.commit().unwrap();
        // => Segment with json, but no field partially_empty
        index_writer
            .add_document(doc!(json => json!({"different_field": "blue"})))
            .unwrap();
        index_writer.commit().unwrap();
        //// => Segment with field partially_empty
        index_writer
            .add_document(doc!(json => json!({"partially_empty": "blue"})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_texts": {
                "terms": {
                    "field": "json.partially_empty"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(res["my_texts"]["buckets"][0]["key"], "blue");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);
        assert_eq!(res["my_texts"]["buckets"][1], serde_json::Value::Null);
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_bytes() -> crate::Result<()> {
@@ -1543,4 +1542,389 @@ mod tests {
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_multi_value() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", FAST);
        let id_field = schema_builder.add_u64_field("id", FAST);
        let index = Index::create_in_ram(schema_builder.build());
        {
            let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
                text_field => "Hello Hello",
                id_field => 1u64,
                id_field => 1u64,
            ))?;
            // Missing
            index_writer.add_document(doc!())?;
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
            ))?;
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
            ))?;
            index_writer.commit()?;
            // Empty segment special case
            index_writer.add_document(doc!())?;
            index_writer.commit()?;
            // Full segment special case
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
                id_field => 1u64,
            ))?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_texts": {
                "terms": {
                    "field": "text",
                    "missing": "Empty"
                },
            },
            "my_texts2": {
                "terms": {
                    "field": "text",
                    "missing": 1337
                },
            },
            "my_ids": {
                "terms": {
                    "field": "id",
                    "missing": 1337
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hello Hello");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 5);
        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Empty");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 2);
        assert_eq!(
            res["my_texts"]["buckets"][2]["key"],
            serde_json::Value::Null
        );
        // text field with numner as missing fallback
        assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
        assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 5);
        assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
        assert_eq!(res["my_texts2"]["buckets"][1]["doc_count"], 2);
        assert_eq!(
            res["my_texts2"]["buckets"][2]["key"],
            serde_json::Value::Null
        );
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
        // id field
        assert_eq!(res["my_ids"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["my_ids"]["buckets"][0]["doc_count"], 4);
        assert_eq!(res["my_ids"]["buckets"][1]["key"], 1.0);
        assert_eq!(res["my_ids"]["buckets"][1]["doc_count"], 3);
        assert_eq!(res["my_ids"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_simple_id() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let id_field = schema_builder.add_u64_field("id", FAST);
        let index = Index::create_in_ram(schema_builder.build());
        {
            let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            index_writer.add_document(doc!(
                id_field => 1u64,
            ))?;
            // Missing
            index_writer.add_document(doc!())?;
            index_writer.add_document(doc!())?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_ids": {
                "terms": {
                    "field": "id",
                    "missing": 1337
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // id field
        assert_eq!(res["my_ids"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["my_ids"]["buckets"][0]["doc_count"], 2);
        assert_eq!(res["my_ids"]["buckets"][1]["key"], 1.0);
        assert_eq!(res["my_ids"]["buckets"][1]["doc_count"], 1);
        assert_eq!(res["my_ids"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing1() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", FAST);
        let id_field = schema_builder.add_u64_field("id", FAST);
        let index = Index::create_in_ram(schema_builder.build());
        {
            let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
                id_field => 1u64,
            ))?;
            // Missing
            index_writer.add_document(doc!())?;
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
            ))?;
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
            ))?;
            index_writer.commit()?;
            // Empty segment special case
            index_writer.add_document(doc!())?;
            index_writer.commit()?;
            // Full segment special case
            index_writer.add_document(doc!(
                text_field => "Hello Hello",
                id_field => 1u64,
            ))?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_texts": {
                "terms": {
                    "field": "text",
                    "missing": "Empty"
                },
            },
            "my_texts2": {
                "terms": {
                    "field": "text",
                    "missing": 1337
                },
            },
            "my_ids": {
                "terms": {
                    "field": "id",
                    "missing": 1337
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hello Hello");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Empty");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 2);
        assert_eq!(
            res["my_texts"]["buckets"][2]["key"],
            serde_json::Value::Null
        );
        // text field with numner as missing fallback
        assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
        assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 4);
        assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
        assert_eq!(res["my_texts2"]["buckets"][1]["doc_count"], 2);
        assert_eq!(
            res["my_texts2"]["buckets"][2]["key"],
            serde_json::Value::Null
        );
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
        // id field
        assert_eq!(res["my_ids"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["my_ids"]["buckets"][0]["doc_count"], 4);
        assert_eq!(res["my_ids"]["buckets"][1]["key"], 1.0);
        assert_eq!(res["my_ids"]["buckets"][1]["doc_count"], 2);
        assert_eq!(res["my_ids"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_empty() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("text", FAST);
        schema_builder.add_u64_field("id", FAST);
        let index = Index::create_in_ram(schema_builder.build());
        {
            let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            // Empty segment special case
            index_writer.add_document(doc!())?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_texts": {
                "terms": {
                    "field": "text",
                    "missing": "Empty"
                },
            },
            "my_texts2": {
                "terms": {
                    "field": "text",
                    "missing": 1337
                },
            },
            "my_ids": {
                "terms": {
                    "field": "id",
                    "missing": 1337
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Empty");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);
        assert_eq!(
            res["my_texts"]["buckets"][1]["key"],
            serde_json::Value::Null
        );
        // text field with number as missing fallback
        assert_eq!(res["my_texts2"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 1);
        assert_eq!(
            res["my_texts2"]["buckets"][1]["key"],
            serde_json::Value::Null
        );
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
        // id field
        assert_eq!(res["my_ids"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["my_ids"]["buckets"][0]["doc_count"], 1);
        assert_eq!(res["my_ids"]["buckets"][1]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_date() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let date_field = schema_builder.add_date_field("date_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
            writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_date": {
                "terms": {
                    "field": "date_field"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // date_field field
        assert_eq!(res["my_date"]["buckets"][0]["key"], "1982-09-17T00:00:00Z");
        assert_eq!(res["my_date"]["buckets"][0]["doc_count"], 2);
        assert_eq!(res["my_date"]["buckets"][1]["key"], "1983-09-27T00:00:00Z");
        assert_eq!(res["my_date"]["buckets"][1]["doc_count"], 1);
        assert_eq!(res["my_date"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_date_missing() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let date_field = schema_builder.add_date_field("date_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
            writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
            writer.add_document(doc!())?;
            writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_date": {
                "terms": {
                    "field": "date_field",
                    "missing": "1982-09-17T00:00:00Z"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // date_field field
        assert_eq!(res["my_date"]["buckets"][0]["key"], "1982-09-17T00:00:00Z");
        assert_eq!(res["my_date"]["buckets"][0]["doc_count"], 3);
        assert_eq!(res["my_date"]["buckets"][1]["key"], "1983-09-27T00:00:00Z");
        assert_eq!(res["my_date"]["buckets"][1]["doc_count"], 1);
        assert_eq!(res["my_date"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
    #[test]
    fn terms_aggregation_bool() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_bool_field("bool_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
            writer.add_document(doc!(field=>true))?;
            writer.add_document(doc!(field=>false))?;
            writer.add_document(doc!(field=>true))?;
            writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_bool": {
                "terms": {
                    "field": "bool_field"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(res["my_bool"]["buckets"][0]["key"], 1.0);
        assert_eq!(res["my_bool"]["buckets"][0]["key_as_string"], "true");
        assert_eq!(res["my_bool"]["buckets"][0]["doc_count"], 2);
        assert_eq!(res["my_bool"]["buckets"][1]["key"], 0.0);
        assert_eq!(res["my_bool"]["buckets"][1]["key_as_string"], "false");
        assert_eq!(res["my_bool"]["buckets"][1]["doc_count"], 1);
        assert_eq!(res["my_bool"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
 }
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -0,0 +1,481 @@
 use rustc_hash::FxHashMap;
 use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
 };
 use crate::aggregation::segment_agg_result::{
    build_segment_agg_collector, SegmentAggregationCollector,
 };
 /// The specialized missing term aggregation.
 #[derive(Default, Debug, Clone)]
 pub struct TermMissingAgg {
    missing_count: u32,
    accessor_idx: usize,
    sub_agg: Option<Box<dyn SegmentAggregationCollector>>,
 }
 impl TermMissingAgg {
    pub(crate) fn new(
        accessor_idx: usize,
        sub_aggregations: &mut AggregationsWithAccessor,
    ) -> crate::Result<Self> {
        let has_sub_aggregations = !sub_aggregations.is_empty();
        let sub_agg = if has_sub_aggregations {
            let sub_aggregation = build_segment_agg_collector(sub_aggregations)?;
            Some(sub_aggregation)
        } else {
            None
        };
        Ok(Self {
            accessor_idx,
            sub_agg,
            ..Default::default()
        })
    }
 }
 impl SegmentAggregationCollector for TermMissingAgg {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
        let agg_with_accessor = &agg_with_accessor.aggs.values[self.accessor_idx];
        let term_agg = agg_with_accessor
            .agg
            .agg
            .as_term()
            .expect("TermMissingAgg collector must be term agg req");
        let missing = term_agg
            .missing
            .as_ref()
            .expect("TermMissingAgg collector, but no missing found in agg req")
            .clone();
        let mut entries: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> =
            Default::default();
        let mut missing_entry = IntermediateTermBucketEntry {
            doc_count: self.missing_count,
            sub_aggregation: Default::default(),
        };
        if let Some(sub_agg) = self.sub_agg {
            let mut res = IntermediateAggregationResults::default();
            sub_agg.add_intermediate_aggregation_result(
                &agg_with_accessor.sub_aggregation,
                &mut res,
            )?;
            missing_entry.sub_aggregation = res;
        }
        entries.insert(missing.into(), missing_entry);
        let bucket = IntermediateBucketResult::Terms {
            buckets: IntermediateTermBucketResult {
                entries,
                sum_other_doc_count: 0,
                doc_count_error_upper_bound: 0,
            },
        };
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
        Ok(())
    }
    fn collect(
        &mut self,
        doc: crate::DocId,
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        let agg = &mut agg_with_accessor.aggs.values[self.accessor_idx];
        let has_value = agg
            .accessors
            .iter()
            .any(|(acc, _)| acc.index.has_value(doc));
        if !has_value {
            self.missing_count += 1;
            if let Some(sub_agg) = self.sub_agg.as_mut() {
                sub_agg.collect(doc, &mut agg.sub_aggregation)?;
            }
        }
        Ok(())
    }
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        for doc in docs {
            self.collect(*doc, agg_with_accessor)?;
        }
        Ok(())
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::exec_request_with_query;
    use crate::schema::{Schema, FAST};
    use crate::{Index, IndexWriter};
    #[test]
    fn terms_aggregation_missing_mixed_type_mult_seg_sub_agg() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let score = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with all values numeric
        index_writer
            .add_document(doc!(score => 1.0, json => json!({"mixed_type": 10.0})))
            .unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        // index_writer.commit().unwrap();
        //// => Segment with all values text
        index_writer
            .add_document(doc!(score => 1.0, json => json!({"mixed_type": "blue"})))
            .unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        // index_writer.commit().unwrap();
        // => Segment with mixed values
        index_writer.add_document(doc!(json => json!({"mixed_type": "red"})))?;
        index_writer.add_document(doc!(json => json!({"mixed_type": -20.5})))?;
        index_writer.add_document(doc!(json => json!({"mixed_type": true})))?;
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
                "aggs": {
                    "sum_score": {
                        "sum": {
                            "field": "score"
                        }
                    }
                }
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(
            res["replace_null"]["buckets"][0]["sum_score"]["value"],
            15.0
        );
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_mixed_type_sub_agg_reg1() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let score = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with all values numeric
        index_writer.add_document(doc!(score => 1.0, json => json!({"mixed_type": 10.0})))?;
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
                "aggs": {
                    "sum_score": {
                        "sum": {
                            "field": "score"
                        }
                    }
                }
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 2);
        assert_eq!(
            res["replace_null"]["buckets"][0]["sum_score"]["value"],
            10.0
        );
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_mult_seg_empty() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let score = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
                "aggs": {
                    "sum_score": {
                        "sum": {
                            "field": "score"
                        }
                    }
                }
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(
            res["replace_null"]["buckets"][0]["sum_score"]["value"],
            15.0
        );
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_single_seg_empty() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let score = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.add_document(doc!(score => 5.0))?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
                "aggs": {
                    "sum_score": {
                        "sum": {
                            "field": "score"
                        }
                    }
                }
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(
            res["replace_null"]["buckets"][0]["sum_score"]["value"],
            15.0
        );
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_mixed_type_mult_seg() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with all values numeric
        index_writer
            .add_document(doc!(json => json!({"mixed_type": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        //// => Segment with all values text
        index_writer
            .add_document(doc!(json => json!({"mixed_type": "blue"})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        // => Segment with mixed values
        index_writer
            .add_document(doc!(json => json!({"mixed_type": "red"})))
            .unwrap();
        index_writer
            .add_document(doc!(json => json!({"mixed_type": -20.5})))
            .unwrap();
        index_writer
            .add_document(doc!(json => json!({"mixed_type": true})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
            },
            "replace_num": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": 1337
                },
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(res["replace_num"]["buckets"][0]["key"], 1337.0);
        assert_eq!(res["replace_num"]["buckets"][0]["doc_count"], 3);
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_str_on_numeric_field() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with all values numeric
        index_writer
            .add_document(doc!(json => json!({"mixed_type": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.add_document(doc!())?;
        index_writer
            .add_document(doc!(json => json!({"mixed_type": -20.5})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
    #[test]
    fn terms_aggregation_missing_mixed_type_one_seg() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with all values numeric
        index_writer
            .add_document(doc!(json => json!({"mixed_type": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        //// => Segment with all values text
        index_writer
            .add_document(doc!(json => json!({"mixed_type": "blue"})))
            .unwrap();
        index_writer.add_document(doc!())?;
        // => Segment with mixed values
        index_writer
            .add_document(doc!(json => json!({"mixed_type": "red"})))
            .unwrap();
        index_writer
            .add_document(doc!(json => json!({"mixed_type": -20.5})))
            .unwrap();
        index_writer
            .add_document(doc!(json => json!({"mixed_type": true})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "replace_null": {
                "terms": {
                    "field": "json.mixed_type",
                    "missing": "NULL"
                },
            },
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // text field
        assert_eq!(res["replace_null"]["buckets"][0]["key"], "NULL");
        assert_eq!(res["replace_null"]["buckets"][0]["doc_count"], 3);
        assert_eq!(res["replace_null"]["sum_other_doc_count"], 0);
        assert_eq!(res["replace_null"]["doc_count_error_upper_bound"], 0);
        Ok(())
    }
 }
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -8,7 +8,7 @@ use super::segment_agg_result::{
 };
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{DocId, SegmentReader, TantivyError};
+use crate::{DocId, SegmentOrdinal, SegmentReader, TantivyError};
 /// The default max bucket count, before the aggregation fails.
 pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
@@ -64,10 +64,15 @@ impl Collector for DistributedAggregationCollector {
    fn for_segment(
        &self,
-        _segment_local_id: crate::SegmentOrdinal,
+        segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
+        AggregationSegmentCollector::from_agg_req_and_reader(
            &self.agg,
            reader,
            segment_local_id,
            &self.limits,
        )
    }
    fn requires_scoring(&self) -> bool {
@@ -89,10 +94,15 @@ impl Collector for AggregationCollector {
    fn for_segment(
        &self,
-        _segment_local_id: crate::SegmentOrdinal,
+        segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
+        AggregationSegmentCollector::from_agg_req_and_reader(
            &self.agg,
            reader,
            segment_local_id,
            &self.limits,
        )
    }
    fn requires_scoring(&self) -> bool {
@@ -135,10 +145,11 @@ impl AggregationSegmentCollector {
    pub fn from_agg_req_and_reader(
        agg: &Aggregations,
        reader: &SegmentReader,
        segment_ordinal: SegmentOrdinal,
        limits: &AggregationLimits,
    ) -> crate::Result<Self> {
        let mut aggs_with_accessor =
-            get_aggs_with_segment_accessor_and_validate(agg, reader, limits)?;
+            get_aggs_with_segment_accessor_and_validate(agg, reader, segment_ordinal, limits)?;
        let result =
            BufAggregationCollector::new(build_segment_agg_collector(&mut aggs_with_accessor)?);
        Ok(AggregationSegmentCollector {
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -19,7 +19,7 @@ use super::bucket::{
 };
 use super::metric::{
    IntermediateAverage, IntermediateCount, IntermediateMax, IntermediateMin, IntermediateStats,
-    IntermediateSum, PercentilesCollector,
+    IntermediateSum, PercentilesCollector, TopHitsCollector,
 };
 use super::segment_agg_result::AggregationLimits;
 use super::{format_date, AggregationError, Key, SerializedKey};
@@ -41,6 +41,8 @@ pub struct IntermediateAggregationResults {
 /// This might seem redundant with `Key`, but the point is to have a different
 /// Serialize implementation.
 pub enum IntermediateKey {
    /// Bool key
    Bool(bool),
    /// String key
    Str(String),
    /// `f64` key
@@ -59,6 +61,7 @@ impl From<IntermediateKey> for Key {
        match value {
            IntermediateKey::Str(s) => Self::Str(s),
            IntermediateKey::F64(f) => Self::F64(f),
            IntermediateKey::Bool(f) => Self::F64(f as u64 as f64),
        }
    }
 }
@@ -71,6 +74,7 @@ impl std::hash::Hash for IntermediateKey {
        match self {
            IntermediateKey::Str(text) => text.hash(state),
            IntermediateKey::F64(val) => val.to_bits().hash(state),
            IntermediateKey::Bool(val) => val.hash(state),
        }
    }
 }
@@ -111,9 +115,6 @@ impl IntermediateAggregationResults {
    }
    /// Convert intermediate result and its aggregation request to the final result.
    ///
    /// Internal function, AggregationsInternal is used instead Aggregations, which is optimized
    /// for internal processing, by splitting metric and buckets into separate groups.
    pub(crate) fn into_final_result_internal(
        self,
        req: &Aggregations,
@@ -121,7 +122,14 @@ impl IntermediateAggregationResults {
    ) -> crate::Result<AggregationResults> {
        let mut results: FxHashMap<String, AggregationResult> = FxHashMap::default();
        for (key, agg_res) in self.aggs_res.into_iter() {
-            let req = req.get(key.as_str()).unwrap();
+            let req = req.get(key.as_str()).unwrap_or_else(|| {
                panic!(
                    "Could not find key {:?} in request keys {:?}. This probably means that \
                     add_intermediate_aggregation_result passed the wrong agg object.",
                    key,
                    req.keys().collect::<Vec<_>>()
                )
            });
            results.insert(key, agg_res.into_final_result(req, limits)?);
        }
        // Handle empty results
@@ -162,16 +170,22 @@ impl IntermediateAggregationResults {
 pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult {
    use AggregationVariants::*;
    match req.agg {
-        Terms(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Terms(
+        Terms(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Terms {
-            Default::default(),
+            buckets: Default::default(),
-        )),
+        }),
        Range(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
            Default::default(),
        )),
-        Histogram(_) | DateHistogram(_) => {
+        Histogram(_) => {
            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Histogram {
                buckets: Vec::new(),
-                column_type: None,
+                is_date_agg: false,
            })
        }
        DateHistogram(_) => {
            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Histogram {
                buckets: Vec::new(),
                is_date_agg: true,
            })
        }
        Average(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Average(
@@ -195,6 +209,9 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
        Percentiles(_) => IntermediateAggregationResult::Metric(
            IntermediateMetricResult::Percentiles(PercentilesCollector::default()),
        ),
        TopHits(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::TopHits(
            TopHitsCollector::default(),
        )),
    }
 }
@@ -255,6 +272,8 @@ pub enum IntermediateMetricResult {
    Stats(IntermediateStats),
    /// Intermediate sum result.
    Sum(IntermediateSum),
    /// Intermediate top_hits result
    TopHits(TopHitsCollector),
 }
 impl IntermediateMetricResult {
@@ -282,9 +301,13 @@ impl IntermediateMetricResult {
                percentiles
                    .into_final_result(req.agg.as_percentile().expect("unexpected metric type")),
            ),
            IntermediateMetricResult::TopHits(top_hits) => {
                MetricResult::TopHits(top_hits.finalize())
            }
        }
    }
    // TODO: this is our top-of-the-chain fruit merge mech
    fn merge_fruits(&mut self, other: IntermediateMetricResult) -> crate::Result<()> {
        match (self, other) {
            (
@@ -320,6 +343,9 @@ impl IntermediateMetricResult {
            ) => {
                left.merge_fruits(right)?;
            }
            (IntermediateMetricResult::TopHits(left), IntermediateMetricResult::TopHits(right)) => {
                left.merge_fruits(right)?;
            }
            _ => {
                panic!("incompatible fruit types in tree or missing merge_fruits handler");
            }
@@ -339,13 +365,16 @@ pub enum IntermediateBucketResult {
    /// This is the histogram entry for a bucket, which contains a key, count, and optionally
    /// sub_aggregations.
    Histogram {
-        /// The column_type of the underlying `Column`
+        /// The column_type of the underlying `Column` is DateTime
-        column_type: Option<ColumnType>,
+        is_date_agg: bool,
-        /// The buckets
+        /// The histogram buckets
        buckets: Vec<IntermediateHistogramBucketEntry>,
    },
    /// Term aggregation
-    Terms(IntermediateTermBucketResult),
+    Terms {
        /// The term buckets
        buckets: IntermediateTermBucketResult,
    },
 }
 impl IntermediateBucketResult {
@@ -395,7 +424,7 @@ impl IntermediateBucketResult {
                Ok(BucketResult::Range { buckets })
            }
            IntermediateBucketResult::Histogram {
-                column_type,
+                is_date_agg,
                buckets,
            } => {
                let histogram_req = &req
@@ -404,7 +433,7 @@ impl IntermediateBucketResult {
                    .expect("unexpected aggregation, expected histogram aggregation");
                let buckets = intermediate_histogram_buckets_to_final_buckets(
                    buckets,
-                    column_type,
+                    is_date_agg,
                    histogram_req,
                    req.sub_aggregation(),
                    limits,
@@ -422,7 +451,7 @@ impl IntermediateBucketResult {
                };
                Ok(BucketResult::Histogram { buckets })
            }
-            IntermediateBucketResult::Terms(terms) => terms.into_final_result(
+            IntermediateBucketResult::Terms { buckets: terms } => terms.into_final_result(
                req.agg
                    .as_term()
                    .expect("unexpected aggregation, expected term aggregation"),
@@ -435,8 +464,12 @@ impl IntermediateBucketResult {
    fn merge_fruits(&mut self, other: IntermediateBucketResult) -> crate::Result<()> {
        match (self, other) {
            (
-                IntermediateBucketResult::Terms(term_res_left),
+                IntermediateBucketResult::Terms {
-                IntermediateBucketResult::Terms(term_res_right),
+                    buckets: term_res_left,
                },
                IntermediateBucketResult::Terms {
                    buckets: term_res_right,
                },
            ) => {
                merge_maps(&mut term_res_left.entries, term_res_right.entries)?;
                term_res_left.sum_other_doc_count += term_res_right.sum_other_doc_count;
@@ -453,17 +486,17 @@ impl IntermediateBucketResult {
            (
                IntermediateBucketResult::Histogram {
                    buckets: buckets_left,
-                    ..
+                    is_date_agg: _,
                },
                IntermediateBucketResult::Histogram {
                    buckets: buckets_right,
-                    ..
+                    is_date_agg: _,
                },
            ) => {
                let buckets: Result<Vec<IntermediateHistogramBucketEntry>, TantivyError> =
                    buckets_left
                        .drain(..)
-                        .merge_join_by(buckets_right.into_iter(), |left, right| {
+                        .merge_join_by(buckets_right, |left, right| {
                            left.key.partial_cmp(&right.key).unwrap_or(Ordering::Equal)
                        })
                        .map(|either| match either {
@@ -520,8 +553,15 @@ impl IntermediateTermBucketResult {
            .into_iter()
            .filter(|bucket| bucket.1.doc_count as u64 >= req.min_doc_count)
            .map(|(key, entry)| {
                let key_as_string = match key {
                    IntermediateKey::Bool(key) => {
                        let val = if key { "true" } else { "false" };
                        Some(val.to_string())
                    }
                    _ => None,
                };
                Ok(BucketEntry {
-                    key_as_string: None,
+                    key_as_string,
                    key: key.into(),
                    doc_count: entry.doc_count as u64,
                    sub_aggregation: entry
--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
 use crate::aggregation::*;
 /// A single-value metric aggregation that computes the average of numeric values that are
 /// extracted from the aggregated documents.
@@ -20,12 +21,21 @@ use super::{IntermediateStats, SegmentStatsCollector};
 pub struct AverageAggregation {
    /// The field name to compute the average on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl AverageAggregation {
    /// Creates a new [`AverageAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        Self { field: field_name }
+        Self {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
@@ -56,3 +66,71 @@ impl IntermediateAverage {
        self.stats.finalize().avg
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn deserialization_with_missing_test1() {
        let json = r#"{
            "field": "score",
            "missing": "10.0"
        }"#;
        let avg: AverageAggregation = serde_json::from_str(json).unwrap();
        assert_eq!(avg.field, "score");
        assert_eq!(avg.missing, Some(10.0));
        // no dot
        let json = r#"{
            "field": "score",
            "missing": "10"
        }"#;
        let avg: AverageAggregation = serde_json::from_str(json).unwrap();
        assert_eq!(avg.field, "score");
        assert_eq!(avg.missing, Some(10.0));
        // from value
        let avg: AverageAggregation = serde_json::from_value(json!({
            "field": "score_f64",
            "missing": 10u64,
        }))
        .unwrap();
        assert_eq!(avg.missing, Some(10.0));
        // from value
        let avg: AverageAggregation = serde_json::from_value(json!({
            "field": "score_f64",
            "missing": 10u32,
        }))
        .unwrap();
        assert_eq!(avg.missing, Some(10.0));
        let avg: AverageAggregation = serde_json::from_value(json!({
            "field": "score_f64",
            "missing": 10i8,
        }))
        .unwrap();
        assert_eq!(avg.missing, Some(10.0));
    }
    #[test]
    fn deserialization_with_missing_test_fail() {
        let json = r#"{
            "field": "score",
            "missing": "a"
        }"#;
        let avg: Result<AverageAggregation, _> = serde_json::from_str(json);
        assert!(avg.is_err());
        assert!(avg
            .unwrap_err()
            .to_string()
            .contains("Failed to parse f64 from string: \"a\""));
        // Disallow NaN
        let json = r#"{
            "field": "score",
            "missing": "NaN"
        }"#;
        let avg: Result<AverageAggregation, _> = serde_json::from_str(json);
        assert!(avg.is_err());
        assert!(avg.unwrap_err().to_string().contains("NaN"));
    }
 }
--- a/src/aggregation/metric/count.rs
+++ b/src/aggregation/metric/count.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
 use crate::aggregation::*;
 /// A single-value metric aggregation that counts the number of values that are
 /// extracted from the aggregated documents.
@@ -18,14 +19,23 @@ use super::{IntermediateStats, SegmentStatsCollector};
 /// ```
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct CountAggregation {
-    /// The field name to compute the minimum on.
+    /// The field name to compute the count on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl CountAggregation {
    /// Creates a new [`CountAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        Self { field: field_name }
+        Self {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
@@ -51,7 +61,7 @@ impl IntermediateCount {
    pub fn merge_fruits(&mut self, other: IntermediateCount) {
        self.stats.merge_fruits(other.stats);
    }
-    /// Computes the final minimum value.
+    /// Computes the final count value.
    pub fn finalize(&self) -> Option<f64> {
        Some(self.stats.finalize().count as f64)
    }
--- a/src/aggregation/metric/max.rs
+++ b/src/aggregation/metric/max.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
 use crate::aggregation::*;
 /// A single-value metric aggregation that computes the maximum of numeric values that are
 /// extracted from the aggregated documents.
@@ -20,12 +21,21 @@ use super::{IntermediateStats, SegmentStatsCollector};
 pub struct MaxAggregation {
    /// The field name to compute the maximum on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl MaxAggregation {
    /// Creates a new [`MaxAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        Self { field: field_name }
+        Self {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
@@ -56,3 +66,55 @@ impl IntermediateMax {
        self.stats.finalize().max
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::exec_request_with_query;
    use crate::schema::{Schema, FAST};
    use crate::{Index, IndexWriter};
    #[test]
    fn test_max_agg_with_missing() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with empty json
        index_writer.add_document(doc!()).unwrap();
        index_writer.commit().unwrap();
        // => Segment with json, but no field partially_empty
        index_writer
            .add_document(doc!(json => json!({"different_field": "blue"})))
            .unwrap();
        index_writer.commit().unwrap();
        //// => Segment with field partially_empty
        index_writer
            .add_document(doc!(json => json!({"partially_empty": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_stats": {
                "max": {
                    "field": "json.partially_empty",
                    "missing": 100.0,
                }
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(
            res["my_stats"],
            json!({
                "value": 100.0,
            })
        );
        Ok(())
    }
 }
--- a/src/aggregation/metric/min.rs
+++ b/src/aggregation/metric/min.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
 use crate::aggregation::*;
 /// A single-value metric aggregation that computes the minimum of numeric values that are
 /// extracted from the aggregated documents.
@@ -20,12 +21,21 @@ use super::{IntermediateStats, SegmentStatsCollector};
 pub struct MinAggregation {
    /// The field name to compute the minimum on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl MinAggregation {
    /// Creates a new [`MinAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        Self { field: field_name }
+        Self {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -23,6 +23,8 @@ mod min;
 mod percentiles;
 mod stats;
 mod sum;
 mod top_hits;
 pub use average::*;
 pub use count::*;
 pub use max::*;
@@ -32,6 +34,7 @@ use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 pub use stats::*;
 pub use sum::*;
 pub use top_hits::*;
 /// Single-metric aggregations use this common result structure.
 ///
@@ -81,6 +84,27 @@ pub struct PercentilesMetricResult {
    pub values: PercentileValues,
 }
 /// The top_hits metric results entry
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct TopHitsVecEntry {
    /// The sort values of the document, depending on the sort criteria in the request.
    pub sort: Vec<Option<u64>>,
    /// Search results, for queries that include field retrieval requests
    /// (`docvalue_fields`).
    #[serde(flatten)]
    pub search_results: FieldRetrivalResult,
 }
 /// The top_hits metric aggregation results a list of top hits by sort criteria.
 ///
 /// The main reason for wrapping it in `hits` is to match elasticsearch output structure.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct TopHitsMetricResult {
    /// The result of the top_hits metric.
    pub hits: Vec<TopHitsVecEntry>,
 }
 #[cfg(test)]
 mod tests {
    use crate::aggregation::agg_req::Aggregations;
@@ -88,7 +112,7 @@ mod tests {
    use crate::aggregation::AggregationCollector;
    use crate::query::AllQuery;
    use crate::schema::{NumericOptions, Schema};
-    use crate::Index;
+    use crate::{Index, IndexWriter};
    #[test]
    fn test_metric_aggregations() {
@@ -96,7 +120,7 @@ mod tests {
        let field_options = NumericOptions::default().set_fast();
        let field = schema_builder.add_f64_field("price", field_options);
        let index = Index::create_in_ram(schema_builder.build());
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        for i in 0..3 {
            index_writer
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -11,7 +11,7 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
-use crate::aggregation::{f64_from_fastfield_u64, AggregationError};
+use crate::aggregation::*;
 use crate::{DocId, TantivyError};
 /// # Percentiles
@@ -80,6 +80,16 @@ pub struct PercentilesAggregationReq {
    /// Whether to return the percentiles as a hash map
    #[serde(default = "default_as_true")]
    pub keyed: bool,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(
        skip_serializing_if = "Option::is_none",
        default,
        deserialize_with = "deserialize_option_f64"
    )]
    pub missing: Option<f64>,
 }
 fn default_percentiles() -> &'static [f64] {
    &[1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0]
@@ -95,6 +105,7 @@ impl PercentilesAggregationReq {
            field: field_name,
            percents: None,
            keyed: default_as_true(),
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
@@ -126,7 +137,7 @@ pub(crate) struct SegmentPercentilesCollector {
    field_type: ColumnType,
    pub(crate) percentiles: PercentilesCollector,
    pub(crate) accessor_idx: usize,
-    val_cache: Vec<u64>,
+    missing: Option<u64>,
 }
 #[derive(Clone, Serialize, Deserialize)]
@@ -227,11 +238,15 @@ impl SegmentPercentilesCollector {
        accessor_idx: usize,
    ) -> crate::Result<Self> {
        req.validate()?;
        let missing = req
            .missing
            .and_then(|val| f64_to_fastfield_u64(val, &field_type));
        Ok(Self {
            field_type,
            percentiles: PercentilesCollector::new(),
            accessor_idx,
-            val_cache: Default::default(),
+            missing,
        })
    }
    #[inline]
@@ -240,9 +255,17 @@ impl SegmentPercentilesCollector {
        docs: &[DocId],
        agg_accessor: &mut AggregationWithAccessor,
    ) {
-        agg_accessor
+        if let Some(missing) = self.missing.as_ref() {
-            .column_block_accessor
+            agg_accessor.column_block_accessor.fetch_block_with_missing(
-            .fetch_block(docs, &agg_accessor.accessor);
+                docs,
                &agg_accessor.accessor,
                *missing,
            );
        } else {
            agg_accessor
                .column_block_accessor
                .fetch_block(docs, &agg_accessor.accessor);
        }
        for val in agg_accessor.column_block_accessor.iter_vals() {
            let val1 = f64_from_fastfield_u64(val, &self.field_type);
@@ -277,9 +300,22 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
    ) -> crate::Result<()> {
        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
-        for val in field.values_for_doc(doc) {
+        if let Some(missing) = self.missing {
-            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            let mut has_val = false;
-            self.percentiles.collect(val1);
+            for val in field.values_for_doc(doc) {
                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.percentiles.collect(val1);
                has_val = true;
            }
            if !has_val {
                self.percentiles
                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
            }
        } else {
            for val in field.values_for_doc(doc) {
                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.percentiles.collect(val1);
            }
        }
        Ok(())
@@ -309,10 +345,12 @@ mod tests {
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::tests::{
-        get_test_index_from_values, get_test_index_from_values_and_terms,
+        exec_request_with_query, get_test_index_from_values, get_test_index_from_values_and_terms,
    };
    use crate::aggregation::AggregationCollector;
    use crate::query::AllQuery;
    use crate::schema::{Schema, FAST};
    use crate::Index;
    #[test]
    fn test_aggregation_percentiles_empty_index() -> crate::Result<()> {
@@ -463,7 +501,7 @@ mod tests {
    fn test_aggregation_percentiles(merge_segments: bool) -> crate::Result<()> {
        use rand_distr::Distribution;
-        let num_values_in_segment = vec![100, 30_000, 8000];
+        let num_values_in_segment = [100, 30_000, 8000];
        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
        let mut rng = StdRng::from_seed([1u8; 32]);
@@ -545,4 +583,110 @@ mod tests {
        Ok(())
    }
    #[test]
    fn test_percentiles_missing_sub_agg() -> crate::Result<()> {
        // This test verifies the `collect` method (in contrast to `collect_block`), which is
        // called when the sub-aggregations are flushed.
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("texts", FAST);
        let score_field_f64 = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut index_writer = index.writer_for_tests()?;
            // writing the segment
            index_writer.add_document(doc!(
                score_field_f64 => 10.0f64,
                text_field => "a"
            ))?;
            index_writer.add_document(doc!(
                score_field_f64 => 10.0f64,
                text_field => "a"
            ))?;
            index_writer.add_document(doc!(text_field => "a"))?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = {
            serde_json::from_value(json!({
                "range_with_stats": {
                    "terms": {
                        "field": "texts"
                    },
                    "aggs": {
                        "percentiles": {
                            "percentiles": {
                                "field": "score",
                                "missing": 5.0
                            }
                        }
                    }
                }
            }))
            .unwrap()
        };
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(res["range_with_stats"]["buckets"][0]["doc_count"], 3);
        assert_eq!(
            res["range_with_stats"]["buckets"][0]["percentiles"]["values"]["1.0"],
            5.0028295751107414
        );
        assert_eq!(
            res["range_with_stats"]["buckets"][0]["percentiles"]["values"]["99.0"],
            10.07469668951144
        );
        Ok(())
    }
    #[test]
    fn test_percentiles_missing() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("texts", FAST);
        let score_field_f64 = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut index_writer = index.writer_for_tests()?;
            // writing the segment
            index_writer.add_document(doc!(
                score_field_f64 => 10.0f64,
                text_field => "a"
            ))?;
            index_writer.add_document(doc!(
                score_field_f64 => 10.0f64,
                text_field => "a"
            ))?;
            index_writer.add_document(doc!(text_field => "a"))?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = {
            serde_json::from_value(json!({
                "percentiles": {
                    "percentiles": {
                        "field": "score",
                        "missing": 5.0
                    }
                }
            }))
            .unwrap()
        };
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(res["percentiles"]["values"]["1.0"], 5.0028295751107414);
        assert_eq!(res["percentiles"]["values"]["99.0"], 10.07469668951144);
        Ok(())
    }
 }
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -5,11 +5,11 @@ use super::*;
 use crate::aggregation::agg_req_with_accessor::{
    AggregationWithAccessor, AggregationsWithAccessor,
 };
 use crate::aggregation::f64_from_fastfield_u64;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::{DocId, TantivyError};
 /// A multi-value metric aggregation that computes a collection of statistics on numeric values that
@@ -29,12 +29,21 @@ use crate::{DocId, TantivyError};
 pub struct StatsAggregation {
    /// The field name to compute the stats on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl StatsAggregation {
    /// Creates a new [`StatsAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        StatsAggregation { field: field_name }
+        StatsAggregation {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
@@ -153,6 +162,7 @@ pub(crate) enum SegmentStatsType {
 #[derive(Clone, Debug, PartialEq)]
 pub(crate) struct SegmentStatsCollector {
    missing: Option<u64>,
    field_type: ColumnType,
    pub(crate) collecting_for: SegmentStatsType,
    pub(crate) stats: IntermediateStats,
@@ -165,12 +175,15 @@ impl SegmentStatsCollector {
        field_type: ColumnType,
        collecting_for: SegmentStatsType,
        accessor_idx: usize,
        missing: Option<f64>,
    ) -> Self {
        let missing = missing.and_then(|val| f64_to_fastfield_u64(val, &field_type));
        Self {
            field_type,
            collecting_for,
            stats: IntermediateStats::default(),
            accessor_idx,
            missing,
            val_cache: Default::default(),
        }
    }
@@ -180,10 +193,17 @@ impl SegmentStatsCollector {
        docs: &[DocId],
        agg_accessor: &mut AggregationWithAccessor,
    ) {
-        agg_accessor
+        if let Some(missing) = self.missing.as_ref() {
-            .column_block_accessor
+            agg_accessor.column_block_accessor.fetch_block_with_missing(
-            .fetch_block(docs, &agg_accessor.accessor);
+                docs,
-
+                &agg_accessor.accessor,
                *missing,
            );
        } else {
            agg_accessor
                .column_block_accessor
                .fetch_block(docs, &agg_accessor.accessor);
        }
        for val in agg_accessor.column_block_accessor.iter_vals() {
            let val1 = f64_from_fastfield_u64(val, &self.field_type);
            self.stats.collect(val1);
@@ -234,10 +254,22 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
-
+        if let Some(missing) = self.missing {
-        for val in field.values_for_doc(doc) {
+            let mut has_val = false;
-            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            for val in field.values_for_doc(doc) {
-            self.stats.collect(val1);
+                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.stats.collect(val1);
                has_val = true;
            }
            if !has_val {
                self.stats
                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
            }
        } else {
            for val in field.values_for_doc(doc) {
                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.stats.collect(val1);
            }
        }
        Ok(())
@@ -262,11 +294,13 @@ mod tests {
    use crate::aggregation::agg_req::{Aggregation, Aggregations};
    use crate::aggregation::agg_result::AggregationResults;
-    use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values};
+    use crate::aggregation::tests::{
        exec_request_with_query, get_test_index_2_segments, get_test_index_from_values,
    };
    use crate::aggregation::AggregationCollector;
    use crate::query::{AllQuery, TermQuery};
-    use crate::schema::IndexRecordOption;
+    use crate::schema::{IndexRecordOption, Schema, FAST};
-    use crate::Term;
+    use crate::{Index, IndexWriter, Term};
    #[test]
    fn test_aggregation_stats_empty_index() -> crate::Result<()> {
@@ -453,4 +487,183 @@ mod tests {
        Ok(())
    }
    #[test]
    fn test_stats_json() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with empty json
        index_writer.add_document(doc!()).unwrap();
        index_writer.commit().unwrap();
        // => Segment with json, but no field partially_empty
        index_writer
            .add_document(doc!(json => json!({"different_field": "blue"})))
            .unwrap();
        index_writer.commit().unwrap();
        //// => Segment with field partially_empty
        index_writer
            .add_document(doc!(json => json!({"partially_empty": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_stats": {
                "stats": {
                    "field": "json.partially_empty"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(
            res["my_stats"],
            json!({
                "avg":  10.0,
                "count": 1,
                "max": 10.0,
                "min": 10.0,
                "sum": 10.0
            })
        );
        Ok(())
    }
    #[test]
    fn test_stats_json_missing() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let json = schema_builder.add_json_field("json", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        // => Segment with empty json
        index_writer.add_document(doc!()).unwrap();
        index_writer.commit().unwrap();
        // => Segment with json, but no field partially_empty
        index_writer
            .add_document(doc!(json => json!({"different_field": "blue"})))
            .unwrap();
        index_writer.commit().unwrap();
        //// => Segment with field partially_empty
        index_writer
            .add_document(doc!(json => json!({"partially_empty": 10.0})))
            .unwrap();
        index_writer.add_document(doc!())?;
        index_writer.commit().unwrap();
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_stats": {
                "stats": {
                    "field": "json.partially_empty",
                    "missing": 0.0
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(
            res["my_stats"],
            json!({
                "avg":  2.5,
                "count": 4,
                "max": 10.0,
                "min": 0.0,
                "sum": 10.0
            })
        );
        // From string
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_stats": {
                "stats": {
                    "field": "json.partially_empty",
                    "missing": "0.0"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(
            res["my_stats"],
            json!({
                "avg":  2.5,
                "count": 4,
                "max": 10.0,
                "min": 0.0,
                "sum": 10.0
            })
        );
        Ok(())
    }
    #[test]
    fn test_stats_json_missing_sub_agg() -> crate::Result<()> {
        // This test verifies the `collect` method (in contrast to `collect_block`), which is
        // called when the sub-aggregations are flushed.
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("texts", FAST);
        let score_field_f64 = schema_builder.add_f64_field("score", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut index_writer = index.writer_for_tests()?;
            // writing the segment
            index_writer.add_document(doc!(
                score_field_f64 => 10.0f64,
                text_field => "a"
            ))?;
            index_writer.add_document(doc!(text_field => "a"))?;
            index_writer.commit()?;
        }
        let agg_req: Aggregations = {
            serde_json::from_value(json!({
                "range_with_stats": {
                    "terms": {
                        "field": "texts"
                    },
                    "aggs": {
                        "my_stats": {
                            "stats": {
                                "field": "score",
                                "missing": 0.0
                            }
                        }
                    }
                }
            }))
            .unwrap()
        };
        let res = exec_request_with_query(agg_req, &index, None)?;
        assert_eq!(
            res["range_with_stats"]["buckets"][0]["my_stats"]["count"],
            2
        );
        assert_eq!(
            res["range_with_stats"]["buckets"][0]["my_stats"]["min"],
            0.0
        );
        assert_eq!(
            res["range_with_stats"]["buckets"][0]["my_stats"]["avg"],
            5.0
        );
        Ok(())
    }
 }
--- a/src/aggregation/metric/sum.rs
+++ b/src/aggregation/metric/sum.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
 use crate::aggregation::*;
 /// A single-value metric aggregation that sums up numeric values that are
 /// extracted from the aggregated documents.
@@ -20,12 +21,21 @@ use super::{IntermediateStats, SegmentStatsCollector};
 pub struct SumAggregation {
    /// The field name to compute the minimum on.
    pub field: String,
    /// The missing parameter defines how documents that are missing a value should be treated.
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }
 impl SumAggregation {
    /// Creates a new [`SumAggregation`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
-        Self { field: field_name }
+        Self {
            field: field_name,
            missing: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
    pub fn field_name(&self) -> &str {
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -0,0 +1,837 @@
 use std::collections::HashMap;
 use std::fmt::Formatter;
 use columnar::{ColumnarReader, DynamicColumn};
 use regex::Regex;
 use serde::ser::SerializeMap;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use super::{TopHitsMetricResult, TopHitsVecEntry};
 use crate::aggregation::bucket::Order;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::collector::TopNComputer;
 use crate::schema::term::JSON_PATH_SEGMENT_SEP_STR;
 use crate::schema::OwnedValue;
 use crate::{DocAddress, DocId, SegmentOrdinal};
 /// # Top Hits
 ///
 /// The top hits aggregation is a useful tool to answer questions like:
 /// - "What are the most recent posts by each author?"
 /// - "What are the most popular items in each category?"
 ///
 /// It does so by keeping track of the most relevant document being aggregated,
 /// in terms of a sort criterion that can consist of multiple fields and their
 /// sort-orders (ascending or descending).
 ///
 /// `top_hits` should not be used as a top-level aggregation. It is intended to be
 /// used as a sub-aggregation, inside a `terms` aggregation or a `filters` aggregation,
 /// for example.
 ///
 /// Note that this aggregator does not return the actual document addresses, but
 /// rather a list of the values of the fields that were requested to be retrieved.
 /// These values can be specified in the `docvalue_fields` parameter, which can include
 /// a list of fast fields to be retrieved. At the moment, only fast fields are supported
 /// but it is possible that we support the `fields` parameter to retrieve any stored
 /// field in the future.
 ///
 /// The following example demonstrates a request for the top_hits aggregation:
 /// ```JSON
 /// {
 ///     "aggs": {
 ///         "top_authors": {
 ///             "terms": {
 ///                 "field": "author",
 ///                 "size": 5
 ///             }
 ///         },
 ///         "aggs": {
 ///             "top_hits": {
 ///                 "size": 2,
 ///                 "from": 0
 ///                 "sort": [
 ///                     { "date": "desc" }
 ///                 ]
 ///                 "docvalue_fields": ["date", "title", "iden"]
 ///             }
 ///         }
 /// }
 /// ```
 ///
 /// This request will return an object containing the top two documents, sorted
 /// by the `date` field in descending order. You can also sort by multiple fields, which
 /// helps to resolve ties. The aggregation object for each bucket will look like:
 /// ```JSON
 /// {
 ///     "hits": [
 ///         {
 ///           "score": [<time_u64>],
 ///           "docvalue_fields": {
 ///             "date": "<date_RFC3339>",
 ///             "title": "<title>",
 ///             "iden": "<iden>"
 ///           }
 ///         },
 ///         {
 ///           "score": [<time_u64>]
 ///           "docvalue_fields": {
 ///             "date": "<date_RFC3339>",
 ///             "title": "<title>",
 ///             "iden": "<iden>"
 ///           }
 ///         }
 ///     ]
 /// }
 /// ```
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
 pub struct TopHitsAggregation {
    sort: Vec<KeyOrder>,
    size: usize,
    from: Option<usize>,
    #[serde(flatten)]
    retrieval: RetrievalFields,
 }
 const fn default_doc_value_fields() -> Vec<String> {
    Vec::new()
 }
 /// Search query spec for each matched document
 /// TODO: move this to a common module
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
 pub struct RetrievalFields {
    /// The fast fields to return for each hit.
    /// This is the only variant supported for now.
    /// TODO: support the {field, format} variant for custom formatting.
    #[serde(rename = "docvalue_fields")]
    #[serde(default = "default_doc_value_fields")]
    pub doc_value_fields: Vec<String>,
 }
 /// Search query result for each matched document
 /// TODO: move this to a common module
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
 pub struct FieldRetrivalResult {
    /// The fast fields returned for each hit.
    #[serde(rename = "docvalue_fields")]
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    pub doc_value_fields: HashMap<String, OwnedValue>,
 }
 impl RetrievalFields {
    fn get_field_names(&self) -> Vec<&str> {
        self.doc_value_fields.iter().map(|s| s.as_str()).collect()
    }
    fn resolve_field_names(&mut self, reader: &ColumnarReader) -> crate::Result<()> {
        // Tranform a glob (`pattern*`, for example) into a regex::Regex (`^pattern.*$`)
        let globbed_string_to_regex = |glob: &str| {
            // Replace `*` glob with `.*` regex
            let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
            Regex::new(&sanitized.replace('*', ".*")).map_err(|e| {
                crate::TantivyError::SchemaError(format!(
                    "Invalid regex '{}' in docvalue_fields: {}",
                    glob, e
                ))
            })
        };
        self.doc_value_fields = self
            .doc_value_fields
            .iter()
            .map(|field| {
                if !field.contains('*')
                    && reader
                        .iter_columns()?
                        .any(|(name, _)| name.as_str() == field)
                {
                    return Ok(vec![field.to_owned()]);
                }
                let pattern = globbed_string_to_regex(field)?;
                let fields = reader
                    .iter_columns()?
                    .map(|(name, _)| {
                        // normalize path from internal fast field repr
                        name.replace(JSON_PATH_SEGMENT_SEP_STR, ".")
                    })
                    .filter(|name| pattern.is_match(name))
                    .collect::<Vec<_>>();
                assert!(
                    !fields.is_empty(),
                    "No fields matched the glob '{}' in docvalue_fields",
                    field
                );
                Ok(fields)
            })
            .collect::<crate::Result<Vec<_>>>()?
            .into_iter()
            .flatten()
            .collect();
        Ok(())
    }
    fn get_document_field_data(
        &self,
        accessors: &HashMap<String, Vec<DynamicColumn>>,
        doc_id: DocId,
    ) -> FieldRetrivalResult {
        let dvf = self
            .doc_value_fields
            .iter()
            .map(|field| {
                let accessors = accessors
                    .get(field)
                    .unwrap_or_else(|| panic!("field '{}' not found in accessors", field));
                let values: Vec<OwnedValue> = accessors
                    .iter()
                    .flat_map(|accessor| match accessor {
                        DynamicColumn::U64(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::U64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::I64(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::I64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::F64(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::F64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::Bytes(accessor) => accessor
                            .term_ords(doc_id)
                            .map(|term_ord| {
                                let mut buffer = vec![];
                                assert!(
                                    accessor
                                        .ord_to_bytes(term_ord, &mut buffer)
                                        .expect("could not read term dictionary"),
                                    "term corresponding to term_ord does not exist"
                                );
                                OwnedValue::Bytes(buffer)
                            })
                            .collect::<Vec<_>>(),
                        DynamicColumn::Str(accessor) => accessor
                            .term_ords(doc_id)
                            .map(|term_ord| {
                                let mut buffer = vec![];
                                assert!(
                                    accessor
                                        .ord_to_bytes(term_ord, &mut buffer)
                                        .expect("could not read term dictionary"),
                                    "term corresponding to term_ord does not exist"
                                );
                                OwnedValue::Str(String::from_utf8(buffer).unwrap())
                            })
                            .collect::<Vec<_>>(),
                        DynamicColumn::Bool(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::Bool)
                            .collect::<Vec<_>>(),
                        DynamicColumn::IpAddr(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::IpAddr)
                            .collect::<Vec<_>>(),
                        DynamicColumn::DateTime(accessor) => accessor
                            .values_for_doc(doc_id)
                            .map(OwnedValue::Date)
                            .collect::<Vec<_>>(),
                    })
                    .collect();
                (field.to_owned(), OwnedValue::Array(values))
            })
            .collect();
        FieldRetrivalResult {
            doc_value_fields: dvf,
        }
    }
 }
 #[derive(Debug, Clone, PartialEq, Default)]
 struct KeyOrder {
    field: String,
    order: Order,
 }
 impl Serialize for KeyOrder {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        let KeyOrder { field, order } = self;
        let mut map = serializer.serialize_map(Some(1))?;
        map.serialize_entry(field, order)?;
        map.end()
    }
 }
 impl<'de> Deserialize<'de> for KeyOrder {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where D: Deserializer<'de> {
        let mut k_o = <HashMap<String, Order>>::deserialize(deserializer)?.into_iter();
        let (k, v) = k_o.next().ok_or(serde::de::Error::custom(
            "Expected exactly one key-value pair in KeyOrder, found none",
        ))?;
        if k_o.next().is_some() {
            return Err(serde::de::Error::custom(
                "Expected exactly one key-value pair in KeyOrder, found more",
            ));
        }
        Ok(Self { field: k, order: v })
    }
 }
 impl TopHitsAggregation {
    /// Validate and resolve field retrieval parameters
    pub fn validate_and_resolve(&mut self, reader: &ColumnarReader) -> crate::Result<()> {
        self.retrieval.resolve_field_names(reader)
    }
    /// Return fields accessed by the aggregator, in order.
    pub fn field_names(&self) -> Vec<&str> {
        self.sort
            .iter()
            .map(|KeyOrder { field, .. }| field.as_str())
            .collect()
    }
    /// Return fields accessed by the aggregator's value retrieval.
    pub fn value_field_names(&self) -> Vec<&str> {
        self.retrieval.get_field_names()
    }
 }
 /// Holds a single comparable doc feature, and the order in which it should be sorted.
 #[derive(Clone, Serialize, Deserialize, Debug)]
 struct ComparableDocFeature {
    /// Stores any u64-mappable feature.
    value: Option<u64>,
    /// Sort order for the doc feature
    order: Order,
 }
 impl Ord for ComparableDocFeature {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        let invert = |cmp: std::cmp::Ordering| match self.order {
            Order::Asc => cmp,
            Order::Desc => cmp.reverse(),
        };
        match (self.value, other.value) {
            (Some(self_value), Some(other_value)) => invert(self_value.cmp(&other_value)),
            (Some(_), None) => std::cmp::Ordering::Greater,
            (None, Some(_)) => std::cmp::Ordering::Less,
            (None, None) => std::cmp::Ordering::Equal,
        }
    }
 }
 impl PartialOrd for ComparableDocFeature {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
 impl PartialEq for ComparableDocFeature {
    fn eq(&self, other: &Self) -> bool {
        self.value.cmp(&other.value) == std::cmp::Ordering::Equal
    }
 }
 impl Eq for ComparableDocFeature {}
 #[derive(Clone, Serialize, Deserialize, Debug)]
 struct ComparableDocFeatures(Vec<ComparableDocFeature>, FieldRetrivalResult);
 impl Ord for ComparableDocFeatures {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        for (self_feature, other_feature) in self.0.iter().zip(other.0.iter()) {
            let cmp = self_feature.cmp(other_feature);
            if cmp != std::cmp::Ordering::Equal {
                return cmp;
            }
        }
        std::cmp::Ordering::Equal
    }
 }
 impl PartialOrd for ComparableDocFeatures {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
 impl PartialEq for ComparableDocFeatures {
    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == std::cmp::Ordering::Equal
    }
 }
 impl Eq for ComparableDocFeatures {}
 /// The TopHitsCollector used for collecting over segments and merging results.
 #[derive(Clone, Serialize, Deserialize)]
 pub struct TopHitsCollector {
    req: TopHitsAggregation,
    top_n: TopNComputer<ComparableDocFeatures, DocAddress, false>,
 }
 impl Default for TopHitsCollector {
    fn default() -> Self {
        Self {
            req: TopHitsAggregation::default(),
            top_n: TopNComputer::new(1),
        }
    }
 }
 impl std::fmt::Debug for TopHitsCollector {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TopHitsCollector")
            .field("req", &self.req)
            .field("top_n_threshold", &self.top_n.threshold)
            .finish()
    }
 }
 impl std::cmp::PartialEq for TopHitsCollector {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
 }
 impl TopHitsCollector {
    fn collect(&mut self, features: ComparableDocFeatures, doc: DocAddress) {
        self.top_n.push(features, doc);
    }
    pub(crate) fn merge_fruits(&mut self, other_fruit: Self) -> crate::Result<()> {
        for doc in other_fruit.top_n.into_vec() {
            self.collect(doc.feature, doc.doc);
        }
        Ok(())
    }
    /// Finalize by converting self into the final result form
    pub fn finalize(self) -> TopHitsMetricResult {
        let mut hits: Vec<TopHitsVecEntry> = self
            .top_n
            .into_sorted_vec()
            .into_iter()
            .map(|doc| TopHitsVecEntry {
                sort: doc.feature.0.iter().map(|f| f.value).collect(),
                search_results: doc.feature.1,
            })
            .collect();
        // Remove the first `from` elements
        // Truncating from end would be more efficient, but we need to truncate from the front
        // because `into_sorted_vec` gives us a descending order because of the inverted
        // `Ord` semantics of the heap elements.
        hits.drain(..self.req.from.unwrap_or(0));
        TopHitsMetricResult { hits }
    }
 }
 #[derive(Clone)]
 pub(crate) struct SegmentTopHitsCollector {
    segment_ordinal: SegmentOrdinal,
    accessor_idx: usize,
    inner_collector: TopHitsCollector,
 }
 impl SegmentTopHitsCollector {
    pub fn from_req(
        req: &TopHitsAggregation,
        accessor_idx: usize,
        segment_ordinal: SegmentOrdinal,
    ) -> Self {
        Self {
            inner_collector: TopHitsCollector {
                req: req.clone(),
                top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
            },
            segment_ordinal,
            accessor_idx,
        }
    }
 }
 impl std::fmt::Debug for SegmentTopHitsCollector {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentTopHitsCollector")
            .field("segment_id", &self.segment_ordinal)
            .field("accessor_idx", &self.accessor_idx)
            .field("inner_collector", &self.inner_collector)
            .finish()
    }
 }
 impl SegmentAggregationCollector for SegmentTopHitsCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
        results: &mut crate::aggregation::intermediate_agg_result::IntermediateAggregationResults,
    ) -> crate::Result<()> {
        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
        let intermediate_result = IntermediateMetricResult::TopHits(self.inner_collector);
        results.push(
            name,
            IntermediateAggregationResult::Metric(intermediate_result),
        )
    }
    fn collect(
        &mut self,
        doc_id: crate::DocId,
        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
    ) -> crate::Result<()> {
        let accessors = &agg_with_accessor.aggs.values[self.accessor_idx].accessors;
        let value_accessors = &agg_with_accessor.aggs.values[self.accessor_idx].value_accessors;
        let features: Vec<ComparableDocFeature> = self
            .inner_collector
            .req
            .sort
            .iter()
            .enumerate()
            .map(|(idx, KeyOrder { order, .. })| {
                let order = *order;
                let value = accessors
                    .get(idx)
                    .expect("could not find field in accessors")
                    .0
                    .values_for_doc(doc_id)
                    .next();
                ComparableDocFeature { value, order }
            })
            .collect();
        let retrieval_result = self
            .inner_collector
            .req
            .retrieval
            .get_document_field_data(value_accessors, doc_id);
        self.inner_collector.collect(
            ComparableDocFeatures(features, retrieval_result),
            DocAddress {
                segment_ord: self.segment_ordinal,
                doc_id,
            },
        );
        Ok(())
    }
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
    ) -> crate::Result<()> {
        // TODO: Consider getting fields with the column block accessor and refactor this.
        // ---
        // Would the additional complexity of getting fields with the column_block_accessor
        // make sense here? Probably yes, but I want to get a first-pass review first
        // before proceeding.
        for doc in docs {
            self.collect(*doc, agg_with_accessor)?;
        }
        Ok(())
    }
 }
 #[cfg(test)]
 mod tests {
    use common::DateTime;
    use pretty_assertions::assert_eq;
    use serde_json::Value;
    use time::macros::datetime;
    use super::{ComparableDocFeature, ComparableDocFeatures, Order};
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::bucket::tests::get_test_index_from_docs;
    use crate::aggregation::tests::get_test_index_from_values;
    use crate::aggregation::AggregationCollector;
    use crate::collector::ComparableDoc;
    use crate::query::AllQuery;
    use crate::schema::OwnedValue as SchemaValue;
    fn invert_order(cmp_feature: ComparableDocFeature) -> ComparableDocFeature {
        let ComparableDocFeature { value, order } = cmp_feature;
        let order = match order {
            Order::Asc => Order::Desc,
            Order::Desc => Order::Asc,
        };
        ComparableDocFeature { value, order }
    }
    fn collector_with_capacity(capacity: usize) -> super::TopHitsCollector {
        super::TopHitsCollector {
            top_n: super::TopNComputer::new(capacity),
            ..Default::default()
        }
    }
    fn invert_order_features(cmp_features: ComparableDocFeatures) -> ComparableDocFeatures {
        let ComparableDocFeatures(cmp_features, search_results) = cmp_features;
        let cmp_features = cmp_features
            .into_iter()
            .map(invert_order)
            .collect::<Vec<_>>();
        ComparableDocFeatures(cmp_features, search_results)
    }
    #[test]
    fn test_comparable_doc_feature() -> crate::Result<()> {
        let small = ComparableDocFeature {
            value: Some(1),
            order: Order::Asc,
        };
        let big = ComparableDocFeature {
            value: Some(2),
            order: Order::Asc,
        };
        let none = ComparableDocFeature {
            value: None,
            order: Order::Asc,
        };
        assert!(small < big);
        assert!(none < small);
        assert!(none < big);
        let small = invert_order(small);
        let big = invert_order(big);
        let none = invert_order(none);
        assert!(small > big);
        assert!(none < small);
        assert!(none < big);
        Ok(())
    }
    #[test]
    fn test_comparable_doc_features() -> crate::Result<()> {
        let features_1 = ComparableDocFeatures(
            vec![ComparableDocFeature {
                value: Some(1),
                order: Order::Asc,
            }],
            Default::default(),
        );
        let features_2 = ComparableDocFeatures(
            vec![ComparableDocFeature {
                value: Some(2),
                order: Order::Asc,
            }],
            Default::default(),
        );
        assert!(features_1 < features_2);
        assert!(invert_order_features(features_1.clone()) > invert_order_features(features_2));
        Ok(())
    }
    #[test]
    fn test_aggregation_top_hits_empty_index() -> crate::Result<()> {
        let values = vec![];
        let index = get_test_index_from_values(false, &values)?;
        let d: Aggregations = serde_json::from_value(json!({
            "top_hits_req": {
                "top_hits": {
                    "size": 2,
                    "sort": [
                        { "date": "desc" }
                    ],
                    "from": 0,
                }
        }
        }))
        .unwrap();
        let collector = AggregationCollector::from_aggs(d, Default::default());
        let reader = index.reader()?;
        let searcher = reader.searcher();
        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
        let res: Value = serde_json::from_str(
            &serde_json::to_string(&agg_res).expect("JSON serialization failed"),
        )
        .expect("JSON parsing failed");
        assert_eq!(
            res,
            json!({
                "top_hits_req": {
                    "hits": []
                }
            })
        );
        Ok(())
    }
    #[test]
    fn test_top_hits_collector_single_feature() -> crate::Result<()> {
        let docs = vec![
            ComparableDoc::<_, _, false> {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 0,
                },
                feature: ComparableDocFeatures(
                    vec![ComparableDocFeature {
                        value: Some(1),
                        order: Order::Asc,
                    }],
                    Default::default(),
                ),
            },
            ComparableDoc {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 2,
                },
                feature: ComparableDocFeatures(
                    vec![ComparableDocFeature {
                        value: Some(3),
                        order: Order::Asc,
                    }],
                    Default::default(),
                ),
            },
            ComparableDoc {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 1,
                },
                feature: ComparableDocFeatures(
                    vec![ComparableDocFeature {
                        value: Some(5),
                        order: Order::Asc,
                    }],
                    Default::default(),
                ),
            },
        ];
        let mut collector = collector_with_capacity(3);
        for doc in docs.clone() {
            collector.collect(doc.feature, doc.doc);
        }
        let res = collector.finalize();
        assert_eq!(
            res,
            super::TopHitsMetricResult {
                hits: vec![
                    super::TopHitsVecEntry {
                        sort: vec![docs[0].feature.0[0].value],
                        search_results: Default::default(),
                    },
                    super::TopHitsVecEntry {
                        sort: vec![docs[1].feature.0[0].value],
                        search_results: Default::default(),
                    },
                    super::TopHitsVecEntry {
                        sort: vec![docs[2].feature.0[0].value],
                        search_results: Default::default(),
                    },
                ]
            }
        );
        Ok(())
    }
    fn test_aggregation_top_hits(merge_segments: bool) -> crate::Result<()> {
        let docs = vec![
            vec![
                r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb", "text2": "bbb", "mixed": { "dyn_arr": [1, "2"] } }"#,
                r#"{ "date": "2017-06-15T00:00:00Z", "text": "ccc", "text2": "ddd", "mixed": { "dyn_arr": [3, "4"] } }"#,
            ],
            vec![
                r#"{ "text": "aaa", "text2": "bbb", "date": "2018-01-02T00:00:00Z", "mixed": { "dyn_arr": ["9", 8] } }"#,
                r#"{ "text": "aaa", "text2": "bbb", "date": "2016-01-02T00:00:00Z", "mixed": { "dyn_arr": ["7", 6] } }"#,
            ],
        ];
        let index = get_test_index_from_docs(merge_segments, &docs)?;
        let d: Aggregations = serde_json::from_value(json!({
            "top_hits_req": {
                "top_hits": {
                    "size": 2,
                    "sort": [
                        { "date": "desc" }
                    ],
                    "from": 1,
                    "docvalue_fields": [
                        "date",
                        "tex*",
                        "mixed.*",
                    ],
                }
        }
        }))?;
        let collector = AggregationCollector::from_aggs(d, Default::default());
        let reader = index.reader()?;
        let searcher = reader.searcher();
        let agg_res =
            serde_json::to_value(searcher.search(&AllQuery, &collector).unwrap()).unwrap();
        let date_2017 = datetime!(2017-06-15 00:00:00 UTC);
        let date_2016 = datetime!(2016-01-02 00:00:00 UTC);
        assert_eq!(
            agg_res["top_hits_req"],
            json!({
                "hits": [
                    {
                        "sort": [common::i64_to_u64(date_2017.unix_timestamp_nanos() as i64)],
                        "docvalue_fields": {
                            "date": [ SchemaValue::Date(DateTime::from_utc(date_2017)) ],
                            "text": [ "ccc" ],
                            "text2": [ "ddd" ],
                            "mixed.dyn_arr": [ 3, "4" ],
                        }
                    },
                    {
                        "sort": [common::i64_to_u64(date_2016.unix_timestamp_nanos() as i64)],
                        "docvalue_fields": {
                            "date": [ SchemaValue::Date(DateTime::from_utc(date_2016)) ],
                            "text": [ "aaa" ],
                            "text2": [ "bbb" ],
                            "mixed.dyn_arr": [ 6, "7" ],
                        }
                    }
                ]
            }),
        );
        Ok(())
    }
    #[test]
    fn test_aggregation_top_hits_single_segment() -> crate::Result<()> {
        test_aggregation_top_hits(true)
    }
    #[test]
    fn test_aggregation_top_hits_multi_segment() -> crate::Result<()> {
        test_aggregation_top_hits(false)
    }
 }
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -145,6 +145,8 @@ mod agg_tests;
 mod agg_bench;
 use core::fmt;
 pub use agg_limits::AggregationLimits;
 pub use collector::{
    AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
@@ -154,7 +156,106 @@ use columnar::{ColumnType, MonotonicallyMappableToU64};
 pub(crate) use date::format_date;
 pub use error::AggregationError;
 use itertools::Itertools;
-use serde::{Deserialize, Serialize};
+use serde::de::{self, Visitor};
 use serde::{Deserialize, Deserializer, Serialize};
 fn parse_str_into_f64<E: de::Error>(value: &str) -> Result<f64, E> {
    let parsed = value.parse::<f64>().map_err(|_err| {
        de::Error::custom(format!("Failed to parse f64 from string: {:?}", value))
    })?;
    // Check if the parsed value is NaN or infinity
    if parsed.is_nan() || parsed.is_infinite() {
        Err(de::Error::custom(format!(
            "Value is not a valid f64 (NaN or Infinity): {:?}",
            value
        )))
    } else {
        Ok(parsed)
    }
 }
 /// deserialize Option<f64> from string or float
 pub(crate) fn deserialize_option_f64<'de, D>(deserializer: D) -> Result<Option<f64>, D::Error>
 where D: Deserializer<'de> {
    struct StringOrFloatVisitor;
    impl<'de> Visitor<'de> for StringOrFloatVisitor {
        type Value = Option<f64>;
        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("a string or a float")
        }
        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
        where E: de::Error {
            parse_str_into_f64(value).map(Some)
        }
        fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(Some(value))
        }
        fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(Some(value as f64))
        }
        fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(Some(value as f64))
        }
        fn visit_none<E>(self) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(None)
        }
        fn visit_unit<E>(self) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(None)
        }
    }
    deserializer.deserialize_any(StringOrFloatVisitor)
 }
 /// deserialize f64 from string or float
 pub(crate) fn deserialize_f64<'de, D>(deserializer: D) -> Result<f64, D::Error>
 where D: Deserializer<'de> {
    struct StringOrFloatVisitor;
    impl<'de> Visitor<'de> for StringOrFloatVisitor {
        type Value = f64;
        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("a string or a float")
        }
        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
        where E: de::Error {
            parse_str_into_f64(value)
        }
        fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(value)
        }
        fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(value as f64)
        }
        fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
        where E: de::Error {
            Ok(value as f64)
        }
    }
    deserializer.deserialize_any(StringOrFloatVisitor)
 }
 /// Represents an associative array `(key => values)` in a very efficient manner.
 #[derive(PartialEq, Serialize, Deserialize)]
@@ -281,6 +382,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &ColumnType) -> f64 {
        ColumnType::U64 => val as f64,
        ColumnType::I64 | ColumnType::DateTime => i64::from_u64(val) as f64,
        ColumnType::F64 => f64::from_u64(val),
        ColumnType::Bool => val as f64,
        _ => {
            panic!("unexpected type {field_type:?}. This should not happen")
        }
@@ -301,6 +403,7 @@ pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &ColumnType) -> Option<
        ColumnType::U64 => Some(val as u64),
        ColumnType::I64 | ColumnType::DateTime => Some((val as i64).to_u64()),
        ColumnType::F64 => Some(val.to_u64()),
        ColumnType::Bool => Some(val as u64),
        _ => None,
    }
 }
@@ -319,7 +422,7 @@ mod tests {
    use crate::indexer::NoMergePolicy;
    use crate::query::{AllQuery, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
-    use crate::{Index, Term};
+    use crate::{Index, IndexWriter, Term};
    pub fn get_test_index_with_num_docs(
        merge_segments: bool,
@@ -411,7 +514,7 @@ mod tests {
                    .set_index_option(IndexRecordOption::Basic)
                    .set_fieldnorms(false),
            )
-            .set_fast("default")
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
        let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
@@ -451,7 +554,7 @@ mod tests {
                .searchable_segment_ids()
                .expect("Searchable segments failed.");
            if segment_ids.len() > 1 {
-                let mut index_writer = index.writer_for_tests()?;
+                let mut index_writer: IndexWriter = index.writer_for_tests()?;
                index_writer.merge(&segment_ids).wait()?;
                index_writer.wait_merging_threads()?;
            }
@@ -466,7 +569,7 @@ mod tests {
            .set_indexing_options(
                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
            )
-            .set_fast("default")
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
        let date_field = schema_builder.add_date_field("date", FAST);
@@ -565,7 +668,7 @@ mod tests {
            let segment_ids = index
                .searchable_segment_ids()
                .expect("Searchable segments failed.");
-            let mut index_writer = index.writer_for_tests()?;
+            let mut index_writer: IndexWriter = index.writer_for_tests()?;
            index_writer.merge(&segment_ids).wait()?;
            index_writer.wait_merging_threads()?;
        }
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -15,7 +15,8 @@ use super::metric::{
    SegmentPercentilesCollector, SegmentStatsCollector, SegmentStatsType, StatsAggregation,
    SumAggregation,
 };
-use crate::aggregation::bucket::SegmentTermCollectorComposite;
+use crate::aggregation::bucket::TermMissingAgg;
 use crate::aggregation::metric::SegmentTopHitsCollector;
 pub(crate) trait SegmentAggregationCollector: CollectorClone + Debug {
    fn add_intermediate_aggregation_result(
@@ -82,29 +83,24 @@ pub(crate) fn build_single_agg_segment_collector(
    use AggregationVariants::*;
    match &req.agg.agg {
        Terms(terms_req) => {
-            if let Some(acc2) = req.accessor2.as_ref() {
+            if req.accessors.is_empty() {
                Ok(Box::new(
                    SegmentTermCollectorComposite::from_req_and_validate(
                        terms_req,
                        &mut req.sub_aggregation,
                        req.field_type,
                        acc2.1,
                        accessor_idx,
                    )?,
                ))
            } else {
                Ok(Box::new(SegmentTermCollector::from_req_and_validate(
                    terms_req,
                    &mut req.sub_aggregation,
                    req.field_type,
                    accessor_idx,
                )?))
            } else {
                Ok(Box::new(TermMissingAgg::new(
                    accessor_idx,
                    &mut req.sub_aggregation,
                )?))
            }
        }
        Range(range_req) => Ok(Box::new(SegmentRangeCollector::from_req_and_validate(
            range_req,
            &mut req.sub_aggregation,
-            &mut req.limits,
+            &req.limits,
            req.field_type,
            accessor_idx,
        )?)),
@@ -120,35 +116,43 @@ pub(crate) fn build_single_agg_segment_collector(
            req.field_type,
            accessor_idx,
        )?)),
-        Average(AverageAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+        Average(AverageAggregation { missing, .. }) => {
-            req.field_type,
+            Ok(Box::new(SegmentStatsCollector::from_req(
-            SegmentStatsType::Average,
+                req.field_type,
-            accessor_idx,
+                SegmentStatsType::Average,
-        ))),
+                accessor_idx,
-        Count(CountAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+                *missing,
            )))
        }
        Count(CountAggregation { missing, .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
            req.field_type,
            SegmentStatsType::Count,
            accessor_idx,
            *missing,
        ))),
-        Max(MaxAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+        Max(MaxAggregation { missing, .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
            req.field_type,
            SegmentStatsType::Max,
            accessor_idx,
            *missing,
        ))),
-        Min(MinAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+        Min(MinAggregation { missing, .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
            req.field_type,
            SegmentStatsType::Min,
            accessor_idx,
            *missing,
        ))),
-        Stats(StatsAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+        Stats(StatsAggregation { missing, .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
            req.field_type,
            SegmentStatsType::Stats,
            accessor_idx,
            *missing,
        ))),
-        Sum(SumAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+        Sum(SumAggregation { missing, .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
            req.field_type,
            SegmentStatsType::Sum,
            accessor_idx,
            *missing,
        ))),
        Percentiles(percentiles_req) => Ok(Box::new(
            SegmentPercentilesCollector::from_req_and_validate(
@@ -157,6 +161,11 @@ pub(crate) fn build_single_agg_segment_collector(
                accessor_idx,
            )?,
        )),
        TopHits(top_hits_req) => Ok(Box::new(SegmentTopHitsCollector::from_req(
            top_hits_req,
            accessor_idx,
            req.segment_ordinal,
        ))),
    }
 }
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -16,7 +16,7 @@ use crate::{DocId, Score, SegmentOrdinal, SegmentReader};
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer(3_000_000).unwrap();
+/// let mut index_writer = index.writer(15_000_000).unwrap();
 /// index_writer.add_document(doc!(title => "The Name of the Wind")).unwrap();
 /// index_writer.add_document(doc!(title => "The Diary of Muadib")).unwrap();
 /// index_writer.add_document(doc!(title => "A Dairy Cow")).unwrap();
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -89,7 +89,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///     let schema = schema_builder.build();
 ///     let index = Index::create_in_ram(schema);
 ///     {
-///         let mut index_writer = index.writer(3_000_000)?;
+///         let mut index_writer = index.writer(15_000_000)?;
 ///         // a document can be associated with any number of facets
 ///         index_writer.add_document(doc!(
 ///             title => "The Name of the Wind",
@@ -410,6 +410,7 @@ impl SegmentCollector for FacetSegmentCollector {
 /// Intermediary result of the `FacetCollector` that stores
 /// the facet counts for all the segments.
 #[derive(Default, Clone)]
 pub struct FacetCounts {
    facet_counts: BTreeMap<Facet, u64>,
 }
@@ -493,10 +494,10 @@ mod tests {
    use super::{FacetCollector, FacetCounts};
    use crate::collector::facet_collector::compress_mapping;
    use crate::collector::Count;
-    use crate::core::Index;
+    use crate::index::Index;
    use crate::query::{AllQuery, QueryParser, TermQuery};
-    use crate::schema::{Document, Facet, FacetOptions, IndexRecordOption, Schema};
+    use crate::schema::{Facet, FacetOptions, IndexRecordOption, Schema, TantivyDocument};
-    use crate::Term;
+    use crate::{IndexWriter, Term};
    fn test_collapse_mapping_aux(
        facet_terms: &[&str],
@@ -559,7 +560,7 @@ mod tests {
        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        index_writer
            .add_document(doc!(facet_field=>Facet::from("/facet/a")))
            .unwrap();
@@ -588,7 +589,7 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        let num_facets: usize = 3 * 4 * 5;
        let facets: Vec<Facet> = (0..num_facets)
            .map(|mut n| {
@@ -601,7 +602,7 @@ mod tests {
            })
            .collect();
        for i in 0..num_facets * 10 {
-            let mut doc = Document::new();
+            let mut doc = TantivyDocument::new();
            doc.add_facet(facet_field, facets[i % num_facets].clone());
            index_writer.add_document(doc).unwrap();
        }
@@ -732,24 +733,25 @@ mod tests {
        let index = Index::create_in_ram(schema);
        let uniform = Uniform::new_inclusive(1, 100_000);
-        let mut docs: Vec<Document> = vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
+        let mut docs: Vec<TantivyDocument> =
-            .into_iter()
+            vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
-            .flat_map(|(c, count)| {
+                .into_iter()
-                let facet = Facet::from(&format!("/facet/{}", c));
+                .flat_map(|(c, count)| {
-                let doc = doc!(facet_field => facet);
+                    let facet = Facet::from(&format!("/facet/{}", c));
-                iter::repeat(doc).take(count)
+                    let doc = doc!(facet_field => facet);
-            })
+                    iter::repeat(doc).take(count)
-            .map(|mut doc| {
+                })
-                doc.add_facet(
+                .map(|mut doc| {
-                    facet_field,
+                    doc.add_facet(
-                    &format!("/facet/{}", thread_rng().sample(uniform)),
+                        facet_field,
-                );
+                        &format!("/facet/{}", thread_rng().sample(uniform)),
-                doc
+                    );
-            })
+                    doc
-            .collect();
+                })
                .collect();
        docs[..].shuffle(&mut thread_rng());
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        for doc in docs {
            index_writer.add_document(doc).unwrap();
        }
@@ -780,7 +782,7 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let docs: Vec<Document> = vec![("b", 2), ("a", 2), ("c", 4)]
+        let docs: Vec<TantivyDocument> = vec![("b", 2), ("a", 2), ("c", 4)]
            .into_iter()
            .flat_map(|(c, count)| {
                let facet = Facet::from(&format!("/facet/{}", c));
@@ -828,7 +830,7 @@ mod bench {
    use crate::collector::FacetCollector;
    use crate::query::AllQuery;
    use crate::schema::{Facet, Schema, INDEXED};
-    use crate::Index;
+    use crate::{Index, IndexWriter};
    #[bench]
    fn bench_facet_collector(b: &mut Bencher) {
@@ -847,7 +849,7 @@ mod bench {
        // 40425 docs
        docs[..].shuffle(&mut thread_rng());
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
        for doc in docs {
            index_writer.add_document(doc).unwrap();
        }
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -12,8 +12,7 @@ use std::marker::PhantomData;
 use columnar::{BytesColumn, Column, DynamicColumn, HasAssociatedColumnType};
 use crate::collector::{Collector, SegmentCollector};
-use crate::schema::Field;
+use crate::{DocId, Score, SegmentReader};
 use crate::{DocId, Score, SegmentReader, TantivyError};
 /// The `FilterCollector` filters docs using a fast field value and a predicate.
 ///
@@ -38,7 +37,7 @@ use crate::{DocId, Score, SegmentReader, TantivyError};
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64))?;
@@ -50,13 +49,13 @@ use crate::{DocId, Score, SegmentReader, TantivyError};
 ///
 /// let query_parser = QueryParser::for_index(&index, vec![title]);
 /// let query = query_parser.parse_query("diary")?;
-/// let no_filter_collector = FilterCollector::new(price, |value: u64| value > 20_120u64, TopDocs::with_limit(2));
+/// let no_filter_collector = FilterCollector::new("price".to_string(), |value: u64| value > 20_120u64, TopDocs::with_limit(2));
 /// let top_docs = searcher.search(&query, &no_filter_collector)?;
 ///
 /// assert_eq!(top_docs.len(), 1);
 /// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
 ///
-/// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(price, |value| value < 5u64, TopDocs::with_limit(2));
+/// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new("price".to_string(), |value| value < 5u64, TopDocs::with_limit(2));
 /// let filtered_top_docs = searcher.search(&query, &filter_all_collector)?;
 ///
 /// assert_eq!(filtered_top_docs.len(), 0);
@@ -70,7 +69,7 @@ use crate::{DocId, Score, SegmentReader, TantivyError};
 pub struct FilterCollector<TCollector, TPredicate, TPredicateValue>
 where TPredicate: 'static + Clone
 {
-    field: Field,
+    field: String,
    collector: TCollector,
    predicate: TPredicate,
    t_predicate_value: PhantomData<TPredicateValue>,
@@ -83,7 +82,7 @@ where
    TPredicate: Fn(TPredicateValue) -> bool + Send + Sync + Clone,
 {
    /// Create a new `FilterCollector`.
-    pub fn new(field: Field, predicate: TPredicate, collector: TCollector) -> Self {
+    pub fn new(field: String, predicate: TPredicate, collector: TCollector) -> Self {
        Self {
            field,
            predicate,
@@ -110,18 +109,7 @@ where
        segment_local_id: u32,
        segment_reader: &SegmentReader,
    ) -> crate::Result<Self::Child> {
-        let schema = segment_reader.schema();
+        let column_opt = segment_reader.fast_fields().column_opt(&self.field)?;
        let field_entry = schema.get_field_entry(self.field);
        if !field_entry.is_fast() {
            return Err(TantivyError::SchemaError(format!(
                "Field {:?} is not a fast field.",
                field_entry.name()
            )));
        }
        let column_opt = segment_reader
            .fast_fields()
            .column_opt(field_entry.name())?;
        let segment_collector = self
            .collector
@@ -216,7 +204,7 @@ where
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind", barcode => &b"010101"[..]))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib", barcode => &b"110011"[..]))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow", barcode => &b"110111"[..]))?;
@@ -229,7 +217,7 @@ where
 ///
 /// let query_parser = QueryParser::for_index(&index, vec![title]);
 /// let query = query_parser.parse_query("diary")?;
-/// let filter_collector = BytesFilterCollector::new(barcode, |bytes: &[u8]| bytes.starts_with(b"01"), TopDocs::with_limit(2));
+/// let filter_collector = BytesFilterCollector::new("barcode".to_string(), |bytes: &[u8]| bytes.starts_with(b"01"), TopDocs::with_limit(2));
 /// let top_docs = searcher.search(&query, &filter_collector)?;
 ///
 /// assert_eq!(top_docs.len(), 1);
@@ -240,7 +228,7 @@ where
 pub struct BytesFilterCollector<TCollector, TPredicate>
 where TPredicate: 'static + Clone
 {
-    field: Field,
+    field: String,
    collector: TCollector,
    predicate: TPredicate,
 }
@@ -251,7 +239,7 @@ where
    TPredicate: Fn(&[u8]) -> bool + Send + Sync + Clone,
 {
    /// Create a new `BytesFilterCollector`.
-    pub fn new(field: Field, predicate: TPredicate, collector: TCollector) -> Self {
+    pub fn new(field: String, predicate: TPredicate, collector: TCollector) -> Self {
        Self {
            field,
            predicate,
@@ -274,10 +262,7 @@ where
        segment_local_id: u32,
        segment_reader: &SegmentReader,
    ) -> crate::Result<Self::Child> {
-        let schema = segment_reader.schema();
+        let column_opt = segment_reader.fast_fields().bytes(&self.field)?;
        let field_name = schema.get_field_name(self.field);
        let column_opt = segment_reader.fast_fields().bytes(field_name)?;
        let segment_collector = self
            .collector
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -233,7 +233,7 @@ mod tests {
        let val_field = schema_builder.add_i64_field("val_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
+        let mut writer = index.writer_for_tests()?;
        writer.add_document(doc!(val_field=>12i64))?;
        writer.add_document(doc!(val_field=>-30i64))?;
        writer.add_document(doc!(val_field=>-12i64))?;
@@ -255,7 +255,7 @@ mod tests {
        let val_field = schema_builder.add_i64_field("val_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
+        let mut writer = index.writer_for_tests()?;
        writer.add_document(doc!(val_field=>12i64))?;
        writer.commit()?;
        writer.add_document(doc!(val_field=>-30i64))?;
@@ -280,7 +280,7 @@ mod tests {
        let date_field = schema_builder.add_date_field("date_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
+        let mut writer = index.writer_for_tests()?;
        writer.add_document(doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
        writer.add_document(
            doc!(date_field=>DateTime::from_primitive(Date::from_calendar_date(1986, Month::March, 9)?.with_hms(0, 0, 0)?)),
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -44,7 +44,7 @@
 //! #     let title = schema_builder.add_text_field("title", TEXT);
 //! #     let schema = schema_builder.build();
 //! #     let index = Index::create_in_ram(schema);
-//! #     let mut index_writer = index.writer(3_000_000)?;
+//! #     let mut index_writer = index.writer(15_000_000)?;
 //! #       index_writer.add_document(doc!(
 //! #       title => "The Name of the Wind",
 //! #      ))?;
@@ -97,7 +97,8 @@ pub use self::multi_collector::{FruitHandle, MultiCollector, MultiFruit};
 mod top_collector;
 mod top_score_collector;
-pub use self::top_score_collector::TopDocs;
+pub use self::top_collector::ComparableDoc;
 pub use self::top_score_collector::{TopDocs, TopNComputer};
 mod custom_score_top_collector;
 pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer};
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -120,7 +120,7 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 /// let title = schema_builder.add_text_field("title", TEXT);
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
-/// let mut index_writer = index.writer(3_000_000)?;
+/// let mut index_writer = index.writer(15_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -2,12 +2,14 @@ use columnar::{BytesColumn, Column};
 use super::*;
 use crate::collector::{Count, FilterCollector, TopDocs};
-use crate::core::SegmentReader;
+use crate::index::SegmentReader;
 use crate::query::{AllQuery, QueryParser};
 use crate::schema::{Schema, FAST, TEXT};
 use crate::time::format_description::well_known::Rfc3339;
 use crate::time::OffsetDateTime;
-use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};
+use crate::{
    doc, DateTime, DocAddress, DocId, Index, Score, Searcher, SegmentOrdinal, TantivyDocument,
 };
 pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
    compute_score: true,
@@ -26,7 +28,7 @@ pub fn test_filter_collector() -> crate::Result<()> {
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
-    let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
    index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
    index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;
@@ -40,7 +42,7 @@ pub fn test_filter_collector() -> crate::Result<()> {
    let query_parser = QueryParser::for_index(&index, vec![title]);
    let query = query_parser.parse_query("diary")?;
    let filter_some_collector = FilterCollector::new(
-        price,
+        "price".to_string(),
        &|value: u64| value > 20_120u64,
        TopDocs::with_limit(2),
    );
@@ -49,8 +51,11 @@ pub fn test_filter_collector() -> crate::Result<()> {
    assert_eq!(top_docs.len(), 1);
    assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
-    let filter_all_collector: FilterCollector<_, _, u64> =
+    let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(
-        FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
+        "price".to_string(),
        &|value| value < 5u64,
        TopDocs::with_limit(2),
    );
    let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
    assert_eq!(filtered_top_docs.len(), 0);
@@ -61,7 +66,8 @@ pub fn test_filter_collector() -> crate::Result<()> {
            > 0
    }
-    let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
+    let filter_dates_collector =
        FilterCollector::new("date".to_string(), &date_filter, TopDocs::with_limit(5));
    let filtered_date_docs = searcher.search(&query, &filter_dates_collector)?;
    assert_eq!(filtered_date_docs.len(), 2);
@@ -280,8 +286,8 @@ fn make_test_searcher() -> crate::Result<Searcher> {
    let schema = Schema::builder().build();
    let index = Index::create_in_ram(schema);
    let mut index_writer = index.writer_for_tests()?;
-    index_writer.add_document(Document::default())?;
+    index_writer.add_document(TantivyDocument::default())?;
-    index_writer.add_document(Document::default())?;
+    index_writer.add_document(TantivyDocument::default())?;
    index_writer.commit()?;
    Ok(index.reader()?.searcher())
 }
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,39 +1,58 @@
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use super::top_score_collector::TopNComputer;
 use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
 /// Contains a feature (field, score, etc.) of a document along with the document address.
 ///
-/// It has a custom implementation of `PartialOrd` that reverses the order. This is because the
+/// It guarantees stable sorting: in case of a tie on the feature, the document
 /// default Rust heap is a max heap, whereas a min heap is needed.
 ///
 /// Additionally, it guarantees stable sorting: in case of a tie on the feature, the document
 /// address is used.
 ///
 /// The REVERSE_ORDER generic parameter controls whether the by-feature order
 /// should be reversed, which is useful for achieving for example largest-first
 /// semantics without having to wrap the feature in a `Reverse`.
 ///
 /// WARNING: equality is not what you would expect here.
 /// Two elements are equal if their feature is equal, and regardless of whether `doc`
 /// is equal. This should be perfectly fine for this usage, but let's make sure this
 /// struct is never public.
-pub(crate) struct ComparableDoc<T, D> {
+#[derive(Clone, Default, Serialize, Deserialize)]
 pub struct ComparableDoc<T, D, const REVERSE_ORDER: bool = false> {
    /// The feature of the document. In practice, this is
    /// is any type that implements `PartialOrd`.
    pub feature: T,
    /// The document address. In practice, this is any
    /// type that implements `PartialOrd`, and is guaranteed
    /// to be unique for each document.
    pub doc: D,
 }
 impl<T: std::fmt::Debug, D: std::fmt::Debug, const R: bool> std::fmt::Debug
    for ComparableDoc<T, D, R>
 {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct(format!("ComparableDoc<_, _ {R}").as_str())
            .field("feature", &self.feature)
            .field("doc", &self.doc)
            .finish()
    }
 }
-impl<T: PartialOrd, D: PartialOrd> PartialOrd for ComparableDoc<T, D> {
+impl<T: PartialOrd, D: PartialOrd, const R: bool> PartialOrd for ComparableDoc<T, D, R> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }
-impl<T: PartialOrd, D: PartialOrd> Ord for ComparableDoc<T, D> {
+impl<T: PartialOrd, D: PartialOrd, const R: bool> Ord for ComparableDoc<T, D, R> {
    #[inline]
    fn cmp(&self, other: &Self) -> Ordering {
-        // Reversed to make BinaryHeap work as a min-heap
+        let by_feature = self
        let by_feature = other
            .feature
-            .partial_cmp(&self.feature)
+            .partial_cmp(&other.feature)
            .map(|ord| if R { ord.reverse() } else { ord })
            .unwrap_or(Ordering::Equal);
        let lazy_by_doc_address = || self.doc.partial_cmp(&other.doc).unwrap_or(Ordering::Equal);
@@ -45,13 +64,13 @@ impl<T: PartialOrd, D: PartialOrd> Ord for ComparableDoc<T, D> {
    }
 }
-impl<T: PartialOrd, D: PartialOrd> PartialEq for ComparableDoc<T, D> {
+impl<T: PartialOrd, D: PartialOrd, const R: bool> PartialEq for ComparableDoc<T, D, R> {
    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == Ordering::Equal
    }
 }
-impl<T: PartialOrd, D: PartialOrd> Eq for ComparableDoc<T, D> {}
+impl<T: PartialOrd, D: PartialOrd, const R: bool> Eq for ComparableDoc<T, D, R> {}
 pub(crate) struct TopCollector<T> {
    pub limit: usize,
@@ -91,18 +110,13 @@ where T: PartialOrd + Clone
        if self.limit == 0 {
            return Ok(Vec::new());
        }
-        let mut top_collector = BinaryHeap::new();
+        let mut top_collector: TopNComputer<_, _> = TopNComputer::new(self.limit + self.offset);
        for child_fruit in children {
            for (feature, doc) in child_fruit {
-                if top_collector.len() < (self.limit + self.offset) {
+                top_collector.push(feature, doc);
                    top_collector.push(ComparableDoc { feature, doc });
                } else if let Some(mut head) = top_collector.peek_mut() {
                    if head.feature < feature {
                        *head = ComparableDoc { feature, doc };
                    }
                }
            }
        }
        Ok(top_collector
            .into_sorted_vec()
            .into_iter()
@@ -111,7 +125,7 @@ where T: PartialOrd + Clone
            .collect())
    }
-    pub(crate) fn for_segment<F: PartialOrd>(
+    pub(crate) fn for_segment<F: PartialOrd + Clone>(
        &self,
        segment_id: SegmentOrdinal,
        _: &SegmentReader,
@@ -136,20 +150,20 @@ where T: PartialOrd + Clone
 /// The Top Collector keeps track of the K documents
 /// sorted by type `T`.
 ///
-/// The implementation is based on a `BinaryHeap`.
+/// The implementation is based on a repeatedly truncating on the median after K * 2 documents
 /// The theoretical complexity for collecting the top `K` out of `n` documents
-/// is `O(n log K)`.
+/// is `O(n + K)`.
 pub(crate) struct TopSegmentCollector<T> {
-    limit: usize,
+    /// We reverse the order of the feature in order to
-    heap: BinaryHeap<ComparableDoc<T, DocId>>,
+    /// have top-semantics instead of bottom semantics.
    topn_computer: TopNComputer<T, DocId>,
    segment_ord: u32,
 }
-impl<T: PartialOrd> TopSegmentCollector<T> {
+impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
    fn new(segment_ord: SegmentOrdinal, limit: usize) -> TopSegmentCollector<T> {
        TopSegmentCollector {
-            limit,
+            topn_computer: TopNComputer::new(limit),
            heap: BinaryHeap::with_capacity(limit),
            segment_ord,
        }
    }
@@ -158,7 +172,7 @@ impl<T: PartialOrd> TopSegmentCollector<T> {
 impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
    pub fn harvest(self) -> Vec<(T, DocAddress)> {
        let segment_ord = self.segment_ord;
-        self.heap
+        self.topn_computer
            .into_sorted_vec()
            .into_iter()
            .map(|comparable_doc| {
@@ -173,33 +187,13 @@ impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
            .collect()
    }
    /// Return true if more documents have been collected than the limit.
    #[inline]
    pub(crate) fn at_capacity(&self) -> bool {
        self.heap.len() >= self.limit
    }
    /// Collects a document scored by the given feature
    ///
    /// It collects documents until it has reached the max capacity. Once it reaches capacity, it
    /// will compare the lowest scoring item with the given one and keep whichever is greater.
    #[inline]
    pub fn collect(&mut self, doc: DocId, feature: T) {
-        if self.at_capacity() {
+        self.topn_computer.push(feature, doc);
            // It's ok to unwrap as long as a limit of 0 is forbidden.
            if let Some(limit_feature) = self.heap.peek().map(|head| head.feature.clone()) {
                if limit_feature < feature {
                    if let Some(mut head) = self.heap.peek_mut() {
                        head.feature = feature;
                        head.doc = doc;
                    }
                }
            }
        } else {
            // we have not reached capacity yet, so we can just push the
            // element.
            self.heap.push(ComparableDoc { feature, doc });
        }
    }
 }
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -1,9 +1,10 @@
 use std::collections::BinaryHeap;
 use std::fmt;
 use std::marker::PhantomData;
 use std::sync::Arc;
 use columnar::ColumnValues;
 use serde::de::DeserializeOwned;
 use serde::{Deserialize, Serialize};
 use super::Collector;
 use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
@@ -86,12 +87,15 @@ where
 /// The `TopDocs` collector keeps track of the top `K` documents
 /// sorted by their score.
 ///
-/// The implementation is based on a `BinaryHeap`.
+/// The implementation is based on a repeatedly truncating on the median after K * 2 documents
-/// The theoretical complexity for collecting the top `K` out of `n` documents
+/// with pattern defeating QuickSort.
-/// is `O(n log K)`.
+/// The theoretical complexity for collecting the top `K` out of `N` documents
 /// is `O(N + K)`.
 ///
-/// This collector guarantees a stable sorting in case of a tie on the
+/// This collector does not guarantee a stable sorting in case of a tie on the
-/// document score. As such, it is suitable to implement pagination.
+/// document score, for stable sorting `PartialOrd` needs to resolve on other fields
 /// like docid in case of score equality.
 /// Only then, it is suitable for pagination.
 ///
 /// ```rust
 /// use tantivy::collector::TopDocs;
@@ -105,7 +109,7 @@ where
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
@@ -210,7 +214,7 @@ impl TopDocs {
    /// let schema = schema_builder.build();
    /// let index = Index::create_in_ram(schema);
    ///
-    /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
    /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
    /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
@@ -261,7 +265,7 @@ impl TopDocs {
    /// #   let schema = schema_builder.build();
    /// #
    /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    /// #   index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64))?;
    /// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64))?;
    /// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64))?;
@@ -307,7 +311,7 @@ impl TopDocs {
    ///
    /// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
    /// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
-    fn order_by_u64_field(
+    pub fn order_by_u64_field(
        self,
        field: impl ToString,
        order: Order,
@@ -349,7 +353,7 @@ impl TopDocs {
    /// #   let schema = schema_builder.build();
    /// #
    /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    /// #   index_writer.add_document(doc!(title => "MadCow Inc.", revenue => 92_000_000i64))?;
    /// #   index_writer.add_document(doc!(title => "Zozo Cow KKK", revenue => 119_000_000i64))?;
    /// #   index_writer.add_document(doc!(title => "Declining Cow", revenue => -63_000_000i64))?;
@@ -449,7 +453,7 @@ impl TopDocs {
    /// fn create_index() -> tantivy::Result<Index> {
    ///   let schema = create_schema();
    ///   let index = Index::create_in_ram(schema);
-    ///   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    ///   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    ///   let product_name = index.schema().get_field("product_name").unwrap();
    ///   let popularity: Field = index.schema().get_field("popularity").unwrap();
    ///   index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64))?;
@@ -556,7 +560,7 @@ impl TopDocs {
    /// # fn main() -> tantivy::Result<()> {
    /// #   let schema = create_schema();
    /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
    /// #   let product_name = index.schema().get_field("product_name").unwrap();
    /// #
    /// let popularity: Field = index.schema().get_field("popularity").unwrap();
@@ -661,50 +665,27 @@ impl Collector for TopDocs {
        reader: &SegmentReader,
    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
        let heap_len = self.0.limit + self.0.offset;
-        let mut heap: BinaryHeap<ComparableDoc<Score, DocId>> = BinaryHeap::with_capacity(heap_len);
+        let mut top_n: TopNComputer<_, _> = TopNComputer::new(heap_len);
        if let Some(alive_bitset) = reader.alive_bitset() {
            let mut threshold = Score::MIN;
-            weight.for_each_pruning(threshold, reader, &mut |doc, score| {
+            top_n.threshold = Some(threshold);
            weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
                if alive_bitset.is_deleted(doc) {
                    return threshold;
                }
-                let heap_item = ComparableDoc {
+                top_n.push(score, doc);
-                    feature: score,
+                threshold = top_n.threshold.unwrap_or(Score::MIN);
                    doc,
                };
                if heap.len() < heap_len {
                    heap.push(heap_item);
                    if heap.len() == heap_len {
                        threshold = heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
                    }
                    return threshold;
                }
                *heap.peek_mut().unwrap() = heap_item;
                threshold = heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
                threshold
            })?;
        } else {
            weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
-                let heap_item = ComparableDoc {
+                top_n.push(score, doc);
-                    feature: score,
+                top_n.threshold.unwrap_or(Score::MIN)
                    doc,
                };
                if heap.len() < heap_len {
                    heap.push(heap_item);
                    // TODO the threshold is suboptimal for heap.len == heap_len
                    if heap.len() == heap_len {
                        return heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
                    } else {
                        return Score::MIN;
                    }
                }
                *heap.peek_mut().unwrap() = heap_item;
                heap.peek().map(|el| el.feature).unwrap_or(Score::MIN)
            })?;
        }
-        let fruit = heap
+        let fruit = top_n
            .into_sorted_vec()
            .into_iter()
            .map(|cid| {
@@ -736,9 +717,142 @@ impl SegmentCollector for TopScoreSegmentCollector {
    }
 }
 /// Fast TopN Computation
 ///
 /// Capacity of the vec is 2 * top_n.
 /// The buffer is truncated to the top_n elements when it reaches the capacity of the Vec.
 /// That means capacity has special meaning and should be carried over when cloning or serializing.
 ///
 /// For TopN == 0, it will be relative expensive.
 #[derive(Serialize, Deserialize)]
 #[serde(from = "TopNComputerDeser<Score, D, REVERSE_ORDER>")]
 pub struct TopNComputer<Score, D, const REVERSE_ORDER: bool = true> {
    /// The buffer reverses sort order to get top-semantics instead of bottom-semantics
    buffer: Vec<ComparableDoc<Score, D, REVERSE_ORDER>>,
    top_n: usize,
    pub(crate) threshold: Option<Score>,
 }
 // Intermediate struct for TopNComputer for deserialization, to keep vec capacity
 #[derive(Deserialize)]
 struct TopNComputerDeser<Score, D, const REVERSE_ORDER: bool> {
    buffer: Vec<ComparableDoc<Score, D, REVERSE_ORDER>>,
    top_n: usize,
    threshold: Option<Score>,
 }
 // Custom clone to keep capacity
 impl<Score: Clone, D: Clone, const REVERSE_ORDER: bool> Clone
    for TopNComputer<Score, D, REVERSE_ORDER>
 {
    fn clone(&self) -> Self {
        let mut buffer_clone = Vec::with_capacity(self.buffer.capacity());
        buffer_clone.extend(self.buffer.iter().cloned());
        TopNComputer {
            buffer: buffer_clone,
            top_n: self.top_n,
            threshold: self.threshold.clone(),
        }
    }
 }
 impl<Score, D, const R: bool> From<TopNComputerDeser<Score, D, R>> for TopNComputer<Score, D, R> {
    fn from(mut value: TopNComputerDeser<Score, D, R>) -> Self {
        let expected_cap = value.top_n.max(1) * 2;
        let current_cap = value.buffer.capacity();
        if current_cap < expected_cap {
            value.buffer.reserve_exact(expected_cap - current_cap);
        } else {
            value.buffer.shrink_to(expected_cap);
        }
        TopNComputer {
            buffer: value.buffer,
            top_n: value.top_n,
            threshold: value.threshold,
        }
    }
 }
 impl<Score, D, const R: bool> TopNComputer<Score, D, R>
 where
    Score: PartialOrd + Clone,
    D: Serialize + DeserializeOwned + Ord + Clone,
 {
    /// Create a new `TopNComputer`.
    /// Internally it will allocate a buffer of size `2 * top_n`.
    pub fn new(top_n: usize) -> Self {
        let vec_cap = top_n.max(1) * 2;
        TopNComputer {
            buffer: Vec::with_capacity(vec_cap),
            top_n,
            threshold: None,
        }
    }
    /// Push a new document to the top n.
    /// If the document is below the current threshold, it will be ignored.
    #[inline]
    pub fn push(&mut self, feature: Score, doc: D) {
        if let Some(last_median) = self.threshold.clone() {
            if feature < last_median {
                return;
            }
        }
        if self.buffer.len() == self.buffer.capacity() {
            let median = self.truncate_top_n();
            self.threshold = Some(median);
        }
        // This is faster since it avoids the buffer resizing to be inlined from vec.push()
        // (this is in the hot path)
        // TODO: Replace with `push_within_capacity` when it's stabilized
        let uninit = self.buffer.spare_capacity_mut();
        // This cannot panic, because we truncate_median will at least remove one element, since
        // the min capacity is 2.
        uninit[0].write(ComparableDoc { doc, feature });
        // This is safe because it would panic in the line above
        unsafe {
            self.buffer.set_len(self.buffer.len() + 1);
        }
    }
    #[inline(never)]
    fn truncate_top_n(&mut self) -> Score {
        // Use select_nth_unstable to find the top nth score
        let (_, median_el, _) = self.buffer.select_nth_unstable(self.top_n);
        let median_score = median_el.feature.clone();
        // Remove all elements below the top_n
        self.buffer.truncate(self.top_n);
        median_score
    }
    /// Returns the top n elements in sorted order.
    pub fn into_sorted_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
        if self.buffer.len() > self.top_n {
            self.truncate_top_n();
        }
        self.buffer.sort_unstable();
        self.buffer
    }
    /// Returns the top n elements in stored order.
    /// Useful if you do not need the elements in sorted order,
    /// for example when merging the results of multiple segments.
    pub fn into_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
        if self.buffer.len() > self.top_n {
            self.truncate_top_n();
        }
        self.buffer
    }
 }
 #[cfg(test)]
 mod tests {
-    use super::TopDocs;
+    use super::{TopDocs, TopNComputer};
    use crate::collector::top_collector::ComparableDoc;
    use crate::collector::Collector;
    use crate::query::{AllQuery, Query, QueryParser};
    use crate::schema::{Field, Schema, FAST, STORED, TEXT};
@@ -752,7 +866,7 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        // writing the segment
-        let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+        let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
        index_writer.add_document(doc!(text_field=>"Hello happy tax payer."))?;
        index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"))?;
        index_writer.add_document(doc!(text_field=>"I like Droopy"))?;
@@ -766,6 +880,70 @@ mod tests {
            crate::assert_nearly_equals!(result.0, expected.0);
        }
    }
    #[test]
    fn test_topn_computer_serde() {
        let computer: TopNComputer<u32, u32> = TopNComputer::new(1);
        let computer_ser = serde_json::to_string(&computer).unwrap();
        let mut computer: TopNComputer<u32, u32> = serde_json::from_str(&computer_ser).unwrap();
        computer.push(1u32, 5u32);
        computer.push(1u32, 0u32);
        computer.push(1u32, 7u32);
        assert_eq!(
            computer.into_sorted_vec(),
            &[ComparableDoc {
                feature: 1u32,
                doc: 0u32,
            },]
        );
    }
    #[test]
    fn test_empty_topn_computer() {
        let mut computer: TopNComputer<u32, u32> = TopNComputer::new(0);
        computer.push(1u32, 1u32);
        computer.push(1u32, 2u32);
        computer.push(1u32, 3u32);
        assert!(computer.into_sorted_vec().is_empty());
    }
    #[test]
    fn test_topn_computer() {
        let mut computer: TopNComputer<u32, u32> = TopNComputer::new(2);
        computer.push(1u32, 1u32);
        computer.push(2u32, 2u32);
        computer.push(3u32, 3u32);
        computer.push(2u32, 4u32);
        computer.push(1u32, 5u32);
        assert_eq!(
            computer.into_sorted_vec(),
            &[
                ComparableDoc {
                    feature: 3u32,
                    doc: 3u32,
                },
                ComparableDoc {
                    feature: 2u32,
                    doc: 2u32,
                }
            ]
        );
    }
    #[test]
    fn test_topn_computer_no_panic() {
        for top_n in 0..10 {
            let mut computer: TopNComputer<u32, u32> = TopNComputer::new(top_n);
            for _ in 0..1 + top_n * 2 {
                computer.push(1u32, 1u32);
            }
            let _vals = computer.into_sorted_vec();
        }
    }
    #[test]
    fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
@@ -852,20 +1030,25 @@ mod tests {
        // using AllQuery to get a constant score
        let searcher = index.reader().unwrap().searcher();
        let page_0 = searcher.search(&AllQuery, &TopDocs::with_limit(1)).unwrap();
        let page_1 = searcher.search(&AllQuery, &TopDocs::with_limit(2)).unwrap();
        let page_2 = searcher.search(&AllQuery, &TopDocs::with_limit(3)).unwrap();
        // precondition for the test to be meaningful: we did get documents
        // with the same score
        assert!(page_0.iter().all(|result| result.0 == page_1[0].0));
        assert!(page_1.iter().all(|result| result.0 == page_1[0].0));
        assert!(page_2.iter().all(|result| result.0 == page_2[0].0));
        // sanity check since we're relying on make_index()
        assert_eq!(page_0.len(), 1);
        assert_eq!(page_1.len(), 2);
        assert_eq!(page_2.len(), 3);
        assert_eq!(page_1, &page_2[..page_1.len()]);
        assert_eq!(page_0, &page_2[..page_0.len()]);
    }
    #[test]
@@ -1122,7 +1305,7 @@ mod tests {
        mut doc_adder: impl FnMut(&mut IndexWriter),
    ) -> (Index, Box<dyn Query>) {
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
+        let mut index_writer = index.writer_with_num_threads(1, 15_000_000).unwrap();
        doc_adder(&mut index_writer);
        index_writer.commit().unwrap();
        let query_parser = QueryParser::for_index(&index, vec![query_field]);
--- a/Show More
+++ b/Show More