fix out of order bug

use optional index in multivalued index (#2439 )
* use optional index in multivalued index For mostly empty multivalued indices there was a large overhead during creation when iterating all docids. This is alleviated by placing an optional index in the multivalued index to mark documents that have values. There's some performance overhead when accessing values in a multivalued index. The accessing cost is now optional index + multivalue index. The sparse codec performs relatively bad with the binary_search when accessing data. This is reflected in the benchmarks below. This changes the format of columnar to v2, but code is added to handle the v1 formats. ``` Running benches/bench_access.rs (/home/pascal/Development/tantivy/optional_multivalues/target/release/deps/bench_access-ea323c028db88db4) multi sparse 1/13 access_values_for_doc Avg: 42.8946ms (+241.80%) Median: 42.8869ms (+244.10%) [42.7484ms .. 43.1074ms] access_first_vals Avg: 42.8022ms (+421.93%) Median: 42.7553ms (+439.84%) [42.6794ms .. 43.7404ms] multi 2x access_values_for_doc Avg: 31.1244ms (+24.17%) Median: 30.8339ms (+23.46%) [30.7192ms .. 33.6059ms] access_first_vals Avg: 24.3070ms (+70.92%) Median: 24.0966ms (+70.18%) [23.9328ms .. 26.4851ms] sparse 1/13 access_values_for_doc Avg: 42.2490ms (+0.61%) Median: 42.2346ms (+2.28%) [41.8988ms .. 43.7821ms] access_first_vals Avg: 43.6272ms (+0.23%) Median: 43.6197ms (+1.78%) [43.4920ms .. 43.9009ms] dense 1/12 access_values_for_doc Avg: 8.6184ms (+23.18%) Median: 8.6126ms (+23.78%) [8.5843ms .. 8.7527ms] access_first_vals Avg: 6.8112ms (+4.47%) Median: 6.8002ms (+4.55%) [6.7887ms .. 6.8991ms] full access_values_for_doc Avg: 9.4073ms (-5.09%) Median: 9.4023ms (-2.23%) [9.3694ms .. 9.4568ms] access_first_vals Avg: 4.9531ms (+6.24%) Median: 4.9502ms (+7.85%) [4.9423ms .. 4.9718ms] ``` ``` Running benches/bench_merge.rs (/home/pascal/Development/tantivy/optional_multivalues/target/release/deps/bench_merge-475697dfceb3639f) merge_multi 2x_and_multi 2x Avg: 20.2280ms (+34.33%) Median: 20.1829ms (+35.33%) [19.9933ms .. 20.8806ms] merge_multi sparse 1/13_and_multi sparse 1/13 Avg: 0.8961ms (-78.04%) Median: 0.8943ms (-77.61%) [0.8899ms .. 0.9272ms] merge_dense 1/12_and_dense 1/12 Avg: 0.6619ms (-1.26%) Median: 0.6616ms (+2.20%) [0.6473ms .. 0.6837ms] merge_sparse 1/13_and_sparse 1/13 Avg: 0.5508ms (-0.85%) Median: 0.5508ms (+2.80%) [0.5420ms .. 0.5634ms] merge_sparse 1/13_and_dense 1/12 Avg: 0.6046ms (-4.64%) Median: 0.6038ms (+2.80%) [0.5939ms .. 0.6296ms] merge_multi sparse 1/13_and_dense 1/12 Avg: 0.9111ms (-83.48%) Median: 0.9063ms (-83.50%) [0.9047ms .. 0.9663ms] merge_multi sparse 1/13_and_sparse 1/13 Avg: 0.8451ms (-89.49%) Median: 0.8428ms (-89.43%) [0.8411ms .. 0.8563ms] merge_multi 2x_and_dense 1/12 Avg: 10.6624ms (-4.82%) Median: 10.6568ms (-4.49%) [10.5738ms .. 10.8353ms] merge_multi 2x_and_sparse 1/13 Avg: 10.6336ms (-22.95%) Median: 10.5925ms (-22.33%) [10.5149ms .. 11.5657ms] ``` * Update columnar/src/columnar/format_version.rs Co-authored-by: Paul Masurel <paul@quickwit.io> * Update columnar/src/column_index/mod.rs Co-authored-by: Paul Masurel <paul@quickwit.io> --------- Co-authored-by: Paul Masurel <paul@quickwit.io>
2026-02-15 04:10:36 +00:00 · 2024-06-25 08:35:58 +08:00 · 2024-06-19 14:54:12 +08:00 · 2024-06-14 10:42:35 +08:00 · 2024-06-14 09:12:58 +09:00 · 2024-06-13 15:51:53 +08:00
234 changed files with 8667 additions and 5712 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -15,11 +15,11 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
-        run: rustup toolchain install nightly-2023-09-10 --profile minimal --component llvm-tools-preview
+        run: rustup toolchain install nightly-2024-04-10 --profile minimal --component llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
-        run: cargo +nightly-2023-09-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
+        run: cargo +nightly-2024-04-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
        continue-on-error: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,65 @@
+Tantivy 0.22
+================================
+
+Tantivy 0.22 will be able to read indices created with Tantivy 0.21.
+
+#### Bugfixes
+- Fix null byte handling in JSON paths (null bytes in json keys caused panic during indexing) [#2345](https://github.com/quickwit-oss/tantivy/pull/2345)(@PSeitz)
+- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton)
+- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz)
+- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz)
+- Fix bug occuring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton)
+- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz)
+- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz)
+- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz)
+
+#### Breaking API Changes
+- rename ReloadPolicy onCommit to onCommitWithDelay [#2235](https://github.com/quickwit-oss/tantivy/pull/2235)(@giovannicuccu)
+- Move exports from the root into modules [#2220](https://github.com/quickwit-oss/tantivy/pull/2220)(@PSeitz)
+- Accept field name instead of `Field` in FilterCollector [#2196](https://github.com/quickwit-oss/tantivy/pull/2196)(@PSeitz)
+- remove deprecated IntOptions and DateTime [#2353](https://github.com/quickwit-oss/tantivy/pull/2353)(@PSeitz)
+
+#### Features/Improvements
+- Tantivy documents as a trait: Index data directly without converting to tantivy types first [#2071](https://github.com/quickwit-oss/tantivy/pull/2071)(@ChillFish8)
+- encode some part of posting list as -1 instead of direct values (smaller inverted indices) [#2185](https://github.com/quickwit-oss/tantivy/pull/2185)(@trinity-1686a)
+- **Aggregation**
+  - Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz)
+  - Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke)
+  - Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz)
+  - Support ip adresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz)
+  - Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz)
+  - Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz)
+
+- Add ExistsQuery to check documents that have a value [#2160](https://github.com/quickwit-oss/tantivy/pull/2160)(@imotov)
+- Expose TopDocs::order_by_u64_field again [#2282](https://github.com/quickwit-oss/tantivy/pull/2282)(@ditsuke)
+
+- **Memory/Performance**
+  - Faster TopN: replace BinaryHeap with TopNComputer [#2186](https://github.com/quickwit-oss/tantivy/pull/2186)(@PSeitz)
+  - reduce number of allocations during indexing [#2257](https://github.com/quickwit-oss/tantivy/pull/2257)(@PSeitz)
+  - Less Memory while indexing: docid deltas while indexing [#2249](https://github.com/quickwit-oss/tantivy/pull/2249)(@PSeitz)
+  - Faster indexing: use term hashmap in fastfield [#2243](https://github.com/quickwit-oss/tantivy/pull/2243)(@PSeitz)
+  - term hashmap remove copy in is_empty, unused unordered_id [#2229](https://github.com/quickwit-oss/tantivy/pull/2229)(@PSeitz)
+  - add method to fetch block of first values in columnar [#2330](https://github.com/quickwit-oss/tantivy/pull/2330)(@PSeitz)
+  - Faster aggregations: add fast path for full columns in fetch_block [#2328](https://github.com/quickwit-oss/tantivy/pull/2328)(@PSeitz)
+  - Faster sstable loading: use fst for sstable index [#2268](https://github.com/quickwit-oss/tantivy/pull/2268)(@trinity-1686a)
+
+- **QueryParser**
+  - allow newline where we allow space in query parser [#2302](https://github.com/quickwit-oss/tantivy/pull/2302)(@trinity-1686a)
+  - allow some mixing of occur and bool in strict query parser [#2323](https://github.com/quickwit-oss/tantivy/pull/2323)(@trinity-1686a)
+  - handle * inside term in lenient query parser [#2228](https://github.com/quickwit-oss/tantivy/pull/2228)(@trinity-1686a)
+  - add support for exists query syntax in query parser [#2170](https://github.com/quickwit-oss/tantivy/pull/2170)(@trinity-1686a)
+- Add shared search executor [#2312](https://github.com/quickwit-oss/tantivy/pull/2312)(@MochiXu)
+- Truncate keys to u16::MAX in term hashmap [#2299](https://github.com/quickwit-oss/tantivy/pull/2299)(@PSeitz)
+- report if a term matched when warming up posting list [#2309](https://github.com/quickwit-oss/tantivy/pull/2309)(@trinity-1686a)
+- Support json fields in FuzzyTermQuery [#2173](https://github.com/quickwit-oss/tantivy/pull/2173)(@PingXia-at)
+- Read list of fields encoded in term dictionary for JSON fields [#2184](https://github.com/quickwit-oss/tantivy/pull/2184)(@PSeitz)
+- add collect_block to BoxableSegmentCollector [#2331](https://github.com/quickwit-oss/tantivy/pull/2331)(@PSeitz)
+- expose collect_block buffer size [#2326](https://github.com/quickwit-oss/tantivy/pull/2326)(@PSeitz)
+- Forward regex parser errors [#2288](https://github.com/quickwit-oss/tantivy/pull/2288)(@adamreichold)
+- Make FacetCounts defaultable and cloneable. [#2322](https://github.com/quickwit-oss/tantivy/pull/2322)(@adamreichold)
+- Derive Debug for SchemaBuilder [#2254](https://github.com/quickwit-oss/tantivy/pull/2254)(@GodTamIt)
+- add missing inlines to tantivy options [#2245](https://github.com/quickwit-oss/tantivy/pull/2245)(@PSeitz)
+
 Tantivy 0.21.1
 ================================
 #### Bugfixes
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.22.0-dev"
+version = "0.23.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -11,34 +11,38 @@ repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2021"
-rust-version = "1.62"
+rust-version = "1.63"
 exclude = ["benches/*.json", "benches/*.txt"]

 [dependencies]
-oneshot = "0.1.5"
-base64 = "0.21.0"
+oneshot = "0.1.7"
+base64 = "0.22.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
-regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
+regex = { version = "1.5.5", default-features = false, features = [
+    "std",
+    "unicode",
+] }
 aho-corasick = "1.0"
 tantivy-fst = "0.5"
 memmap2 = { version = "0.9.0", optional = true }
 lz4_flex = { version = "0.11", default-features = false, optional = true }
-zstd = { version = "0.13", default-features = false }
+zstd = { version = "0.13", optional = true, default-features = false }
 tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
-num_cpus = "1.13.1"
-fs4 = { version = "0.7.0", optional = true }
+fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
-census = "0.4.0"
+bitpacking = { version = "0.9.2", default-features = false, features = [
+    "bitpacker4x",
+] }
+census = "0.4.2"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
 htmlescape = "0.3.1"
@@ -48,18 +52,18 @@ smallvec = "1.8.0"
 rayon = "1.5.2"
 lru = "0.12.0"
 fastdivide = "0.4.0"
-itertools = "0.12.0"
+itertools = "0.13.0"
 measure_time = "0.8.2"
 arc-swap = "1.5.0"

-columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" }
-sstable = { version= "0.2", path="./sstable", package ="tantivy-sstable", optional = true }
-stacker = { version= "0.2", path="./stacker", package ="tantivy-stacker" }
-query-grammar = { version= "0.21.0", path="./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version= "0.5", path="./bitpacker" }
-common = { version= "0.6", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version= "0.2", path="./tokenizer-api", package="tantivy-tokenizer-api" }
-sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] }
+columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
+sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
+stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
+query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
+tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
+common = { version = "0.7", path = "./common/", package = "tantivy-common" }
+tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
+sketches-ddsketch = { version = "0.3.0", features = ["use_serde"] }
 futures-util = { version = "0.3.28", optional = true }
 fnv = "1.0.7"

@@ -67,6 +71,7 @@ fnv = "1.0.7"
 winapi = "0.3.9"

 [dev-dependencies]
+binggan = "0.8.0"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -77,6 +82,10 @@ futures = "0.3.21"
 paste = "1.0.11"
 more-asserts = "0.3.1"
 rand_distr = "0.4.3"
+time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
+postcard = { version = "1.0.4", features = [
+  "use-std",
+], default-features = false }

 [target.'cfg(not(windows))'.dev-dependencies]
 criterion = { version = "0.5", default-features = false }
@@ -105,20 +114,29 @@ mmap = ["fs4", "tempfile", "memmap2"]
 stopwords = []

 lz4-compression = ["lz4_flex"]
-zstd-compression = []
+zstd-compression = ["zstd"]

 failpoints = ["fail", "fail/failpoints"]
-unstable = [] # useful for benches.
+unstable = []                            # useful for benches.

 quickwit = ["sstable", "futures-util"]

-# Compares only the hash of a string when indexing data. 
+# Compares only the hash of a string when indexing data.
 # Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
 # Uses 64bit ahash.
 compare_hash_only = ["stacker/compare_hash_only"]

 [workspace]
-members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
+members = [
+    "query-grammar",
+    "bitpacker",
+    "common",
+    "ownedbytes",
+    "stacker",
+    "sstable",
+    "tokenizer-api",
+    "columnar",
+]

 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
@@ -139,3 +157,7 @@ harness = false
 [[bench]]
 name = "index-bench"
 harness = false
+
+[[bench]]
+name = "agg_bench"
+harness = false
--- a/README.md
+++ b/README.md
@@ -5,19 +5,18 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)

-![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)
+<img src="https://tantivy-search.github.io/logo/tantivy-logo.png" alt="Tantivy, the fastest full-text search engine library written in Rust" height="250">

-**Tantivy** is a **full-text search engine library** written in Rust.
+## Fast full-text search engine library written in Rust

-It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
-an off-the-shelf search engine server, but rather a crate that can be used
-to build such a search engine.
+**If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our distributed search engine built on top of Tantivy.**
+
+Tantivy is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
+an off-the-shelf search engine server, but rather a crate that can be used to build such a search engine.

 Tantivy is, in fact, strongly inspired by Lucene's design.

-If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our search engine built on top of Tantivy.
-
-# Benchmark
+## Benchmark

 The following [benchmark](https://tantivy-search.github.io/bench/) breakdowns
 performance for different types of queries/collections.
@@ -28,7 +27,7 @@ Your mileage WILL vary depending on the nature of queries and their load.

 Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).

-# Features
+## Features

 - Full-text search
 - Configurable tokenizer (stemming available for 17 Latin languages) with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy), [Vaporetto](https://crates.io/crates/vaporetto_tantivy), and [tantivy-tokenizer-tiny-segmenter](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
@@ -54,11 +53,11 @@ Details about the benchmark can be found at this [repository](https://github.com
 - Searcher Warmer API
 - Cheesy logo with a horse

-## Non-features
+### Non-features

 Distributed search is out of the scope of Tantivy, but if you are looking for this feature, check out [Quickwit](https://github.com/quickwit-oss/quickwit/).

-# Getting started
+## Getting started

 Tantivy works on stable Rust and supports Linux, macOS, and Windows.

@@ -68,7 +67,7 @@ index documents, and search via the CLI or a small server with a REST API.
 It walks you through getting a Wikipedia search engine up and running in a few minutes.
 - [Reference doc for the last released version](https://docs.rs/tantivy/)

-# How can I support this project?
+## How can I support this project?

 There are many ways to support this project.

@@ -79,16 +78,16 @@ There are many ways to support this project.
 - Contribute code (you can join [our Discord server](https://discord.gg/MT27AG5EVE))
 - Talk about Tantivy around you

-# Contributing code
+## Contributing code

 We use the GitHub Pull Request workflow: reference a GitHub ticket and/or include a comprehensive commit message when opening a PR.
 Feel free to update CHANGELOG.md with your contribution.

-## Tokenizer
+### Tokenizer

 When implementing a tokenizer for tantivy depend on the `tantivy-tokenizer-api` crate.

-## Clone and build locally
+### Clone and build locally

 Tantivy compiles on stable Rust.
 To check out and run tests, you can simply run:
@@ -99,10 +98,11 @@ cd tantivy
 cargo test
 ```

-# Companies Using Tantivy
+## Companies Using Tantivy

 <p align="left">
-<img align="center" src="doc/assets/images/etsy.png" alt="Etsy" height="25" width="auto" />&nbsp;
+<img align="center" src="doc/assets/images/etsy.png" alt="Etsy" height="25" width="auto" /> &nbsp;
+<img align="center" src="doc/assets/images/paradedb.png" alt="ParadeDB" height="25" width="auto" /> &nbsp;
 <img align="center" src="doc/assets/images/Nuclia.png#gh-light-mode-only" alt="Nuclia" height="25" width="auto" /> &nbsp;
 <img align="center" src="doc/assets/images/humanfirst.png#gh-light-mode-only" alt="Humanfirst.ai" height="30" width="auto" />
 <img align="center" src="doc/assets/images/element.io.svg#gh-light-mode-only" alt="Element.io" height="25" width="auto" />
@@ -111,7 +111,7 @@ cargo test
 <img align="center" src="doc/assets/images/element-dark-theme.png#gh-dark-mode-only" alt="Element.io" height="25" width="auto" />
 </p>

-# FAQ
+## FAQ

 ### Can I use Tantivy in other languages?

--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -0,0 +1,419 @@
+use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use rand::prelude::SliceRandom;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use rand_distr::Distribution;
+use serde_json::json;
+use tantivy::aggregation::agg_req::Aggregations;
+use tantivy::aggregation::AggregationCollector;
+use tantivy::query::{AllQuery, TermQuery};
+use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
+use tantivy::{doc, Index, Term};
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
+
+/// Mini macro to register a function via its name
+/// runner.register("average_u64", move |index| average_u64(index));
+macro_rules! register {
+    ($runner:expr, $func:ident) => {
+        $runner.register(stringify!($func), move |index| $func(index))
+    };
+}
+
+fn main() {
+    let inputs = vec![
+        ("full", get_test_index_bench(Cardinality::Full).unwrap()),
+        (
+            "dense",
+            get_test_index_bench(Cardinality::OptionalDense).unwrap(),
+        ),
+        (
+            "sparse",
+            get_test_index_bench(Cardinality::OptionalSparse).unwrap(),
+        ),
+        (
+            "multivalue",
+            get_test_index_bench(Cardinality::Multivalued).unwrap(),
+        ),
+    ];
+
+    bench_agg(InputGroup::new_with_inputs(inputs));
+}
+
+fn bench_agg(mut group: InputGroup<Index>) {
+    group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
+    register!(group, average_u64);
+    register!(group, average_f64);
+    register!(group, average_f64_u64);
+    register!(group, stats_f64);
+    register!(group, extendedstats_f64);
+    register!(group, percentiles_f64);
+    register!(group, terms_few);
+    register!(group, terms_many);
+    register!(group, terms_many_order_by_term);
+    register!(group, terms_many_with_top_hits);
+    register!(group, terms_many_with_avg_sub_agg);
+    register!(group, terms_many_json_mixed_type_with_sub_agg_card);
+    register!(group, range_agg);
+    register!(group, range_agg_with_avg_sub_agg);
+    register!(group, range_agg_with_term_agg_few);
+    register!(group, range_agg_with_term_agg_many);
+    register!(group, histogram);
+    register!(group, histogram_hard_bounds);
+    register!(group, histogram_with_avg_sub_agg);
+    register!(group, avg_and_range_with_avg_sub_agg);
+
+    group.run();
+}
+
+fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+
+    let reader = index.reader().unwrap();
+    let text_field = reader.searcher().schema().get_field("text").unwrap();
+    let term_query = TermQuery::new(
+        Term::from_field_text(text_field, "cool"),
+        IndexRecordOption::Basic,
+    );
+    let collector = get_collector(agg_req);
+    let searcher = reader.searcher();
+    black_box(searcher.search(&term_query, &collector).unwrap());
+}
+
+fn average_u64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64(index: &Index) {
+    let agg_req = json!({
+        "average": { "avg": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn average_f64_u64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "avg": { "field": "score_f64" } },
+        "average": { "avg": { "field": "score" } },
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn stats_f64(index: &Index) {
+    let agg_req = json!({
+        "average_f64": { "stats": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn extendedstats_f64(index: &Index) {
+    let agg_req = json!({
+        "extendedstats_f64": { "extended_stats": { "field": "score_f64", } }
+    });
+    exec_term_with_agg(index, agg_req)
+}
+fn percentiles_f64(index: &Index) {
+    let agg_req = json!({
+      "mypercentiles": {
+        "percentiles": {
+          "field": "score_f64",
+          "percents": [ 95, 99, 99.9 ]
+        }
+      }
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_few(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_few_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_order_by_term(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_top_hits(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "top_hits": { "top_hits":
+                    {
+                        "sort": [
+                            { "score": "desc" }
+                        ],
+                        "size": 2,
+                        "doc_value_fields": ["score_f64"]
+                    }
+                }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_many_json_mixed_type_with_sub_agg_card(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "json.mixed_type" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn execute_agg(index: &Index, agg_req: serde_json::Value) {
+    let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
+    let collector = get_collector(agg_req);
+
+    let reader = index.reader().unwrap();
+    let searcher = reader.searcher();
+    black_box(searcher.search(&AllQuery, &collector).unwrap());
+}
+fn range_agg(index: &Index) {
+    let agg_req = json!({
+        "range_f64": { "range": { "field": "score_f64", "ranges": [
+            { "from": 3, "to": 7000 },
+            { "from": 7000, "to": 20000 },
+            { "from": 20000, "to": 30000 },
+            { "from": 30000, "to": 40000 },
+            { "from": 40000, "to": 50000 },
+            { "from": 50000, "to": 60000 }
+        ] } },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn range_agg_with_term_agg_few(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_few_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn range_agg_with_term_agg_many(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_many_terms" } },
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": {
+                "field": "score_f64",
+                "interval": 100 // 1000 buckets
+            },
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_hard_bounds(index: &Index) {
+    let agg_req = json!({
+        "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
+    });
+    execute_agg(index, agg_req);
+}
+fn histogram_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": { "field": "score_f64", "interval": 100 },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn avg_and_range_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "range": {
+                "field": "score_f64",
+                "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 60000 }
+                ]
+            },
+            "aggs": {
+                "average_in_range": { "avg": { "field": "score" } }
+            }
+        },
+        "average": { "avg": { "field": "score" } }
+    });
+    execute_agg(index, agg_req);
+}
+
+#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
+enum Cardinality {
+    /// All documents contain exactly one value.
+    /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
+    #[default]
+    Full = 0,
+    /// All documents contain at most one value.
+    OptionalDense = 1,
+    /// All documents may contain any number of values.
+    Multivalued = 2,
+    /// 1 / 20 documents has a value
+    OptionalSparse = 3,
+}
+
+fn get_collector(agg_req: Aggregations) -> AggregationCollector {
+    AggregationCollector::from_aggs(agg_req, Default::default())
+}
+
+fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
+    let mut schema_builder = Schema::builder();
+    let text_fieldtype = tantivy::schema::TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text", text_fieldtype);
+    let json_field = schema_builder.add_json_field("json", FAST);
+    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
+    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
+    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
+    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
+    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
+    let index = Index::create_from_tempdir(schema_builder.build())?;
+    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
+
+    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
+
+    let many_terms_data = (0..150_000)
+        .map(|num| format!("author{num}"))
+        .collect::<Vec<_>>();
+    {
+        let mut rng = StdRng::from_seed([1u8; 32]);
+        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
+        // To make the different test cases comparable we just change one doc to force the
+        // cardinality
+        if cardinality == Cardinality::OptionalDense {
+            index_writer.add_document(doc!())?;
+        }
+        if cardinality == Cardinality::Multivalued {
+            index_writer.add_document(doc!(
+                json_field => json!({"mixed_type": 10.0}),
+                json_field => json!({"mixed_type": 10.0}),
+                text_field => "cool",
+                text_field => "cool",
+                text_field_many_terms => "cool",
+                text_field_many_terms => "cool",
+                text_field_few_terms => "cool",
+                text_field_few_terms => "cool",
+                score_field => 1u64,
+                score_field => 1u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => 1i64,
+                score_field_i64 => 1i64,
+            ))?;
+        }
+        let mut doc_with_value = 1_000_000;
+        if cardinality == Cardinality::OptionalSparse {
+            doc_with_value /= 20;
+        }
+        let _val_max = 1_000_000.0;
+        for _ in 0..doc_with_value {
+            let val: f64 = rng.gen_range(0.0..1_000_000.0);
+            let json = if rng.gen_bool(0.1) {
+                // 10% are numeric values
+                json!({ "mixed_type": val })
+            } else {
+                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
+            };
+            index_writer.add_document(doc!(
+                text_field => "cool",
+                json_field => json,
+                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
+                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                score_field => val as u64,
+                score_field_f64 => lg_norm.sample(&mut rng),
+                score_field_i64 => val as i64,
+            ))?;
+            if cardinality == Cardinality::OptionalSparse {
+                for _ in 0..20 {
+                    index_writer.add_document(doc!(text_field => "cool"))?;
+                }
+            }
+        }
+        // writing the segment
+        index_writer.commit()?;
+    }
+
+    Ok(index)
+}
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -18,7 +18,7 @@ fn benchmark(
        benchmark_dynamic_json(b, input, schema, commit, parse_json)
    } else {
        _benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
-            TantivyDocument::parse_json(&schema, doc_json).unwrap()
+            TantivyDocument::parse_json(schema, doc_json).unwrap()
        })
    }
 }
@@ -90,8 +90,7 @@ fn benchmark_dynamic_json(
 ) {
    let json_field = schema.get_field("json").unwrap();
    _benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
-        let json_val: serde_json::Map<String, serde_json::Value> =
-            serde_json::from_str(doc_json).unwrap();
+        let json_val: serde_json::Value = serde_json::from_str(doc_json).unwrap();
        tantivy::doc!(json_field=>json_val)
    })
 }
@@ -138,15 +137,16 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
    for (prefix, schema, is_dynamic) in benches {
        for commit in [false, true] {
            let suffix = if commit { "with-commit" } else { "no-commit" };
-            for parse_json in [false] {
+            {
+                let parse_json = false;
                // for parse_json in [false, true] {
                let suffix = if parse_json {
-                    format!("{}-with-json-parsing", suffix)
+                    format!("{suffix}-with-json-parsing")
                } else {
-                    format!("{}", suffix)
+                    suffix.to_string()
                };

-                let bench_name = format!("{}{}", prefix, suffix);
+                let bench_name = format!("{prefix}{suffix}");
                group.bench_function(bench_name, |b| {
                    benchmark(b, HDFS_LOGS, schema.clone(), commit, parse_json, is_dynamic)
                });
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.5.0"
+version = "0.6.0"
 edition = "2021"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -1,4 +1,3 @@
-use std::convert::TryInto;
 use std::io;
 use std::ops::{Range, RangeInclusive};

--- a/cliff.toml
+++ b/cliff.toml
@@ -1,6 +1,10 @@
 # configuration file for git-cliff{ pattern = "foo", replace = "bar"}
 # see https://github.com/orhun/git-cliff#configuration-file

+[remote.github]
+owner = "quickwit-oss"
+repo = "tantivy"
+
 [changelog]
 # changelog header
 header = """
@@ -8,15 +12,43 @@ header = """
 # template for the changelog body
 # https://tera.netlify.app/docs/#introduction
 body = """
-{% if version %}\
-    {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }})
-    ==================
-{% else %}\
-    ## [unreleased]
-{% endif %}\
+## What's Changed
+
+{%- if version %} in {{ version }}{%- endif -%}
 {% for commit in commits %}
-    - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | split(pat="\n") | first | trim | upper_first }}(@{{ commit.author.name }})\
-{% endfor %}
+  {% if commit.github.pr_title -%}
+    {%- set commit_message = commit.github.pr_title -%}
+  {%- else -%}
+    {%- set commit_message = commit.message -%}
+  {%- endif -%}
+  - {{ commit_message | split(pat="\n") | first | trim }}\
+    {% if commit.github.pr_number %} \
+      [#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}){% if commit.github.username %}(@{{ commit.github.username }}){%- endif -%} \
+    {%- endif %}
+{%- endfor -%}
+
+{% if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
+  {% raw %}\n{% endraw -%}
+  ## New Contributors
+{%- endif %}\
+{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
+  * @{{ contributor.username }} made their first contribution
+    {%- if contributor.pr_number %} in \
+      [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
+    {%- endif %}
+{%- endfor -%}
+
+{% if version %}
+    {% if previous.version %}
+      **Full Changelog**: {{ self::remote_url() }}/compare/{{ previous.version }}...{{ version }}
+    {% endif %}
+{% else -%}
+  {% raw %}\n{% endraw %}
+{% endif %}
+
+{%- macro remote_url() -%}
+  https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
+{%- endmacro -%}
 """
 # remove the leading and trailing whitespace from the template
 trim = true
@@ -25,53 +57,24 @@ footer = """
 """

 postprocessors = [
-    { pattern = 'Paul Masurel', replace = "fulmicoton"}, # replace with github user
-    { pattern = 'PSeitz', replace = "PSeitz"}, # replace with github user
-    { pattern = 'Adam Reichold', replace = "adamreichold"}, # replace with github user
-    { pattern = 'trinity-1686a', replace = "trinity-1686a"}, # replace with github user
-    { pattern = 'Michael Kleen', replace = "mkleen"}, # replace with github user
-    { pattern = 'Adrien Guillo', replace = "guilload"}, # replace with github user
-    { pattern = 'François Massot', replace = "fmassot"}, # replace with github user
-    { pattern = 'Naveen Aiathurai', replace = "naveenann"}, # replace with github user
-    { pattern = '', replace = ""}, # replace with github user
 ]

 [git]
 # parse the commits based on https://www.conventionalcommits.org
 # This is required or commit.message contains the whole commit message and not just the title
-conventional_commits = true
+conventional_commits = false
 # filter out the commits that are not conventional
-filter_unconventional = false
+filter_unconventional = true
 # process each line of a commit as an individual commit
 split_commits = false
 # regex for preprocessing the commit messages
 commit_preprocessors = [
-    { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "[#${2}](https://github.com/quickwit-oss/tantivy/issues/${2})"}, # replace issue numbers
+    { pattern = '\((\w+\s)?#([0-9]+)\)', replace = ""},
 ]
 #link_parsers = [
    #{ pattern = "#(\\d+)", href = "https://github.com/quickwit-oss/tantivy/pulls/$1"},
 #]
 # regex for parsing and grouping commits
-commit_parsers = [
-    { message = "^feat", group = "Features"},
-    { message = "^fix", group = "Bug Fixes"},
-    { message = "^doc", group = "Documentation"},
-    { message = "^perf", group = "Performance"},
-    { message = "^refactor", group = "Refactor"},
-    { message = "^style", group = "Styling"},
-    { message = "^test", group = "Testing"},
-    { message = "^chore\\(release\\): prepare for", skip = true},
-    { message = "(?i)clippy", skip = true},
-    { message = "(?i)dependabot", skip = true},
-    { message = "(?i)fmt", skip = true},
-    { message = "(?i)bump", skip = true},
-    { message = "(?i)readme", skip = true},
-    { message = "(?i)comment", skip = true},
-    { message = "(?i)spelling", skip = true},
-    { message = "^chore", group = "Miscellaneous Tasks"},
-    { body = ".*security", group = "Security"},
-    { message = ".*", group = "Other", default_scope = "other"},
-]
 # protect breaking changes from being skipped due to matching a skipping commit_parser
 protect_breaking_commits = false
 # filter out the commits that are not matched by commit parsers
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-columnar"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 license = "MIT"
 homepage = "https://github.com/quickwit-oss/tantivy"
@@ -9,19 +9,30 @@ description = "column oriented storage for tantivy"
 categories = ["database-implementations", "data-structures", "compression"]

 [dependencies]
-itertools = "0.12.0"
+itertools = "0.13.0"
 fastdivide = "0.4.0"

-stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"}
-sstable = { version= "0.2", path = "../sstable", package = "tantivy-sstable" }
-common = { version= "0.6", path = "../common", package = "tantivy-common" }
-tantivy-bitpacker = { version= "0.5", path = "../bitpacker/" }
+stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"}
+sstable = { version= "0.3", path = "../sstable", package = "tantivy-sstable" }
+common = { version= "0.7", path = "../common", package = "tantivy-common" }
+tantivy-bitpacker = { version= "0.6", path = "../bitpacker/" }
 serde = "1.0.152"
+downcast-rs = "1.2.0"

 [dev-dependencies]
 proptest = "1"
 more-asserts = "0.3.1"
 rand = "0.8"
+binggan = "0.8.1"
+
+[[bench]]
+name = "bench_merge"
+harness = false
+
+[[bench]]
+name = "bench_access"
+harness = false
+

 [features]
 unstable = []
--- a/columnar/benches/bench_access.rs
+++ b/columnar/benches/bench_access.rs
@@ -0,0 +1,67 @@
+use binggan::{black_box, InputGroup};
+use common::*;
+use tantivy_columnar::Column;
+
+pub mod common;
+
+const NUM_DOCS: u32 = 2_000_000;
+
+pub fn generate_columnar_and_open(card: Card, num_docs: u32) -> Column {
+    let reader = generate_columnar_with_name(card, num_docs, "price");
+    reader.read_columns("price").unwrap()[0]
+        .open_u64_lenient()
+        .unwrap()
+        .unwrap()
+}
+
+fn main() {
+    let mut inputs = Vec::new();
+
+    let mut add_card = |card1: Card| {
+        inputs.push((
+            format!("{card1}"),
+            generate_columnar_and_open(card1, NUM_DOCS),
+        ));
+    };
+
+    add_card(Card::MultiSparse);
+    add_card(Card::Multi);
+    add_card(Card::Sparse);
+    add_card(Card::Dense);
+    add_card(Card::Full);
+
+    bench_group(InputGroup::new_with_inputs(inputs));
+}
+
+fn bench_group(mut runner: InputGroup<Column>) {
+    runner.register("access_values_for_doc", |column| {
+        let mut sum = 0;
+        for i in 0..NUM_DOCS {
+            for value in column.values_for_doc(i) {
+                sum += value;
+            }
+        }
+        black_box(sum);
+    });
+    runner.register("access_first_vals", |column| {
+        let mut sum = 0;
+        const BLOCK_SIZE: usize = 32;
+        let mut docs = vec![0; BLOCK_SIZE];
+        let mut buffer = vec![None; BLOCK_SIZE];
+        for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
+            // fill docs
+            for idx in 0..BLOCK_SIZE {
+                docs[idx] = idx as u32 + i;
+            }
+
+            column.first_vals(&docs, &mut buffer);
+            for val in buffer.iter() {
+                let Some(val) = val else { continue };
+                sum += *val;
+            }
+        }
+
+        black_box(sum);
+    });
+    runner.run();
+}
--- a/columnar/benches/bench_first_vals.rs
+++ b/columnar/benches/bench_first_vals.rs
@@ -0,0 +1,155 @@
+#![feature(test)]
+extern crate test;
+
+use std::sync::Arc;
+
+use rand::prelude::*;
+use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
+use tantivy_columnar::*;
+use test::{black_box, Bencher};
+
+struct Columns {
+    pub optional: Column,
+    pub full: Column,
+    pub multi: Column,
+}
+
+fn get_test_columns() -> Columns {
+    let data = generate_permutation();
+    let mut dataframe_writer = ColumnarWriter::default();
+    for (idx, val) in data.iter().enumerate() {
+        dataframe_writer.record_numerical(idx as u32, "full_values", NumericalValue::U64(*val));
+        if idx % 2 == 0 {
+            dataframe_writer.record_numerical(
+                idx as u32,
+                "optional_values",
+                NumericalValue::U64(*val),
+            );
+        }
+        dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
+        dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
+    }
+    let mut buffer: Vec<u8> = Vec::new();
+    dataframe_writer
+        .serialize(data.len() as u32, &mut buffer)
+        .unwrap();
+    let columnar = ColumnarReader::open(buffer).unwrap();
+
+    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("optional_values").unwrap();
+    assert_eq!(cols.len(), 1);
+    let optional = cols[0].open_u64_lenient().unwrap().unwrap();
+    assert_eq!(optional.index.get_cardinality(), Cardinality::Optional);
+
+    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("full_values").unwrap();
+    assert_eq!(cols.len(), 1);
+    let column_full = cols[0].open_u64_lenient().unwrap().unwrap();
+    assert_eq!(column_full.index.get_cardinality(), Cardinality::Full);
+
+    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("multi_values").unwrap();
+    assert_eq!(cols.len(), 1);
+    let multi = cols[0].open_u64_lenient().unwrap().unwrap();
+    assert_eq!(multi.index.get_cardinality(), Cardinality::Multivalued);
+
+    Columns {
+        optional,
+        full: column_full,
+        multi,
+    }
+}
+
+const NUM_VALUES: u64 = 100_000;
+fn generate_permutation() -> Vec<u64> {
+    let mut permutation: Vec<u64> = (0u64..NUM_VALUES).collect();
+    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+    permutation
+}
+
+pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<dyn ColumnValues<u64>> {
+    serialize_and_load_u64_based_column_values(&column, &[codec_type])
+}
+
+fn run_bench_on_column_full_scan(b: &mut Bencher, column: Column) {
+    let num_iter = black_box(NUM_VALUES);
+    b.iter(|| {
+        let mut sum = 0u64;
+        for i in 0..num_iter as u32 {
+            let val = column.first(i);
+            sum += val.unwrap_or(0);
+        }
+        sum
+    });
+}
+fn run_bench_on_column_block_fetch(b: &mut Bencher, column: Column) {
+    let mut block: Vec<Option<u64>> = vec![None; 64];
+    let fetch_docids = (0..64).collect::<Vec<_>>();
+    b.iter(move || {
+        column.first_vals(&fetch_docids, &mut block);
+        block[0]
+    });
+}
+fn run_bench_on_column_block_single_calls(b: &mut Bencher, column: Column) {
+    let mut block: Vec<Option<u64>> = vec![None; 64];
+    let fetch_docids = (0..64).collect::<Vec<_>>();
+    b.iter(move || {
+        for i in 0..fetch_docids.len() {
+            block[i] = column.first(fetch_docids[i]);
+        }
+        block[0]
+    });
+}
+
+/// Column first method
+#[bench]
+fn bench_get_first_on_full_column_full_scan(b: &mut Bencher) {
+    let column = get_test_columns().full;
+    run_bench_on_column_full_scan(b, column);
+}
+
+#[bench]
+fn bench_get_first_on_optional_column_full_scan(b: &mut Bencher) {
+    let column = get_test_columns().optional;
+    run_bench_on_column_full_scan(b, column);
+}
+
+#[bench]
+fn bench_get_first_on_multi_column_full_scan(b: &mut Bencher) {
+    let column = get_test_columns().multi;
+    run_bench_on_column_full_scan(b, column);
+}
+
+/// Block fetch column accessor
+#[bench]
+fn bench_get_block_first_on_optional_column(b: &mut Bencher) {
+    let column = get_test_columns().optional;
+    run_bench_on_column_block_fetch(b, column);
+}
+
+#[bench]
+fn bench_get_block_first_on_multi_column(b: &mut Bencher) {
+    let column = get_test_columns().multi;
+    run_bench_on_column_block_fetch(b, column);
+}
+
+#[bench]
+fn bench_get_block_first_on_full_column(b: &mut Bencher) {
+    let column = get_test_columns().full;
+    run_bench_on_column_block_fetch(b, column);
+}
+
+#[bench]
+fn bench_get_block_first_on_optional_column_single_calls(b: &mut Bencher) {
+    let column = get_test_columns().optional;
+    run_bench_on_column_block_single_calls(b, column);
+}
+
+#[bench]
+fn bench_get_block_first_on_multi_column_single_calls(b: &mut Bencher) {
+    let column = get_test_columns().multi;
+    run_bench_on_column_block_single_calls(b, column);
+}
+
+#[bench]
+fn bench_get_block_first_on_full_column_single_calls(b: &mut Bencher) {
+    let column = get_test_columns().full;
+    run_bench_on_column_block_single_calls(b, column);
+}
--- a/columnar/benches/bench_merge.rs
+++ b/columnar/benches/bench_merge.rs
@@ -0,0 +1,49 @@
+pub mod common;
+
+use binggan::{black_box, BenchRunner};
+use common::{generate_columnar_with_name, Card};
+use tantivy_columnar::*;
+
+const NUM_DOCS: u32 = 100_000;
+
+fn main() {
+    let mut inputs = Vec::new();
+
+    let mut add_combo = |card1: Card, card2: Card| {
+        inputs.push((
+            format!("merge_{card1}_and_{card2}"),
+            vec![
+                generate_columnar_with_name(card1, NUM_DOCS, "price"),
+                generate_columnar_with_name(card2, NUM_DOCS, "price"),
+            ],
+        ));
+    };
+
+    add_combo(Card::Multi, Card::Multi);
+    add_combo(Card::MultiSparse, Card::MultiSparse);
+    add_combo(Card::Dense, Card::Dense);
+    add_combo(Card::Sparse, Card::Sparse);
+    add_combo(Card::Sparse, Card::Dense);
+    add_combo(Card::MultiSparse, Card::Dense);
+    add_combo(Card::MultiSparse, Card::Sparse);
+    add_combo(Card::Multi, Card::Dense);
+    add_combo(Card::Multi, Card::Sparse);
+
+    let runner: BenchRunner = BenchRunner::new();
+    let mut group = runner.new_group();
+    for (input_name, columnar_readers) in inputs.iter() {
+        group.register_with_input(
+            input_name,
+            columnar_readers,
+            move |columnar_readers: &Vec<ColumnarReader>| {
+                let mut out = Vec::new();
+                let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
+                let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
+
+                merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
+                black_box(out);
+            },
+        );
+    }
+    group.run();
+}
--- a/columnar/benches/bench_values_u128.rs
+++ b/columnar/benches/bench_values_u128.rs
--- a/columnar/benches/bench_values_u64.rs
+++ b/columnar/benches/bench_values_u64.rs
@@ -16,14 +16,6 @@ fn generate_permutation() -> Vec<u64> {
    permutation
 }

-fn generate_random() -> Vec<u64> {
-    let mut permutation: Vec<u64> = (0u64..100_000u64)
-        .map(|el| el + random::<u16>() as u64)
-        .collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation
-}
-
 // Warning: this generates the same permutation at each call
 fn generate_permutation_gcd() -> Vec<u64> {
    let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
--- a/columnar/benches/common.rs
+++ b/columnar/benches/common.rs
@@ -0,0 +1,59 @@
+extern crate tantivy_columnar;
+
+use core::fmt;
+use std::fmt::{Display, Formatter};
+
+use tantivy_columnar::{ColumnarReader, ColumnarWriter};
+
+pub enum Card {
+    MultiSparse,
+    Multi,
+    Sparse,
+    Dense,
+    Full,
+}
+impl Display for Card {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        match self {
+            Card::MultiSparse => write!(f, "multi sparse 1/13"),
+            Card::Multi => write!(f, "multi 2x"),
+            Card::Sparse => write!(f, "sparse 1/13"),
+            Card::Dense => write!(f, "dense 1/12"),
+            Card::Full => write!(f, "full"),
+        }
+    }
+}
+pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_name: &str) -> ColumnarReader {
+    let mut columnar_writer = ColumnarWriter::default();
+
+    if let Card::MultiSparse = card {
+        columnar_writer.record_numerical(0, column_name, 10u64);
+        columnar_writer.record_numerical(0, column_name, 10u64);
+    }
+
+    for i in 0..num_docs {
+        match card {
+            Card::MultiSparse | Card::Sparse => {
+                if i % 13 == 0 {
+                    columnar_writer.record_numerical(i, column_name, i as u64);
+                }
+            }
+            Card::Dense => {
+                if i % 12 == 0 {
+                    columnar_writer.record_numerical(i, column_name, i as u64);
+                }
+            }
+            Card::Full => {
+                columnar_writer.record_numerical(i, column_name, i as u64);
+            }
+            Card::Multi => {
+                columnar_writer.record_numerical(i, column_name, i as u64);
+                columnar_writer.record_numerical(i, column_name, i as u64);
+            }
+        }
+    }
+
+    let mut wrt: Vec<u8> = Vec::new();
+    columnar_writer.serialize(num_docs, &mut wrt).unwrap();
+    ColumnarReader::open(wrt).unwrap()
+}
--- a/columnar/compat_tests_data/v1.columnar
+++ b/columnar/compat_tests_data/v1.columnar
--- a/columnar/compat_tests_data/v2.columnar
+++ b/columnar/compat_tests_data/v2.columnar
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -14,20 +14,32 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
    ColumnBlockAccessor<T>
 {
    #[inline]
-    pub fn fetch_block(&mut self, docs: &[u32], accessor: &Column<T>) {
-        self.docid_cache.clear();
-        self.row_id_cache.clear();
-        accessor.row_ids_for_docs(docs, &mut self.docid_cache, &mut self.row_id_cache);
-        self.val_cache.resize(self.row_id_cache.len(), T::default());
-        accessor
-            .values
-            .get_vals(&self.row_id_cache, &mut self.val_cache);
+    pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<T>) {
+        if accessor.index.get_cardinality().is_full() {
+            self.val_cache.resize(docs.len(), T::default());
+            accessor.values.get_vals(docs, &mut self.val_cache);
+        } else {
+            self.docid_cache.clear();
+            self.row_id_cache.clear();
+            accessor.row_ids_for_docs(docs, &mut self.docid_cache, &mut self.row_id_cache);
+            self.val_cache.resize(self.row_id_cache.len(), T::default());
+            accessor
+                .values
+                .get_vals(&self.row_id_cache, &mut self.val_cache);
+        }
    }
    #[inline]
    pub fn fetch_block_with_missing(&mut self, docs: &[u32], accessor: &Column<T>, missing: T) {
        self.fetch_block(docs, accessor);
-        // We can compare docid_cache with docs to find missing docs
-        if docs.len() != self.docid_cache.len() || accessor.index.is_multivalue() {
+        // no missing values
+        if accessor.index.get_cardinality().is_full() {
+            return;
+        }
+
+        // We can compare docid_cache length with docs to find missing docs
+        // For multi value columns we can't rely on the length and always need to scan
+        if accessor.index.get_cardinality().is_multivalue() || docs.len() != self.docid_cache.len()
+        {
            self.missing_docids_cache.clear();
            find_missing_docs(docs, &self.docid_cache, |doc| {
                self.missing_docids_cache.push(doc);
@@ -44,11 +56,25 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
    }

    #[inline]
-    pub fn iter_docid_vals(&self) -> impl Iterator<Item = (DocId, T)> + '_ {
-        self.docid_cache
-            .iter()
-            .cloned()
-            .zip(self.val_cache.iter().cloned())
+    /// Returns an iterator over the docids and values
+    /// The passed in `docs` slice needs to be the same slice that was passed to `fetch_block` or
+    /// `fetch_block_with_missing`.
+    ///
+    /// The docs is used if the column is full (each docs has exactly one value), otherwise the
+    /// internal docid vec is used for the iterator, which e.g. may contain duplicate docs.
+    pub fn iter_docid_vals<'a>(
+        &'a self,
+        docs: &'a [u32],
+        accessor: &Column<T>,
+    ) -> impl Iterator<Item = (DocId, T)> + '_ {
+        if accessor.index.get_cardinality().is_full() {
+            docs.iter().cloned().zip(self.val_cache.iter().cloned())
+        } else {
+            self.docid_cache
+                .iter()
+                .cloned()
+                .zip(self.val_cache.iter().cloned())
+        }
    }
 }

--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -3,17 +3,17 @@ mod serialize;

 use std::fmt::{self, Debug};
 use std::io::Write;
-use std::ops::{Deref, Range, RangeInclusive};
+use std::ops::{Range, RangeInclusive};
 use std::sync::Arc;

 use common::BinarySerializable;
 pub use dictionary_encoded::{BytesColumn, StrColumn};
 pub use serialize::{
-    open_column_bytes, open_column_str, open_column_u128, open_column_u64,
-    serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
+    open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
+    open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
 };

-use crate::column_index::ColumnIndex;
+use crate::column_index::{ColumnIndex, Set};
 use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
 use crate::column_values::{monotonic_map_column, ColumnValues};
 use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
@@ -83,10 +83,36 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.values.max_value()
    }

+    #[inline]
    pub fn first(&self, row_id: RowId) -> Option<T> {
        self.values_for_doc(row_id).next()
    }

+    /// Load the first value for each docid in the provided slice.
+    #[inline]
+    pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
+        match &self.index {
+            ColumnIndex::Empty { .. } => {}
+            ColumnIndex::Full => self.values.get_vals_opt(docids, output),
+            ColumnIndex::Optional(optional_index) => {
+                for (i, docid) in docids.iter().enumerate() {
+                    output[i] = optional_index
+                        .rank_if_exists(*docid)
+                        .map(|rowid| self.values.get_val(rowid));
+                }
+            }
+            ColumnIndex::Multivalued(multivalued_index) => {
+                for (i, docid) in docids.iter().enumerate() {
+                    let range = multivalued_index.range(*docid);
+                    let is_empty = range.start == range.end;
+                    if !is_empty {
+                        output[i] = Some(self.values.get_val(range.start));
+                    }
+                }
+            }
+        }
+    }
+
    /// Translates a block of docis to row_ids.
    ///
    /// returns the row_ids and the matching docids on the same index
@@ -105,11 +131,12 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }

    pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> + '_ {
-        self.value_row_ids(doc_id)
+        self.index
+            .value_row_ids(doc_id)
            .map(|value_row_id: RowId| self.values.get_val(value_row_id))
    }

-    /// Get the docids of values which are in the provided value range.
+    /// Get the docids of values which are in the provided value and docid range.
    #[inline]
    pub fn get_docids_for_value_range(
        &self,
@@ -147,14 +174,6 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }
 }

-impl<T> Deref for Column<T> {
-    type Target = ColumnIndex;
-
-    fn deref(&self) -> &Self::Target {
-        &self.index
-    }
-}
-
 impl BinarySerializable for Cardinality {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
        self.to_code().serialize(writer)
@@ -176,6 +195,7 @@ struct FirstValueWithDefault<T: Copy> {
 impl<T: PartialOrd + Debug + Send + Sync + Copy + 'static> ColumnValues<T>
    for FirstValueWithDefault<T>
 {
+    #[inline(always)]
    fn get_val(&self, idx: u32) -> T {
        self.column.first(idx).unwrap_or(self.default_value)
    }
--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -12,7 +12,7 @@ use crate::column_values::{
    CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
 };
 use crate::iterable::Iterable;
-use crate::StrColumn;
+use crate::{StrColumn, Version};

 pub fn serialize_column_mappable_to_u128<T: MonotonicallyMappableToU128>(
    column_index: SerializableColumnIndex<'_>,
@@ -40,25 +40,9 @@ pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
    Ok(())
 }

-pub fn open_column_u64<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Column<T>> {
-    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
-    let column_index_num_bytes = u32::from_le_bytes(
-        column_index_num_bytes_payload
-            .as_slice()
-            .try_into()
-            .unwrap(),
-    );
-    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
-    let column_index = crate::column_index::open_column_index(column_index_data)?;
-    let column_values = load_u64_based_column_values(column_values_data)?;
-    Ok(Column {
-        index: column_index,
-        values: column_values,
-    })
-}
-
-pub fn open_column_u128<T: MonotonicallyMappableToU128>(
+pub fn open_column_u64<T: MonotonicallyMappableToU64>(
    bytes: OwnedBytes,
+    format_version: Version,
 ) -> io::Result<Column<T>> {
    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
    let column_index_num_bytes = u32::from_le_bytes(
@@ -68,7 +52,27 @@ pub fn open_column_u128<T: MonotonicallyMappableToU128>(
            .unwrap(),
    );
    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
-    let column_index = crate::column_index::open_column_index(column_index_data)?;
+    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
+    let column_values = load_u64_based_column_values(column_values_data)?;
+    Ok(Column {
+        index: column_index,
+        values: column_values,
+    })
+}
+
+pub fn open_column_u128<T: MonotonicallyMappableToU128>(
+    bytes: OwnedBytes,
+    format_version: Version,
+) -> io::Result<Column<T>> {
+    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
+    let column_index_num_bytes = u32::from_le_bytes(
+        column_index_num_bytes_payload
+            .as_slice()
+            .try_into()
+            .unwrap(),
+    );
+    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
+    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
    let column_values = crate::column_values::open_u128_mapped(column_values_data)?;
    Ok(Column {
        index: column_index,
@@ -76,19 +80,42 @@ pub fn open_column_u128<T: MonotonicallyMappableToU128>(
    })
 }

-pub fn open_column_bytes(data: OwnedBytes) -> io::Result<BytesColumn> {
+/// Open the column as u64.
+///
+/// See [`open_u128_as_compact_u64`] for more details.
+pub fn open_column_u128_as_compact_u64(
+    bytes: OwnedBytes,
+    format_version: Version,
+) -> io::Result<Column<u64>> {
+    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
+    let column_index_num_bytes = u32::from_le_bytes(
+        column_index_num_bytes_payload
+            .as_slice()
+            .try_into()
+            .unwrap(),
+    );
+    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
+    let column_index = crate::column_index::open_column_index(column_index_data, format_version)?;
+    let column_values = crate::column_values::open_u128_as_compact_u64(column_values_data)?;
+    Ok(Column {
+        index: column_index,
+        values: column_values,
+    })
+}
+
+pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> io::Result<BytesColumn> {
    let (body, dictionary_len_bytes) = data.rsplit(4);
    let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
    let (dictionary_bytes, column_bytes) = body.split(dictionary_len as usize);
    let dictionary = Arc::new(Dictionary::from_bytes(dictionary_bytes)?);
-    let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes)?;
+    let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes, format_version)?;
    Ok(BytesColumn {
        dictionary,
        term_ord_column,
    })
 }

-pub fn open_column_str(data: OwnedBytes) -> io::Result<StrColumn> {
-    let bytes_column = open_column_bytes(data)?;
+pub fn open_column_str(data: OwnedBytes, format_version: Version) -> io::Result<StrColumn> {
+    let bytes_column = open_column_bytes(data, format_version)?;
    Ok(StrColumn::wrap(bytes_column))
 }
--- a/columnar/src/column_index/merge/mod.rs
+++ b/columnar/src/column_index/merge/mod.rs
@@ -95,8 +95,12 @@ pub fn merge_column_index<'a>(

 #[cfg(test)]
 mod tests {
+    use common::OwnedBytes;
+
    use crate::column_index::merge::detect_cardinality;
-    use crate::column_index::multivalued_index::MultiValueIndex;
+    use crate::column_index::multivalued_index::{
+        open_multivalued_index, serialize_multivalued_index, MultiValueIndex,
+    };
    use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
    use crate::{
        Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
@@ -171,7 +175,11 @@ mod tests {
        let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
            panic!("Excpected a multivalued index")
        };
-        let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
+        let mut output = Vec::new();
+        serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
+        let multivalue =
+            open_multivalued_index(OwnedBytes::new(output), crate::Version::V2).unwrap();
+        let start_indexes: Vec<RowId> = multivalue.get_start_index_column().iter().collect();
        assert_eq!(&start_indexes, &[0, 3, 5]);
    }

@@ -200,11 +208,16 @@ mod tests {
            ],
        )
        .into();
+
        let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
        let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
            panic!("Excpected a multivalued index")
        };
-        let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
+        let mut output = Vec::new();
+        serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
+        let multivalue =
+            open_multivalued_index(OwnedBytes::new(output), crate::Version::V2).unwrap();
+        let start_indexes: Vec<RowId> = multivalue.get_start_index_column().iter().collect();
        assert_eq!(&start_indexes, &[0, 3, 5, 6]);
    }
 }
--- a/columnar/src/column_index/merge/shuffled.rs
+++ b/columnar/src/column_index/merge/shuffled.rs
@@ -1,6 +1,8 @@
 use std::iter;

-use crate::column_index::{SerializableColumnIndex, Set};
+use crate::column_index::{
+    SerializableColumnIndex, SerializableMultivalueIndex, SerializableOptionalIndex, Set,
+};
 use crate::iterable::Iterable;
 use crate::{Cardinality, ColumnIndex, RowId, ShuffleMergeOrder};

@@ -14,15 +16,24 @@ pub fn merge_column_index_shuffled<'a>(
        Cardinality::Optional => {
            let non_null_row_ids =
                merge_column_index_shuffled_optional(column_indexes, shuffle_merge_order);
-            SerializableColumnIndex::Optional {
+            SerializableColumnIndex::Optional(SerializableOptionalIndex {
                non_null_row_ids,
                num_rows: shuffle_merge_order.num_rows(),
-            }
+            })
        }
        Cardinality::Multivalued => {
-            let multivalue_start_index =
-                merge_column_index_shuffled_multivalued(column_indexes, shuffle_merge_order);
-            SerializableColumnIndex::Multivalued(multivalue_start_index)
+            let non_null_row_ids =
+                merge_column_index_shuffled_optional(column_indexes, shuffle_merge_order);
+            SerializableColumnIndex::Multivalued(SerializableMultivalueIndex {
+                doc_ids_with_values: SerializableOptionalIndex {
+                    non_null_row_ids,
+                    num_rows: shuffle_merge_order.num_rows(),
+                },
+                start_offsets: merge_column_index_shuffled_multivalued(
+                    column_indexes,
+                    shuffle_merge_order,
+                ),
+            })
        }
    }
 }
@@ -102,11 +113,18 @@ fn iter_num_values<'a>(

 /// Transforms an iterator containing the number of vals per row (with `num_rows` elements)
 /// into a `start_offset` iterator starting at 0 and (with `num_rows + 1` element)
+///
+/// This will filter values with 0 values as these are covered by the optional index in the
+/// multivalue index.
 fn integrate_num_vals(num_vals: impl Iterator<Item = u32>) -> impl Iterator<Item = RowId> {
-    iter::once(0u32).chain(num_vals.scan(0, |state, num_vals| {
-        *state += num_vals;
-        Some(*state)
-    }))
+    iter::once(0u32).chain(
+        num_vals
+            .filter(|num_vals| *num_vals != 0)
+            .scan(0, |state, num_vals| {
+                *state += num_vals;
+                Some(*state)
+            }),
+    )
 }

 impl<'a> Iterable<u32> for ShuffledMultivaluedIndex<'a> {
@@ -134,13 +152,13 @@ mod tests {

    #[test]
    fn test_integrate_num_vals_several() {
-        assert!(integrate_num_vals([3, 0, 10, 20].into_iter()).eq([0, 3, 3, 13, 33].into_iter()));
+        assert!(integrate_num_vals([3, 0, 10, 20].into_iter()).eq([0, 3, 13, 33].into_iter()));
    }

    #[test]
    fn test_merge_column_index_optional_shuffle() {
        let optional_index: ColumnIndex = OptionalIndex::for_test(2, &[0]).into();
-        let column_indexes = vec![optional_index, ColumnIndex::Full];
+        let column_indexes = [optional_index, ColumnIndex::Full];
        let row_addrs = vec![
            RowAddr {
                segment_ord: 0u32,
@@ -157,10 +175,10 @@ mod tests {
            Cardinality::Optional,
            &shuffle_merge_order,
        );
-        let SerializableColumnIndex::Optional {
+        let SerializableColumnIndex::Optional(SerializableOptionalIndex {
            non_null_row_ids,
            num_rows,
-        } = serializable_index
+        }) = serializable_index
        else {
            panic!()
        };
--- a/columnar/src/column_index/merge/stacked.rs
+++ b/columnar/src/column_index/merge/stacked.rs
@@ -1,6 +1,8 @@
-use std::iter;
+use std::ops::Range;

-use crate::column_index::{SerializableColumnIndex, Set};
+use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
+use crate::column_index::serialize::SerializableOptionalIndex;
+use crate::column_index::SerializableColumnIndex;
 use crate::iterable::Iterable;
 use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};

@@ -15,23 +17,149 @@ pub fn merge_column_index_stacked<'a>(
 ) -> SerializableColumnIndex<'a> {
    match cardinality_after_merge {
        Cardinality::Full => SerializableColumnIndex::Full,
-        Cardinality::Optional => SerializableColumnIndex::Optional {
+        Cardinality::Optional => SerializableColumnIndex::Optional(SerializableOptionalIndex {
            non_null_row_ids: Box::new(StackedOptionalIndex {
                columns,
                stack_merge_order,
            }),
            num_rows: stack_merge_order.num_rows(),
-        },
+        }),
        Cardinality::Multivalued => {
-            let stacked_multivalued_index = StackedMultivaluedIndex {
-                columns,
-                stack_merge_order,
-            };
-            SerializableColumnIndex::Multivalued(Box::new(stacked_multivalued_index))
+            let serializable_multivalue_index =
+                make_serializable_multivalued_index(columns, stack_merge_order);
+            SerializableColumnIndex::Multivalued(serializable_multivalue_index)
        }
    }
 }

+struct StackedDocIdsWithValues<'a> {
+    column_indexes: &'a [ColumnIndex],
+    stack_merge_order: &'a StackMergeOrder,
+}
+
+impl Iterable<u32> for StackedDocIdsWithValues<'_> {
+    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
+        Box::new((0..self.column_indexes.len()).flat_map(|i| {
+            let column_index = &self.column_indexes[i];
+            let doc_range = self.stack_merge_order.columnar_range(i);
+            get_doc_ids_with_values(column_index, doc_range)
+        }))
+    }
+}
+
+fn get_doc_ids_with_values<'a>(
+    column_index: &'a ColumnIndex,
+    doc_range: Range<u32>,
+) -> Box<dyn Iterator<Item = u32> + 'a> {
+    match column_index {
+        ColumnIndex::Empty { .. } => Box::new(0..0),
+        ColumnIndex::Full => Box::new(doc_range),
+        ColumnIndex::Optional(optional_index) => Box::new(
+            optional_index
+                .iter_rows()
+                .map(move |row| row + doc_range.start),
+        ),
+        ColumnIndex::Multivalued(multivalued_index) => match multivalued_index {
+            MultiValueIndex::MultiValueIndexV1(multivalued_index) => {
+                Box::new((0..multivalued_index.num_docs()).filter_map(move |docid| {
+                    let range = multivalued_index.range(docid);
+                    if range.is_empty() {
+                        None
+                    } else {
+                        Some(docid + doc_range.start)
+                    }
+                }))
+            }
+            MultiValueIndex::MultiValueIndexV2(multivalued_index) => Box::new(
+                multivalued_index
+                    .optional_index
+                    .iter_rows()
+                    .map(move |row| row + doc_range.start),
+            ),
+        },
+    }
+}
+
+fn stack_doc_ids_with_values<'a>(
+    column_indexes: &'a [ColumnIndex],
+    stack_merge_order: &'a StackMergeOrder,
+) -> SerializableOptionalIndex<'a> {
+    let num_rows = stack_merge_order.num_rows();
+    SerializableOptionalIndex {
+        non_null_row_ids: Box::new(StackedDocIdsWithValues {
+            column_indexes,
+            stack_merge_order,
+        }),
+        num_rows,
+    }
+}
+
+struct StackedStartOffsets<'a> {
+    column_indexes: &'a [ColumnIndex],
+    stack_merge_order: &'a StackMergeOrder,
+}
+
+fn get_num_values_iterator<'a>(
+    column_index: &'a ColumnIndex,
+    num_docs: u32,
+) -> Box<dyn Iterator<Item = u32> + 'a> {
+    match column_index {
+        ColumnIndex::Empty { .. } => Box::new(std::iter::empty()),
+        ColumnIndex::Full => Box::new(std::iter::repeat(1u32).take(num_docs as usize)),
+        ColumnIndex::Optional(optional_index) => {
+            Box::new(std::iter::repeat(1u32).take(optional_index.num_non_nulls() as usize))
+        }
+        ColumnIndex::Multivalued(multivalued_index) => Box::new(
+            multivalued_index
+                .get_start_index_column()
+                .iter()
+                .scan(0u32, |previous_start_offset, current_start_offset| {
+                    let num_vals = current_start_offset - *previous_start_offset;
+                    *previous_start_offset = current_start_offset;
+                    Some(num_vals)
+                })
+                .skip(1),
+        ),
+    }
+}
+
+impl<'a> Iterable<u32> for StackedStartOffsets<'a> {
+    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
+        let num_values_it = (0..self.column_indexes.len()).flat_map(|columnar_id| {
+            let num_docs = self.stack_merge_order.columnar_range(columnar_id).len() as u32;
+            let column_index = &self.column_indexes[columnar_id];
+            get_num_values_iterator(column_index, num_docs)
+        });
+        Box::new(std::iter::once(0u32).chain(num_values_it.into_iter().scan(
+            0u32,
+            |cumulated, el| {
+                *cumulated += el;
+                Some(*cumulated)
+            },
+        )))
+    }
+}
+
+fn stack_start_offsets<'a>(
+    column_indexes: &'a [ColumnIndex],
+    stack_merge_order: &'a StackMergeOrder,
+) -> Box<dyn Iterable<u32> + 'a> {
+    Box::new(StackedStartOffsets {
+        column_indexes,
+        stack_merge_order,
+    })
+}
+
+fn make_serializable_multivalued_index<'a>(
+    columns: &'a [ColumnIndex],
+    stack_merge_order: &'a StackMergeOrder,
+) -> SerializableMultivalueIndex<'a> {
+    SerializableMultivalueIndex {
+        doc_ids_with_values: stack_doc_ids_with_values(columns, stack_merge_order),
+        start_offsets: stack_start_offsets(columns, stack_merge_order),
+    }
+}
+
 struct StackedOptionalIndex<'a> {
    columns: &'a [ColumnIndex],
    stack_merge_order: &'a StackMergeOrder,
@@ -62,90 +190,3 @@ impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
        )
    }
 }
-
-#[derive(Clone, Copy)]
-struct StackedMultivaluedIndex<'a> {
-    columns: &'a [ColumnIndex],
-    stack_merge_order: &'a StackMergeOrder,
-}
-
-fn convert_column_opt_to_multivalued_index<'a>(
-    column_index_opt: &'a ColumnIndex,
-    num_rows: RowId,
-) -> Box<dyn Iterator<Item = RowId> + 'a> {
-    match column_index_opt {
-        ColumnIndex::Empty { .. } => Box::new(iter::repeat(0u32).take(num_rows as usize + 1)),
-        ColumnIndex::Full => Box::new(0..num_rows + 1),
-        ColumnIndex::Optional(optional_index) => {
-            Box::new(
-                (0..num_rows)
-                    // TODO optimize
-                    .map(|row_id| optional_index.rank(row_id))
-                    .chain(std::iter::once(optional_index.num_non_nulls())),
-            )
-        }
-        ColumnIndex::Multivalued(multivalued_index) => multivalued_index.start_index_column.iter(),
-    }
-}
-
-impl<'a> Iterable<RowId> for StackedMultivaluedIndex<'a> {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + '_> {
-        let multivalued_indexes =
-            self.columns
-                .iter()
-                .enumerate()
-                .map(|(columnar_id, column_opt)| {
-                    let num_rows =
-                        self.stack_merge_order.columnar_range(columnar_id).len() as RowId;
-                    convert_column_opt_to_multivalued_index(column_opt, num_rows)
-                });
-        stack_multivalued_indexes(multivalued_indexes)
-    }
-}
-
-// Refactor me
-fn stack_multivalued_indexes<'a>(
-    mut multivalued_indexes: impl Iterator<Item = Box<dyn Iterator<Item = RowId> + 'a>> + 'a,
-) -> Box<dyn Iterator<Item = RowId> + 'a> {
-    let mut offset = 0;
-    let mut last_row_id = 0;
-    let mut current_it = multivalued_indexes.next();
-    Box::new(std::iter::from_fn(move || loop {
-        let Some(multivalued_index) = current_it.as_mut() else {
-            return None;
-        };
-        if let Some(row_id) = multivalued_index.next() {
-            last_row_id = offset + row_id;
-            return Some(last_row_id);
-        }
-        offset = last_row_id;
-        loop {
-            current_it = multivalued_indexes.next();
-            if current_it.as_mut()?.next().is_some() {
-                break;
-            }
-        }
-    }))
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::RowId;
-
-    fn it<'a>(row_ids: &'a [RowId]) -> Box<dyn Iterator<Item = RowId> + 'a> {
-        Box::new(row_ids.iter().copied())
-    }
-
-    #[test]
-    fn test_stack() {
-        let columns = [
-            it(&[0u32, 0u32]),
-            it(&[0u32, 1u32, 1u32, 4u32]),
-            it(&[0u32, 3u32, 5u32]),
-            it(&[0u32, 4u32]),
-        ]
-        .into_iter();
-        let start_offsets: Vec<RowId> = super::stack_multivalued_indexes(columns).collect();
-        assert_eq!(start_offsets, &[0, 0, 1, 1, 4, 7, 9, 13]);
-    }
-}
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -11,8 +11,11 @@ mod serialize;
 use std::ops::Range;

 pub use merge::merge_column_index;
+pub(crate) use multivalued_index::SerializableMultivalueIndex;
 pub use optional_index::{OptionalIndex, Set};
-pub use serialize::{open_column_index, serialize_column_index, SerializableColumnIndex};
+pub use serialize::{
+    open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex,
+};

 use crate::column_index::multivalued_index::MultiValueIndex;
 use crate::{Cardinality, DocId, RowId};
@@ -42,10 +45,6 @@ impl From<MultiValueIndex> for ColumnIndex {
 }

 impl ColumnIndex {
-    #[inline]
-    pub fn is_multivalue(&self) -> bool {
-        matches!(self, ColumnIndex::Multivalued(_))
-    }
    /// Returns the cardinality of the column index.
    ///
    /// By convention, if the column contains no docs, we consider that it is
@@ -126,24 +125,50 @@ impl ColumnIndex {
        }
    }

-    pub fn docid_range_to_rowids(&self, doc_id: Range<DocId>) -> Range<RowId> {
+    pub fn docid_range_to_rowids(&self, doc_id_range: Range<DocId>) -> Range<RowId> {
        match self {
            ColumnIndex::Empty { .. } => 0..0,
-            ColumnIndex::Full => doc_id,
+            ColumnIndex::Full => doc_id_range,
            ColumnIndex::Optional(optional_index) => {
-                let row_start = optional_index.rank(doc_id.start);
-                let row_end = optional_index.rank(doc_id.end);
+                let row_start = optional_index.rank(doc_id_range.start);
+                let row_end = optional_index.rank(doc_id_range.end);
                row_start..row_end
            }
-            ColumnIndex::Multivalued(multivalued_index) => {
-                let end_docid = doc_id.end.min(multivalued_index.num_docs() - 1) + 1;
-                let start_docid = doc_id.start.min(end_docid);
+            ColumnIndex::Multivalued(multivalued_index) => match multivalued_index {
+                MultiValueIndex::MultiValueIndexV1(index) => {
+                    let row_start = index.start_index_column.get_val(doc_id_range.start);
+                    let row_end = index.start_index_column.get_val(doc_id_range.end);
+                    row_start..row_end
+                }
+                MultiValueIndex::MultiValueIndexV2(index) => {
+                    // In this case we will use the optional_index select the next values
+                    // that are valid. There are different cases to consider:
+                    // Not exists below means does not exist in the optional
+                    // index, because it has no values.
+                    // * doc_id_range may cover a range of docids which are non existent
+                    // => rank
+                    //   will give us the next document outside the range with a value. They both
+                    //   get the same rank and therefore return a zero range
+                    //
+                    // * doc_id_range.start and doc_id_range.end may not exist, but docids in
+                    // between may have values
+                    // => rank will give us the next document outside the range with a value.
+                    //
+                    // * doc_id_range.start may be not existent but doc_id_range.end may exist
+                    // * doc_id_range.start may exist but doc_id_range.end may not exist
+                    // * doc_id_range.start and doc_id_range.end may exist
+                    // => rank on doc_id_range.end will give use the next value, which matches
+                    // how the `start_index_column` works, so we get the value start of the next
+                    // docid which we use to create the exclusive range.
+                    //
+                    let rank_start = index.optional_index.rank(doc_id_range.start);
+                    let row_start = index.start_index_column.get_val(rank_start);
+                    let rank_end = index.optional_index.rank(doc_id_range.end);
+                    let row_end = index.start_index_column.get_val(rank_end);

-                let row_start = multivalued_index.start_index_column.get_val(start_docid);
-                let row_end = multivalued_index.start_index_column.get_val(end_docid);
-
-                row_start..row_end
-            }
+                    row_start..row_end
+                }
+            },
        }
    }

--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -3,64 +3,98 @@ use std::io::Write;
 use std::ops::Range;
 use std::sync::Arc;

-use common::OwnedBytes;
+use common::{CountingWriter, OwnedBytes};

+use super::optional_index::{open_optional_index, serialize_optional_index};
+use super::{OptionalIndex, SerializableOptionalIndex, Set};
 use crate::column_values::{
    load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues,
 };
 use crate::iterable::Iterable;
-use crate::{DocId, RowId};
+use crate::{DocId, RowId, Version};
+
+pub struct SerializableMultivalueIndex<'a> {
+    pub doc_ids_with_values: SerializableOptionalIndex<'a>,
+    pub start_offsets: Box<dyn Iterable<u32> + 'a>,
+}

 pub fn serialize_multivalued_index(
-    multivalued_index: &dyn Iterable<RowId>,
+    multivalued_index: &SerializableMultivalueIndex,
    output: &mut impl Write,
 ) -> io::Result<()> {
+    let SerializableMultivalueIndex {
+        doc_ids_with_values,
+        start_offsets,
+    } = multivalued_index;
+    let mut count_writer = CountingWriter::wrap(output);
+    let SerializableOptionalIndex {
+        non_null_row_ids,
+        num_rows,
+    } = doc_ids_with_values;
+    serialize_optional_index(&**non_null_row_ids, *num_rows, &mut count_writer)?;
+    let optional_len = count_writer.written_bytes() as u32;
+    let output = count_writer.finish();
    serialize_u64_based_column_values(
-        multivalued_index,
+        &**start_offsets,
        &[CodecType::Bitpacked, CodecType::Linear],
        output,
    )?;
+    output.write_all(&optional_len.to_le_bytes())?;
    Ok(())
 }

-pub fn open_multivalued_index(bytes: OwnedBytes) -> io::Result<MultiValueIndex> {
-    let start_index_column: Arc<dyn ColumnValues<RowId>> = load_u64_based_column_values(bytes)?;
-    Ok(MultiValueIndex { start_index_column })
+pub fn open_multivalued_index(
+    bytes: OwnedBytes,
+    format_version: Version,
+) -> io::Result<MultiValueIndex> {
+    match format_version {
+        Version::V1 => {
+            let start_index_column: Arc<dyn ColumnValues<RowId>> =
+                load_u64_based_column_values(bytes)?;
+            Ok(MultiValueIndex::MultiValueIndexV1(MultiValueIndexV1 {
+                start_index_column,
+            }))
+        }
+        Version::V2 => {
+            let (body_bytes, optional_index_len) = bytes.rsplit(4);
+            let optional_index_len =
+                u32::from_le_bytes(optional_index_len.as_slice().try_into().unwrap());
+            let (optional_index_bytes, start_index_bytes) =
+                body_bytes.split(optional_index_len as usize);
+            let optional_index = open_optional_index(optional_index_bytes)?;
+            let start_index_column: Arc<dyn ColumnValues<RowId>> =
+                load_u64_based_column_values(start_index_bytes)?;
+            Ok(MultiValueIndex::MultiValueIndexV2(MultiValueIndexV2 {
+                optional_index,
+                start_index_column,
+            }))
+        }
+    }
 }

 #[derive(Clone)]
 /// Index to resolve value range for given doc_id.
 /// Starts at 0.
-pub struct MultiValueIndex {
+pub enum MultiValueIndex {
+    MultiValueIndexV1(MultiValueIndexV1),
+    MultiValueIndexV2(MultiValueIndexV2),
+}
+
+#[derive(Clone)]
+/// Index to resolve value range for given doc_id.
+/// Starts at 0.
+pub struct MultiValueIndexV1 {
    pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
 }

-impl std::fmt::Debug for MultiValueIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("MultiValuedIndex")
-            .field("num_rows", &self.start_index_column.num_vals())
-            .finish_non_exhaustive()
-    }
-}
-
-impl From<Arc<dyn ColumnValues<RowId>>> for MultiValueIndex {
-    fn from(start_index_column: Arc<dyn ColumnValues<RowId>>) -> Self {
-        MultiValueIndex { start_index_column }
-    }
-}
-
-impl MultiValueIndex {
-    pub fn for_test(start_offsets: &[RowId]) -> MultiValueIndex {
-        let mut buffer = Vec::new();
-        serialize_multivalued_index(&start_offsets, &mut buffer).unwrap();
-        let bytes = OwnedBytes::new(buffer);
-        open_multivalued_index(bytes).unwrap()
-    }
-
+impl MultiValueIndexV1 {
    /// Returns `[start, end)`, such that the values associated with
    /// the given document are `start..end`.
    #[inline]
    pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
+        if doc_id >= self.num_docs() {
+            return 0..0;
+        }
        let start = self.start_index_column.get_val(doc_id);
        let end = self.start_index_column.get_val(doc_id + 1);
        start..end
@@ -83,7 +117,6 @@ impl MultiValueIndex {
    ///
    /// TODO: Instead of a linear scan we can employ a exponential search into binary search to
    /// match a docid to its value position.
-    #[allow(clippy::bool_to_int_with_if)]
    pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: &mut Vec<u32>) {
        if ranks.is_empty() {
            return;
@@ -111,11 +144,170 @@ impl MultiValueIndex {
    }
 }

+#[derive(Clone)]
+/// Index to resolve value range for given doc_id.
+/// Starts at 0.
+pub struct MultiValueIndexV2 {
+    pub optional_index: OptionalIndex,
+    pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
+}
+
+impl std::fmt::Debug for MultiValueIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let index = match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => &idx.start_index_column,
+            MultiValueIndex::MultiValueIndexV2(idx) => &idx.start_index_column,
+        };
+        f.debug_struct("MultiValuedIndex")
+            .field("num_rows", &index.num_vals())
+            .finish_non_exhaustive()
+    }
+}
+
+impl MultiValueIndex {
+    pub fn for_test(start_offsets: &[RowId]) -> MultiValueIndex {
+        assert!(!start_offsets.is_empty());
+        assert_eq!(start_offsets[0], 0);
+        let mut doc_with_values = Vec::new();
+        let mut compact_start_offsets: Vec<u32> = vec![0];
+        for doc in 0..start_offsets.len() - 1 {
+            if start_offsets[doc] < start_offsets[doc + 1] {
+                doc_with_values.push(doc as RowId);
+                compact_start_offsets.push(start_offsets[doc + 1]);
+            }
+        }
+        let serializable_multivalued_index = SerializableMultivalueIndex {
+            doc_ids_with_values: SerializableOptionalIndex {
+                non_null_row_ids: Box::new(&doc_with_values[..]),
+                num_rows: start_offsets.len() as u32 - 1,
+            },
+            start_offsets: Box::new(&compact_start_offsets[..]),
+        };
+        let mut buffer = Vec::new();
+        serialize_multivalued_index(&serializable_multivalued_index, &mut buffer).unwrap();
+        let bytes = OwnedBytes::new(buffer);
+        open_multivalued_index(bytes, Version::V2).unwrap()
+    }
+
+    pub fn get_start_index_column(&self) -> &Arc<dyn crate::ColumnValues<RowId>> {
+        match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => &idx.start_index_column,
+            MultiValueIndex::MultiValueIndexV2(idx) => &idx.start_index_column,
+        }
+    }
+
+    /// Returns `[start, end)` values range, such that the values associated with
+    /// the given document are `start..end`.
+    #[inline]
+    pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
+        match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => idx.range(doc_id),
+            MultiValueIndex::MultiValueIndexV2(idx) => idx.range(doc_id),
+        }
+    }
+
+    /// Returns the number of documents in the index.
+    #[inline]
+    pub fn num_docs(&self) -> u32 {
+        match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => idx.start_index_column.num_vals() - 1,
+            MultiValueIndex::MultiValueIndexV2(idx) => idx.optional_index.num_docs(),
+        }
+    }
+
+    /// Converts a list of ranks (row ids of values) in a 1:n index to the corresponding list of
+    /// docids. Positions are converted inplace to docids.
+    ///
+    /// Since there is no index for value pos -> docid, but docid -> value pos range, we scan the
+    /// index.
+    ///
+    /// Correctness: positions needs to be sorted. idx_reader needs to contain monotonically
+    /// increasing positions.
+    ///
+    /// TODO: Instead of a linear scan we can employ a exponential search into binary search to
+    /// match a docid to its value position.
+    pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: &mut Vec<u32>) {
+        match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => {
+                idx.select_batch_in_place(docid_start, ranks)
+            }
+            MultiValueIndex::MultiValueIndexV2(idx) => {
+                idx.select_batch_in_place(docid_start, ranks)
+            }
+        }
+    }
+}
+impl MultiValueIndexV2 {
+    /// Returns `[start, end)`, such that the values associated with
+    /// the given document are `start..end`.
+    #[inline]
+    pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
+        let Some(rank) = self.optional_index.rank_if_exists(doc_id) else {
+            return 0..0;
+        };
+        let start = self.start_index_column.get_val(rank);
+        let end = self.start_index_column.get_val(rank + 1);
+        start..end
+    }
+
+    /// Returns the number of documents in the index.
+    #[inline]
+    pub fn num_docs(&self) -> u32 {
+        self.optional_index.num_docs()
+    }
+
+    /// Converts a list of ranks (row ids of values) in a 1:n index to the corresponding list of
+    /// docids. Positions are converted inplace to docids.
+    ///
+    /// Since there is no index for value pos -> docid, but docid -> value pos range, we scan the
+    /// index.
+    ///
+    /// Correctness: positions needs to be sorted. idx_reader needs to contain monotonically
+    /// increasing positions.
+    ///
+    /// TODO: Instead of a linear scan we can employ a exponential search into binary search to
+    /// match a docid to its value position.
+    pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: &mut Vec<u32>) {
+        if ranks.is_empty() {
+            return;
+        }
+        let mut cur_pos_in_idx = self.optional_index.rank(docid_start);
+        let mut last_doc = None;
+
+        assert!(cur_pos_in_idx <= ranks[0]);
+
+        let mut write_doc_pos = 0;
+        for i in 0..ranks.len() {
+            let pos = ranks[i];
+            loop {
+                let end = self.start_index_column.get_val(cur_pos_in_idx + 1);
+                if end > pos {
+                    ranks[write_doc_pos] = cur_pos_in_idx;
+                    write_doc_pos += if last_doc == Some(cur_pos_in_idx) {
+                        0
+                    } else {
+                        1
+                    };
+                    last_doc = Some(cur_pos_in_idx);
+                    break;
+                }
+                cur_pos_in_idx += 1;
+            }
+        }
+        ranks.truncate(write_doc_pos);
+
+        for rank in ranks.iter_mut() {
+            *rank = self.optional_index.select(*rank);
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::ops::Range;

    use super::MultiValueIndex;
+    use crate::{ColumnarReader, DynamicColumn};

    fn index_to_pos_helper(
        index: &MultiValueIndex,
@@ -134,6 +326,7 @@ mod tests {
        let positions = &[10u32, 11, 15, 20, 21, 22];
        assert_eq!(index_to_pos_helper(&index, 0..5, positions), vec![1, 3, 4]);
        assert_eq!(index_to_pos_helper(&index, 1..5, positions), vec![1, 3, 4]);
+
        assert_eq!(index_to_pos_helper(&index, 0..5, &[9]), vec![0]);
        assert_eq!(index_to_pos_helper(&index, 1..5, &[10]), vec![1]);
        assert_eq!(index_to_pos_helper(&index, 1..5, &[11]), vec![1]);
@@ -141,4 +334,67 @@ mod tests {
        assert_eq!(index_to_pos_helper(&index, 2..5, &[12, 14]), vec![2]);
        assert_eq!(index_to_pos_helper(&index, 2..5, &[12, 14, 15]), vec![2, 3]);
    }
+
+    #[test]
+    fn test_range_to_rowids() {
+        use crate::ColumnarWriter;
+
+        let mut columnar_writer = ColumnarWriter::default();
+
+        // This column gets coerced to u64
+        columnar_writer.record_numerical(1, "full", u64::MAX);
+        columnar_writer.record_numerical(1, "full", u64::MAX);
+
+        columnar_writer.record_numerical(5, "full", u64::MAX);
+        columnar_writer.record_numerical(5, "full", u64::MAX);
+
+        let mut wrt: Vec<u8> = Vec::new();
+        columnar_writer.serialize(7, &mut wrt).unwrap();
+
+        let reader = ColumnarReader::open(wrt).unwrap();
+        // Open the column as u64
+        let column = reader.read_columns("full").unwrap()[0]
+            .open()
+            .unwrap()
+            .coerce_numerical(crate::NumericalType::U64)
+            .unwrap();
+        let DynamicColumn::U64(column) = column else {
+            panic!();
+        };
+
+        let row_id_range = column.index.docid_range_to_rowids(1..2);
+        assert_eq!(row_id_range, 0..2);
+
+        let row_id_range = column.index.docid_range_to_rowids(0..2);
+        assert_eq!(row_id_range, 0..2);
+
+        let row_id_range = column.index.docid_range_to_rowids(0..4);
+        assert_eq!(row_id_range, 0..2);
+
+        let row_id_range = column.index.docid_range_to_rowids(3..4);
+        assert_eq!(row_id_range, 2..2);
+
+        let row_id_range = column.index.docid_range_to_rowids(1..6);
+        assert_eq!(row_id_range, 0..4);
+
+        let row_id_range = column.index.docid_range_to_rowids(3..6);
+        assert_eq!(row_id_range, 2..4);
+
+        let row_id_range = column.index.docid_range_to_rowids(0..6);
+        assert_eq!(row_id_range, 0..4);
+
+        let row_id_range = column.index.docid_range_to_rowids(0..6);
+        assert_eq!(row_id_range, 0..4);
+
+        let check = |range, expected| {
+            let full_range = 0..=u64::MAX;
+            let mut docids = Vec::new();
+            column.get_docids_for_value_range(full_range, range, &mut docids);
+            assert_eq!(docids, expected);
+        };
+
+        // check(0..1, vec![]);
+        // check(0..2, vec![1]);
+        check(1..2, vec![1]);
+    }
 }
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -21,8 +21,6 @@ const DENSE_BLOCK_THRESHOLD: u32 =

 const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;

-const BLOCK_SIZE: RowId = 1 << 16;
-
 #[derive(Copy, Clone, Debug)]
 struct BlockMeta {
    non_null_rows_before_block: u32,
@@ -88,8 +86,14 @@ pub struct OptionalIndex {
    block_metas: Arc<[BlockMeta]>,
 }

+impl<'a> Iterable<u32> for &'a OptionalIndex {
+    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
+        Box::new(self.iter_rows())
+    }
+}
+
 impl std::fmt::Debug for OptionalIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        f.debug_struct("OptionalIndex")
            .field("num_rows", &self.num_rows)
            .field("num_non_null_rows", &self.num_non_null_rows)
@@ -109,8 +113,8 @@ struct RowAddr {
 #[inline(always)]
 fn row_addr_from_row_id(row_id: RowId) -> RowAddr {
    RowAddr {
-        block_id: (row_id / BLOCK_SIZE) as u16,
-        in_block_row_id: (row_id % BLOCK_SIZE) as u16,
+        block_id: (row_id / ELEMENTS_PER_BLOCK) as u16,
+        in_block_row_id: (row_id % ELEMENTS_PER_BLOCK) as u16,
    }
 }

@@ -185,14 +189,20 @@ impl Set<RowId> for OptionalIndex {
        }
    }

+    /// Any value doc_id is allowed.
+    /// In particular, doc_id = num_rows.
    #[inline]
    fn rank(&self, doc_id: DocId) -> RowId {
+        if doc_id >= self.num_docs() {
+            return self.num_non_nulls();
+        }
        let RowAddr {
            block_id,
            in_block_row_id,
        } = row_addr_from_row_id(doc_id);
        let block_meta = self.block_metas[block_id as usize];
        let block = self.block(block_meta);
+
        let block_offset_row_id = match block {
            Block::Dense(dense_block) => dense_block.rank(in_block_row_id),
            Block::Sparse(sparse_block) => sparse_block.rank(in_block_row_id),
@@ -200,13 +210,15 @@ impl Set<RowId> for OptionalIndex {
        block_meta.non_null_rows_before_block + block_offset_row_id
    }

+    /// Any value doc_id is allowed.
+    /// In particular, doc_id = num_rows.
    #[inline]
    fn rank_if_exists(&self, doc_id: DocId) -> Option<RowId> {
        let RowAddr {
            block_id,
            in_block_row_id,
        } = row_addr_from_row_id(doc_id);
-        let block_meta = self.block_metas[block_id as usize];
+        let block_meta = *self.block_metas.get(block_id as usize)?;
        let block = self.block(block_meta);
        let block_offset_row_id = match block {
            Block::Dense(dense_block) => dense_block.rank_if_exists(in_block_row_id),
@@ -491,7 +503,7 @@ fn deserialize_optional_index_block_metadatas(
        non_null_rows_before_block += num_non_null_rows;
    }
    block_metas.resize(
-        ((num_rows + BLOCK_SIZE - 1) / BLOCK_SIZE) as usize,
+        ((num_rows + ELEMENTS_PER_BLOCK - 1) / ELEMENTS_PER_BLOCK) as usize,
        BlockMeta {
            non_null_rows_before_block,
            start_byte_offset,
--- a/columnar/src/column_index/optional_index/set.rs
+++ b/columnar/src/column_index/optional_index/set.rs
@@ -28,10 +28,11 @@ pub trait Set<T> {
    /// Returns true if the elements is contained in the Set
    fn contains(&self, el: T) -> bool;

-    /// Returns the number of rows in the set that are < `el`
+    /// Returns the element's rank (its position in the set).
+    /// If the set does not contain the element, it will return the next existing elements rank.
    fn rank(&self, el: T) -> T;

-    /// If the set contains `el` returns the element rank.
+    /// If the set contains `el`, returns the element's rank (its position in the set).
    /// If the set does not contain the element, it returns `None`.
    fn rank_if_exists(&self, el: T) -> Option<T>;

@@ -39,7 +40,8 @@ pub trait Set<T> {
    ///
    /// # Panics
    ///
-    /// May panic if rank is greater than the number of elements in the Set.
+    /// May panic if rank is greater or equal to the number of
+    /// elements in the Set.
    fn select(&self, rank: T) -> T;

    /// Creates a brand new select cursor.
--- a/columnar/src/column_index/optional_index/set_block/dense.rs
+++ b/columnar/src/column_index/optional_index/set_block/dense.rs
@@ -1,4 +1,3 @@
-use std::convert::TryInto;
 use std::io::{self, Write};

 use common::BinarySerializable;
--- a/columnar/src/column_index/optional_index/set_block/tests.rs
+++ b/columnar/src/column_index/optional_index/set_block/tests.rs
@@ -22,8 +22,8 @@ fn test_set_helper<C: SetCodec<Item = u16>>(vals: &[u16]) -> usize {
            vals.iter().cloned().take_while(|v| *v < val).count() as u16
        );
    }
-    for rank in 0..vals.len() {
-        assert_eq!(tested_set.select(rank as u16), vals[rank]);
+    for (rank, val) in vals.iter().enumerate() {
+        assert_eq!(tested_set.select(rank as u16), *val);
    }
    buffer.len()
 }
@@ -107,3 +107,41 @@ fn test_simple_translate_codec_idx_to_original_idx_dense() {
        assert_eq!(i, select_cursor.select(i));
    }
 }
+
+#[test]
+fn test_simple_translate_idx_to_value_idx_dense() {
+    let mut buffer = Vec::new();
+    DenseBlockCodec::serialize([1, 10].iter().copied(), &mut buffer).unwrap();
+    let tested_set = DenseBlockCodec::open(buffer.as_slice());
+    assert!(tested_set.contains(1));
+    assert!(!tested_set.contains(2));
+    assert_eq!(tested_set.rank(0), 0);
+    assert_eq!(tested_set.rank(1), 0);
+    for rank in 2..10 {
+        // ranks that don't exist select the next highest one
+        assert_eq!(tested_set.rank_if_exists(rank), None);
+        assert_eq!(tested_set.rank(rank), 1);
+    }
+    assert_eq!(tested_set.rank(10), 1);
+}
+
+#[test]
+fn test_simple_translate_idx_to_value_idx_sparse() {
+    let mut buffer = Vec::new();
+    SparseBlockCodec::serialize([1, 10].iter().copied(), &mut buffer).unwrap();
+    let tested_set = SparseBlockCodec::open(buffer.as_slice());
+    assert!(tested_set.contains(1));
+    assert!(!tested_set.contains(2));
+    assert_eq!(tested_set.rank(0), 0);
+    assert_eq!(tested_set.select(tested_set.rank(0)), 1);
+    assert_eq!(tested_set.rank(1), 0);
+    assert_eq!(tested_set.select(tested_set.rank(1)), 1);
+    for rank in 2..10 {
+        // ranks that don't exist select the next highest one
+        assert_eq!(tested_set.rank_if_exists(rank), None);
+        assert_eq!(tested_set.rank(rank), 1);
+        assert_eq!(tested_set.select(tested_set.rank(rank)), 10);
+    }
+    assert_eq!(tested_set.rank(10), 1);
+    assert_eq!(tested_set.select(tested_set.rank(10)), 10);
+}
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -1,8 +1,29 @@
-use proptest::prelude::{any, prop, *};
-use proptest::strategy::Strategy;
+use proptest::prelude::*;
 use proptest::{prop_oneof, proptest};

 use super::*;
+use crate::{ColumnarReader, ColumnarWriter, DynamicColumnHandle};
+
+#[test]
+fn test_optional_index_bug_2293() {
+    // tests for panic in docid_range_to_rowids for docid == num_docs
+    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK - 1);
+    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK);
+    test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK + 1);
+}
+fn test_optional_index_with_num_docs(num_docs: u32) {
+    let mut dataframe_writer = ColumnarWriter::default();
+    dataframe_writer.record_numerical(100, "score", 80i64);
+    let mut buffer: Vec<u8> = Vec::new();
+    dataframe_writer.serialize(num_docs, &mut buffer).unwrap();
+    let columnar = ColumnarReader::open(buffer).unwrap();
+    assert_eq!(columnar.num_columns(), 1);
+    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("score").unwrap();
+    assert_eq!(cols.len(), 1);
+
+    let col = cols[0].open().unwrap();
+    col.column_index().docid_range_to_rowids(0..num_docs);
+}

 #[test]
 fn test_dense_block_threshold() {
@@ -35,7 +56,7 @@ proptest! {

 #[test]
 fn test_with_random_sets_simple() {
-    let vals = 10..BLOCK_SIZE * 2;
+    let vals = 10..ELEMENTS_PER_BLOCK * 2;
    let mut out: Vec<u8> = Vec::new();
    serialize_optional_index(&vals, 100, &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
@@ -171,7 +192,7 @@ fn test_optional_index_rank() {
    test_optional_index_rank_aux(&[0u32, 1u32]);
    let mut block = Vec::new();
    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
+    block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
    test_optional_index_rank_aux(&block);
 }

@@ -185,8 +206,8 @@ fn test_optional_index_iter_empty_one() {
 fn test_optional_index_iter_dense_block() {
    let mut block = Vec::new();
    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
-    test_optional_index_iter_aux(&block, 3 * BLOCK_SIZE);
+    block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
+    test_optional_index_iter_aux(&block, 3 * ELEMENTS_PER_BLOCK);
 }

 #[test]
--- a/columnar/src/column_index/serialize.rs
+++ b/columnar/src/column_index/serialize.rs
@@ -3,28 +3,39 @@ use std::io::Write;

 use common::{CountingWriter, OwnedBytes};

+use super::multivalued_index::SerializableMultivalueIndex;
+use super::OptionalIndex;
 use crate::column_index::multivalued_index::serialize_multivalued_index;
 use crate::column_index::optional_index::serialize_optional_index;
 use crate::column_index::ColumnIndex;
 use crate::iterable::Iterable;
-use crate::{Cardinality, RowId};
+use crate::{Cardinality, RowId, Version};
+
+pub struct SerializableOptionalIndex<'a> {
+    pub non_null_row_ids: Box<dyn Iterable<RowId> + 'a>,
+    pub num_rows: RowId,
+}
+
+impl<'a> From<&'a OptionalIndex> for SerializableOptionalIndex<'a> {
+    fn from(optional_index: &'a OptionalIndex) -> Self {
+        SerializableOptionalIndex {
+            non_null_row_ids: Box::new(optional_index),
+            num_rows: optional_index.num_docs(),
+        }
+    }
+}

 pub enum SerializableColumnIndex<'a> {
    Full,
-    Optional {
-        non_null_row_ids: Box<dyn Iterable<RowId> + 'a>,
-        num_rows: RowId,
-    },
-    // TODO remove the Arc<dyn> apart from serialization this is not
-    // dynamic at all.
-    Multivalued(Box<dyn Iterable<RowId> + 'a>),
+    Optional(SerializableOptionalIndex<'a>),
+    Multivalued(SerializableMultivalueIndex<'a>),
 }

 impl<'a> SerializableColumnIndex<'a> {
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
            SerializableColumnIndex::Full => Cardinality::Full,
-            SerializableColumnIndex::Optional { .. } => Cardinality::Optional,
+            SerializableColumnIndex::Optional(_) => Cardinality::Optional,
            SerializableColumnIndex::Multivalued(_) => Cardinality::Multivalued,
        }
    }
@@ -40,12 +51,12 @@ pub fn serialize_column_index(
    output.write_all(&[cardinality])?;
    match column_index {
        SerializableColumnIndex::Full => {}
-        SerializableColumnIndex::Optional {
+        SerializableColumnIndex::Optional(SerializableOptionalIndex {
            non_null_row_ids,
            num_rows,
-        } => serialize_optional_index(non_null_row_ids.as_ref(), num_rows, &mut output)?,
+        }) => serialize_optional_index(non_null_row_ids.as_ref(), num_rows, &mut output)?,
        SerializableColumnIndex::Multivalued(multivalued_index) => {
-            serialize_multivalued_index(&*multivalued_index, &mut output)?
+            serialize_multivalued_index(&multivalued_index, &mut output)?
        }
    }
    let column_index_num_bytes = output.written_bytes() as u32;
@@ -53,7 +64,10 @@ pub fn serialize_column_index(
 }

 /// Open a serialized column index.
-pub fn open_column_index(mut bytes: OwnedBytes) -> io::Result<ColumnIndex> {
+pub fn open_column_index(
+    mut bytes: OwnedBytes,
+    format_version: Version,
+) -> io::Result<ColumnIndex> {
    if bytes.is_empty() {
        return Err(io::Error::new(
            io::ErrorKind::UnexpectedEof,
@@ -70,7 +84,8 @@ pub fn open_column_index(mut bytes: OwnedBytes) -> io::Result<ColumnIndex> {
            Ok(ColumnIndex::Optional(optional_index))
        }
        Cardinality::Multivalued => {
-            let multivalue_index = super::multivalued_index::open_multivalued_index(bytes)?;
+            let multivalue_index =
+                super::multivalued_index::open_multivalued_index(bytes, format_version)?;
            Ok(ColumnIndex::Multivalued(multivalue_index))
        }
    }
--- a/columnar/src/column_values/merge.rs
+++ b/columnar/src/column_values/merge.rs
@@ -10,7 +10,7 @@ pub(crate) struct MergedColumnValues<'a, T> {
    pub(crate) merge_row_order: &'a MergeRowOrder,
 }

-impl<'a, T: Copy + PartialOrd + Debug> Iterable<T> for MergedColumnValues<'a, T> {
+impl<'a, T: Copy + PartialOrd + Debug + 'static> Iterable<T> for MergedColumnValues<'a, T> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
        match self.merge_row_order {
            MergeRowOrder::Stack(_) => Box::new(
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -10,6 +10,7 @@ use std::fmt::Debug;
 use std::ops::{Range, RangeInclusive};
 use std::sync::Arc;

+use downcast_rs::DowncastSync;
 pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
 pub use monotonic_mapping_u128::MonotonicallyMappableToU128;

@@ -25,7 +26,10 @@ mod monotonic_column;

 pub(crate) use merge::MergedColumnValues;
 pub use stats::ColumnStats;
-pub use u128_based::{open_u128_mapped, serialize_column_values_u128};
+pub use u128_based::{
+    open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
+    CompactSpaceU64Accessor,
+};
 pub use u64_based::{
    load_u64_based_column_values, serialize_and_load_u64_based_column_values,
    serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
@@ -41,7 +45,7 @@ use crate::RowId;
 ///
 /// Any methods with a default and specialized implementation need to be called in the
 /// wrappers that implement the trait: Arc and MonotonicMappingColumn
-pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
+pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
    /// Return the value associated with the given idx.
    ///
    /// This accessor should return as fast as possible.
@@ -68,11 +72,40 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
            out_x4[3] = self.get_val(idx_x4[3]);
        }

-        let step_size = 4;
-        let cutoff = indexes.len() - indexes.len() % step_size;
+        let out_and_idx_chunks = output
+            .chunks_exact_mut(4)
+            .into_remainder()
+            .iter_mut()
+            .zip(indexes.chunks_exact(4).remainder());
+        for (out, idx) in out_and_idx_chunks {
+            *out = self.get_val(*idx);
+        }
+    }

-        for idx in cutoff..indexes.len() {
-            output[idx] = self.get_val(indexes[idx]);
+    /// Allows to push down multiple fetch calls, to avoid dynamic dispatch overhead.
+    /// The slightly weird `Option<T>` in output allows pushdown to full columns.
+    ///
+    /// idx and output should have the same length
+    ///
+    /// # Panics
+    ///
+    /// May panic if `idx` is greater than the column length.
+    fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
+        assert!(indexes.len() == output.len());
+        let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
+        for (out_x4, idx_x4) in out_and_idx_chunks {
+            out_x4[0] = Some(self.get_val(idx_x4[0]));
+            out_x4[1] = Some(self.get_val(idx_x4[1]));
+            out_x4[2] = Some(self.get_val(idx_x4[2]));
+            out_x4[3] = Some(self.get_val(idx_x4[3]));
+        }
+        let out_and_idx_chunks = output
+            .chunks_exact_mut(4)
+            .into_remainder()
+            .iter_mut()
+            .zip(indexes.chunks_exact(4).remainder());
+        for (out, idx) in out_and_idx_chunks {
+            *out = Some(self.get_val(*idx));
        }
    }

@@ -101,7 +134,7 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
        row_id_hits: &mut Vec<RowId>,
    ) {
        let row_id_range = row_id_range.start..row_id_range.end.min(self.num_vals());
-        for idx in row_id_range.start..row_id_range.end {
+        for idx in row_id_range {
            let val = self.get_val(idx);
            if value_range.contains(&val) {
                row_id_hits.push(idx);
@@ -139,6 +172,7 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
        Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
    }
 }
+downcast_rs::impl_downcast!(sync ColumnValues<T> where T: PartialOrd);

 /// Empty column of values.
 pub struct EmptyColumnValues;
@@ -161,12 +195,17 @@ impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
    }
 }

-impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
+impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> T {
        self.as_ref().get_val(idx)
    }

+    #[inline(always)]
+    fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
+        self.as_ref().get_vals_opt(indexes, output)
+    }
+
    #[inline(always)]
    fn min_value(&self) -> T {
        self.as_ref().min_value()
--- a/columnar/src/column_values/monotonic_column.rs
+++ b/columnar/src/column_values/monotonic_column.rs
@@ -31,10 +31,10 @@ pub fn monotonic_map_column<C, T, Input, Output>(
    monotonic_mapping: T,
 ) -> impl ColumnValues<Output>
 where
-    C: ColumnValues<Input>,
-    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
-    Input: PartialOrd + Debug + Send + Sync + Clone,
-    Output: PartialOrd + Debug + Send + Sync + Clone,
+    C: ColumnValues<Input> + 'static,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync + 'static,
+    Input: PartialOrd + Debug + Send + Sync + Clone + 'static,
+    Output: PartialOrd + Debug + Send + Sync + Clone + 'static,
 {
    MonotonicMappingColumn {
        from_column,
@@ -45,10 +45,10 @@ where

 impl<C, T, Input, Output> ColumnValues<Output> for MonotonicMappingColumn<C, T, Input>
 where
-    C: ColumnValues<Input>,
-    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
-    Input: PartialOrd + Send + Debug + Sync + Clone,
-    Output: PartialOrd + Send + Debug + Sync + Clone,
+    C: ColumnValues<Input> + 'static,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync + 'static,
+    Input: PartialOrd + Send + Debug + Sync + Clone + 'static,
+    Output: PartialOrd + Send + Debug + Sync + Clone + 'static,
 {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> Output {
@@ -107,7 +107,7 @@ mod tests {
    #[test]
    fn test_monotonic_mapping_iter() {
        let vals: Vec<u64> = (0..100u64).map(|el| el * 10).collect();
-        let col = VecColumn::from(&vals);
+        let col = VecColumn::from(vals);
        let mapped = monotonic_map_column(
            col,
            StrictlyMonotonicMappingInverter::from(StrictlyMonotonicMappingToInternal::<i64>::new()),
--- a/columnar/src/column_values/u128_based/compact_space/mod.rs
+++ b/columnar/src/column_values/u128_based/compact_space/mod.rs
@@ -22,7 +22,7 @@ mod build_compact_space;

 use build_compact_space::get_compact_space;
 use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
-use tantivy_bitpacker::{self, BitPacker, BitUnpacker};
+use tantivy_bitpacker::{BitPacker, BitUnpacker};

 use crate::column_values::ColumnValues;
 use crate::RowId;
@@ -148,7 +148,7 @@ impl CompactSpace {
            .binary_search_by_key(&compact, |range_mapping| range_mapping.compact_start)
            // Correctness: Overflow. The first range starts at compact space 0, the error from
            // binary search can never be 0
-            .map_or_else(|e| e - 1, |v| v);
+            .unwrap_or_else(|e| e - 1);

        let range_mapping = &self.ranges_mapping[pos];
        let diff = compact - range_mapping.compact_start;
@@ -292,6 +292,63 @@ impl BinarySerializable for IPCodecParams {
    }
 }

+/// Exposes the compact space compressed values as u64.
+///
+/// This allows faster access to the values, as u64 is faster to work with than u128.
+/// It also allows to handle u128 values like u64, via the `open_u64_lenient` as a uniform
+/// access interface.
+///
+/// When converting from the internal u64 to u128 `compact_to_u128` can be used.
+pub struct CompactSpaceU64Accessor(CompactSpaceDecompressor);
+impl CompactSpaceU64Accessor {
+    pub(crate) fn open(data: OwnedBytes) -> io::Result<CompactSpaceU64Accessor> {
+        let decompressor = CompactSpaceU64Accessor(CompactSpaceDecompressor::open(data)?);
+        Ok(decompressor)
+    }
+    /// Convert a compact space value to u128
+    pub fn compact_to_u128(&self, compact: u32) -> u128 {
+        self.0.compact_to_u128(compact)
+    }
+}
+
+impl ColumnValues<u64> for CompactSpaceU64Accessor {
+    #[inline]
+    fn get_val(&self, doc: u32) -> u64 {
+        let compact = self.0.get_compact(doc);
+        compact as u64
+    }
+
+    fn min_value(&self) -> u64 {
+        self.0.u128_to_compact(self.0.min_value()).unwrap() as u64
+    }
+
+    fn max_value(&self) -> u64 {
+        self.0.u128_to_compact(self.0.max_value()).unwrap() as u64
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.0.params.num_vals
+    }
+
+    #[inline]
+    fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
+        Box::new(self.0.iter_compact().map(|el| el as u64))
+    }
+
+    #[inline]
+    fn get_row_ids_for_value_range(
+        &self,
+        value_range: RangeInclusive<u64>,
+        position_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        let value_range = self.0.compact_to_u128(*value_range.start() as u32)
+            ..=self.0.compact_to_u128(*value_range.end() as u32);
+        self.0
+            .get_row_ids_for_value_range(value_range, position_range, positions)
+    }
+}
+
 impl ColumnValues<u128> for CompactSpaceDecompressor {
    #[inline]
    fn get_val(&self, doc: u32) -> u128 {
@@ -402,9 +459,14 @@ impl CompactSpaceDecompressor {
            .map(|compact| self.compact_to_u128(compact))
    }

+    #[inline]
+    pub fn get_compact(&self, idx: u32) -> u32 {
+        self.params.bit_unpacker.get(idx, &self.data) as u32
+    }
+
    #[inline]
    pub fn get(&self, idx: u32) -> u128 {
-        let compact = self.params.bit_unpacker.get(idx, &self.data) as u32;
+        let compact = self.get_compact(idx);
        self.compact_to_u128(compact)
    }

--- a/columnar/src/column_values/u128_based/mod.rs
+++ b/columnar/src/column_values/u128_based/mod.rs
@@ -6,7 +6,9 @@ use std::sync::Arc;
 mod compact_space;

 use common::{BinarySerializable, OwnedBytes, VInt};
-use compact_space::{CompactSpaceCompressor, CompactSpaceDecompressor};
+pub use compact_space::{
+    CompactSpaceCompressor, CompactSpaceDecompressor, CompactSpaceU64Accessor,
+};

 use crate::column_values::monotonic_map_column;
 use crate::column_values::monotonic_mapping::{
@@ -108,6 +110,23 @@ pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
        StrictlyMonotonicMappingToInternal::<T>::new().into();
    Ok(Arc::new(monotonic_map_column(reader, inverted)))
 }
+
+/// Returns the u64 representation of the u128 data.
+/// The internal representation of the data as u64 is useful for faster processing.
+///
+/// In order to convert to u128 back cast to `CompactSpaceU64Accessor` and call
+/// `compact_to_u128`.
+///
+/// # Notice
+/// In case there are new codecs added, check for usages of `CompactSpaceDecompressorU64` and
+/// also handle the new codecs.
+pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn ColumnValues<u64>>> {
+    let header = U128Header::deserialize(&mut bytes)?;
+    assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
+    let reader = CompactSpaceU64Accessor::open(bytes)?;
+    Ok(Arc::new(reader))
+}
+
 #[cfg(test)]
 pub mod tests {
    use super::*;
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -63,7 +63,6 @@ impl ColumnValues for BitpackedReader {
    fn get_val(&self, doc: u32) -> u64 {
        self.stats.min_value + self.stats.gcd.get() * self.bit_unpacker.get(doc, &self.data)
    }
-
    #[inline]
    fn min_value(&self) -> u64 {
        self.stats.min_value
--- a/columnar/src/column_values/u64_based/blockwise_linear.rs
+++ b/columnar/src/column_values/u64_based/blockwise_linear.rs
@@ -63,7 +63,10 @@ impl BlockwiseLinearEstimator {
        if self.block.is_empty() {
            return;
        }
-        let line = Line::train(&VecColumn::from(&self.block));
+        let column = VecColumn::from(std::mem::take(&mut self.block));
+        let line = Line::train(&column);
+        self.block = column.into();
+
        let mut max_value = 0u64;
        for (i, buffer_val) in self.block.iter().enumerate() {
            let interpolated_val = line.eval(i as u32);
@@ -125,7 +128,7 @@ impl ColumnCodecEstimator for BlockwiseLinearEstimator {
                *buffer_val = gcd_divider.divide(*buffer_val - stats.min_value);
            }

-            let line = Line::train(&VecColumn::from(&buffer));
+            let line = Line::train(&VecColumn::from(buffer.to_vec()));

            assert!(!buffer.is_empty());

--- a/columnar/src/column_values/u64_based/line.rs
+++ b/columnar/src/column_values/u64_based/line.rs
@@ -184,7 +184,7 @@ mod tests {
    }

    fn test_eval_max_err(ys: &[u64]) -> Option<u64> {
-        let line = Line::train(&VecColumn::from(&ys));
+        let line = Line::train(&VecColumn::from(ys.to_vec()));
        ys.iter()
            .enumerate()
            .map(|(x, y)| y.wrapping_sub(line.eval(x as u32)))
--- a/columnar/src/column_values/u64_based/linear.rs
+++ b/columnar/src/column_values/u64_based/linear.rs
@@ -173,7 +173,9 @@ impl LinearCodecEstimator {
    fn collect_before_line_estimation(&mut self, value: u64) {
        self.block.push(value);
        if self.block.len() == LINE_ESTIMATION_BLOCK_LEN {
-            let line = Line::train(&VecColumn::from(&self.block));
+            let column = VecColumn::from(std::mem::take(&mut self.block));
+            let line = Line::train(&column);
+            self.block = column.into();
            let block = std::mem::take(&mut self.block);
            for val in block {
                self.collect_after_line_estimation(&line, val);
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -1,5 +1,4 @@
 use proptest::prelude::*;
-use proptest::strategy::Strategy;
 use proptest::{prop_oneof, proptest};

 #[test]
--- a/columnar/src/column_values/vec_column.rs
+++ b/columnar/src/column_values/vec_column.rs
@@ -4,14 +4,14 @@ use tantivy_bitpacker::minmax;

 use crate::ColumnValues;

-/// VecColumn provides `Column` over a slice.
-pub struct VecColumn<'a, T = u64> {
-    pub(crate) values: &'a [T],
+/// VecColumn provides `Column` over a `Vec<T>`.
+pub struct VecColumn<T = u64> {
+    pub(crate) values: Vec<T>,
    pub(crate) min_value: T,
    pub(crate) max_value: T,
 }

-impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> ColumnValues<T> for VecColumn<'a, T> {
+impl<T: Copy + PartialOrd + Send + Sync + Debug + 'static> ColumnValues<T> for VecColumn<T> {
    fn get_val(&self, position: u32) -> T {
        self.values[position as usize]
    }
@@ -37,11 +37,8 @@ impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> ColumnValues<T> for VecColu
    }
 }

-impl<'a, T: Copy + PartialOrd + Default, V> From<&'a V> for VecColumn<'a, T>
-where V: AsRef<[T]> + ?Sized
-{
-    fn from(values: &'a V) -> Self {
-        let values = values.as_ref();
+impl<T: Copy + PartialOrd + Default> From<Vec<T>> for VecColumn<T> {
+    fn from(values: Vec<T>) -> Self {
        let (min_value, max_value) = minmax(values.iter().copied()).unwrap_or_default();
        Self {
            values,
@@ -50,3 +47,8 @@ where V: AsRef<[T]> + ?Sized
        }
    }
 }
+impl From<VecColumn> for Vec<u64> {
+    fn from(column: VecColumn) -> Self {
+        column.values
+    }
+}
--- a/columnar/src/columnar/column_type.rs
+++ b/columnar/src/columnar/column_type.rs
@@ -58,7 +58,7 @@ impl ColumnType {
        self == &ColumnType::DateTime
    }

-    pub fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
+    pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
        COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
    }
 }
--- a/columnar/src/columnar/format_version.rs
+++ b/columnar/src/columnar/format_version.rs
@@ -1,3 +1,6 @@
+use core::fmt;
+use std::fmt::{Display, Formatter};
+
 use crate::InvalidData;

 pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();
@@ -8,7 +11,7 @@ const MAGIC_BYTES: [u8; 4] = [2, 113, 119, 66];

 pub fn footer() -> [u8; VERSION_FOOTER_NUM_BYTES] {
    let mut footer_bytes = [0u8; VERSION_FOOTER_NUM_BYTES];
-    footer_bytes[0..4].copy_from_slice(&Version::V1.to_bytes());
+    footer_bytes[0..4].copy_from_slice(&CURRENT_VERSION.to_bytes());
    footer_bytes[4..8].copy_from_slice(&MAGIC_BYTES[..]);
    footer_bytes
 }
@@ -20,10 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result<Vers
    Version::try_from_bytes(footer_bytes[0..4].try_into().unwrap())
 }

+pub const CURRENT_VERSION: Version = Version::V2;
+
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 #[repr(u32)]
 pub enum Version {
    V1 = 1u32,
+    V2 = 2u32,
+}
+
+impl Display for Version {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        match self {
+            Version::V1 => write!(f, "v1"),
+            Version::V2 => write!(f, "v2"),
+        }
+    }
 }

 impl Version {
@@ -35,6 +50,7 @@ impl Version {
        let code = u32::from_le_bytes(bytes);
        match code {
            1u32 => Ok(Version::V1),
+            2u32 => Ok(Version::V2),
            _ => Err(InvalidData),
        }
    }
@@ -47,9 +63,9 @@ mod tests {
    use super::*;

    #[test]
-    fn test_footer_dserialization() {
+    fn test_footer_deserialization() {
        let parsed_version: Version = parse_footer(footer()).unwrap();
-        assert_eq!(Version::V1, parsed_version);
+        assert_eq!(Version::V2, parsed_version);
    }

    #[test]
@@ -63,11 +79,10 @@ mod tests {
        for &i in &version_to_tests {
            let version_res = Version::try_from_bytes(i.to_le_bytes());
            if let Ok(version) = version_res {
-                assert_eq!(version, Version::V1);
                assert_eq!(version.to_bytes(), i.to_le_bytes());
                valid_versions.insert(i);
            }
        }
-        assert_eq!(valid_versions.len(), 1);
+        assert_eq!(valid_versions.len(), 2);
    }
 }
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -7,7 +7,6 @@ use std::io;
 use std::net::Ipv6Addr;
 use std::sync::Arc;

-use itertools::Itertools;
 pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};

 use super::writer::ColumnarSerializer;
@@ -371,20 +370,8 @@ fn is_empty_after_merge(
                        true
                    }
                    ColumnIndex::Multivalued(multivalued_index) => {
-                        for (doc_id, (start_index, end_index)) in multivalued_index
-                            .start_index_column
-                            .iter()
-                            .tuple_windows()
-                            .enumerate()
-                        {
-                            let doc_id = doc_id as u32;
-                            if start_index == end_index {
-                                // There are no values in this document
-                                continue;
-                            }
-                            // The document contains values and is present in the alive bitset.
-                            // The column is therefore not empty.
-                            if alive_bitset.contains(doc_id) {
+                        for alive_docid in alive_bitset.iter() {
+                            if !multivalued_index.range(alive_docid).is_empty() {
                                return false;
                            }
                        }
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,5 +1,3 @@
-use std::collections::BTreeMap;
-
 use itertools::Itertools;

 use super::*;
@@ -16,7 +14,7 @@ fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
    }
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
-        .serialize(vals.len() as RowId, None, &mut buffer)
+        .serialize(vals.len() as RowId, &mut buffer)
        .unwrap();
    ColumnarReader::open(buffer).unwrap()
 }
@@ -161,9 +159,7 @@ fn make_numerical_columnar_multiple_columns(
        .max()
        .unwrap_or(0u32);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

@@ -186,9 +182,7 @@ fn make_byte_columnar_multiple_columns(
        }
    }
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

@@ -207,9 +201,7 @@ fn make_text_columnar_multiple_columns(columns: &[(&str, &[&[&str]])]) -> Column
        .max()
        .unwrap_or(0u32);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

--- a/columnar/src/columnar/mod.rs
+++ b/columnar/src/columnar/mod.rs
@@ -5,6 +5,7 @@ mod reader;
 mod writer;

 pub use column_type::{ColumnType, HasAssociatedColumnType};
+pub use format_version::{Version, CURRENT_VERSION};
 #[cfg(test)]
 pub(crate) use merge::ColumnTypeCategory;
 pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -6,7 +6,7 @@ use sstable::{Dictionary, RangeSSTable};

 use crate::columnar::{format_version, ColumnType};
 use crate::dynamic_column::DynamicColumnHandle;
-use crate::RowId;
+use crate::{RowId, Version};

 fn io_invalid_data(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::InvalidData, msg)
@@ -19,6 +19,7 @@ pub struct ColumnarReader {
    column_dictionary: Dictionary<RangeSSTable>,
    column_data: FileSlice,
    num_rows: RowId,
+    format_version: Version,
 }

 impl fmt::Debug for ColumnarReader {
@@ -53,6 +54,7 @@ impl fmt::Debug for ColumnarReader {
 fn read_all_columns_in_stream(
    mut stream: sstable::Streamer<'_, RangeSSTable>,
    column_data: &FileSlice,
+    format_version: Version,
 ) -> io::Result<Vec<DynamicColumnHandle>> {
    let mut results = Vec::new();
    while stream.advance() {
@@ -67,6 +69,7 @@ fn read_all_columns_in_stream(
        let dynamic_column_handle = DynamicColumnHandle {
            file_slice,
            column_type,
+            format_version,
        };
        results.push(dynamic_column_handle);
    }
@@ -88,7 +91,7 @@ impl ColumnarReader {
        let num_rows = u32::deserialize(&mut &footer_bytes[8..12])?;
        let version_footer_bytes: [u8; format_version::VERSION_FOOTER_NUM_BYTES] =
            footer_bytes[12..].try_into().unwrap();
-        let _version = format_version::parse_footer(version_footer_bytes)?;
+        let format_version = format_version::parse_footer(version_footer_bytes)?;
        let (column_data, sstable) =
            file_slice_without_sstable_len.split_from_end(sstable_len as usize);
        let column_dictionary = Dictionary::open(sstable)?;
@@ -96,6 +99,7 @@ impl ColumnarReader {
            column_dictionary,
            column_data,
            num_rows,
+            format_version,
        })
    }

@@ -126,6 +130,7 @@ impl ColumnarReader {
                let column_handle = DynamicColumnHandle {
                    file_slice,
                    column_type,
+                    format_version: self.format_version,
                };
                Some((column_name, column_handle))
            } else {
@@ -167,7 +172,7 @@ impl ColumnarReader {
            .stream_for_column_range(column_name)
            .into_stream_async()
            .await?;
-        read_all_columns_in_stream(stream, &self.column_data)
+        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
    }

    /// Get all columns for the given column name.
@@ -176,7 +181,7 @@ impl ColumnarReader {
    /// different types.
    pub fn read_columns(&self, column_name: &str) -> io::Result<Vec<DynamicColumnHandle>> {
        let stream = self.stream_for_column_range(column_name).into_stream()?;
-        read_all_columns_in_stream(stream, &self.column_data)
+        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
    }

    /// Return the number of columns in the columnar.
@@ -195,7 +200,7 @@ mod tests {
        columnar_writer.record_column_type("col1", ColumnType::Str, false);
        columnar_writer.record_column_type("col2", ColumnType::U64, false);
        let mut buffer = Vec::new();
-        columnar_writer.serialize(1, None, &mut buffer).unwrap();
+        columnar_writer.serialize(1, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        let columns = columnar.list_columns().unwrap();
        assert_eq!(columns.len(), 2);
@@ -211,7 +216,7 @@ mod tests {
        columnar_writer.record_column_type("count", ColumnType::U64, false);
        columnar_writer.record_numerical(1, "count", 1u64);
        let mut buffer = Vec::new();
-        columnar_writer.serialize(2, None, &mut buffer).unwrap();
+        columnar_writer.serialize(2, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        let columns = columnar.list_columns().unwrap();
        assert_eq!(columns.len(), 1);
--- a/columnar/src/columnar/writer/column_writers.rs
+++ b/columnar/src/columnar/writer/column_writers.rs
@@ -41,31 +41,10 @@ impl ColumnWriter {
    pub(super) fn operation_iterator<'a, V: SymbolValue>(
        &self,
        arena: &MemoryArena,
-        old_to_new_ids_opt: Option<&[RowId]>,
        buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
        buffer.clear();
        self.values.read_to_end(arena, buffer);
-        if let Some(old_to_new_ids) = old_to_new_ids_opt {
-            // TODO avoid the extra deserialization / serialization.
-            let mut sorted_ops: Vec<(RowId, ColumnOperation<V>)> = Vec::new();
-            let mut new_doc = 0u32;
-            let mut cursor = &buffer[..];
-            for op in std::iter::from_fn(|| ColumnOperation::<V>::deserialize(&mut cursor)) {
-                if let ColumnOperation::NewDoc(doc) = &op {
-                    new_doc = old_to_new_ids[*doc as usize];
-                    sorted_ops.push((new_doc, ColumnOperation::NewDoc(new_doc)));
-                } else {
-                    sorted_ops.push((new_doc, op));
-                }
-            }
-            // stable sort is crucial here.
-            sorted_ops.sort_by_key(|(new_doc_id, _)| *new_doc_id);
-            buffer.clear();
-            for (_, op) in sorted_ops {
-                buffer.extend_from_slice(op.serialize().as_ref());
-            }
-        }
        let mut cursor: &[u8] = &buffer[..];
        std::iter::from_fn(move || ColumnOperation::deserialize(&mut cursor))
    }
@@ -231,11 +210,9 @@ impl NumericalColumnWriter {
    pub(super) fn operation_iterator<'a>(
        self,
        arena: &MemoryArena,
-        old_to_new_ids: Option<&[RowId]>,
        buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a {
-        self.column_writer
-            .operation_iterator(arena, old_to_new_ids, buffer)
+        self.column_writer.operation_iterator(arena, buffer)
    }
 }

@@ -277,11 +254,9 @@ impl StrOrBytesColumnWriter {
    pub(super) fn operation_iterator<'a>(
        &self,
        arena: &MemoryArena,
-        old_to_new_ids: Option<&[RowId]>,
        byte_buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a {
-        self.column_writer
-            .operation_iterator(arena, old_to_new_ids, byte_buffer)
+        self.column_writer.operation_iterator(arena, byte_buffer)
    }
 }

--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -12,10 +12,8 @@ use common::CountingWriter;
 pub(crate) use serializer::ColumnarSerializer;
 use stacker::{Addr, ArenaHashMap, MemoryArena};

-use crate::column_index::SerializableColumnIndex;
-use crate::column_values::{
-    ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, VecColumn,
-};
+use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
+use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
 use crate::columnar::column_type::ColumnType;
 use crate::columnar::writer::column_writers::{
    ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
@@ -45,7 +43,7 @@ struct SpareBuffers {
 /// columnar_writer.record_str(1u32 /* doc id */, "product_name", "Apple");
 /// columnar_writer.record_numerical(0u32 /* doc id */, "price", 10.5f64); //< uh oh we ended up mixing integer and floats.
 /// let mut wrt: Vec<u8> =  Vec::new();
-/// columnar_writer.serialize(2u32, None, &mut wrt).unwrap();
+/// columnar_writer.serialize(2u32, &mut wrt).unwrap();
 /// ```
 #[derive(Default)]
 pub struct ColumnarWriter {
@@ -61,22 +59,6 @@ pub struct ColumnarWriter {
    buffers: SpareBuffers,
 }

-#[inline]
-fn mutate_or_create_column<V, TMutator>(
-    arena_hash_map: &mut ArenaHashMap,
-    column_name: &str,
-    updater: TMutator,
-) where
-    V: Copy + 'static,
-    TMutator: FnMut(Option<V>) -> V,
-{
-    assert!(
-        !column_name.as_bytes().contains(&0u8),
-        "key may not contain the 0 byte"
-    );
-    arena_hash_map.mutate_or_create(column_name.as_bytes(), updater);
-}
-
 impl ColumnarWriter {
    pub fn mem_usage(&self) -> usize {
        self.arena.mem_usage()
@@ -93,63 +75,6 @@ impl ColumnarWriter {
                .sum::<usize>()
    }

-    /// Returns the list of doc ids from 0..num_docs sorted by the `sort_field`
-    /// column.
-    ///
-    /// If the column is multivalued, use the first value for scoring.
-    /// If no value is associated to a specific row, the document is assigned
-    /// the lowest possible score.
-    ///
-    /// The sort applied is stable.
-    pub fn sort_order(&self, sort_field: &str, num_docs: RowId, reversed: bool) -> Vec<u32> {
-        let Some(numerical_col_writer) = self
-            .numerical_field_hash_map
-            .get::<NumericalColumnWriter>(sort_field.as_bytes())
-            .or_else(|| {
-                self.datetime_field_hash_map
-                    .get::<NumericalColumnWriter>(sort_field.as_bytes())
-            })
-        else {
-            return Vec::new();
-        };
-        let mut symbols_buffer = Vec::new();
-        let mut values = Vec::new();
-        let mut start_doc_check_fill = 0;
-        let mut current_doc_opt: Option<RowId> = None;
-        // Assumption: NewDoc will never call the same doc twice and is strictly increasing between
-        // calls
-        for op in numerical_col_writer.operation_iterator(&self.arena, None, &mut symbols_buffer) {
-            match op {
-                ColumnOperation::NewDoc(doc) => {
-                    current_doc_opt = Some(doc);
-                }
-                ColumnOperation::Value(numerical_value) => {
-                    if let Some(current_doc) = current_doc_opt {
-                        // Fill up with 0.0 since last doc
-                        values.extend((start_doc_check_fill..current_doc).map(|doc| (0.0, doc)));
-                        start_doc_check_fill = current_doc + 1;
-                        // handle multi values
-                        current_doc_opt = None;
-
-                        let score: f32 = f64::coerce(numerical_value) as f32;
-                        values.push((score, current_doc));
-                    }
-                }
-            }
-        }
-        for doc in values.len() as u32..num_docs {
-            values.push((0.0f32, doc));
-        }
-        values.sort_by(|(left_score, _), (right_score, _)| {
-            if reversed {
-                right_score.total_cmp(left_score)
-            } else {
-                left_score.total_cmp(right_score)
-            }
-        });
-        values.into_iter().map(|(_score, doc)| doc).collect()
-    }
-
    /// Records a column type. This is useful to bypass the coercion process,
    /// makes sure the empty is present in the resulting columnar, or set
    /// the `sort_values_within_row`.
@@ -177,9 +102,8 @@ impl ColumnarWriter {
                    },
                    &mut self.dictionaries,
                );
-                mutate_or_create_column(
-                    hash_map,
-                    column_name,
+                hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<StrOrBytesColumnWriter>| {
                        let mut column_writer = if let Some(column_writer) = column_opt {
                            column_writer
@@ -194,24 +118,21 @@ impl ColumnarWriter {
                );
            }
            ColumnType::Bool => {
-                mutate_or_create_column(
-                    &mut self.bool_field_hash_map,
-                    column_name,
+                self.bool_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::DateTime => {
-                mutate_or_create_column(
-                    &mut self.datetime_field_hash_map,
-                    column_name,
+                self.datetime_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
                );
            }
            ColumnType::I64 | ColumnType::F64 | ColumnType::U64 => {
                let numerical_type = column_type.numerical_type().unwrap();
-                mutate_or_create_column(
-                    &mut self.numerical_field_hash_map,
-                    column_name,
+                self.numerical_field_hash_map.mutate_or_create(
+                    column_name.as_bytes(),
                    |column_opt: Option<NumericalColumnWriter>| {
                        let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                        column.force_numerical_type(numerical_type);
@@ -219,9 +140,8 @@ impl ColumnarWriter {
                    },
                );
            }
-            ColumnType::IpAddr => mutate_or_create_column(
-                &mut self.ip_addr_field_hash_map,
-                column_name,
+            ColumnType::IpAddr => self.ip_addr_field_hash_map.mutate_or_create(
+                column_name.as_bytes(),
                |column_opt: Option<ColumnWriter>| column_opt.unwrap_or_default(),
            ),
        }
@@ -234,9 +154,8 @@ impl ColumnarWriter {
        numerical_value: T,
    ) {
        let (hash_map, arena) = (&mut self.numerical_field_hash_map, &mut self.arena);
-        mutate_or_create_column(
-            hash_map,
-            column_name,
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
            |column_opt: Option<NumericalColumnWriter>| {
                let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
                column.record_numerical_value(doc, numerical_value.into(), arena);
@@ -246,10 +165,6 @@ impl ColumnarWriter {
    }

    pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_addr: Ipv6Addr) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena) = (&mut self.ip_addr_field_hash_map, &mut self.arena);
        hash_map.mutate_or_create(
            column_name.as_bytes(),
@@ -263,24 +178,30 @@ impl ColumnarWriter {

    pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
        let (hash_map, arena) = (&mut self.bool_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(doc, val, arena);
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(doc, val, arena);
+                column
+            },
+        );
    }

    pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) {
        let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena);
-        mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
-            let mut column: ColumnWriter = column_opt.unwrap_or_default();
-            column.record(
-                doc,
-                NumericalValue::I64(datetime.into_timestamp_nanos()),
-                arena,
-            );
-            column
-        });
+        hash_map.mutate_or_create(
+            column_name.as_bytes(),
+            |column_opt: Option<ColumnWriter>| {
+                let mut column: ColumnWriter = column_opt.unwrap_or_default();
+                column.record(
+                    doc,
+                    NumericalValue::I64(datetime.into_timestamp_nanos()),
+                    arena,
+                );
+                column
+            },
+        );
    }

    pub fn record_str(&mut self, doc: RowId, column_name: &str, value: &str) {
@@ -305,10 +226,6 @@ impl ColumnarWriter {
    }

    pub fn record_bytes(&mut self, doc: RowId, column_name: &str, value: &[u8]) {
-        assert!(
-            !column_name.as_bytes().contains(&0u8),
-            "key may not contain the 0 byte"
-        );
        let (hash_map, arena, dictionaries) = (
            &mut self.bytes_field_hash_map,
            &mut self.arena,
@@ -328,13 +245,9 @@ impl ColumnarWriter {
            },
        );
    }
-    pub fn serialize(
-        &mut self,
-        num_docs: RowId,
-        old_to_new_row_ids: Option<&[RowId]>,
-        wrt: &mut dyn io::Write,
-    ) -> io::Result<Vec<(String, ColumnType)>> {
+    pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
        let mut serializer = ColumnarSerializer::new(wrt);
+
        let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
            .numerical_field_hash_map
            .iter()
@@ -348,7 +261,7 @@ impl ColumnarWriter {
        columns.extend(
            self.bytes_field_hash_map
                .iter()
-                .map(|(term, addr)| (term, ColumnType::Bytes, addr)),
+                .map(|(column_name, addr)| (column_name, ColumnType::Bytes, addr)),
        );
        columns.extend(
            self.str_field_hash_map
@@ -370,13 +283,12 @@ impl ColumnarWriter {
                .iter()
                .map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
        );
+        // TODO: replace JSON_END_OF_PATH with b'0' in columns
        columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));

        let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
        let mut symbol_byte_buffer: Vec<u8> = Vec::new();
-        for (column_name, column_type, addr) in columns.iter() {
-            let column_type = *column_type;
-            let addr = *addr;
+        for (column_name, column_type, addr) in columns {
            match column_type {
                ColumnType::Bool => {
                    let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
@@ -386,11 +298,7 @@ impl ColumnarWriter {
                    serialize_bool_column(
                        cardinality,
                        num_docs,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -404,11 +312,7 @@ impl ColumnarWriter {
                    serialize_ip_addr_column(
                        cardinality,
                        num_docs,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -433,11 +337,8 @@ impl ColumnarWriter {
                        num_docs,
                        str_or_bytes_column_writer.sort_values_within_row,
                        dictionary_builder,
-                        str_or_bytes_column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        str_or_bytes_column_writer
+                            .operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &self.arena,
                        &mut column_serializer,
@@ -455,11 +356,7 @@ impl ColumnarWriter {
                        cardinality,
                        num_docs,
                        numerical_type,
-                        numerical_column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        numerical_column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -474,11 +371,7 @@ impl ColumnarWriter {
                        cardinality,
                        num_docs,
                        NumericalType::I64,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -487,15 +380,7 @@ impl ColumnarWriter {
            };
        }
        serializer.finalize(num_docs)?;
-        Ok(columns
-            .into_iter()
-            .map(|(column_name, column_type, _)| {
-                (
-                    String::from_utf8_lossy(column_name).to_string(),
-                    column_type,
-                )
-            })
-            .collect())
+        Ok(())
    }
 }

@@ -655,10 +540,7 @@ fn send_to_serialize_column_mappable_to_u128<
    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<T>,
    mut wrt: impl io::Write,
-) -> io::Result<()>
-where
-    for<'a> VecColumn<'a, T>: ColumnValues<T>,
-{
+) -> io::Result<()> {
    values.clear();
    // TODO: split index and values
    let serializable_column_index = match cardinality {
@@ -674,16 +556,16 @@ where
            let optional_index_builder = value_index_builders.borrow_optional_index_builder();
            consume_operation_iterator(op_iterator, optional_index_builder, values);
            let optional_index = optional_index_builder.finish(num_rows);
-            SerializableColumnIndex::Optional {
+            SerializableColumnIndex::Optional(SerializableOptionalIndex {
                num_rows,
                non_null_row_ids: Box::new(optional_index),
-            }
+            })
        }
        Cardinality::Multivalued => {
            let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
            consume_operation_iterator(op_iterator, multivalued_index_builder, values);
-            let multivalued_index = multivalued_index_builder.finish(num_rows);
-            SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
+            let serializable_multivalued_index = multivalued_index_builder.finish(num_rows);
+            SerializableColumnIndex::Multivalued(serializable_multivalued_index)
        }
    };
    crate::column::serialize_column_mappable_to_u128(
@@ -694,15 +576,6 @@ where
    Ok(())
 }

-fn sort_values_within_row_in_place(multivalued_index: &[RowId], values: &mut [u64]) {
-    let mut start_index: usize = 0;
-    for end_index in multivalued_index.iter().copied() {
-        let end_index = end_index as usize;
-        values[start_index..end_index].sort_unstable();
-        start_index = end_index;
-    }
-}
-
 fn send_to_serialize_column_mappable_to_u64(
    op_iterator: impl Iterator<Item = ColumnOperation<u64>>,
    cardinality: Cardinality,
@@ -711,10 +584,7 @@ fn send_to_serialize_column_mappable_to_u64(
    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<u64>,
    mut wrt: impl io::Write,
-) -> io::Result<()>
-where
-    for<'a> VecColumn<'a, u64>: ColumnValues<u64>,
-{
+) -> io::Result<()> {
    values.clear();
    let serializable_column_index = match cardinality {
        Cardinality::Full => {
@@ -729,19 +599,22 @@ where
            let optional_index_builder = value_index_builders.borrow_optional_index_builder();
            consume_operation_iterator(op_iterator, optional_index_builder, values);
            let optional_index = optional_index_builder.finish(num_rows);
-            SerializableColumnIndex::Optional {
+            SerializableColumnIndex::Optional(SerializableOptionalIndex {
                non_null_row_ids: Box::new(optional_index),
                num_rows,
-            }
+            })
        }
        Cardinality::Multivalued => {
            let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
            consume_operation_iterator(op_iterator, multivalued_index_builder, values);
-            let multivalued_index = multivalued_index_builder.finish(num_rows);
+            let serializable_multivalued_index = multivalued_index_builder.finish(num_rows);
            if sort_values_within_row {
-                sort_values_within_row_in_place(multivalued_index, values);
+                sort_values_within_row_in_place(
+                    serializable_multivalued_index.start_offsets.boxed_iter(),
+                    values,
+                );
            }
-            SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
+            SerializableColumnIndex::Multivalued(serializable_multivalued_index)
        }
    };
    crate::column::serialize_column_mappable_to_u64(
@@ -752,6 +625,18 @@ where
    Ok(())
 }

+fn sort_values_within_row_in_place(
+    multivalued_index: impl Iterator<Item = RowId>,
+    values: &mut [u64],
+) {
+    let mut start_index: usize = 0;
+    for end_index in multivalued_index {
+        let end_index = end_index as usize;
+        values[start_index..end_index].sort_unstable();
+        start_index = end_index;
+    }
+}
+
 fn coerce_numerical_symbol<T>(
    operation_iterator: impl Iterator<Item = ColumnOperation<NumericalValue>>,
 ) -> impl Iterator<Item = ColumnOperation<u64>>
@@ -799,7 +684,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(3), Cardinality::Full);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 6);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
@@ -828,7 +713,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(3), Cardinality::Optional);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 4);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(1u32)));
@@ -851,7 +736,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(2), Cardinality::Optional);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 2);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
@@ -870,7 +755,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(1), Cardinality::Multivalued);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 3);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
--- a/columnar/src/columnar/writer/serializer.rs
+++ b/columnar/src/columnar/writer/serializer.rs
@@ -1,6 +1,7 @@
 use std::io;
 use std::io::Write;

+use common::json_path_writer::JSON_END_OF_PATH;
 use common::{BinarySerializable, CountingWriter};
 use sstable::value::RangeValueWriter;
 use sstable::RangeSSTable;
@@ -19,7 +20,7 @@ pub struct ColumnarSerializer<W: io::Write> {
 fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
    buffer.clear();
    buffer.extend_from_slice(key);
-    buffer.push(0u8);
+    buffer.push(JSON_END_OF_PATH);
    buffer.push(column_type.to_code());
 }

@@ -96,14 +97,13 @@ impl<'a, W: io::Write> io::Write for ColumnSerializer<'a, W> {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::columnar::column_type::ColumnType;

    #[test]
    fn test_prepare_key_bytes() {
        let mut buffer: Vec<u8> = b"somegarbage".to_vec();
        prepare_key(b"root\0child", ColumnType::Str, &mut buffer);
        assert_eq!(buffer.len(), 12);
-        assert_eq!(&buffer[..10], b"root\0child");
+        assert_eq!(&buffer[..10], b"root0child");
        assert_eq!(buffer[10], 0u8);
        assert_eq!(buffer[11], ColumnType::Str.to_code());
    }
--- a/columnar/src/columnar/writer/value_index.rs
+++ b/columnar/src/columnar/writer/value_index.rs
@@ -1,3 +1,4 @@
+use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
 use crate::iterable::Iterable;
 use crate::RowId;

@@ -59,31 +60,47 @@ impl IndexBuilder for OptionalIndexBuilder {

 #[derive(Default)]
 pub struct MultivaluedIndexBuilder {
-    start_offsets: Vec<RowId>,
+    doc_with_values: Vec<RowId>,
+    start_offsets: Vec<u32>,
    total_num_vals_seen: u32,
+    current_row: RowId,
+    current_row_has_value: bool,
 }

 impl MultivaluedIndexBuilder {
-    pub fn finish(&mut self, num_docs: RowId) -> &[u32] {
-        self.start_offsets
-            .resize(num_docs as usize + 1, self.total_num_vals_seen);
-        &self.start_offsets[..]
+    pub fn finish(&mut self, num_docs: RowId) -> SerializableMultivalueIndex<'_> {
+        self.start_offsets.push(self.total_num_vals_seen);
+        let non_null_row_ids: Box<dyn Iterable<RowId>> = Box::new(&self.doc_with_values[..]);
+        SerializableMultivalueIndex {
+            doc_ids_with_values: SerializableOptionalIndex {
+                non_null_row_ids,
+                num_rows: num_docs,
+            },
+            start_offsets: Box::new(&self.start_offsets[..]),
+        }
    }

    fn reset(&mut self) {
+        self.doc_with_values.clear();
        self.start_offsets.clear();
-        self.start_offsets.push(0u32);
        self.total_num_vals_seen = 0;
+        self.current_row = 0;
+        self.current_row_has_value = false;
    }
 }

 impl IndexBuilder for MultivaluedIndexBuilder {
    fn record_row(&mut self, row_id: RowId) {
-        self.start_offsets
-            .resize(row_id as usize + 1, self.total_num_vals_seen);
+        self.current_row = row_id;
+        self.current_row_has_value = false;
    }

    fn record_value(&mut self) {
+        if !self.current_row_has_value {
+            self.current_row_has_value = true;
+            self.doc_with_values.push(self.current_row);
+            self.start_offsets.push(self.total_num_vals_seen);
+        }
        self.total_num_vals_seen += 1;
    }
 }
@@ -141,6 +158,32 @@ mod tests {
        );
    }

+    #[test]
+    fn test_multivalued_value_index_builder_simple() {
+        let mut multivalued_value_index_builder = MultivaluedIndexBuilder::default();
+        {
+            multivalued_value_index_builder.record_row(0u32);
+            multivalued_value_index_builder.record_value();
+            multivalued_value_index_builder.record_value();
+            let serialized_multivalue_index = multivalued_value_index_builder.finish(1u32);
+            let start_offsets: Vec<u32> = serialized_multivalue_index
+                .start_offsets
+                .boxed_iter()
+                .collect();
+            assert_eq!(&start_offsets, &[0, 2]);
+        }
+        multivalued_value_index_builder.reset();
+        multivalued_value_index_builder.record_row(0u32);
+        multivalued_value_index_builder.record_value();
+        multivalued_value_index_builder.record_value();
+        let serialized_multivalue_index = multivalued_value_index_builder.finish(1u32);
+        let start_offsets: Vec<u32> = serialized_multivalue_index
+            .start_offsets
+            .boxed_iter()
+            .collect();
+        assert_eq!(&start_offsets, &[0, 2]);
+    }
+
    #[test]
    fn test_multivalued_value_index_builder() {
        let mut multivalued_value_index_builder = MultivaluedIndexBuilder::default();
@@ -149,17 +192,15 @@ mod tests {
        multivalued_value_index_builder.record_value();
        multivalued_value_index_builder.record_row(2u32);
        multivalued_value_index_builder.record_value();
-        assert_eq!(
-            multivalued_value_index_builder.finish(4u32).to_vec(),
-            vec![0, 0, 2, 3, 3]
-        );
-        multivalued_value_index_builder.reset();
-        multivalued_value_index_builder.record_row(2u32);
-        multivalued_value_index_builder.record_value();
-        multivalued_value_index_builder.record_value();
-        assert_eq!(
-            multivalued_value_index_builder.finish(4u32).to_vec(),
-            vec![0, 0, 0, 2, 2]
-        );
+        let SerializableMultivalueIndex {
+            doc_ids_with_values,
+            start_offsets,
+        } = multivalued_value_index_builder.finish(4u32);
+        assert_eq!(doc_ids_with_values.num_rows, 4u32);
+        let doc_ids_with_values: Vec<u32> =
+            doc_ids_with_values.non_null_row_ids.boxed_iter().collect();
+        assert_eq!(&doc_ids_with_values, &[1u32, 2u32]);
+        let start_offsets: Vec<u32> = start_offsets.boxed_iter().collect();
+        assert_eq!(&start_offsets[..], &[0, 2, 3]);
    }
 }
--- a/columnar/src/compat_tests.rs
+++ b/columnar/src/compat_tests.rs
@@ -0,0 +1,183 @@
+use std::path::PathBuf;
+
+use itertools::Itertools;
+
+use crate::{
+    merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
+    CURRENT_VERSION,
+};
+
+const NUM_DOCS: u32 = u16::MAX as u32;
+
+fn generate_columnar(num_docs: u32, value_offset: u64) -> Vec<u8> {
+    use crate::ColumnarWriter;
+
+    let mut columnar_writer = ColumnarWriter::default();
+
+    for i in 0..num_docs {
+        if i % 100 == 0 {
+            columnar_writer.record_numerical(i, "sparse", value_offset + i as u64);
+        }
+        if i % 5 == 0 {
+            columnar_writer.record_numerical(i, "dense", value_offset + i as u64);
+        }
+        columnar_writer.record_numerical(i, "full", value_offset + i as u64);
+        columnar_writer.record_numerical(i, "multi", value_offset + i as u64);
+        columnar_writer.record_numerical(i, "multi", value_offset + i as u64);
+    }
+
+    let mut wrt: Vec<u8> = Vec::new();
+    columnar_writer.serialize(num_docs, &mut wrt).unwrap();
+
+    wrt
+}
+
+#[test]
+/// Writes a columnar for the CURRENT_VERSION to disk.
+fn create_format() {
+    let version = CURRENT_VERSION.to_string();
+    let file_path = path_for_version(&version);
+    if PathBuf::from(file_path.clone()).exists() {
+        return;
+    }
+    let columnar = generate_columnar(NUM_DOCS, 0);
+    std::fs::write(file_path, columnar).unwrap();
+}
+
+fn path_for_version(version: &str) -> String {
+    format!("./compat_tests_data/{}.columnar", version)
+}
+
+#[test]
+fn test_format_v1() {
+    let path = path_for_version("v1");
+    test_format(&path);
+}
+
+#[test]
+fn test_format_v2() {
+    let path = path_for_version("v2");
+    test_format(&path);
+}
+
+fn test_format(path: &str) {
+    let file_content = std::fs::read(path).unwrap();
+    let reader = ColumnarReader::open(file_content).unwrap();
+
+    check_columns(&reader);
+
+    // Test merge
+    let reader2 = ColumnarReader::open(generate_columnar(NUM_DOCS, NUM_DOCS as u64)).unwrap();
+    let columnar_readers = vec![&reader, &reader2];
+    let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
+    let mut out = Vec::new();
+    merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
+    let reader = ColumnarReader::open(out).unwrap();
+    check_columns(&reader);
+}
+
+fn check_columns(reader: &ColumnarReader) {
+    let column = open_column(reader, "full");
+    check_column(&column, |doc_id| vec![(doc_id, doc_id as u64).into()]);
+    assert_eq!(column.get_cardinality(), Cardinality::Full);
+
+    let column = open_column(reader, "multi");
+    check_column(&column, |doc_id| {
+        vec![
+            (doc_id * 2, doc_id as u64).into(),
+            (doc_id * 2 + 1, doc_id as u64).into(),
+        ]
+    });
+    assert_eq!(column.get_cardinality(), Cardinality::Multivalued);
+
+    let column = open_column(reader, "sparse");
+    check_column(&column, |doc_id| {
+        if doc_id % 100 == 0 {
+            vec![(doc_id / 100, doc_id as u64).into()]
+        } else {
+            vec![]
+        }
+    });
+    assert_eq!(column.get_cardinality(), Cardinality::Optional);
+
+    let column = open_column(reader, "dense");
+    check_column(&column, |doc_id| {
+        if doc_id % 5 == 0 {
+            vec![(doc_id / 5, doc_id as u64).into()]
+        } else {
+            vec![]
+        }
+    });
+    assert_eq!(column.get_cardinality(), Cardinality::Optional);
+}
+
+struct RowIdAndValue {
+    row_id: u32,
+    value: u64,
+}
+impl From<(u32, u64)> for RowIdAndValue {
+    fn from((row_id, value): (u32, u64)) -> Self {
+        Self { row_id, value }
+    }
+}
+
+fn check_column<F: Fn(u32) -> Vec<RowIdAndValue>>(column: &Column<u64>, expected: F) {
+    let num_docs = column.num_docs();
+    let test_doc = |doc: u32| {
+        if expected(doc).is_empty() {
+            assert_eq!(column.first(doc), None);
+        } else {
+            assert_eq!(column.first(doc), Some(expected(doc)[0].value));
+        }
+        let values = column.values_for_doc(doc).collect_vec();
+        assert_eq!(values, expected(doc).iter().map(|x| x.value).collect_vec());
+        let mut row_ids = Vec::new();
+        column.row_ids_for_docs(&[doc], &mut vec![], &mut row_ids);
+        assert_eq!(
+            row_ids,
+            expected(doc).iter().map(|x| x.row_id).collect_vec()
+        );
+        let values = column.values_for_doc(doc).collect_vec();
+        assert_eq!(values, expected(doc).iter().map(|x| x.value).collect_vec());
+
+        // Docid rowid conversion
+        let mut row_ids = Vec::new();
+        let safe_next_doc = |doc: u32| (doc + 1).min(num_docs - 1);
+        column
+            .index
+            .docids_to_rowids(&[doc, safe_next_doc(doc)], &mut vec![], &mut row_ids);
+        let expected_rowids = expected(doc)
+            .iter()
+            .map(|x| x.row_id)
+            .chain(expected(safe_next_doc(doc)).iter().map(|x| x.row_id))
+            .collect_vec();
+        assert_eq!(row_ids, expected_rowids);
+        let rowid_range = column
+            .index
+            .docid_range_to_rowids(doc..safe_next_doc(doc) + 1);
+        if expected_rowids.is_empty() {
+            assert!(rowid_range.is_empty());
+        } else {
+            assert_eq!(
+                rowid_range,
+                expected_rowids[0]..expected_rowids.last().unwrap() + 1
+            );
+        }
+    };
+    test_doc(0);
+    test_doc(num_docs - 1);
+    test_doc(num_docs - 2);
+    test_doc(65000);
+}
+
+fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
+    let column = reader.read_columns(name).unwrap()[0]
+        .open()
+        .unwrap()
+        .coerce_numerical(crate::NumericalType::U64)
+        .unwrap();
+    let DynamicColumn::U64(column) = column else {
+        panic!();
+    };
+    column
+}
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -8,7 +8,7 @@ use common::{ByteCount, DateTime, HasLen, OwnedBytes};
 use crate::column::{BytesColumn, Column, StrColumn};
 use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
 use crate::columnar::ColumnType;
-use crate::{Cardinality, ColumnIndex, NumericalType};
+use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};

 #[derive(Clone)]
 pub enum DynamicColumn {
@@ -232,6 +232,7 @@ static_dynamic_conversions!(Column<Ipv6Addr>, IpAddr);
 pub struct DynamicColumnHandle {
    pub(crate) file_slice: FileSlice,
    pub(crate) column_type: ColumnType,
+    pub(crate) format_version: Version,
 }

 impl DynamicColumnHandle {
@@ -247,7 +248,12 @@ impl DynamicColumnHandle {
    }

    /// Returns the `u64` fast field reader reader associated with `fields` of types
-    /// Str, u64, i64, f64, bool, or datetime.
+    /// Str, u64, i64, f64, bool, ip, or datetime.
+    ///
+    /// Notice that for IpAddr, the fastfield reader will return the u64 representation of the
+    /// IpAddr.
+    /// In order to convert to u128 back cast to `CompactSpaceU64Accessor` and call
+    /// `compact_to_u128`.
    ///
    /// If not, the fastfield reader will returns the u64-value associated with the original
    /// FastValue.
@@ -255,16 +261,24 @@ impl DynamicColumnHandle {
        let column_bytes = self.file_slice.read_bytes()?;
        match self.column_type {
            ColumnType::Str | ColumnType::Bytes => {
-                let column: BytesColumn = crate::column::open_column_bytes(column_bytes)?;
+                let column: BytesColumn =
+                    crate::column::open_column_bytes(column_bytes, self.format_version)?;
                Ok(Some(column.term_ord_column))
            }
-            ColumnType::IpAddr => Ok(None),
+            ColumnType::IpAddr => {
+                let column = crate::column::open_column_u128_as_compact_u64(
+                    column_bytes,
+                    self.format_version,
+                )?;
+                Ok(Some(column))
+            }
            ColumnType::Bool
            | ColumnType::I64
            | ColumnType::U64
            | ColumnType::F64
            | ColumnType::DateTime => {
-                let column = crate::column::open_column_u64::<u64>(column_bytes)?;
+                let column =
+                    crate::column::open_column_u64::<u64>(column_bytes, self.format_version)?;
                Ok(Some(column))
            }
        }
@@ -272,15 +286,31 @@ impl DynamicColumnHandle {

    fn open_internal(&self, column_bytes: OwnedBytes) -> io::Result<DynamicColumn> {
        let dynamic_column: DynamicColumn = match self.column_type {
-            ColumnType::Bytes => crate::column::open_column_bytes(column_bytes)?.into(),
-            ColumnType::Str => crate::column::open_column_str(column_bytes)?.into(),
-            ColumnType::I64 => crate::column::open_column_u64::<i64>(column_bytes)?.into(),
-            ColumnType::U64 => crate::column::open_column_u64::<u64>(column_bytes)?.into(),
-            ColumnType::F64 => crate::column::open_column_u64::<f64>(column_bytes)?.into(),
-            ColumnType::Bool => crate::column::open_column_u64::<bool>(column_bytes)?.into(),
-            ColumnType::IpAddr => crate::column::open_column_u128::<Ipv6Addr>(column_bytes)?.into(),
+            ColumnType::Bytes => {
+                crate::column::open_column_bytes(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::Str => {
+                crate::column::open_column_str(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::I64 => {
+                crate::column::open_column_u64::<i64>(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::U64 => {
+                crate::column::open_column_u64::<u64>(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::F64 => {
+                crate::column::open_column_u64::<f64>(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::Bool => {
+                crate::column::open_column_u64::<bool>(column_bytes, self.format_version)?.into()
+            }
+            ColumnType::IpAddr => {
+                crate::column::open_column_u128::<Ipv6Addr>(column_bytes, self.format_version)?
+                    .into()
+            }
            ColumnType::DateTime => {
-                crate::column::open_column_u64::<DateTime>(column_bytes)?.into()
+                crate::column::open_column_u64::<DateTime>(column_bytes, self.format_version)?
+                    .into()
            }
        };
        Ok(dynamic_column)
--- a/columnar/src/iterable.rs
+++ b/columnar/src/iterable.rs
@@ -1,4 +1,7 @@
 use std::ops::Range;
+use std::sync::Arc;
+
+use crate::{ColumnValues, RowId};

 pub trait Iterable<T = u64> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_>;
@@ -17,3 +20,9 @@ where Range<T>: Iterator<Item = T>
        Box::new(self.clone())
    }
 }
+
+impl Iterable for Arc<dyn crate::ColumnValues<RowId>> {
+    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
+        Box::new(self.iter().map(|row_id| row_id as u64))
+    }
+}
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -48,7 +48,7 @@ pub use column_values::{
 };
 pub use columnar::{
    merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
-    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
+    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
 };
 use sstable::VoidSSTable;
 pub use value::{NumericalType, NumericalValue};
@@ -113,6 +113,9 @@ impl Cardinality {
    pub fn is_multivalue(&self) -> bool {
        matches!(self, Cardinality::Multivalued)
    }
+    pub fn is_full(&self) -> bool {
+        matches!(self, Cardinality::Full)
+    }
    pub(crate) fn to_code(self) -> u8 {
        self as u8
    }
@@ -128,3 +131,6 @@ impl Cardinality {

 #[cfg(test)]
 mod tests;
+
+#[cfg(test)]
+mod compat_tests;
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -21,7 +21,7 @@ fn test_dataframe_writer_str() {
    dataframe_writer.record_str(1u32, "my_string", "hello");
    dataframe_writer.record_str(3u32, "my_string", "helloeee");
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
@@ -35,7 +35,7 @@ fn test_dataframe_writer_bytes() {
    dataframe_writer.record_bytes(1u32, "my_string", b"hello");
    dataframe_writer.record_bytes(3u32, "my_string", b"helloeee");
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
@@ -49,7 +49,7 @@ fn test_dataframe_writer_bool() {
    dataframe_writer.record_bool(1u32, "bool.value", false);
    dataframe_writer.record_bool(3u32, "bool.value", true);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("bool.value").unwrap();
@@ -74,12 +74,12 @@ fn test_dataframe_writer_u64_multivalued() {
    dataframe_writer.record_numerical(6u32, "divisor", 2u64);
    dataframe_writer.record_numerical(6u32, "divisor", 3u64);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(7, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(7, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("divisor").unwrap();
    assert_eq!(cols.len(), 1);
-    assert_eq!(cols[0].num_bytes(), 29);
+    assert_eq!(cols[0].num_bytes(), 50);
    let dyn_i64_col = cols[0].open().unwrap();
    let DynamicColumn::I64(divisor_col) = dyn_i64_col else {
        panic!();
@@ -97,7 +97,7 @@ fn test_dataframe_writer_ip_addr() {
    dataframe_writer.record_ip_addr(1, "ip_addr", Ipv6Addr::from_u128(1001));
    dataframe_writer.record_ip_addr(3, "ip_addr", Ipv6Addr::from_u128(1050));
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("ip_addr").unwrap();
@@ -128,7 +128,7 @@ fn test_dataframe_writer_numerical() {
    dataframe_writer.record_numerical(2u32, "srical.value", NumericalValue::U64(13u64));
    dataframe_writer.record_numerical(4u32, "srical.value", NumericalValue::U64(15u64));
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(6, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(6, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("srical.value").unwrap();
@@ -153,46 +153,6 @@ fn test_dataframe_writer_numerical() {
    assert_eq!(column_i64.first(6), None); //< we can change the spec for that one.
 }

-#[test]
-fn test_dataframe_sort_by_full() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(0u32, "value", NumericalValue::U64(1));
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
-    let data = dataframe_writer.sort_order("value", 2, false);
-    assert_eq!(data, vec![0, 1]);
-}
-
-#[test]
-fn test_dataframe_sort_by_opt() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(3));
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(2));
-    let data = dataframe_writer.sort_order("value", 5, false);
-    // 0, 2, 4 is 0.0
-    assert_eq!(data, vec![0, 2, 4, 3, 1]);
-    let data = dataframe_writer.sort_order("value", 5, true);
-    assert_eq!(
-        data,
-        vec![4, 2, 0, 3, 1].into_iter().rev().collect::<Vec<_>>()
-    );
-}
-
-#[test]
-fn test_dataframe_sort_by_multi() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    // valid for sort
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
-    // those are ignored for sort
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
-    // valid for sort
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(3));
-    // ignored, would change sort order
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(1));
-    let data = dataframe_writer.sort_order("value", 4, false);
-    assert_eq!(data, vec![0, 2, 1, 3]);
-}
-
 #[test]
 fn test_dictionary_encoded_str() {
    let mut buffer = Vec::new();
@@ -201,7 +161,7 @@ fn test_dictionary_encoded_str() {
    columnar_writer.record_str(3, "my.column", "c");
    columnar_writer.record_str(3, "my.column2", "different_column!");
    columnar_writer.record_str(4, "my.column", "b");
-    columnar_writer.serialize(5, None, &mut buffer).unwrap();
+    columnar_writer.serialize(5, &mut buffer).unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar_reader.num_columns(), 2);
    let col_handles = columnar_reader.read_columns("my.column").unwrap();
@@ -235,7 +195,7 @@ fn test_dictionary_encoded_bytes() {
    columnar_writer.record_bytes(3, "my.column", b"c");
    columnar_writer.record_bytes(3, "my.column2", b"different_column!");
    columnar_writer.record_bytes(4, "my.column", b"b");
-    columnar_writer.serialize(5, None, &mut buffer).unwrap();
+    columnar_writer.serialize(5, &mut buffer).unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar_reader.num_columns(), 2);
    let col_handles = columnar_reader.read_columns("my.column").unwrap();
@@ -344,7 +304,7 @@ fn column_value_strategy() -> impl Strategy<Value = ColumnValue> {
            ip_addr_byte
        ))),
        1 => any::<bool>().prop_map(ColumnValue::Bool),
-        1 => (0_679_723_993i64..1_679_723_995i64)
+        1 => (679_723_993i64..1_679_723_995i64)
            .prop_map(|val| { ColumnValue::DateTime(DateTime::from_timestamp_secs(val)) })
    ]
 }
@@ -369,26 +329,12 @@ fn columnar_docs_strategy() -> impl Strategy<Value = Vec<Vec<(&'static str, Colu
        .prop_flat_map(|num_docs| proptest::collection::vec(doc_strategy(), num_docs))
 }

-fn columnar_docs_and_mapping_strategy(
-) -> impl Strategy<Value = (Vec<Vec<(&'static str, ColumnValue)>>, Vec<RowId>)> {
-    columnar_docs_strategy().prop_flat_map(|docs| {
-        permutation_strategy(docs.len()).prop_map(move |permutation| (docs.clone(), permutation))
-    })
-}
-
-fn permutation_strategy(n: usize) -> impl Strategy<Value = Vec<RowId>> {
-    Just((0u32..n as RowId).collect()).prop_shuffle()
-}
-
 fn permutation_and_subset_strategy(n: usize) -> impl Strategy<Value = Vec<usize>> {
    let vals: Vec<usize> = (0..n).collect();
    subsequence(vals, 0..=n).prop_shuffle()
 }

-fn build_columnar_with_mapping(
-    docs: &[Vec<(&'static str, ColumnValue)>],
-    old_to_new_row_ids_opt: Option<&[RowId]>,
-) -> ColumnarReader {
+fn build_columnar_with_mapping(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
    let num_docs = docs.len() as u32;
    let mut buffer = Vec::new();
    let mut columnar_writer = ColumnarWriter::default();
@@ -416,15 +362,13 @@ fn build_columnar_with_mapping(
            }
        }
    }
-    columnar_writer
-        .serialize(num_docs, old_to_new_row_ids_opt, &mut buffer)
-        .unwrap();
+    columnar_writer.serialize(num_docs, &mut buffer).unwrap();

    ColumnarReader::open(buffer).unwrap()
 }

 fn build_columnar(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
-    build_columnar_with_mapping(docs, None)
+    build_columnar_with_mapping(docs)
 }

 fn assert_columnar_eq_strict(left: &ColumnarReader, right: &ColumnarReader) {
@@ -448,6 +392,7 @@ fn assert_columnar_eq(
    }
 }

+#[track_caller]
 fn assert_column_eq<T: Copy + PartialOrd + Debug + Send + Sync + 'static>(
    left: &Column<T>,
    right: &Column<T>,
@@ -683,54 +628,6 @@ proptest! {
    }
 }

-// Same as `test_single_columnar_builder_proptest` but with a shuffling mapping.
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(500))]
-    #[test]
-    fn test_single_columnar_builder_with_shuffle_proptest((docs, mapping) in columnar_docs_and_mapping_strategy()) {
-        let columnar = build_columnar_with_mapping(&docs[..], Some(&mapping));
-        assert_eq!(columnar.num_rows() as usize, docs.len());
-        let mut expected_columns: HashMap<(&str, ColumnTypeCategory), HashMap<u32, Vec<&ColumnValue>> > = Default::default();
-        for (doc_id, doc_vals) in docs.iter().enumerate() {
-            for (col_name, col_val) in doc_vals {
-                expected_columns
-                    .entry((col_name, col_val.column_type_category()))
-                    .or_default()
-                    .entry(mapping[doc_id])
-                    .or_default()
-                    .push(col_val);
-            }
-        }
-        let column_list = columnar.list_columns().unwrap();
-        assert_eq!(expected_columns.len(), column_list.len());
-        for (column_name, column) in column_list {
-            let dynamic_column = column.open().unwrap();
-            let col_category: ColumnTypeCategory = dynamic_column.column_type().into();
-            let expected_col_values: &HashMap<u32, Vec<&ColumnValue>> = expected_columns.get(&(column_name.as_str(), col_category)).unwrap();
-            for _doc_id in 0..columnar.num_rows() {
-                match &dynamic_column {
-                    DynamicColumn::Bool(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::I64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::U64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::F64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::IpAddr(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::DateTime(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::Bytes(col) =>
-                        assert_bytes_column_values(col, expected_col_values, false),
-                    DynamicColumn::Str(col) =>
-                        assert_bytes_column_values(col, expected_col_values, true),
-                }
-            }
-        }
-    }
-}
-
 // This tests create 2 or 3 random small columnar and attempts to merge them.
 // It compares the resulting merged dataframe with what would have been obtained by building the
 // dataframe from the concatenated rows to begin with.
@@ -844,24 +741,68 @@ fn columnar_docs_and_remap(
 proptest! {
    #![proptest_config(ProptestConfig::with_cases(1000))]
    #[test]
-    fn test_columnar_merge_and_remap_proptest((columnar_docs, shuffle_merge_order) in columnar_docs_and_remap()) {
-        let shuffled_rows: Vec<Vec<(&'static str, ColumnValue)>> = shuffle_merge_order.iter()
-            .map(|row_addr| columnar_docs[row_addr.segment_ord as usize][row_addr.row_id as usize].clone())
-            .collect();
-        let expected_merged_columnar = build_columnar(&shuffled_rows[..]);
-        let columnar_readers: Vec<ColumnarReader> = columnar_docs.iter()
-            .map(|docs| build_columnar(&docs[..]))
-            .collect::<Vec<_>>();
-        let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
-        let mut output: Vec<u8> = Vec::new();
-        let segment_num_rows: Vec<RowId> = columnar_docs.iter().map(|docs| docs.len() as RowId).collect();
-        let shuffle_merge_order = ShuffleMergeOrder::for_test(&segment_num_rows, shuffle_merge_order);
-        crate::merge_columnar(&columnar_readers_arr[..], &[], shuffle_merge_order.into(), &mut output).unwrap();
-        let merged_columnar = ColumnarReader::open(output).unwrap();
-        assert_columnar_eq(&merged_columnar, &expected_merged_columnar, true);
+    fn test_columnar_merge_and_remap_proptest((columnar_docs, shuffle_merge_order) in
+columnar_docs_and_remap()) {
+        test_columnar_merge_and_remap(columnar_docs, shuffle_merge_order);
    }
 }

+fn test_columnar_merge_and_remap(
+    columnar_docs: Vec<Vec<Vec<(&'static str, ColumnValue)>>>,
+    shuffle_merge_order: Vec<RowAddr>,
+) {
+    let shuffled_rows: Vec<Vec<(&'static str, ColumnValue)>> = shuffle_merge_order
+        .iter()
+        .map(|row_addr| {
+            columnar_docs[row_addr.segment_ord as usize][row_addr.row_id as usize].clone()
+        })
+        .collect();
+    let expected_merged_columnar = build_columnar(&shuffled_rows[..]);
+    let columnar_readers: Vec<ColumnarReader> = columnar_docs
+        .iter()
+        .map(|docs| build_columnar(&docs[..]))
+        .collect::<Vec<_>>();
+    let columnar_readers_ref: Vec<&ColumnarReader> = columnar_readers.iter().collect();
+    let mut output: Vec<u8> = Vec::new();
+    let segment_num_rows: Vec<RowId> = columnar_docs
+        .iter()
+        .map(|docs| docs.len() as RowId)
+        .collect();
+    let shuffle_merge_order = ShuffleMergeOrder::for_test(&segment_num_rows, shuffle_merge_order);
+    crate::merge_columnar(
+        &columnar_readers_ref[..],
+        &[],
+        shuffle_merge_order.into(),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    assert_columnar_eq(&merged_columnar, &expected_merged_columnar, true);
+}
+
+#[test]
+fn test_columnar_merge_and_remap_bug_1() {
+    let columnar_docs = vec![vec![
+        vec![
+            ("c1", ColumnValue::Numerical(NumericalValue::U64(0))),
+            ("c1", ColumnValue::Numerical(NumericalValue::U64(0))),
+        ],
+        vec![],
+    ]];
+    let shuffle_merge_order: Vec<RowAddr> = vec![
+        RowAddr {
+            segment_ord: 0,
+            row_id: 1,
+        },
+        RowAddr {
+            segment_ord: 0,
+            row_id: 0,
+        },
+    ];
+
+    test_columnar_merge_and_remap(columnar_docs, shuffle_merge_order);
+}
+
 #[test]
 fn test_columnar_merge_empty() {
    let columnar_reader_1 = build_columnar(&[]);
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-common"
-version = "0.6.0"
+version = "0.7.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2021"
@@ -14,7 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy"

 [dependencies]
 byteorder = "1.4.3"
-ownedbytes = { version= "0.6", path="../ownedbytes" }
+ownedbytes = { version= "0.7", path="../ownedbytes" }
 async-trait = "0.1"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 serde = { version = "1.0.136", features = ["derive"] }
@@ -22,3 +22,6 @@ serde = { version = "1.0.136", features = ["derive"] }
 [dev-dependencies]
 proptest = "1.0.0"
 rand = "0.8.4"
+
+[features]
+unstable = [] # useful for benches.
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -1,12 +1,11 @@
-use std::convert::TryInto;
 use std::io::Write;
-use std::{fmt, io, u64};
+use std::{fmt, io};

 use ownedbytes::OwnedBytes;

 use crate::ByteCount;

-#[derive(Clone, Copy, Eq, PartialEq, Hash)]
+#[derive(Clone, Copy, Eq, PartialEq)]
 pub struct TinySet(u64);

 impl fmt::Debug for TinySet {
--- a/common/src/datetime.rs
+++ b/common/src/datetime.rs
@@ -1,5 +1,3 @@
-#![allow(deprecated)]
-
 use std::fmt;
 use std::io::{Read, Write};

@@ -27,9 +25,6 @@ pub enum DateTimePrecision {
    Nanoseconds,
 }

-#[deprecated(since = "0.20.0", note = "Use `DateTimePrecision` instead")]
-pub type DatePrecision = DateTimePrecision;
-
 /// A date/time value with nanoseconds precision.
 ///
 /// This timestamp does not carry any explicit time zone information.
@@ -40,7 +35,7 @@ pub type DatePrecision = DateTimePrecision;
 /// All constructors and conversions are provided as explicit
 /// functions and not by implementing any `From`/`Into` traits
 /// to prevent unintended usage.
-#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
 pub struct DateTime {
    // Timestamp in nanoseconds.
    pub(crate) timestamp_nanos: i64,
--- a/common/src/json_path_writer.rs
+++ b/common/src/json_path_writer.rs
@@ -5,6 +5,12 @@ pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
 pub const JSON_PATH_SEGMENT_SEP_STR: &str =
    unsafe { std::str::from_utf8_unchecked(&[JSON_PATH_SEGMENT_SEP]) };

+/// Separates the json path and the value in
+/// a JSON term binary representation.
+pub const JSON_END_OF_PATH: u8 = 0u8;
+pub const JSON_END_OF_PATH_STR: &str =
+    unsafe { std::str::from_utf8_unchecked(&[JSON_END_OF_PATH]) };
+
 /// Create a new JsonPathWriter, that creates flattened json paths for tantivy.
 #[derive(Clone, Debug, Default)]
 pub struct JsonPathWriter {
@@ -14,6 +20,14 @@ pub struct JsonPathWriter {
 }

 impl JsonPathWriter {
+    pub fn with_expand_dots(expand_dots: bool) -> Self {
+        JsonPathWriter {
+            path: String::new(),
+            indices: Vec::new(),
+            expand_dots,
+        }
+    }
+
    pub fn new() -> Self {
        JsonPathWriter {
            path: String::new(),
@@ -39,8 +53,8 @@ impl JsonPathWriter {
    pub fn push(&mut self, segment: &str) {
        let len_path = self.path.len();
        self.indices.push(len_path);
-        if !self.path.is_empty() {
-            self.path.push_str(JSON_PATH_SEGMENT_SEP_STR);
+        if self.indices.len() > 1 {
+            self.path.push(JSON_PATH_SEGMENT_SEP as char);
        }
        self.path.push_str(segment);
        if self.expand_dots {
@@ -55,6 +69,12 @@ impl JsonPathWriter {
        }
    }

+    /// Set the end of JSON path marker.
+    #[inline]
+    pub fn set_end(&mut self) {
+        self.path.push_str(JSON_END_OF_PATH_STR);
+    }
+
    /// Remove the last segment. Does nothing if the path is empty.
    #[inline]
    pub fn pop(&mut self) {
@@ -91,6 +111,7 @@ mod tests {
    #[test]
    fn json_path_writer_test() {
        let mut writer = JsonPathWriter::new();
+        writer.set_expand_dots(false);

        writer.push("root");
        assert_eq!(writer.as_str(), "root");
@@ -109,4 +130,15 @@ mod tests {
        writer.push("k8s.node.id");
        assert_eq!(writer.as_str(), "root\u{1}k8s\u{1}node\u{1}id");
    }
+
+    #[test]
+    fn test_json_path_expand_dots_enabled_pop_segment() {
+        let mut json_writer = JsonPathWriter::with_expand_dots(true);
+        json_writer.push("hello");
+        assert_eq!(json_writer.as_str(), "hello");
+        json_writer.push("color.hue");
+        assert_eq!(json_writer.as_str(), "hello\x01color\x01hue");
+        json_writer.pop();
+        assert_eq!(json_writer.as_str(), "hello");
+    }
 }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -9,14 +9,12 @@ mod byte_count;
 mod datetime;
 pub mod file_slice;
 mod group_by;
-mod json_path_writer;
+pub mod json_path_writer;
 mod serialize;
 mod vint;
 mod writer;
 pub use bitset::*;
 pub use byte_count::ByteCount;
-#[allow(deprecated)]
-pub use datetime::DatePrecision;
 pub use datetime::{DateTime, DateTimePrecision};
 pub use group_by::GroupByIteratorExtended;
 pub use json_path_writer::JsonPathWriter;
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -290,8 +290,7 @@ impl<'a> BinarySerializable for Cow<'a, [u8]> {
 #[cfg(test)]
 pub mod test {

-    use super::{VInt, *};
-    use crate::serialize::BinarySerializable;
+    use super::*;
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
        O::default().serialize(&mut buffer).unwrap();
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -151,7 +151,7 @@ pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
    (result, vlen)
 }
 /// Write a `u32` as a vint payload.
-pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
+pub fn write_u32_vint<W: io::Write + ?Sized>(val: u32, writer: &mut W) -> io::Result<()> {
    let mut buf = [0u8; 8];
    let data = serialize_vint_u32(val, &mut buf);
    writer.write_all(data)
--- a/doc/assets/images/paradedb.png
+++ b/doc/assets/images/paradedb.png
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -7,6 +7,11 @@
    - [Other](#other)
  - [Usage](#usage)

+# Index Sorting has been removed!
+More infos here:
+
+https://github.com/quickwit-oss/tantivy/issues/2352
+
 # Index Sorting

 Tantivy allows you to sort the index according to a property.
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -11,9 +11,10 @@ use columnar::Column;
 // ---
 // Importing tantivy...
 use tantivy::collector::{Collector, SegmentCollector};
+use tantivy::index::SegmentReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::{doc, Index, IndexWriter, Score, SegmentReader};
+use tantivy::{doc, Index, IndexWriter, Score};

 #[derive(Default)]
 struct Stats {
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -4,7 +4,7 @@

 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
-use tantivy::schema::{DateOptions, Document, OwnedValue, Schema, INDEXED, STORED, STRING};
+use tantivy::schema::{DateOptions, Document, Schema, Value, INDEXED, STORED, STRING};
 use tantivy::{Index, IndexWriter, TantivyDocument};

 fn main() -> tantivy::Result<()> {
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
    let opts = DateOptions::from(INDEXED)
        .set_stored()
        .set_fast()
-        .set_precision(tantivy::DateTimePrecision::Seconds);
+        .set_precision(tantivy::schema::DateTimePrecision::Seconds);
    // Add `occurred_at` date field type
    let occurred_at = schema_builder.add_date_field("occurred_at", opts);
    let event_type = schema_builder.add_text_field("event", STRING | STORED);
@@ -61,10 +61,12 @@ fn main() -> tantivy::Result<()> {
        assert_eq!(count_docs.len(), 1);
        for (_score, doc_address) in count_docs {
            let retrieved_doc = searcher.doc::<TantivyDocument>(doc_address)?;
-            assert!(matches!(
-                retrieved_doc.get_first(occurred_at),
-                Some(OwnedValue::Date(_))
-            ));
+            assert!(retrieved_doc
+                .get_first(occurred_at)
+                .unwrap()
+                .as_value()
+                .as_datetime()
+                .is_some(),);
            assert_eq!(
                retrieved_doc.to_json(&schema),
                r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -51,7 +51,7 @@ fn main() -> tantivy::Result<()> {
    let reader = index.reader()?;
    let searcher = reader.searcher();
    {
-        let facets = vec![
+        let facets = [
            Facet::from("/ingredient/egg"),
            Facet::from("/ingredient/oil"),
            Facet::from("/ingredient/garlic"),
@@ -94,9 +94,8 @@ fn main() -> tantivy::Result<()> {
                    .doc::<TantivyDocument>(*doc_id)
                    .unwrap()
                    .get_first(title)
-                    .and_then(|v| v.as_str())
+                    .and_then(|v| v.as_str().map(|el| el.to_string()))
                    .unwrap()
-                    .to_owned()
            })
            .collect();
        assert_eq!(titles, vec!["Fried egg", "Egg rolls"]);
--- a/examples/index_from_multiple_threads.rs
+++ b/examples/index_from_multiple_threads.rs
@@ -61,7 +61,7 @@ fn main() -> tantivy::Result<()> {
                        debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
                        limbs and branches that arch over the pool"
                    ))?;
-            println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
+            println!("add doc {i} from thread 1 - opstamp {opstamp}");
            thread::sleep(Duration::from_millis(20));
        }
        Result::<(), TantivyError>::Ok(())
@@ -82,7 +82,7 @@ fn main() -> tantivy::Result<()> {
                    body => "Some great book description..."
                ))?
            };
-            println!("add doc {} from thread 2 - opstamp {}", i, opstamp);
+            println!("add doc {i} from thread 2 - opstamp {opstamp}");
            thread::sleep(Duration::from_millis(10));
        }
        Result::<(), TantivyError>::Ok(())
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -7,10 +7,11 @@
 // the list of documents containing a term, getting
 // its term frequency, and accessing its positions.

+use tantivy::postings::Postings;
 // ---
 // Importing tantivy...
 use tantivy::schema::*;
-use tantivy::{doc, DocSet, Index, IndexWriter, Postings, TERMINATED};
+use tantivy::{doc, DocSet, Index, IndexWriter, TERMINATED};

 fn main() -> tantivy::Result<()> {
    // We first create a schema for the sake of the
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -3,10 +3,11 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, RwLock, Weak};

 use tantivy::collector::TopDocs;
+use tantivy::index::SegmentId;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{
-    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    doc, DocAddress, DocId, Index, IndexWriter, Opstamp, Searcher, SearcherGeneration,
    SegmentReader, Warmer,
 };

--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 name = "ownedbytes"
-version = "0.6.0"
+version = "0.7.0"
 edition = "2021"
 description = "Expose data as static slice"
 license = "MIT"
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -1,4 +1,3 @@
-use std::convert::TryInto;
 use std::ops::{Deref, Range};
 use std::sync::Arc;
 use std::{fmt, io};
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.21.0"
+version = "0.22.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/query-grammar/src/infallible.rs
+++ b/query-grammar/src/infallible.rs
@@ -81,8 +81,8 @@ where
    T: InputTakeAtPosition + Clone,
    <T as InputTakeAtPosition>::Item: AsChar + Clone,
 {
-    opt_i(nom::character::complete::space0)(input)
-        .map(|(left, (spaces, errors))| (left, (spaces.expect("space0 can't fail"), errors)))
+    opt_i(nom::character::complete::multispace0)(input)
+        .map(|(left, (spaces, errors))| (left, (spaces.expect("multispace0 can't fail"), errors)))
 }

 pub(crate) fn space1_infallible<T>(input: T) -> JResult<T, Option<T>>
@@ -90,7 +90,7 @@ where
    T: InputTakeAtPosition + Clone + InputLength,
    <T as InputTakeAtPosition>::Item: AsChar + Clone,
 {
-    opt_i(nom::character::complete::space1)(input).map(|(left, (spaces, mut errors))| {
+    opt_i(nom::character::complete::multispace1)(input).map(|(left, (spaces, mut errors))| {
        if spaces.is_none() {
            errors.push(LenientErrorInternal {
                pos: left.input_len(),
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1,13 +1,14 @@
+use std::borrow::Cow;
 use std::iter::once;

 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete::{
-    anychar, char, digit1, none_of, one_of, satisfy, space0, space1, u32,
+    anychar, char, digit1, multispace0, multispace1, none_of, one_of, satisfy, u32,
 };
 use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify};
 use nom::error::{Error, ErrorKind};
-use nom::multi::{many0, many1, separated_list0, separated_list1};
+use nom::multi::{many0, many1, separated_list0};
 use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
 use nom::IResult;

@@ -19,7 +20,7 @@ use crate::Occur;
 // Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
 // special characters.
 const SPECIAL_CHARS: &[char] = &[
-    '+', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')', '!', '\\', '*', ' ',
+    '+', '^', '`', ':', '{', '}', '"', '\'', '[', ']', '(', ')', '!', '\\', '*', ' ',
 ];

 /// consume a field name followed by colon. Return the field name with escape sequence
@@ -41,36 +42,92 @@ fn field_name(inp: &str) -> IResult<&str, String> {
    )(inp)
 }

+const ESCAPE_IN_WORD: &[char] = &['^', '`', ':', '{', '}', '"', '\'', '[', ']', '(', ')', '\\'];
+
+fn interpret_escape(source: &str) -> String {
+    let mut res = String::with_capacity(source.len());
+    let mut in_escape = false;
+    let require_escape = |c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c) || c == '-';
+
+    for c in source.chars() {
+        if in_escape {
+            if !require_escape(c) {
+                // we re-add the escape sequence
+                res.push('\\');
+            }
+            res.push(c);
+            in_escape = false;
+        } else if c == '\\' {
+            in_escape = true;
+        } else {
+            res.push(c);
+        }
+    }
+    res
+}
+
 /// Consume a word outside of any context.
 // TODO should support escape sequences
-fn word(inp: &str) -> IResult<&str, &str> {
+fn word(inp: &str) -> IResult<&str, Cow<str>> {
    map_res(
        recognize(tuple((
-            satisfy(|c| {
-                !c.is_whitespace()
-                    && !['-', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
-            }),
-            many0(satisfy(|c: char| {
-                !c.is_whitespace() && ![':', '^', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
-            })),
+            alt((
+                preceded(char('\\'), anychar),
+                satisfy(|c| !c.is_whitespace() && !ESCAPE_IN_WORD.contains(&c) && c != '-'),
+            )),
+            many0(alt((
+                preceded(char('\\'), anychar),
+                satisfy(|c: char| !c.is_whitespace() && !ESCAPE_IN_WORD.contains(&c)),
+            ))),
        ))),
        |s| match s {
            "OR" | "AND" | "NOT" | "IN" => Err(Error::new(inp, ErrorKind::Tag)),
-            _ => Ok(s),
+            s if s.contains('\\') => Ok(Cow::Owned(interpret_escape(s))),
+            s => Ok(Cow::Borrowed(s)),
        },
    )(inp)
 }

-fn word_infallible(delimiter: &str) -> impl Fn(&str) -> JResult<&str, Option<&str>> + '_ {
-    |inp| {
-        opt_i_err(
-            preceded(
-                space0,
-                recognize(many1(satisfy(|c| {
-                    !c.is_whitespace() && !delimiter.contains(c)
-                }))),
+fn word_infallible(
+    delimiter: &str,
+    emit_error: bool,
+) -> impl Fn(&str) -> JResult<&str, Option<Cow<str>>> + '_ {
+    // emit error is set when receiving an unescaped `:` should emit an error
+
+    move |inp| {
+        map(
+            opt_i_err(
+                preceded(
+                    multispace0,
+                    recognize(many1(alt((
+                        preceded(char::<&str, _>('\\'), anychar),
+                        satisfy(|c| !c.is_whitespace() && !delimiter.contains(c)),
+                    )))),
+                ),
+                "expected word",
            ),
-            "expected word",
+            |(opt_s, mut errors)| match opt_s {
+                Some(s) => {
+                    if emit_error
+                        && (s
+                            .as_bytes()
+                            .windows(2)
+                            .any(|window| window[0] != b'\\' && window[1] == b':')
+                            || s.starts_with(':'))
+                    {
+                        errors.push(LenientErrorInternal {
+                            pos: inp.len(),
+                            message: "parsed possible invalid field as term".to_string(),
+                        });
+                    }
+                    if s.contains('\\') {
+                        (Some(Cow::Owned(interpret_escape(s))), errors)
+                    } else {
+                        (Some(Cow::Borrowed(s)), errors)
+                    }
+                }
+                None => (None, errors),
+            },
        )(inp)
    }
 }
@@ -159,7 +216,7 @@ fn simple_term_infallible(
                (value((), char('\'')), simple_quotes),
            ),
            // numbers are parsed with words in this case, as we allow string starting with a -
-            map(word_infallible(delimiter), |(text, errors)| {
+            map(word_infallible(delimiter, true), |(text, errors)| {
                (text.map(|text| (Delimiter::None, text.to_string())), errors)
            }),
        )(inp)
@@ -218,27 +275,14 @@ fn term_or_phrase_infallible(inp: &str) -> JResult<&str, Option<UserInputLeaf>>
 }

 fn term_group(inp: &str) -> IResult<&str, UserInputAst> {
-    let occur_symbol = alt((
-        value(Occur::MustNot, char('-')),
-        value(Occur::Must, char('+')),
-    ));
-
    map(
        tuple((
-            terminated(field_name, space0),
-            delimited(
-                tuple((char('('), space0)),
-                separated_list0(space1, tuple((opt(occur_symbol), term_or_phrase))),
-                char(')'),
-            ),
+            terminated(field_name, multispace0),
+            delimited(tuple((char('('), multispace0)), ast, char(')')),
        )),
-        |(field_name, terms)| {
-            UserInputAst::Clause(
-                terms
-                    .into_iter()
-                    .map(|(occur, leaf)| (occur, leaf.set_field(Some(field_name.clone())).into()))
-                    .collect(),
-            )
+        |(field_name, mut ast)| {
+            ast.set_default_field(field_name);
+            ast
        },
    )(inp)
 }
@@ -250,7 +294,7 @@ fn term_group_precond(inp: &str) -> IResult<&str, (), ()> {
        (),
        peek(tuple((
            field_name,
-            space0,
+            multispace0,
            char('('), // when we are here, we know it can't be anything but a term group
        ))),
    )(inp)
@@ -258,46 +302,18 @@ fn term_group_precond(inp: &str) -> IResult<&str, (), ()> {
 }

 fn term_group_infallible(inp: &str) -> JResult<&str, UserInputAst> {
-    let (mut inp, (field_name, _, _, _)) =
-        tuple((field_name, space0, char('('), space0))(inp).expect("precondition failed");
+    let (inp, (field_name, _, _, _)) =
+        tuple((field_name, multispace0, char('('), multispace0))(inp).expect("precondition failed");

-    let mut terms = Vec::new();
-    let mut errs = Vec::new();
-
-    let mut first_round = true;
-    loop {
-        let mut space_error = if first_round {
-            first_round = false;
-            Vec::new()
-        } else {
-            let (rest, (_, err)) = space1_infallible(inp)?;
-            inp = rest;
-            err
-        };
-        if inp.is_empty() {
-            errs.push(LenientErrorInternal {
-                pos: inp.len(),
-                message: "missing )".to_string(),
-            });
-            break Ok((inp, (UserInputAst::Clause(terms), errs)));
-        }
-        if let Some(inp) = inp.strip_prefix(')') {
-            break Ok((inp, (UserInputAst::Clause(terms), errs)));
-        }
-        // only append missing space error if we did not reach the end of group
-        errs.append(&mut space_error);
-
-        // here we do the assumption term_or_phrase_infallible always consume something if the
-        // first byte is not `)` or ' '. If it did not, we would end up looping.
-
-        let (rest, ((occur, leaf), mut err)) =
-            tuple_infallible((occur_symbol, term_or_phrase_infallible))(inp)?;
-        errs.append(&mut err);
-        if let Some(leaf) = leaf {
-            terms.push((occur, leaf.set_field(Some(field_name.clone())).into()));
-        }
-        inp = rest;
-    }
+    let res = delimited_infallible(
+        nothing,
+        map(ast_infallible, |(mut ast, errors)| {
+            ast.set_default_field(field_name.to_string());
+            (ast, errors)
+        }),
+        opt_i_err(char(')'), "expected ')'"),
+    )(inp);
+    res
 }

 fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
@@ -305,7 +321,7 @@ fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
        UserInputLeaf::Exists {
            field: String::new(),
        },
-        tuple((space0, char('*'))),
+        tuple((multispace0, char('*'))),
    )(inp)
 }

@@ -314,7 +330,7 @@ fn exists_precond(inp: &str) -> IResult<&str, (), ()> {
        (),
        peek(tuple((
            field_name,
-            space0,
+            multispace0,
            char('*'), // when we are here, we know it can't be anything but a exists
        ))),
    )(inp)
@@ -323,7 +339,7 @@ fn exists_precond(inp: &str) -> IResult<&str, (), ()> {

 fn exists_infallible(inp: &str) -> JResult<&str, UserInputAst> {
    let (inp, (field_name, _, _)) =
-        tuple((field_name, space0, char('*')))(inp).expect("precondition failed");
+        tuple((field_name, multispace0, char('*')))(inp).expect("precondition failed");

    let exists = UserInputLeaf::Exists { field: field_name }.into();
    Ok((inp, (exists, Vec::new())))
@@ -349,7 +365,7 @@ fn literal_no_group_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>>
            alt_infallible(
                (
                    (
-                        value((), tuple((tag("IN"), space0, char('[')))),
+                        value((), tuple((tag("IN"), multispace0, char('[')))),
                        map(set_infallible, |(set, errs)| (Some(set), errs)),
                    ),
                    (
@@ -363,15 +379,6 @@ fn literal_no_group_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>>
        |((field_name, _, leaf), mut errors)| {
            (
                leaf.map(|leaf| {
-                    if matches!(&leaf, UserInputLeaf::Literal(literal)
-                            if literal.phrase.contains(':') && literal.delimiter == Delimiter::None)
-                        && field_name.is_none()
-                    {
-                        errors.push(LenientErrorInternal {
-                            pos: inp.len(),
-                            message: "parsed possible invalid field as term".to_string(),
-                        });
-                    }
                    if matches!(&leaf, UserInputLeaf::Literal(literal)
                            if literal.phrase == "NOT" && literal.delimiter == Delimiter::None)
                        && field_name.is_none()
@@ -430,8 +437,8 @@ fn range(inp: &str) -> IResult<&str, UserInputLeaf> {
    // check for unbounded range in the form of <5, <=10, >5, >=5
    let elastic_unbounded_range = map(
        tuple((
-            preceded(space0, alt((tag(">="), tag("<="), tag("<"), tag(">")))),
-            preceded(space0, range_term_val()),
+            preceded(multispace0, alt((tag(">="), tag("<="), tag("<"), tag(">")))),
+            preceded(multispace0, range_term_val()),
        )),
        |(comparison_sign, bound)| match comparison_sign {
            ">=" => (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
@@ -444,7 +451,7 @@ fn range(inp: &str) -> IResult<&str, UserInputLeaf> {
    );

    let lower_bound = map(
-        separated_pair(one_of("{["), space0, range_term_val()),
+        separated_pair(one_of("{["), multispace0, range_term_val()),
        |(boundary_char, lower_bound)| {
            if lower_bound == "*" {
                UserInputBound::Unbounded
@@ -457,7 +464,7 @@ fn range(inp: &str) -> IResult<&str, UserInputLeaf> {
    );

    let upper_bound = map(
-        separated_pair(range_term_val(), space0, one_of("}]")),
+        separated_pair(range_term_val(), multispace0, one_of("}]")),
        |(upper_bound, boundary_char)| {
            if upper_bound == "*" {
                UserInputBound::Unbounded
@@ -469,8 +476,11 @@ fn range(inp: &str) -> IResult<&str, UserInputLeaf> {
        },
    );

-    let lower_to_upper =
-        separated_pair(lower_bound, tuple((space1, tag("TO"), space1)), upper_bound);
+    let lower_to_upper = separated_pair(
+        lower_bound,
+        tuple((multispace1, tag("TO"), multispace1)),
+        upper_bound,
+    );

    map(
        alt((elastic_unbounded_range, lower_to_upper)),
@@ -487,17 +497,20 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
        tuple_infallible((
            opt_i(anychar),
            space0_infallible,
-            word_infallible("]}"),
+            word_infallible("]}", false),
            space1_infallible,
            opt_i_err(
-                terminated(tag("TO"), alt((value((), space1), value((), eof)))),
+                terminated(tag("TO"), alt((value((), multispace1), value((), eof)))),
                "missing keyword TO",
            ),
-            word_infallible("]}"),
+            word_infallible("]}", false),
            opt_i_err(one_of("]}"), "missing range delimiter"),
        )),
-        |((lower_bound_kind, _space0, lower, _space1, to, upper, upper_bound_kind), errs)| {
-            let lower_bound = match (lower_bound_kind, lower) {
+        |(
+            (lower_bound_kind, _multispace0, lower, _multispace1, to, upper, upper_bound_kind),
+            errs,
+        )| {
+            let lower_bound = match (lower_bound_kind, lower.as_deref()) {
                (_, Some("*")) => UserInputBound::Unbounded,
                (_, None) => UserInputBound::Unbounded,
                // if it is some, TO was actually the bound (i.e. [TO TO something])
@@ -506,7 +519,7 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
                (Some('{'), Some(bound)) => UserInputBound::Exclusive(bound.to_string()),
                _ => unreachable!("precondition failed, range did not start with [ or {{"),
            };
-            let upper_bound = match (upper_bound_kind, upper) {
+            let upper_bound = match (upper_bound_kind, upper.as_deref()) {
                (_, Some("*")) => UserInputBound::Unbounded,
                (_, None) => UserInputBound::Unbounded,
                (Some(']'), Some(bound)) => UserInputBound::Inclusive(bound.to_string()),
@@ -523,7 +536,7 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
            (
                (
                    value((), tag(">=")),
-                    map(word_infallible(""), |(bound, err)| {
+                    map(word_infallible("", false), |(bound, err)| {
                        (
                            (
                                bound
@@ -537,7 +550,7 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
                ),
                (
                    value((), tag("<=")),
-                    map(word_infallible(""), |(bound, err)| {
+                    map(word_infallible("", false), |(bound, err)| {
                        (
                            (
                                UserInputBound::Unbounded,
@@ -551,7 +564,7 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
                ),
                (
                    value((), tag(">")),
-                    map(word_infallible(""), |(bound, err)| {
+                    map(word_infallible("", false), |(bound, err)| {
                        (
                            (
                                bound
@@ -565,7 +578,7 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
                ),
                (
                    value((), tag("<")),
-                    map(word_infallible(""), |(bound, err)| {
+                    map(word_infallible("", false), |(bound, err)| {
                        (
                            (
                                UserInputBound::Unbounded,
@@ -596,10 +609,10 @@ fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
 fn set(inp: &str) -> IResult<&str, UserInputLeaf> {
    map(
        preceded(
-            tuple((space0, tag("IN"), space1)),
+            tuple((multispace0, tag("IN"), multispace1)),
            delimited(
-                tuple((char('['), space0)),
-                separated_list0(space1, map(simple_term, |(_, term)| term)),
+                tuple((char('['), multispace0)),
+                separated_list0(multispace1, map(simple_term, |(_, term)| term)),
                char(']'),
            ),
        ),
@@ -667,7 +680,7 @@ fn leaf(inp: &str) -> IResult<&str, UserInputAst> {
    alt((
        delimited(char('('), ast, char(')')),
        map(char('*'), |_| UserInputAst::from(UserInputLeaf::All)),
-        map(preceded(tuple((tag("NOT"), space1)), leaf), negate),
+        map(preceded(tuple((tag("NOT"), multispace1)), leaf), negate),
        literal,
    ))(inp)
 }
@@ -780,27 +793,23 @@ fn binary_operand(inp: &str) -> IResult<&str, BinaryOperand> {
 }

 fn aggregate_binary_expressions(
-    left: UserInputAst,
-    others: Vec<(BinaryOperand, UserInputAst)>,
-) -> UserInputAst {
-    let mut dnf: Vec<Vec<UserInputAst>> = vec![vec![left]];
-    for (operator, operand_ast) in others {
-        match operator {
-            BinaryOperand::And => {
-                if let Some(last) = dnf.last_mut() {
-                    last.push(operand_ast);
-                }
-            }
-            BinaryOperand::Or => {
-                dnf.push(vec![operand_ast]);
-            }
-        }
-    }
-    if dnf.len() == 1 {
-        UserInputAst::and(dnf.into_iter().next().unwrap()) //< safe
+    left: (Option<Occur>, UserInputAst),
+    others: Vec<(Option<BinaryOperand>, Option<Occur>, UserInputAst)>,
+) -> Result<UserInputAst, LenientErrorInternal> {
+    let mut leafs = Vec::with_capacity(others.len() + 1);
+    leafs.push((None, left.0, Some(left.1)));
+    leafs.extend(
+        others
+            .into_iter()
+            .map(|(operand, occur, ast)| (operand, occur, Some(ast))),
+    );
+    // the parameters we pass should statically guarantee we can't get errors
+    // (no prefix BinaryOperand is provided)
+    let (res, mut errors) = aggregate_infallible_expressions(leafs);
+    if errors.is_empty() {
+        Ok(res)
    } else {
-        let conjunctions = dnf.into_iter().map(UserInputAst::and).collect();
-        UserInputAst::or(conjunctions)
+        Err(errors.swap_remove(0))
    }
 }

@@ -816,30 +825,10 @@ fn aggregate_infallible_expressions(
        return (UserInputAst::empty_query(), err);
    }

-    let use_operand = leafs.iter().any(|(operand, _, _)| operand.is_some());
-    let all_operand = leafs
-        .iter()
-        .skip(1)
-        .all(|(operand, _, _)| operand.is_some());
    let early_operand = leafs
        .iter()
        .take(1)
        .all(|(operand, _, _)| operand.is_some());
-    let use_occur = leafs.iter().any(|(_, occur, _)| occur.is_some());
-
-    if use_operand && use_occur {
-        err.push(LenientErrorInternal {
-            pos: 0,
-            message: "Use of mixed occur and boolean operator".to_string(),
-        });
-    }
-
-    if use_operand && !all_operand {
-        err.push(LenientErrorInternal {
-            pos: 0,
-            message: "Missing boolean operator".to_string(),
-        });
-    }

    if early_operand {
        err.push(LenientErrorInternal {
@@ -866,7 +855,15 @@ fn aggregate_infallible_expressions(
                    Some(BinaryOperand::And) => Some(Occur::Must),
                    _ => Some(Occur::Should),
                };
-                clauses.push(vec![(occur.or(default_op), ast.clone())]);
+                if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
+                    // if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
+                    clauses.push(vec![(
+                        Some(Occur::Should),
+                        ast.clone().unary(Occur::MustNot),
+                    )])
+                } else {
+                    clauses.push(vec![(occur.or(default_op), ast.clone())]);
+                }
            }
            None => {
                let default_op = match next_operator {
@@ -874,7 +871,15 @@ fn aggregate_infallible_expressions(
                    Some(BinaryOperand::Or) => Some(Occur::Should),
                    None => None,
                };
-                clauses.push(vec![(occur.or(default_op), ast.clone())])
+                if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
+                    // if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
+                    clauses.push(vec![(
+                        Some(Occur::Should),
+                        ast.clone().unary(Occur::MustNot),
+                    )])
+                } else {
+                    clauses.push(vec![(occur.or(default_op), ast.clone())])
+                }
            }
        }
    }
@@ -891,7 +896,12 @@ fn aggregate_infallible_expressions(
            }
        }
        Some(BinaryOperand::Or) => {
-            clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
+            if last_occur == Some(Occur::MustNot) {
+                // if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
+                clauses.push(vec![(Some(Occur::Should), last_ast.unary(Occur::MustNot))]);
+            } else {
+                clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
+            }
        }
        None => clauses.push(vec![(last_occur, last_ast)]),
    }
@@ -917,35 +927,29 @@ fn aggregate_infallible_expressions(
    }
 }

-fn operand_leaf(inp: &str) -> IResult<&str, (BinaryOperand, UserInputAst)> {
-    tuple((
-        terminated(binary_operand, space0),
-        terminated(boosted_leaf, space0),
-    ))(inp)
+fn operand_leaf(inp: &str) -> IResult<&str, (Option<BinaryOperand>, Option<Occur>, UserInputAst)> {
+    map(
+        tuple((
+            terminated(opt(binary_operand), multispace0),
+            terminated(occur_leaf, multispace0),
+        )),
+        |(operand, (occur, ast))| (operand, occur, ast),
+    )(inp)
 }

 fn ast(inp: &str) -> IResult<&str, UserInputAst> {
-    let boolean_expr = map(
-        separated_pair(boosted_leaf, space1, many1(operand_leaf)),
+    let boolean_expr = map_res(
+        separated_pair(occur_leaf, multispace1, many1(operand_leaf)),
        |(left, right)| aggregate_binary_expressions(left, right),
    );
-    let whitespace_separated_leaves = map(separated_list1(space1, occur_leaf), |subqueries| {
-        if subqueries.len() == 1 {
-            let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
-            match occur_opt.unwrap_or(Occur::Should) {
-                Occur::Must | Occur::Should => ast,
-                Occur::MustNot => UserInputAst::Clause(vec![(Some(Occur::MustNot), ast)]),
-            }
+    let single_leaf = map(occur_leaf, |(occur, ast)| {
+        if occur == Some(Occur::MustNot) {
+            ast.unary(Occur::MustNot)
        } else {
-            UserInputAst::Clause(subqueries.into_iter().collect())
+            ast
        }
    });
-
-    delimited(
-        space0,
-        alt((boolean_expr, whitespace_separated_leaves)),
-        space0,
-    )(inp)
+    delimited(multispace0, alt((boolean_expr, single_leaf)), multispace0)(inp)
 }

 fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
@@ -969,7 +973,7 @@ fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
 }

 pub fn parse_to_ast(inp: &str) -> IResult<&str, UserInputAst> {
-    map(delimited(space0, opt(ast), eof), |opt_ast| {
+    map(delimited(multispace0, opt(ast), eof), |opt_ast| {
        rewrite_ast(opt_ast.unwrap_or_else(UserInputAst::empty_query))
    })(inp)
 }
@@ -1145,24 +1149,43 @@ mod test {
    #[test]
    fn test_parse_query_to_ast_binary_op() {
        test_parse_query_to_ast_helper("a AND b", "(+a +b)");
+        test_parse_query_to_ast_helper("a\nAND b", "(+a +b)");
        test_parse_query_to_ast_helper("a OR b", "(?a ?b)");
        test_parse_query_to_ast_helper("a OR b AND c", "(?a ?(+b +c))");
        test_parse_query_to_ast_helper("a AND b         AND c", "(+a +b +c)");
-        test_is_parse_err("a OR b aaa", "(?a ?b *aaa)");
-        test_is_parse_err("a AND b aaa", "(?(+a +b) *aaa)");
-        test_is_parse_err("aaa a OR b ", "(*aaa ?a ?b)");
-        test_is_parse_err("aaa ccc a OR b ", "(*aaa *ccc ?a ?b)");
-        test_is_parse_err("aaa a AND b ", "(*aaa ?(+a +b))");
-        test_is_parse_err("aaa ccc a AND b ", "(*aaa *ccc ?(+a +b))");
+        test_parse_query_to_ast_helper("a OR b aaa", "(?a ?b *aaa)");
+        test_parse_query_to_ast_helper("a AND b aaa", "(?(+a +b) *aaa)");
+        test_parse_query_to_ast_helper("aaa a OR b ", "(*aaa ?a ?b)");
+        test_parse_query_to_ast_helper("aaa ccc a OR b ", "(*aaa *ccc ?a ?b)");
+        test_parse_query_to_ast_helper("aaa a AND b ", "(*aaa ?(+a +b))");
+        test_parse_query_to_ast_helper("aaa ccc a AND b ", "(*aaa *ccc ?(+a +b))");
    }

    #[test]
    fn test_parse_mixed_bool_occur() {
-        test_is_parse_err("a OR b +aaa", "(?a ?b +aaa)");
-        test_is_parse_err("a AND b -aaa", "(?(+a +b) -aaa)");
-        test_is_parse_err("+a OR +b aaa", "(+a +b *aaa)");
-        test_is_parse_err("-a AND -b aaa", "(?(-a -b) *aaa)");
-        test_is_parse_err("-aaa +ccc -a OR b ", "(-aaa +ccc -a ?b)");
+        test_parse_query_to_ast_helper("+a OR +b", "(+a +b)");
+
+        test_parse_query_to_ast_helper("a AND -b", "(+a -b)");
+        test_parse_query_to_ast_helper("-a AND b", "(-a +b)");
+        test_parse_query_to_ast_helper("a AND NOT b", "(+a +(-b))");
+        test_parse_query_to_ast_helper("NOT a AND b", "(+(-a) +b)");
+
+        test_parse_query_to_ast_helper("a AND NOT b AND c", "(+a +(-b) +c)");
+        test_parse_query_to_ast_helper("a AND -b AND c", "(+a -b +c)");
+
+        test_parse_query_to_ast_helper("a OR -b", "(?a ?(-b))");
+        test_parse_query_to_ast_helper("-a OR b", "(?(-a) ?b)");
+        test_parse_query_to_ast_helper("a OR NOT b", "(?a ?(-b))");
+        test_parse_query_to_ast_helper("NOT a OR b", "(?(-a) ?b)");
+
+        test_parse_query_to_ast_helper("a OR NOT b OR c", "(?a ?(-b) ?c)");
+        test_parse_query_to_ast_helper("a OR -b OR c", "(?a ?(-b) ?c)");
+
+        test_parse_query_to_ast_helper("a OR b +aaa", "(?a ?b +aaa)");
+        test_parse_query_to_ast_helper("a AND b -aaa", "(?(+a +b) -aaa)");
+        test_parse_query_to_ast_helper("+a OR +b aaa", "(+a +b *aaa)");
+        test_parse_query_to_ast_helper("-a AND -b aaa", "(?(-a -b) *aaa)");
+        test_parse_query_to_ast_helper("-aaa +ccc -a OR b ", "(-aaa +ccc ?(-a) ?b)");
    }

    #[test]
@@ -1182,6 +1205,12 @@ mod test {
        test_parse_query_to_ast_helper("weight: <= 70", "\"weight\":{\"*\" TO \"70\"]");

        test_parse_query_to_ast_helper("weight: <= 70.5", "\"weight\":{\"*\" TO \"70.5\"]");
+
+        test_parse_query_to_ast_helper(">a", "{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper(">=a", "[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("<a", "{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("<=a", "{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("<=bsd", "{\"*\" TO \"bsd\"]");
    }

    #[test]
@@ -1452,8 +1481,18 @@ mod test {

    #[test]
    fn test_parse_query_term_group() {
-        test_parse_query_to_ast_helper(r#"field:(abc)"#, r#"(*"field":abc)"#);
+        test_parse_query_to_ast_helper(r#"field:(abc)"#, r#""field":abc"#);
        test_parse_query_to_ast_helper(r#"field:(+a -"b c")"#, r#"(+"field":a -"field":"b c")"#);
+        test_parse_query_to_ast_helper(r#"field:(a AND "b c")"#, r#"(+"field":a +"field":"b c")"#);
+        test_parse_query_to_ast_helper(r#"field:(a OR "b c")"#, r#"(?"field":a ?"field":"b c")"#);
+        test_parse_query_to_ast_helper(
+            r#"field:(a OR (b AND c))"#,
+            r#"(?"field":a ?(+"field":b +"field":c))"#,
+        );
+        test_parse_query_to_ast_helper(
+            r#"field:(a [b TO c])"#,
+            r#"(*"field":a *"field":["b" TO "c"])"#,
+        );

        test_is_parse_err(r#"field:(+a -"b c""#, r#"(+"field":a -"field":"b c")"#);
    }
@@ -1605,5 +1644,21 @@ mod test {
            r#"myfield:'hello\"happy\'tax'"#,
            r#""myfield":'hello"happy'tax'"#,
        );
+        // we don't process escape sequence for chars which don't require it
+        test_parse_query_to_ast_helper(r#"abc\*"#, r#"abc\*"#);
+    }
+
+    #[test]
+    fn test_queries_with_colons() {
+        test_parse_query_to_ast_helper(r#""abc:def""#, r#""abc:def""#);
+        test_parse_query_to_ast_helper(r#"'abc:def'"#, r#"'abc:def'"#);
+        test_parse_query_to_ast_helper(r#"abc\:def"#, r#"abc:def"#);
+        test_parse_query_to_ast_helper(r#""abc\:def""#, r#""abc:def""#);
+        test_parse_query_to_ast_helper(r#"'abc\:def'"#, r#"'abc:def'"#);
+    }
+
+    #[test]
+    fn test_invalid_field() {
+        test_is_parse_err(r#"!bc:def"#, "!bc:def");
    }
 }
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -44,6 +44,26 @@ impl UserInputLeaf {
            },
        }
    }
+
+    pub(crate) fn set_default_field(&mut self, default_field: String) {
+        match self {
+            UserInputLeaf::Literal(ref mut literal) if literal.field_name.is_none() => {
+                literal.field_name = Some(default_field)
+            }
+            UserInputLeaf::All => {
+                *self = UserInputLeaf::Exists {
+                    field: default_field,
+                }
+            }
+            UserInputLeaf::Range { ref mut field, .. } if field.is_none() => {
+                *field = Some(default_field)
+            }
+            UserInputLeaf::Set { ref mut field, .. } if field.is_none() => {
+                *field = Some(default_field)
+            }
+            _ => (), // field was already set, do nothing
+        }
+    }
 }

 impl Debug for UserInputLeaf {
@@ -205,6 +225,16 @@ impl UserInputAst {
    pub fn or(asts: Vec<UserInputAst>) -> UserInputAst {
        UserInputAst::compose(Occur::Should, asts)
    }
+
+    pub(crate) fn set_default_field(&mut self, field: String) {
+        match self {
+            UserInputAst::Clause(clauses) => clauses
+                .iter_mut()
+                .for_each(|(_, ast)| ast.set_default_field(field.clone())),
+            UserInputAst::Leaf(leaf) => leaf.set_default_field(field),
+            UserInputAst::Boost(ref mut ast, _) => ast.set_default_field(field),
+        }
+    }
 }

 impl From<UserInputLiteral> for UserInputLeaf {
--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -1,550 +0,0 @@
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-
-    use rand::prelude::SliceRandom;
-    use rand::rngs::StdRng;
-    use rand::{Rng, SeedableRng};
-    use rand_distr::Distribution;
-    use serde_json::json;
-    use test::{self, Bencher};
-
-    use crate::aggregation::agg_req::Aggregations;
-    use crate::aggregation::AggregationCollector;
-    use crate::query::{AllQuery, TermQuery};
-    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
-    use crate::{Index, Term};
-
-    #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
-    enum Cardinality {
-        /// All documents contain exactly one value.
-        /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
-        #[default]
-        Full = 0,
-        /// All documents contain at most one value.
-        Optional = 1,
-        /// All documents may contain any number of values.
-        Multivalued = 2,
-        /// 1 / 20 documents has a value
-        Sparse = 3,
-    }
-
-    fn get_collector(agg_req: Aggregations) -> AggregationCollector {
-        AggregationCollector::from_aggs(agg_req, Default::default())
-    }
-
-    fn get_test_index_bench(cardinality: Cardinality) -> crate::Result<Index> {
-        let mut schema_builder = Schema::builder();
-        let text_fieldtype = crate::schema::TextOptions::default()
-            .set_indexing_options(
-                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
-            )
-            .set_stored();
-        let text_field = schema_builder.add_text_field("text", text_fieldtype);
-        let json_field = schema_builder.add_json_field("json", FAST);
-        let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
-        let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
-        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
-        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
-        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
-        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
-        let index = Index::create_from_tempdir(schema_builder.build())?;
-        let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
-
-        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
-
-        let many_terms_data = (0..150_000)
-            .map(|num| format!("author{}", num))
-            .collect::<Vec<_>>();
-        {
-            let mut rng = StdRng::from_seed([1u8; 32]);
-            let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
-            // To make the different test cases comparable we just change one doc to force the
-            // cardinality
-            if cardinality == Cardinality::Optional {
-                index_writer.add_document(doc!())?;
-            }
-            if cardinality == Cardinality::Multivalued {
-                index_writer.add_document(doc!(
-                    json_field => json!({"mixed_type": 10.0}),
-                    json_field => json!({"mixed_type": 10.0}),
-                    text_field => "cool",
-                    text_field => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_many_terms => "cool",
-                    text_field_few_terms => "cool",
-                    text_field_few_terms => "cool",
-                    score_field => 1u64,
-                    score_field => 1u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => 1i64,
-                    score_field_i64 => 1i64,
-                ))?;
-            }
-            let mut doc_with_value = 1_000_000;
-            if cardinality == Cardinality::Sparse {
-                doc_with_value /= 20;
-            }
-            let _val_max = 1_000_000.0;
-            for _ in 0..doc_with_value {
-                let val: f64 = rng.gen_range(0.0..1_000_000.0);
-                let json = if rng.gen_bool(0.1) {
-                    // 10% are numeric values
-                    json!({ "mixed_type": val })
-                } else {
-                    json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
-                };
-                index_writer.add_document(doc!(
-                    text_field => "cool",
-                    json_field => json,
-                    text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
-                    text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
-                    score_field => val as u64,
-                    score_field_f64 => lg_norm.sample(&mut rng),
-                    score_field_i64 => val as i64,
-                ))?;
-                if cardinality == Cardinality::Sparse {
-                    for _ in 0..20 {
-                        index_writer.add_document(doc!(text_field => "cool"))?;
-                    }
-                }
-            }
-            // writing the segment
-            index_writer.commit()?;
-        }
-
-        Ok(index)
-    }
-
-    use paste::paste;
-    #[macro_export]
-    macro_rules! bench_all_cardinalities {
-        (  $x:ident ) => {
-            paste! {
-                #[bench]
-                fn $x(b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Full)
-                }
-
-                #[bench]
-                fn [<$x _opt>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Optional)
-                }
-
-                #[bench]
-                fn [<$x _multi>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Multivalued)
-                }
-
-                #[bench]
-                fn [<$x _sparse>](b: &mut Bencher) {
-                    [<$x _card>](b, Cardinality::Sparse)
-                }
-
-            }
-        };
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64);
-
-    fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average": { "avg": { "field": "score", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_stats_f64);
-
-    fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "stats": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_f64);
-
-    fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64", } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_percentiles_f64);
-
-    fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_str = r#"
-            {
-              "mypercentiles": {
-                "percentiles": {
-                  "field": "score_f64",
-                  "percents": [ 95, 99, 99.9 ]
-                }
-              }
-            } "#;
-            let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_average_u64_and_f64);
-
-    fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "average_f64": { "avg": { "field": "score_f64" } },
-                "average": { "avg": { "field": "score" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_few);
-
-    fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_few_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
-
-    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "text_many_terms" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg);
-
-    fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": {
-                    "terms": { "field": "json.mixed_type" },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many2);
-
-    fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms" } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term);
-
-    fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req: Aggregations = serde_json::from_value(json!({
-                "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_only);
-
-    fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "range_f64": { "range": { "field": "score_f64", "ranges": [
-                    { "from": 3, "to": 7000 },
-                    { "from": 7000, "to": 20000 },
-                    { "from": 20000, "to": 30000 },
-                    { "from": 30000, "to": 40000 },
-                    { "from": 40000, "to": 50000 },
-                    { "from": 50000, "to": 60000 }
-                ] } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_range_with_avg);
-
-    fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 30000 },
-                            { "from": 30000, "to": 40000 },
-                            { "from": 40000, "to": 50000 },
-                            { "from": 50000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    // hard bounds has a different algorithm, because it actually limits collection range
-    //
-    bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds);
-
-    fn bench_aggregation_histogram_only_hard_bounds_card(
-        b: &mut Bencher,
-        cardinality: Cardinality,
-    ) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_with_avg);
-
-    fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": { "field": "score_f64", "interval": 100 },
-                    "aggs": {
-                        "average_f64": { "avg": { "field": "score_f64" } }
-                    }
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_histogram_only);
-
-    fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-
-        b.iter(|| {
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "histogram": {
-                        "field": "score_f64",
-                        "interval": 100 // 1000 buckets
-                    },
-                }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&AllQuery, &collector).unwrap()
-        });
-    }
-
-    bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg);
-
-    fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
-        let index = get_test_index_bench(cardinality).unwrap();
-        let reader = index.reader().unwrap();
-        let text_field = reader.searcher().schema().get_field("text").unwrap();
-
-        b.iter(|| {
-            let term_query = TermQuery::new(
-                Term::from_field_text(text_field, "cool"),
-                IndexRecordOption::Basic,
-            );
-
-            let agg_req_1: Aggregations = serde_json::from_value(json!({
-                "rangef64": {
-                    "range": {
-                        "field": "score_f64",
-                        "ranges": [
-                            { "from": 3, "to": 7000 },
-                            { "from": 7000, "to": 20000 },
-                            { "from": 20000, "to": 60000 }
-                        ]
-                    },
-                    "aggs": {
-                        "average_in_range": { "avg": { "field": "score" } }
-                    }
-                },
-                "average": { "avg": { "field": "score" } }
-            }))
-            .unwrap();
-
-            let collector = get_collector(agg_req_1);
-
-            let searcher = reader.searcher();
-            searcher.search(&term_query, &collector).unwrap()
-        });
-    }
-}
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -81,10 +81,11 @@ impl AggregationLimits {
        }
    }

-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }

@@ -94,11 +95,11 @@ impl AggregationLimits {
 }

 fn validate_memory_consumption(
-    memory_consumption: &AtomicU64,
+    memory_consumption: u64,
    memory_limit: ByteCount,
 ) -> Result<(), AggregationError> {
    // Load the estimated memory consumed by the aggregations
-    let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
+    let memory_consumed: ByteCount = memory_consumption.into();
    if memory_consumed > memory_limit {
        return Err(AggregationError::MemoryExceeded {
            limit: memory_limit,
@@ -118,10 +119,11 @@ pub struct ResourceLimitGuard {
 }

 impl ResourceLimitGuard {
-    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
-        self.memory_consumption
-            .fetch_add(num_bytes, Ordering::Relaxed);
-        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+    pub(crate) fn add_memory_consumed(&self, add_num_bytes: u64) -> crate::Result<()> {
+        let prev_value = self
+            .memory_consumption
+            .fetch_add(add_num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(prev_value + add_num_bytes, self.memory_limit)?;
        Ok(())
    }
 }
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -34,8 +34,8 @@ use super::bucket::{
    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
 };
 use super::metric::{
-    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation,
-    PercentilesAggregationReq, StatsAggregation, SumAggregation,
+    AverageAggregation, CountAggregation, ExtendedStatsAggregation, MaxAggregation, MinAggregation,
+    PercentilesAggregationReq, StatsAggregation, SumAggregation, TopHitsAggregation,
 };

 /// The top-level aggregation request structure, which contains [`Aggregation`] and their user
@@ -93,7 +93,12 @@ impl Aggregation {
    }

    fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
-        fast_field_names.insert(self.agg.get_fast_field_name().to_string());
+        fast_field_names.extend(
+            self.agg
+                .get_fast_field_names()
+                .iter()
+                .map(|s| s.to_string()),
+        );
        fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
    }
 }
@@ -141,29 +146,39 @@ pub enum AggregationVariants {
    /// extracted values.
    #[serde(rename = "stats")]
    Stats(StatsAggregation),
+    /// Computes a collection of estended statistics (`min`, `max`, `sum`, `count`, `avg`,
+    /// `sum_of_squares`, `variance`, `variance_sampling`, `std_deviation`,
+    /// `std_deviation_sampling`) over the  extracted values.
+    #[serde(rename = "extended_stats")]
+    ExtendedStats(ExtendedStatsAggregation),
    /// Computes the sum of the extracted values.
    #[serde(rename = "sum")]
    Sum(SumAggregation),
    /// Computes the sum of the extracted values.
    #[serde(rename = "percentiles")]
    Percentiles(PercentilesAggregationReq),
+    /// Finds the top k values matching some order
+    #[serde(rename = "top_hits")]
+    TopHits(TopHitsAggregation),
 }

 impl AggregationVariants {
-    /// Returns the name of the field used by the aggregation.
-    pub fn get_fast_field_name(&self) -> &str {
+    /// Returns the name of the fields used by the aggregation.
+    pub fn get_fast_field_names(&self) -> Vec<&str> {
        match self {
-            AggregationVariants::Terms(terms) => terms.field.as_str(),
-            AggregationVariants::Range(range) => range.field.as_str(),
-            AggregationVariants::Histogram(histogram) => histogram.field.as_str(),
-            AggregationVariants::DateHistogram(histogram) => histogram.field.as_str(),
-            AggregationVariants::Average(avg) => avg.field_name(),
-            AggregationVariants::Count(count) => count.field_name(),
-            AggregationVariants::Max(max) => max.field_name(),
-            AggregationVariants::Min(min) => min.field_name(),
-            AggregationVariants::Stats(stats) => stats.field_name(),
-            AggregationVariants::Sum(sum) => sum.field_name(),
-            AggregationVariants::Percentiles(per) => per.field_name(),
+            AggregationVariants::Terms(terms) => vec![terms.field.as_str()],
+            AggregationVariants::Range(range) => vec![range.field.as_str()],
+            AggregationVariants::Histogram(histogram) => vec![histogram.field.as_str()],
+            AggregationVariants::DateHistogram(histogram) => vec![histogram.field.as_str()],
+            AggregationVariants::Average(avg) => vec![avg.field_name()],
+            AggregationVariants::Count(count) => vec![count.field_name()],
+            AggregationVariants::Max(max) => vec![max.field_name()],
+            AggregationVariants::Min(min) => vec![min.field_name()],
+            AggregationVariants::Stats(stats) => vec![stats.field_name()],
+            AggregationVariants::ExtendedStats(extended_stats) => vec![extended_stats.field_name()],
+            AggregationVariants::Sum(sum) => vec![sum.field_name()],
+            AggregationVariants::Percentiles(per) => vec![per.field_name()],
+            AggregationVariants::TopHits(top_hits) => top_hits.field_names(),
        }
    }

@@ -188,6 +203,12 @@ impl AggregationVariants {
            _ => None,
        }
    }
+    pub(crate) fn as_top_hits(&self) -> Option<&TopHitsAggregation> {
+        match &self {
+            AggregationVariants::TopHits(top_hits) => Some(top_hits),
+            _ => None,
+        }
+    }

    pub(crate) fn as_percentile(&self) -> Option<&PercentilesAggregationReq> {
        match &self {
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -1,6 +1,9 @@
 //! This will enhance the request tree with access to the fastfield and metadata.

-use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};
+use std::collections::HashMap;
+use std::io;
+
+use columnar::{Column, ColumnBlockAccessor, ColumnType, DynamicColumn, StrColumn};

 use super::agg_limits::ResourceLimitGuard;
 use super::agg_req::{Aggregation, AggregationVariants, Aggregations};
@@ -8,13 +11,14 @@ use super::bucket::{
    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
 };
 use super::metric::{
-    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation, StatsAggregation,
-    SumAggregation,
+    AverageAggregation, CountAggregation, ExtendedStatsAggregation, MaxAggregation, MinAggregation,
+    StatsAggregation, SumAggregation,
 };
 use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
 use crate::aggregation::{f64_to_fastfield_u64, Key};
-use crate::SegmentReader;
+use crate::index::SegmentReader;
+use crate::SegmentOrdinal;

 #[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
@@ -32,6 +36,7 @@ impl AggregationsWithAccessor {
 }

 pub struct AggregationWithAccessor {
+    pub(crate) segment_ordinal: SegmentOrdinal,
    /// In general there can be buckets without fast field access, e.g. buckets that are created
    /// based on search terms. That is not that case currently, but eventually this needs to be
    /// Option or moved.
@@ -44,10 +49,16 @@ pub struct AggregationWithAccessor {
    pub(crate) limits: ResourceLimitGuard,
    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
    /// Used for missing term aggregation, which checks all columns for existence.
+    /// And also for `top_hits` aggregation, which may sort on multiple fields.
    /// By convention the missing aggregation is chosen, when this property is set
    /// (instead bein set in `agg`).
    /// If this needs to used by other aggregations, we need to refactor this.
-    pub(crate) accessors: Vec<Column<u64>>,
+    // NOTE: we can make all other aggregations use this instead of the `accessor` and `field_type`
+    // (making them obsolete) But will it have a performance impact?
+    pub(crate) accessors: Vec<(Column<u64>, ColumnType)>,
+    /// Map field names to all associated column accessors.
+    /// This field is used for `docvalue_fields`, which is currently only supported for `top_hits`.
+    pub(crate) value_accessors: HashMap<String, Vec<DynamicColumn>>,
    pub(crate) agg: Aggregation,
 }

@@ -57,19 +68,55 @@ impl AggregationWithAccessor {
        agg: &Aggregation,
        sub_aggregation: &Aggregations,
        reader: &SegmentReader,
+        segment_ordinal: SegmentOrdinal,
        limits: AggregationLimits,
    ) -> crate::Result<Vec<AggregationWithAccessor>> {
-        let add_agg_with_accessor = |accessor: Column<u64>,
+        let mut agg = agg.clone();
+
+        let add_agg_with_accessor = |agg: &Aggregation,
+                                     accessor: Column<u64>,
                                     column_type: ColumnType,
                                     aggs: &mut Vec<AggregationWithAccessor>|
         -> crate::Result<()> {
            let res = AggregationWithAccessor {
+                segment_ordinal,
                accessor,
-                accessors: Vec::new(),
+                accessors: Default::default(),
+                value_accessors: Default::default(),
                field_type: column_type,
                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                    sub_aggregation,
                    reader,
+                    segment_ordinal,
+                    &limits,
+                )?,
+                agg: agg.clone(),
+                limits: limits.new_guard(),
+                missing_value_for_accessor: None,
+                str_dict_column: None,
+                column_block_accessor: Default::default(),
+            };
+            aggs.push(res);
+            Ok(())
+        };
+
+        let add_agg_with_accessors = |agg: &Aggregation,
+                                      accessors: Vec<(Column<u64>, ColumnType)>,
+                                      aggs: &mut Vec<AggregationWithAccessor>,
+                                      value_accessors: HashMap<String, Vec<DynamicColumn>>|
+         -> crate::Result<()> {
+            let (accessor, field_type) = accessors.first().expect("at least one accessor");
+            let res = AggregationWithAccessor {
+                segment_ordinal,
+                // TODO: We should do away with the `accessor` field altogether
+                accessor: accessor.clone(),
+                value_accessors,
+                field_type: *field_type,
+                accessors,
+                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
+                    sub_aggregation,
+                    reader,
+                    segment_ordinal,
                    &limits,
                )?,
                agg: agg.clone(),
@@ -84,32 +131,36 @@ impl AggregationWithAccessor {

        let mut res: Vec<AggregationWithAccessor> = Vec::new();
        use AggregationVariants::*;
-        match &agg.agg {
+
+        match agg.agg {
            Range(RangeAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            }) => {
                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(accessor, column_type, &mut res)?;
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            Histogram(HistogramAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            }) => {
                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(accessor, column_type, &mut res)?;
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            DateHistogram(DateHistogramAggregationReq {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            }) => {
                let (accessor, column_type) =
                    // Only DateTime is supported for DateHistogram
                    get_ff_reader(reader, field_name, Some(&[ColumnType::DateTime]))?;
-                add_agg_with_accessor(accessor, column_type, &mut res)?;
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            Terms(TermsAggregation {
-                field: field_name,
-                missing,
+                field: ref field_name,
+                ref missing,
                ..
            }) => {
                let str_dict_column = reader.fast_fields().str(field_name)?;
@@ -119,9 +170,9 @@ impl AggregationWithAccessor {
                    ColumnType::F64,
                    ColumnType::Str,
                    ColumnType::DateTime,
+                    ColumnType::Bool,
+                    ColumnType::IpAddr,
                    // ColumnType::Bytes Unsupported
-                    // ColumnType::Bool Unsupported
-                    // ColumnType::IpAddr Unsupported
                ];

                // In case the column is empty we want the shim column to match the missing type
@@ -162,24 +213,11 @@ impl AggregationWithAccessor {
                    let column_and_types =
                        get_all_ff_reader_or_empty(reader, field_name, None, fallback_type)?;

-                    let accessors: Vec<Column> =
-                        column_and_types.iter().map(|(a, _)| a.clone()).collect();
-                    let agg_wit_acc = AggregationWithAccessor {
-                        missing_value_for_accessor: None,
-                        accessor: accessors[0].clone(),
-                        accessors,
-                        field_type: ColumnType::U64,
-                        sub_aggregation: get_aggs_with_segment_accessor_and_validate(
-                            sub_aggregation,
-                            reader,
-                            &limits,
-                        )?,
-                        agg: agg.clone(),
-                        str_dict_column: str_dict_column.clone(),
-                        limits: limits.new_guard(),
-                        column_block_accessor: Default::default(),
-                    };
-                    res.push(agg_wit_acc);
+                    let accessors = column_and_types
+                        .iter()
+                        .map(|c_t| (c_t.0.clone(), c_t.1))
+                        .collect();
+                    add_agg_with_accessors(&agg, accessors, &mut res, Default::default())?;
                }

                for (accessor, column_type) in column_and_types {
@@ -189,21 +227,25 @@ impl AggregationWithAccessor {
                        missing.clone()
                    };

-                    let missing_value_for_accessor =
-                        if let Some(missing) = missing_value_term_agg.as_ref() {
-                            get_missing_val(column_type, missing, agg.agg.get_fast_field_name())?
-                        } else {
-                            None
-                        };
+                    let missing_value_for_accessor = if let Some(missing) =
+                        missing_value_term_agg.as_ref()
+                    {
+                        get_missing_val(column_type, missing, agg.agg.get_fast_field_names()[0])?
+                    } else {
+                        None
+                    };

                    let agg = AggregationWithAccessor {
+                        segment_ordinal,
                        missing_value_for_accessor,
                        accessor,
-                        accessors: Vec::new(),
+                        accessors: Default::default(),
+                        value_accessors: Default::default(),
                        field_type: column_type,
                        sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                            sub_aggregation,
                            reader,
+                            segment_ordinal,
                            &limits,
                        )?,
                        agg: agg.clone(),
@@ -215,34 +257,67 @@ impl AggregationWithAccessor {
                }
            }
            Average(AverageAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            })
            | Count(CountAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            })
            | Max(MaxAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            })
            | Min(MinAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            })
            | Stats(StatsAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
+            })
+            | ExtendedStats(ExtendedStatsAggregation {
+                field: ref field_name,
+                ..
            })
            | Sum(SumAggregation {
-                field: field_name, ..
+                field: ref field_name,
+                ..
            }) => {
                let (accessor, column_type) =
                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(accessor, column_type, &mut res)?;
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
-            Percentiles(percentiles) => {
+            Percentiles(ref percentiles) => {
                let (accessor, column_type) = get_ff_reader(
                    reader,
                    percentiles.field_name(),
                    Some(get_numeric_or_date_column_types()),
                )?;
-                add_agg_with_accessor(accessor, column_type, &mut res)?;
+                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
+            }
+            TopHits(ref mut top_hits) => {
+                top_hits.validate_and_resolve_field_names(reader.fast_fields().columnar())?;
+                let accessors: Vec<(Column<u64>, ColumnType)> = top_hits
+                    .field_names()
+                    .iter()
+                    .map(|field| {
+                        get_ff_reader(reader, field, Some(get_numeric_or_date_column_types()))
+                    })
+                    .collect::<crate::Result<_>>()?;
+
+                let value_accessors = top_hits
+                    .value_field_names()
+                    .iter()
+                    .map(|field_name| {
+                        Ok((
+                            field_name.to_string(),
+                            get_dynamic_columns(reader, field_name)?,
+                        ))
+                    })
+                    .collect::<crate::Result<_>>()?;
+
+                add_agg_with_accessors(&agg, accessors, &mut res, value_accessors)?;
            }
        };

@@ -264,8 +339,8 @@ fn get_missing_val(
        }
        _ => {
            return Err(crate::TantivyError::InvalidArgument(format!(
-                "Missing value {:?} for field {} is not supported for column type {:?}",
-                missing, field_name, column_type
+                "Missing value {missing:?} for field {field_name} is not supported for column \
+                 type {column_type:?}"
            )));
        }
    };
@@ -284,6 +359,7 @@ fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
 pub(crate) fn get_aggs_with_segment_accessor_and_validate(
    aggs: &Aggregations,
    reader: &SegmentReader,
+    segment_ordinal: SegmentOrdinal,
    limits: &AggregationLimits,
 ) -> crate::Result<AggregationsWithAccessor> {
    let mut aggss = Vec::new();
@@ -292,6 +368,7 @@ pub(crate) fn get_aggs_with_segment_accessor_and_validate(
            agg,
            agg.sub_aggregation(),
            reader,
+            segment_ordinal,
            limits.clone(),
        )?;
        for agg in aggs {
@@ -321,6 +398,19 @@ fn get_ff_reader(
    Ok(ff_field_with_type)
 }

+fn get_dynamic_columns(
+    reader: &SegmentReader,
+    field_name: &str,
+) -> crate::Result<Vec<columnar::DynamicColumn>> {
+    let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?;
+    let cols = ff_fields
+        .iter()
+        .map(|h| h.open())
+        .collect::<io::Result<_>>()?;
+    assert!(!ff_fields.is_empty(), "field {field_name} not found");
+    Ok(cols)
+}
+
 /// Get all fast field reader or empty as default.
 ///
 /// Is guaranteed to return at least one column.
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -8,7 +8,9 @@ use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use super::bucket::GetDocCount;
-use super::metric::{PercentilesMetricResult, SingleMetricResult, Stats};
+use super::metric::{
+    ExtendedStats, PercentilesMetricResult, SingleMetricResult, Stats, TopHitsMetricResult,
+};
 use super::{AggregationError, Key};
 use crate::TantivyError;

@@ -88,10 +90,14 @@ pub enum MetricResult {
    Min(SingleMetricResult),
    /// Stats metric result.
    Stats(Stats),
+    /// ExtendedStats metric result.
+    ExtendedStats(Box<ExtendedStats>),
    /// Sum metric result.
    Sum(SingleMetricResult),
-    /// Sum metric result.
+    /// Percentiles metric result.
    Percentiles(PercentilesMetricResult),
+    /// Top hits metric result
+    TopHits(TopHitsMetricResult),
 }

 impl MetricResult {
@@ -102,10 +108,14 @@ impl MetricResult {
            MetricResult::Max(max) => Ok(max.value),
            MetricResult::Min(min) => Ok(min.value),
            MetricResult::Stats(stats) => stats.get_value(agg_property),
+            MetricResult::ExtendedStats(extended_stats) => extended_stats.get_value(agg_property),
            MetricResult::Sum(sum) => Ok(sum.value),
            MetricResult::Percentiles(_) => Err(TantivyError::AggregationError(
                AggregationError::InvalidRequest("percentiles can't be used to order".to_string()),
            )),
+            MetricResult::TopHits(_) => Err(TantivyError::AggregationError(
+                AggregationError::InvalidRequest("top_hits can't be used to order".to_string()),
+            )),
        }
    }
 }
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -4,6 +4,7 @@ use crate::aggregation::agg_req::{Aggregation, Aggregations};
 use crate::aggregation::agg_result::AggregationResults;
 use crate::aggregation::buf_collector::DOC_BLOCK_SIZE;
 use crate::aggregation::collector::AggregationCollector;
+use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
 use crate::aggregation::segment_agg_result::AggregationLimits;
 use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values_and_terms};
 use crate::aggregation::DistributedAggregationCollector;
@@ -66,6 +67,22 @@ fn test_aggregation_flushing(
            }
        }
    },
+    "top_hits_test":{
+        "terms": {
+            "field": "string_id"
+        },
+        "aggs": {
+            "bucketsL2": {
+                "top_hits": {
+                    "size": 2,
+                    "sort": [
+                        { "score": "asc" }
+                    ],
+                    "docvalue_fields": ["score"]
+                }
+            }
+        }
+    },
    "histogram_test":{
        "histogram": {
            "field": "score",
@@ -108,6 +125,16 @@ fn test_aggregation_flushing(

        let searcher = reader.searcher();
        let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
+
+        // Test postcard roundtrip serialization
+        let intermediate_agg_result_bytes = postcard::to_allocvec(&intermediate_agg_result).expect(
+            "Postcard Serialization failed, flatten etc. is not supported in the intermediate \
+             result",
+        );
+        let intermediate_agg_result: IntermediateAggregationResults =
+            postcard::from_bytes(&intermediate_agg_result_bytes)
+                .expect("Post deserialization failed");
+
        intermediate_agg_result
            .into_final_result(agg_req, &Default::default())
            .unwrap()
@@ -587,6 +614,9 @@ fn test_aggregation_on_json_object() {
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
+    index_writer
+        .add_document(doc!(json => json!({"color": "red"})))
+        .unwrap();
    index_writer
        .add_document(doc!(json => json!({"color": "red"})))
        .unwrap();
@@ -614,8 +644,8 @@ fn test_aggregation_on_json_object() {
        &serde_json::json!({
            "jsonagg": {
                "buckets": [
+                    {"doc_count": 2, "key": "red"},
                    {"doc_count": 1, "key": "blue"},
-                    {"doc_count": 1, "key": "red"}
                ],
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0
@@ -637,6 +667,9 @@ fn test_aggregation_on_nested_json_object() {
    index_writer
        .add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
        .unwrap();
+    index_writer
+        .add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
+        .unwrap();
    index_writer.commit().unwrap();
    let reader = index.reader().unwrap();
    let searcher = reader.searcher();
@@ -664,7 +697,7 @@ fn test_aggregation_on_nested_json_object() {
        &serde_json::json!({
            "jsonagg1": {
                "buckets": [
-                    {"doc_count": 1, "key": "blue"},
+                    {"doc_count": 2, "key": "blue"},
                    {"doc_count": 1, "key": "red"}
                ],
                "doc_count_error_upper_bound": 0,
@@ -672,7 +705,7 @@ fn test_aggregation_on_nested_json_object() {
            },
            "jsonagg2": {
                "buckets": [
-                    {"doc_count": 1, "key": "blue"},
+                    {"doc_count": 2, "key": "blue"},
                    {"doc_count": 1, "key": "red"}
                ],
                "doc_count_error_upper_bound": 0,
@@ -810,29 +843,38 @@ fn test_aggregation_on_json_object_mixed_types() {
    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    // => Segment with all values numeric
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": 10.0})))
+        .add_document(doc!(json => json!({"mixed_type": 10.0, "mixed_price": 10.0})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with all values text
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "blue"})))
+        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
+        .unwrap();
+    index_writer
+        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
+        .unwrap();
+    index_writer
+        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with all boolen
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": true})))
+        .add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
        .unwrap();
    index_writer.commit().unwrap();

    // => Segment with mixed values
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "red"})))
+        .add_document(doc!(json => json!({"mixed_type": "red", "mixed_price": 1.0})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": -20.5})))
+        .add_document(doc!(json => json!({"mixed_type": "red", "mixed_price": 1.0})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": true})))
+        .add_document(doc!(json => json!({"mixed_type": -20.5, "mixed_price": -20.5})))
+        .unwrap();
+    index_writer
+        .add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
        .unwrap();

    index_writer.commit().unwrap();
@@ -846,7 +888,7 @@ fn test_aggregation_on_json_object_mixed_types() {
                "order": { "min_price": "desc" }
            },
            "aggs": {
-                "min_price": { "min": { "field": "json.mixed_type" } }
+                "min_price": { "min": { "field": "json.mixed_price" } }
            }
        },
        "rangeagg": {
@@ -870,6 +912,7 @@ fn test_aggregation_on_json_object_mixed_types() {

    let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
    let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
+    use pretty_assertions::assert_eq;
    assert_eq!(
        &aggregation_res_json,
        &serde_json::json!({
@@ -884,10 +927,10 @@ fn test_aggregation_on_json_object_mixed_types() {
          "termagg": {
            "buckets": [
              { "doc_count": 1, "key": 10.0, "min_price": { "value": 10.0 } },
+              { "doc_count": 3, "key": "blue", "min_price": { "value": 5.0 } },
+              { "doc_count": 2, "key": "red", "min_price": { "value": 1.0 } },
              { "doc_count": 1, "key": -20.5, "min_price": { "value": -20.5 } },
-              // TODO bool is also not yet handled in aggregation
-              { "doc_count": 1, "key": "blue", "min_price": { "value": null } },
-              { "doc_count": 1, "key": "red", "min_price": { "value": null } },
+              { "doc_count": 2, "key": 1.0, "key_as_string": "true", "min_price": { "value": null } },
            ],
            "sum_other_doc_count": 0
          }
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};

 use super::{HistogramAggregation, HistogramBounds};
-use crate::aggregation::AggregationError;
+use crate::aggregation::*;

 /// DateHistogramAggregation is similar to `HistogramAggregation`, but it can only be used with date
 /// type.
@@ -307,6 +307,7 @@ pub mod tests {
    ) -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
        schema_builder.add_date_field("date", FAST);
+        schema_builder.add_json_field("mixed", FAST);
        schema_builder.add_text_field("text", FAST | STRING);
        schema_builder.add_text_field("text2", FAST | STRING);
        let schema = schema_builder.build();
@@ -351,8 +352,10 @@ pub mod tests {
        let docs = vec![
            vec![r#"{ "date": "2015-01-01T12:10:30Z", "text": "aaa" }"#],
            vec![r#"{ "date": "2015-01-01T11:11:30Z", "text": "bbb" }"#],
+            vec![r#"{ "date": "2015-01-01T11:11:30Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-06T00:00:00Z", "text": "ccc" }"#],
+            vec![r#"{ "date": "2015-01-06T00:00:00Z", "text": "ccc" }"#],
        ];
        let index = get_test_index_from_docs(merge_segments, &docs).unwrap();

@@ -381,7 +384,7 @@ pub mod tests {
                        {
                            "key_as_string" : "2015-01-01T00:00:00Z",
                            "key" : 1420070400000.0,
-                            "doc_count" : 4
+                            "doc_count" : 6
                        }
                    ]
                }
@@ -419,15 +422,15 @@ pub mod tests {
                    {
                        "key_as_string" : "2015-01-01T00:00:00Z",
                        "key" : 1420070400000.0,
-                        "doc_count" : 4,
+                        "doc_count" : 6,
                        "texts": {
                            "buckets": [
                                {
-                                "doc_count": 2,
+                                "doc_count": 3,
                                "key": "bbb"
                                },
                                {
-                                "doc_count": 1,
+                                "doc_count": 2,
                                "key": "ccc"
                                },
                                {
@@ -466,7 +469,7 @@ pub mod tests {
                "sales_over_time": {
                    "buckets": [
                        {
-                            "doc_count": 2,
+                            "doc_count": 3,
                            "key": 1420070400000.0,
                            "key_as_string": "2015-01-01T00:00:00Z"
                        },
@@ -491,7 +494,7 @@ pub mod tests {
                            "key_as_string": "2015-01-05T00:00:00Z"
                        },
                        {
-                            "doc_count": 1,
+                            "doc_count": 2,
                            "key": 1420502400000.0,
                            "key_as_string": "2015-01-06T00:00:00Z"
                        }
@@ -532,7 +535,7 @@ pub mod tests {
                            "key_as_string": "2014-12-31T00:00:00Z"
                        },
                        {
-                            "doc_count": 2,
+                            "doc_count": 3,
                            "key": 1420070400000.0,
                            "key_as_string": "2015-01-01T00:00:00Z"
                        },
@@ -557,7 +560,7 @@ pub mod tests {
                            "key_as_string": "2015-01-05T00:00:00Z"
                        },
                        {
-                            "doc_count": 1,
+                            "doc_count": 2,
                            "key": 1420502400000.0,
                            "key_as_string": "2015-01-06T00:00:00Z"
                        },
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,8 +1,5 @@
 use std::cmp::Ordering;
-use std::fmt::Display;

-use columnar::ColumnType;
-use itertools::Itertools;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use tantivy_bitpacker::minmax;
@@ -18,9 +15,9 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateHistogramBucketEntry,
 };
 use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, AggregationLimits, SegmentAggregationCollector,
+    build_segment_agg_collector, SegmentAggregationCollector,
 };
-use crate::aggregation::{f64_from_fastfield_u64, format_date};
+use crate::aggregation::*;
 use crate::TantivyError;

 /// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
@@ -73,6 +70,7 @@ pub struct HistogramAggregation {
    pub field: String,
    /// The interval to chunk your data range. Each bucket spans a value range of [0..interval).
    /// Must be a positive value.
+    #[serde(deserialize_with = "deserialize_f64")]
    pub interval: f64,
    /// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
    /// 1))`.
@@ -85,6 +83,7 @@ pub struct HistogramAggregation {
    /// fall into the buckets with the key 0 and 10.
    /// With offset 5 and interval 10, they would both fall into the bucket with they key 5 and the
    /// range [5..15)
+    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub offset: Option<f64>,
    /// The minimum number of documents in a bucket to be returned. Defaults to 0.
    pub min_doc_count: Option<u64>,
@@ -308,7 +307,10 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
            .column_block_accessor
            .fetch_block(docs, &bucket_agg_accessor.accessor);

-        for (doc, val) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
+        for (doc, val) in bucket_agg_accessor
+            .column_block_accessor
+            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
+        {
            let val = self.f64_from_fastfield_u64(val);

            let bucket_pos = get_bucket_pos(val);
@@ -329,9 +331,11 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
@@ -595,11 +599,12 @@ mod tests {
    use serde_json::Value;

    use super::*;
-    use crate::aggregation::agg_req::Aggregations;
+    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::tests::{
        exec_request, exec_request_with_query, exec_request_with_query_and_memory_limit,
        get_test_index_2_segments, get_test_index_from_values, get_test_index_with_num_docs,
    };
+    use crate::query::AllQuery;

    #[test]
    fn histogram_test_crooked_values() -> crate::Result<()> {
@@ -1351,6 +1356,35 @@ mod tests {
            })
        );

+        Ok(())
+    }
+    #[test]
+    fn test_aggregation_histogram_empty_index() -> crate::Result<()> {
+        // test index without segments
+        let values = vec![];
+
+        let index = get_test_index_from_values(false, &values)?;
+
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "myhisto": {
+                "histogram": {
+                    "field": "score",
+                    "interval": 10.0
+                },
+            }
+        }))
+        .unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+        // Make sure the result structure is correct
+        assert_eq!(res["myhisto"]["buckets"].as_array().unwrap().len(), 0);
+
        Ok(())
    }
 }
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -28,6 +28,7 @@ mod term_agg;
 mod term_missing_agg;

 use std::collections::HashMap;
+use std::fmt;

 pub use histogram::*;
 pub use range::*;
@@ -72,12 +73,12 @@ impl From<&str> for OrderTarget {
    }
 }

-impl ToString for OrderTarget {
-    fn to_string(&self) -> String {
+impl fmt::Display for OrderTarget {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
-            OrderTarget::Key => "_key".to_string(),
-            OrderTarget::Count => "_count".to_string(),
-            OrderTarget::SubAggregation(agg) => agg.to_string(),
+            OrderTarget::Key => f.write_str("_key"),
+            OrderTarget::Count => f.write_str("_count"),
+            OrderTarget::SubAggregation(agg) => agg.fmt(f),
        }
    }
 }
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,7 +1,6 @@
 use std::fmt::Debug;
 use std::ops::Range;

-use columnar::{ColumnType, MonotonicallyMappableToU64};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

@@ -14,9 +13,7 @@ use crate::aggregation::intermediate_agg_result::{
 use crate::aggregation::segment_agg_result::{
    build_segment_agg_collector, SegmentAggregationCollector,
 };
-use crate::aggregation::{
-    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
-};
+use crate::aggregation::*;
 use crate::TantivyError;

 /// Provide user-defined buckets to aggregate on.
@@ -72,11 +69,19 @@ pub struct RangeAggregationRange {
    pub key: Option<String>,
    /// The from range value, which is inclusive in the range.
    /// `None` equals to an open ended interval.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(
+        skip_serializing_if = "Option::is_none",
+        default,
+        deserialize_with = "deserialize_option_f64"
+    )]
    pub from: Option<f64>,
    /// The to range value, which is not inclusive in the range.
    /// `None` equals to an open ended interval.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(
+        skip_serializing_if = "Option::is_none",
+        default,
+        deserialize_with = "deserialize_option_f64"
+    )]
    pub to: Option<f64>,
 }

@@ -230,7 +235,10 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
            .column_block_accessor
            .fetch_block(docs, &bucket_agg_accessor.accessor);

-        for (doc, val) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
+        for (doc, val) in bucket_agg_accessor
+            .column_block_accessor
+            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
+        {
            let bucket_pos = self.get_bucket_pos(val);

            let bucket = &mut self.buckets[bucket_pos];
@@ -441,7 +449,6 @@ pub(crate) fn range_to_key(range: &Range<u64>, field_type: &ColumnType) -> crate
 #[cfg(test)]
 mod tests {

-    use columnar::MonotonicallyMappableToU64;
    use serde_json::Value;

    use super::*;
@@ -450,7 +457,6 @@ mod tests {
        exec_request, exec_request_with_query, get_test_index_2_segments,
        get_test_index_with_num_docs,
    };
-    use crate::aggregation::AggregationLimits;

    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,6 +1,10 @@
 use std::fmt::Debug;
+use std::net::Ipv6Addr;

-use columnar::{BytesColumn, ColumnType, MonotonicallyMappableToU64, StrColumn};
+use columnar::column_values::CompactSpaceU64Accessor;
+use columnar::{
+    BytesColumn, ColumnType, MonotonicallyMappableToU128, MonotonicallyMappableToU64, StrColumn,
+};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

@@ -99,23 +103,14 @@ pub struct TermsAggregation {
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub size: Option<u32>,

-    /// Unused by tantivy.
-    ///
-    /// Since tantivy doesn't know shards, this parameter is merely there to be used by consumers
-    /// of tantivy. shard_size is the number of terms returned by each shard.
-    /// The default value in elasticsearch is size * 1.5 + 10.
-    ///
-    /// Should never be smaller than size.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
-    #[serde(alias = "shard_size")]
-    pub split_size: Option<u32>,
-
-    /// The get more accurate results, we fetch more than `size` from each segment.
+    /// To get more accurate results, we fetch more than `size` from each segment.
    ///
    /// Increasing this value is will increase the cost for more accuracy.
    ///
    /// Defaults to 10 * size.
    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(alias = "shard_size")]
+    #[serde(alias = "split_size")]
    pub segment_size: Option<u32>,

    /// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will
@@ -256,7 +251,7 @@ pub struct SegmentTermCollector {
    term_buckets: TermBuckets,
    req: TermsAggregationInternal,
    blueprint: Option<Box<dyn SegmentAggregationCollector>>,
-    field_type: ColumnType,
+    column_type: ColumnType,
    accessor_idx: usize,
 }

@@ -315,7 +310,10 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }
        // has subagg
        if let Some(blueprint) = self.blueprint.as_ref() {
-            for (doc, term_id) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
+            for (doc, term_id) in bucket_agg_accessor
+                .column_block_accessor
+                .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
+            {
                let sub_aggregations = self
                    .term_buckets
                    .sub_aggs
@@ -326,9 +324,11 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }

        let mem_delta = self.get_memory_consumption() - mem_pre;
-        bucket_agg_accessor
-            .limits
-            .add_memory_consumed(mem_delta as u64)?;
+        if mem_delta > 0 {
+            bucket_agg_accessor
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
+        }

        Ok(())
    }
@@ -355,10 +355,9 @@ impl SegmentTermCollector {
        field_type: ColumnType,
        accessor_idx: usize,
    ) -> crate::Result<Self> {
-        if field_type == ColumnType::Bytes || field_type == ColumnType::Bool {
+        if field_type == ColumnType::Bytes {
            return Err(TantivyError::InvalidArgument(format!(
-                "terms aggregation is not supported for column type {:?}",
-                field_type
+                "terms aggregation is not supported for column type {field_type:?}"
            )));
        }
        let term_buckets = TermBuckets::default();
@@ -389,7 +388,7 @@ impl SegmentTermCollector {
            req: TermsAggregationInternal::from_req(req),
            term_buckets,
            blueprint,
-            field_type,
+            column_type: field_type,
            accessor_idx,
        })
    }
@@ -466,7 +465,7 @@ impl SegmentTermCollector {
                Ok(intermediate_entry)
            };

-        if self.field_type == ColumnType::Str {
+        if self.column_type == ColumnType::Str {
            let term_dict = agg_with_accessor
                .str_dict_column
                .as_ref()
@@ -531,28 +530,55 @@ impl SegmentTermCollector {
                        });
                }
            }
-        } else if self.field_type == ColumnType::DateTime {
+        } else if self.column_type == ColumnType::DateTime {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
                let val = i64::from_u64(val);
                let date = format_date(val)?;
                dict.insert(IntermediateKey::Str(date), intermediate_entry);
            }
+        } else if self.column_type == ColumnType::Bool {
+            for (val, doc_count) in entries {
+                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
+                let val = bool::from_u64(val);
+                dict.insert(IntermediateKey::Bool(val), intermediate_entry);
+            }
+        } else if self.column_type == ColumnType::IpAddr {
+            let compact_space_accessor = agg_with_accessor
+                .accessor
+                .values
+                .clone()
+                .downcast_arc::<CompactSpaceU64Accessor>()
+                .map_err(|_| {
+                    TantivyError::AggregationError(
+                        crate::aggregation::AggregationError::InternalError(
+                            "Type mismatch: Could not downcast to CompactSpaceU64Accessor"
+                                .to_string(),
+                        ),
+                    )
+                })?;
+
+            for (val, doc_count) in entries {
+                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
+                let val: u128 = compact_space_accessor.compact_to_u128(val as u32);
+                let val = Ipv6Addr::from_u128(val);
+                dict.insert(IntermediateKey::IpAddr(val), intermediate_entry);
+            }
        } else {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
-                let val = f64_from_fastfield_u64(val, &self.field_type);
+                let val = f64_from_fastfield_u64(val, &self.column_type);
                dict.insert(IntermediateKey::F64(val), intermediate_entry);
            }
        };

-        Ok(IntermediateBucketResult::Terms(
-            IntermediateTermBucketResult {
+        Ok(IntermediateBucketResult::Terms {
+            buckets: IntermediateTermBucketResult {
                entries: dict,
                sum_other_doc_count,
                doc_count_error_upper_bound: term_doc_count_before_cutoff,
            },
-        ))
+        })
    }
 }

@@ -590,6 +616,9 @@ pub(crate) fn cut_off_buckets<T: GetDocCount + Debug>(

 #[cfg(test)]
 mod tests {
+    use std::net::IpAddr;
+    use std::str::FromStr;
+
    use common::DateTime;
    use time::{Date, Month};

@@ -600,7 +629,7 @@ mod tests {
    };
    use crate::aggregation::AggregationLimits;
    use crate::indexer::NoMergePolicy;
-    use crate::schema::{Schema, FAST, STRING};
+    use crate::schema::{IntoIpv6Addr, Schema, FAST, STRING};
    use crate::{Index, IndexWriter};

    #[test]
@@ -1182,9 +1211,9 @@ mod tests {

        assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termc");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termb");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 0);
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termb");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termc");
        assert_eq!(res["my_texts"]["buckets"][2]["doc_count"], 0);
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
@@ -1365,7 +1394,7 @@ mod tests {

    #[test]
    fn terms_aggregation_different_tokenizer_on_ff_test() -> crate::Result<()> {
-        let terms = vec!["Hello Hello", "Hallo Hallo"];
+        let terms = vec!["Hello Hello", "Hallo Hallo", "Hallo Hallo"];

        let index = get_test_index_from_terms(true, &[terms])?;

@@ -1383,7 +1412,7 @@ mod tests {
        println!("{}", serde_json::to_string_pretty(&res).unwrap());

        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hallo Hallo");
-        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);
+        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);

        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Hello Hello");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);
@@ -1894,4 +1923,80 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn terms_aggregation_bool() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let field = schema_builder.add_bool_field("bool_field", FAST);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        {
+            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
+            writer.add_document(doc!(field=>true))?;
+            writer.add_document(doc!(field=>false))?;
+            writer.add_document(doc!(field=>true))?;
+            writer.commit()?;
+        }
+
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_bool": {
+                "terms": {
+                    "field": "bool_field"
+                },
+            }
+        }))
+        .unwrap();
+
+        let res = exec_request_with_query(agg_req, &index, None)?;
+
+        assert_eq!(res["my_bool"]["buckets"][0]["key"], 1.0);
+        assert_eq!(res["my_bool"]["buckets"][0]["key_as_string"], "true");
+        assert_eq!(res["my_bool"]["buckets"][0]["doc_count"], 2);
+        assert_eq!(res["my_bool"]["buckets"][1]["key"], 0.0);
+        assert_eq!(res["my_bool"]["buckets"][1]["key_as_string"], "false");
+        assert_eq!(res["my_bool"]["buckets"][1]["doc_count"], 1);
+        assert_eq!(res["my_bool"]["buckets"][2]["key"], serde_json::Value::Null);
+
+        Ok(())
+    }
+
+    #[test]
+    fn terms_aggregation_ip_addr() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let field = schema_builder.add_ip_addr_field("ip_field", FAST);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        {
+            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
+            // IpV6 loopback
+            writer.add_document(doc!(field=>IpAddr::from_str("::1").unwrap().into_ipv6_addr()))?;
+            writer.add_document(doc!(field=>IpAddr::from_str("::1").unwrap().into_ipv6_addr()))?;
+            // IpV4
+            writer.add_document(
+                doc!(field=>IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr()),
+            )?;
+            writer.commit()?;
+        }
+
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_bool": {
+                "terms": {
+                    "field": "ip_field"
+                },
+            }
+        }))
+        .unwrap();
+
+        let res = exec_request_with_query(agg_req, &index, None)?;
+        // print as json
+        // println!("{}", serde_json::to_string_pretty(&res).unwrap());
+
+        assert_eq!(res["my_bool"]["buckets"][0]["key"], "::1");
+        assert_eq!(res["my_bool"]["buckets"][0]["doc_count"], 2);
+        assert_eq!(res["my_bool"]["buckets"][1]["key"], "127.0.0.1");
+        assert_eq!(res["my_bool"]["buckets"][1]["doc_count"], 1);
+        assert_eq!(res["my_bool"]["buckets"][2]["key"], serde_json::Value::Null);
+
+        Ok(())
+    }
 }
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -73,11 +73,13 @@ impl SegmentAggregationCollector for TermMissingAgg {

        entries.insert(missing.into(), missing_entry);

-        let bucket = IntermediateBucketResult::Terms(IntermediateTermBucketResult {
-            entries,
-            sum_other_doc_count: 0,
-            doc_count_error_upper_bound: 0,
-        });
+        let bucket = IntermediateBucketResult::Terms {
+            buckets: IntermediateTermBucketResult {
+                entries,
+                sum_other_doc_count: 0,
+                doc_count_error_upper_bound: 0,
+            },
+        };

        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;

@@ -90,7 +92,10 @@ impl SegmentAggregationCollector for TermMissingAgg {
        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        let agg = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        let has_value = agg.accessors.iter().any(|acc| acc.index.has_value(doc));
+        let has_value = agg
+            .accessors
+            .iter()
+            .any(|(acc, _)| acc.index.has_value(doc));
        if !has_value {
            self.missing_count += 1;
            if let Some(sub_agg) = self.sub_agg.as_mut() {
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -8,7 +8,8 @@ use super::segment_agg_result::{
 };
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{DocId, SegmentReader, TantivyError};
+use crate::index::SegmentReader;
+use crate::{DocId, SegmentOrdinal, TantivyError};

 /// The default max bucket count, before the aggregation fails.
 pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
@@ -64,10 +65,15 @@ impl Collector for DistributedAggregationCollector {

    fn for_segment(
        &self,
-        _segment_local_id: crate::SegmentOrdinal,
+        segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
+        AggregationSegmentCollector::from_agg_req_and_reader(
+            &self.agg,
+            reader,
+            segment_local_id,
+            &self.limits,
+        )
    }

    fn requires_scoring(&self) -> bool {
@@ -89,10 +95,15 @@ impl Collector for AggregationCollector {

    fn for_segment(
        &self,
-        _segment_local_id: crate::SegmentOrdinal,
+        segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
+        AggregationSegmentCollector::from_agg_req_and_reader(
+            &self.agg,
+            reader,
+            segment_local_id,
+            &self.limits,
+        )
    }

    fn requires_scoring(&self) -> bool {
@@ -135,10 +146,11 @@ impl AggregationSegmentCollector {
    pub fn from_agg_req_and_reader(
        agg: &Aggregations,
        reader: &SegmentReader,
+        segment_ordinal: SegmentOrdinal,
        limits: &AggregationLimits,
    ) -> crate::Result<Self> {
        let mut aggs_with_accessor =
-            get_aggs_with_segment_accessor_and_validate(agg, reader, limits)?;
+            get_aggs_with_segment_accessor_and_validate(agg, reader, segment_ordinal, limits)?;
        let result =
            BufAggregationCollector::new(build_segment_agg_collector(&mut aggs_with_accessor)?);
        Ok(AggregationSegmentCollector {
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -5,6 +5,7 @@
 use std::cmp::Ordering;
 use std::collections::hash_map::Entry;
 use std::hash::Hash;
+use std::net::Ipv6Addr;

 use columnar::ColumnType;
 use itertools::Itertools;
@@ -18,8 +19,8 @@ use super::bucket::{
    GetDocCount, Order, OrderTarget, RangeAggregation, TermsAggregation,
 };
 use super::metric::{
-    IntermediateAverage, IntermediateCount, IntermediateMax, IntermediateMin, IntermediateStats,
-    IntermediateSum, PercentilesCollector,
+    IntermediateAverage, IntermediateCount, IntermediateExtendedStats, IntermediateMax,
+    IntermediateMin, IntermediateStats, IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
 };
 use super::segment_agg_result::AggregationLimits;
 use super::{format_date, AggregationError, Key, SerializedKey};
@@ -41,6 +42,10 @@ pub struct IntermediateAggregationResults {
 /// This might seem redundant with `Key`, but the point is to have a different
 /// Serialize implementation.
 pub enum IntermediateKey {
+    /// Ip Addr key
+    IpAddr(Ipv6Addr),
+    /// Bool key
+    Bool(bool),
    /// String key
    Str(String),
    /// `f64` key
@@ -58,7 +63,16 @@ impl From<IntermediateKey> for Key {
    fn from(value: IntermediateKey) -> Self {
        match value {
            IntermediateKey::Str(s) => Self::Str(s),
+            IntermediateKey::IpAddr(s) => {
+                // Prefer to use the IPv4 representation if possible
+                if let Some(ip) = s.to_ipv4_mapped() {
+                    Self::Str(ip.to_string())
+                } else {
+                    Self::Str(s.to_string())
+                }
+            }
            IntermediateKey::F64(f) => Self::F64(f),
+            IntermediateKey::Bool(f) => Self::F64(f as u64 as f64),
        }
    }
 }
@@ -71,6 +85,8 @@ impl std::hash::Hash for IntermediateKey {
        match self {
            IntermediateKey::Str(text) => text.hash(state),
            IntermediateKey::F64(val) => val.to_bits().hash(state),
+            IntermediateKey::Bool(val) => val.hash(state),
+            IntermediateKey::IpAddr(val) => val.hash(state),
        }
    }
 }
@@ -166,9 +182,9 @@ impl IntermediateAggregationResults {
 pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult {
    use AggregationVariants::*;
    match req.agg {
-        Terms(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Terms(
-            Default::default(),
-        )),
+        Terms(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Terms {
+            buckets: Default::default(),
+        }),
        Range(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
            Default::default(),
        )),
@@ -199,12 +215,18 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
        Stats(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Stats(
            IntermediateStats::default(),
        )),
+        ExtendedStats(_) => IntermediateAggregationResult::Metric(
+            IntermediateMetricResult::ExtendedStats(IntermediateExtendedStats::default()),
+        ),
        Sum(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Sum(
            IntermediateSum::default(),
        )),
        Percentiles(_) => IntermediateAggregationResult::Metric(
            IntermediateMetricResult::Percentiles(PercentilesCollector::default()),
        ),
+        TopHits(ref req) => IntermediateAggregationResult::Metric(
+            IntermediateMetricResult::TopHits(TopHitsTopNComputer::new(req)),
+        ),
    }
 }

@@ -263,8 +285,12 @@ pub enum IntermediateMetricResult {
    Min(IntermediateMin),
    /// Intermediate stats result.
    Stats(IntermediateStats),
+    /// Intermediate stats result.
+    ExtendedStats(IntermediateExtendedStats),
    /// Intermediate sum result.
    Sum(IntermediateSum),
+    /// Intermediate top_hits result
+    TopHits(TopHitsTopNComputer),
 }

 impl IntermediateMetricResult {
@@ -285,6 +311,9 @@ impl IntermediateMetricResult {
            IntermediateMetricResult::Stats(intermediate_stats) => {
                MetricResult::Stats(intermediate_stats.finalize())
            }
+            IntermediateMetricResult::ExtendedStats(intermediate_stats) => {
+                MetricResult::ExtendedStats(intermediate_stats.finalize())
+            }
            IntermediateMetricResult::Sum(intermediate_sum) => {
                MetricResult::Sum(intermediate_sum.finalize().into())
            }
@@ -292,9 +321,13 @@ impl IntermediateMetricResult {
                percentiles
                    .into_final_result(req.agg.as_percentile().expect("unexpected metric type")),
            ),
+            IntermediateMetricResult::TopHits(top_hits) => {
+                MetricResult::TopHits(top_hits.into_final_result())
+            }
        }
    }

+    // TODO: this is our top-of-the-chain fruit merge mech
    fn merge_fruits(&mut self, other: IntermediateMetricResult) -> crate::Result<()> {
        match (self, other) {
            (
@@ -321,6 +354,12 @@ impl IntermediateMetricResult {
            ) => {
                stats_left.merge_fruits(stats_right);
            }
+            (
+                IntermediateMetricResult::ExtendedStats(extended_stats_left),
+                IntermediateMetricResult::ExtendedStats(extended_stats_right),
+            ) => {
+                extended_stats_left.merge_fruits(extended_stats_right);
+            }
            (IntermediateMetricResult::Sum(sum_left), IntermediateMetricResult::Sum(sum_right)) => {
                sum_left.merge_fruits(sum_right);
            }
@@ -330,6 +369,9 @@ impl IntermediateMetricResult {
            ) => {
                left.merge_fruits(right)?;
            }
+            (IntermediateMetricResult::TopHits(left), IntermediateMetricResult::TopHits(right)) => {
+                left.merge_fruits(right)?;
+            }
            _ => {
                panic!("incompatible fruit types in tree or missing merge_fruits handler");
            }
@@ -351,11 +393,14 @@ pub enum IntermediateBucketResult {
    Histogram {
        /// The column_type of the underlying `Column` is DateTime
        is_date_agg: bool,
-        /// The buckets
+        /// The histogram buckets
        buckets: Vec<IntermediateHistogramBucketEntry>,
    },
    /// Term aggregation
-    Terms(IntermediateTermBucketResult),
+    Terms {
+        /// The term buckets
+        buckets: IntermediateTermBucketResult,
+    },
 }

 impl IntermediateBucketResult {
@@ -432,7 +477,7 @@ impl IntermediateBucketResult {
                };
                Ok(BucketResult::Histogram { buckets })
            }
-            IntermediateBucketResult::Terms(terms) => terms.into_final_result(
+            IntermediateBucketResult::Terms { buckets: terms } => terms.into_final_result(
                req.agg
                    .as_term()
                    .expect("unexpected aggregation, expected term aggregation"),
@@ -445,8 +490,12 @@ impl IntermediateBucketResult {
    fn merge_fruits(&mut self, other: IntermediateBucketResult) -> crate::Result<()> {
        match (self, other) {
            (
-                IntermediateBucketResult::Terms(term_res_left),
-                IntermediateBucketResult::Terms(term_res_right),
+                IntermediateBucketResult::Terms {
+                    buckets: term_res_left,
+                },
+                IntermediateBucketResult::Terms {
+                    buckets: term_res_right,
+                },
            ) => {
                merge_maps(&mut term_res_left.entries, term_res_right.entries)?;
                term_res_left.sum_other_doc_count += term_res_right.sum_other_doc_count;
@@ -530,8 +579,15 @@ impl IntermediateTermBucketResult {
            .into_iter()
            .filter(|bucket| bucket.1.doc_count as u64 >= req.min_doc_count)
            .map(|(key, entry)| {
+                let key_as_string = match key {
+                    IntermediateKey::Bool(key) => {
+                        let val = if key { "true" } else { "false" };
+                        Some(val.to_string())
+                    }
+                    _ => None,
+                };
                Ok(BucketEntry {
-                    key_as_string: None,
+                    key_as_string,
                    key: key.into(),
                    doc_count: entry.doc_count as u64,
                    sub_aggregation: entry
--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;

 use serde::{Deserialize, Serialize};

-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
+use crate::aggregation::*;

 /// A single-value metric aggregation that computes the average of numeric values that are
 /// extracted from the aggregated documents.
@@ -24,7 +25,7 @@ pub struct AverageAggregation {
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
-    #[serde(default)]
+    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }

@@ -65,3 +66,71 @@ impl IntermediateAverage {
        self.stats.finalize().avg
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn deserialization_with_missing_test1() {
+        let json = r#"{
+            "field": "score",
+            "missing": "10.0"
+        }"#;
+        let avg: AverageAggregation = serde_json::from_str(json).unwrap();
+        assert_eq!(avg.field, "score");
+        assert_eq!(avg.missing, Some(10.0));
+        // no dot
+        let json = r#"{
+            "field": "score",
+            "missing": "10"
+        }"#;
+        let avg: AverageAggregation = serde_json::from_str(json).unwrap();
+        assert_eq!(avg.field, "score");
+        assert_eq!(avg.missing, Some(10.0));
+
+        // from value
+        let avg: AverageAggregation = serde_json::from_value(json!({
+            "field": "score_f64",
+            "missing": 10u64,
+        }))
+        .unwrap();
+        assert_eq!(avg.missing, Some(10.0));
+        // from value
+        let avg: AverageAggregation = serde_json::from_value(json!({
+            "field": "score_f64",
+            "missing": 10u32,
+        }))
+        .unwrap();
+        assert_eq!(avg.missing, Some(10.0));
+        let avg: AverageAggregation = serde_json::from_value(json!({
+            "field": "score_f64",
+            "missing": 10i8,
+        }))
+        .unwrap();
+        assert_eq!(avg.missing, Some(10.0));
+    }
+
+    #[test]
+    fn deserialization_with_missing_test_fail() {
+        let json = r#"{
+            "field": "score",
+            "missing": "a"
+        }"#;
+        let avg: Result<AverageAggregation, _> = serde_json::from_str(json);
+        assert!(avg.is_err());
+        assert!(avg
+            .unwrap_err()
+            .to_string()
+            .contains("Failed to parse f64 from string: \"a\""));
+
+        // Disallow NaN
+        let json = r#"{
+            "field": "score",
+            "missing": "NaN"
+        }"#;
+        let avg: Result<AverageAggregation, _> = serde_json::from_str(json);
+        assert!(avg.is_err());
+        assert!(avg.unwrap_err().to_string().contains("NaN"));
+    }
+}
--- a/src/aggregation/metric/count.rs
+++ b/src/aggregation/metric/count.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;

 use serde::{Deserialize, Serialize};

-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
+use crate::aggregation::*;

 /// A single-value metric aggregation that counts the number of values that are
 /// extracted from the aggregated documents.
@@ -24,7 +25,7 @@ pub struct CountAggregation {
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
-    #[serde(default)]
+    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }

--- a/src/aggregation/metric/extended_stats.rs
+++ b/src/aggregation/metric/extended_stats.rs
--- a/src/aggregation/metric/max.rs
+++ b/src/aggregation/metric/max.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;

 use serde::{Deserialize, Serialize};

-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
+use crate::aggregation::*;

 /// A single-value metric aggregation that computes the maximum of numeric values that are
 /// extracted from the aggregated documents.
@@ -24,7 +25,7 @@ pub struct MaxAggregation {
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
-    #[serde(default)]
+    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }

--- a/src/aggregation/metric/min.rs
+++ b/src/aggregation/metric/min.rs
@@ -2,7 +2,8 @@ use std::fmt::Debug;

 use serde::{Deserialize, Serialize};

-use super::{IntermediateStats, SegmentStatsCollector};
+use super::*;
+use crate::aggregation::*;

 /// A single-value metric aggregation that computes the minimum of numeric values that are
 /// extracted from the aggregated documents.
@@ -24,7 +25,7 @@ pub struct MinAggregation {
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
-    #[serde(default)]
+    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
 }

--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -18,13 +18,19 @@

 mod average;
 mod count;
+mod extended_stats;
 mod max;
 mod min;
 mod percentiles;
 mod stats;
 mod sum;
+mod top_hits;
+
+use std::collections::HashMap;
+
 pub use average::*;
 pub use count::*;
+pub use extended_stats::*;
 pub use max::*;
 pub use min::*;
 pub use percentiles::*;
@@ -32,6 +38,9 @@ use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 pub use stats::*;
 pub use sum::*;
+pub use top_hits::*;
+
+use crate::schema::OwnedValue;

 /// Single-metric aggregations use this common result structure.
 ///
@@ -81,6 +90,28 @@ pub struct PercentilesMetricResult {
    pub values: PercentileValues,
 }

+/// The top_hits metric results entry
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TopHitsVecEntry {
+    /// The sort values of the document, depending on the sort criteria in the request.
+    pub sort: Vec<Option<u64>>,
+
+    /// Search results, for queries that include field retrieval requests
+    /// (`docvalue_fields`).
+    #[serde(rename = "docvalue_fields")]
+    #[serde(skip_serializing_if = "HashMap::is_empty")]
+    pub doc_value_fields: HashMap<String, OwnedValue>,
+}
+
+/// The top_hits metric aggregation results a list of top hits by sort criteria.
+///
+/// The main reason for wrapping it in `hits` is to match elasticsearch output structure.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TopHitsMetricResult {
+    /// The result of the top_hits metric.
+    pub hits: Vec<TopHitsVecEntry>,
+}
+
 #[cfg(test)]
 mod tests {
    use crate::aggregation::agg_req::Aggregations;
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -1,6 +1,5 @@
 use std::fmt::Debug;

-use columnar::ColumnType;
 use serde::{Deserialize, Serialize};

 use super::*;
@@ -11,7 +10,7 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
-use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, AggregationError};
+use crate::aggregation::*;
 use crate::{DocId, TantivyError};

 /// # Percentiles
@@ -84,7 +83,11 @@ pub struct PercentilesAggregationReq {
    /// By default they will be ignored but it is also possible to treat them as if they had a
    /// value. Examples in JSON format:
    /// { "field": "my_numbers", "missing": "10.0" }
-    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(
+        skip_serializing_if = "Option::is_none",
+        default,
+        deserialize_with = "deserialize_option_f64"
+    )]
    pub missing: Option<f64>,
 }
 fn default_percentiles() -> &'static [f64] {
@@ -133,7 +136,6 @@ pub(crate) struct SegmentPercentilesCollector {
    field_type: ColumnType,
    pub(crate) percentiles: PercentilesCollector,
    pub(crate) accessor_idx: usize,
-    val_cache: Vec<u64>,
    missing: Option<u64>,
 }

@@ -243,7 +245,6 @@ impl SegmentPercentilesCollector {
            field_type,
            percentiles: PercentilesCollector::new(),
            accessor_idx,
-            val_cache: Default::default(),
            missing,
        })
    }
--- a/Show More
+++ b/Show More