Fix building on windows with mmap (#2070 )

* Fix windows build * Make pub * Update docs * Re arrange * Fix compilation error on unix * Fix unix borrows * Revert "Fix unix borrows" This reverts commit c1d94fd12b. * Fix unix borrows and revert original change * Fix warning * Cleaner code. --------- Co-authored-by: Paul Masurel <paul@quickwit.io>
release tantivy (#2083 )
2025-12-27 20:42:54 +00:00 · 2023-06-10 18:32:39 +02:00 · 2023-06-09 10:47:46 +02:00 · 2023-06-08 18:37:58 +08:00 · 2023-06-08 11:13:52 +02:00 · 2023-06-08 09:07:08 +02:00
224 changed files with 14483 additions and 5788 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -6,6 +6,11 @@ on:
  pull_request:
    branches: [main]

+# Ensures that we cancel running jobs for the same PR / same workflow.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  coverage:
    runs-on: ubuntu-latest
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -8,6 +8,11 @@ env:
  CARGO_TERM_COLOR: always
  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000

+# Ensures that we cancel running jobs for the same PR / same workflow.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  test:

--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,11 @@ on:
 env:
  CARGO_TERM_COLOR: always

+# Ensures that we cancel running jobs for the same PR / same workflow.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  check:

--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,5 @@ benchmark
 .idea
 trace.dat
 cargo-timing*
+control
+variable
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -254,7 +254,7 @@ The token positions of all of the terms are then stored in a separate file with
 The [TermInfo](src/postings/term_info.rs) gives an offset (expressed in position this time) in this file. As we iterate through the docset,
 we advance the position reader by the number of term frequencies of the current document.

-## [fieldnorms/](src/fieldnorms): Here is my doc, how many tokens in this field?
+## [fieldnorm/](src/fieldnorm): Here is my doc, how many tokens in this field?

 The [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) formula also requires to know the number of tokens stored in a specific field for a given document. We store this information on one byte per document in the fieldnorm.
 The fieldnorm is therefore compressed. Values up to 40 are encoded unchanged.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,79 @@
+
+Tantivy 0.20 [Unreleased]
+================================
+#### Bugfixes
+- Fix phrase queries with slop (slop supports now transpositions, algorithm that carries slop so far for num terms > 2) [#2031](https://github.com/quickwit-oss/tantivy/issues/2031)[#2020](https://github.com/quickwit-oss/tantivy/issues/2020)(@PSeitz)
+- Handle error for exists on MMapDirectory [#1988](https://github.com/quickwit-oss/tantivy/issues/1988) (@PSeitz)
+- Aggregation
+  - Fix min doc_count empty merge bug [#2057](https://github.com/quickwit-oss/tantivy/issues/2057) (@PSeitz)
+  - Fix: Sort order for term aggregations (sort order on key was inverted) [#1858](https://github.com/quickwit-oss/tantivy/issues/1858) (@PSeitz)
+
+#### Features/Improvements
+- Add PhrasePrefixQuery [#1842](https://github.com/quickwit-oss/tantivy/issues/1842) (@trinity-1686a)
+- Add `coerce` option for text and numbers types (convert the value instead of returning an error during indexing) [#1904](https://github.com/quickwit-oss/tantivy/issues/1904) (@PSeitz)
+- Add regex tokenizer [#1759](https://github.com/quickwit-oss/tantivy/issues/1759)(@mkleen)
+- Move tokenizer API to seperate crate. Having a seperate crate with a stable API will allow us to use tokenizers with different tantivy versions. [#1767](https://github.com/quickwit-oss/tantivy/issues/1767) (@PSeitz)
+- **Columnar crate**: New fast field handling (@fulmicoton @PSeitz) [#1806](https://github.com/quickwit-oss/tantivy/issues/1806)[#1809](https://github.com/quickwit-oss/tantivy/issues/1809)
+  - Support for fast fields with optional values. Previously tantivy supported only single-valued and multi-value fast fields. The encoding of optional fast fields is now very compact.
+  - Fast field Support for JSON (schemaless fast fields). Support multiple types on the same column. [#1876](https://github.com/quickwit-oss/tantivy/issues/1876) (@fulmicoton)
+  - Unified access for fast fields over different cardinalities.
+  - Unified storage for typed and untyped fields.
+  - Move fastfield codecs into columnar. [#1782](https://github.com/quickwit-oss/tantivy/issues/1782) (@fulmicoton)
+  - Sparse dense index for optional values [#1716](https://github.com/quickwit-oss/tantivy/issues/1716) (@PSeitz)
+  - Switch to nanosecond precision in DateTime fastfield [#2016](https://github.com/quickwit-oss/tantivy/issues/2016) (@PSeitz)
+- **Aggregation**
+  - Add `date_histogram` aggregation (only `fixed_interval` for now) [#1900](https://github.com/quickwit-oss/tantivy/issues/1900) (@PSeitz)
+  - Add `percentiles` aggregations [#1984](https://github.com/quickwit-oss/tantivy/issues/1984) (@PSeitz)
+  - [**breaking**] Drop JSON support on intermediate agg result (we use postcard as format in `quickwit` to send intermediate results) [#1992](https://github.com/quickwit-oss/tantivy/issues/1992) (@PSeitz)
+  - Set memory limit in bytes for aggregations after which they abort (Previously there was only the bucket limit) [#1942](https://github.com/quickwit-oss/tantivy/issues/1942)[#1957](https://github.com/quickwit-oss/tantivy/issues/1957)(@PSeitz)
+  - Add support for u64,i64,f64 fields in term aggregation [#1883](https://github.com/quickwit-oss/tantivy/issues/1883) (@PSeitz)
+  - Add count, min, max, and sum aggregations [#1794](https://github.com/quickwit-oss/tantivy/issues/1794) (@guilload)
+  - Switch to Aggregation without serde_untagged => better deserialization errors. [#2003](https://github.com/quickwit-oss/tantivy/issues/2003) (@PSeitz)
+  - Switch to ms in histogram for date type (ES compatibility) [#2045](https://github.com/quickwit-oss/tantivy/issues/2045) (@PSeitz)
+  - Reduce term aggregation memory consumption [#2013](https://github.com/quickwit-oss/tantivy/issues/2013) (@PSeitz)
+  - Reduce agg memory consumption: Replace generic aggregation collector (which has a high memory requirement per instance) in aggregation tree with optimized versions behind a trait.
+  - Split term collection count and sub_agg (Faster term agg with less memory consumption for cases without sub-aggs) [#1921](https://github.com/quickwit-oss/tantivy/issues/1921) (@PSeitz)
+  - Schemaless aggregations: In combination with stacker tantivy supports now schemaless aggregations via the JSON type.
+    - Add aggregation support for JSON type [#1888](https://github.com/quickwit-oss/tantivy/issues/1888) (@PSeitz)
+    - Mixed types support on JSON fields in aggs [#1971](https://github.com/quickwit-oss/tantivy/issues/1971) (@PSeitz)
+  - Perf: Fetch blocks of vals in aggregation for all cardinality [#1950](https://github.com/quickwit-oss/tantivy/issues/1950) (@PSeitz)
+- `Searcher` with disabled scoring via `EnableScoring::Disabled` [#1780](https://github.com/quickwit-oss/tantivy/issues/1780) (@shikhar)
+- Enable tokenizer on json fields [#2053](https://github.com/quickwit-oss/tantivy/issues/2053) (@PSeitz)
+- Enforcing "NOT" and "-" queries consistency in UserInputAst [#1609](https://github.com/quickwit-oss/tantivy/issues/1609) (@Denis Bazhenov)
+- Faster indexing
+  - Refactor tokenization pipeline to use GATs [#1924](https://github.com/quickwit-oss/tantivy/issues/1924) (@trinity-1686a)
+  - Faster term hash map [#1940](https://github.com/quickwit-oss/tantivy/issues/1940) (@PSeitz)
+  - Refactor vint [#2010](https://github.com/quickwit-oss/tantivy/issues/2010) (@PSeitz)
+- Faster search
+  - Work in batches of docs on the SegmentCollector (Only for cases without score for now) [#1937](https://github.com/quickwit-oss/tantivy/issues/1937) (@PSeitz)
+  - Faster fast field range queries using SIMD [#1954](https://github.com/quickwit-oss/tantivy/issues/1954) (@fulmicoton)
+  - Improve fast field range query performance [#1864](https://github.com/quickwit-oss/tantivy/issues/1864) (@PSeitz)
+- Make BM25 scoring more flexible [#1855](https://github.com/quickwit-oss/tantivy/issues/1855) (@alexcole)
+- Switch fs2 to fs4 as it is now unmaintained and does not support illumos [#1944](https://github.com/quickwit-oss/tantivy/issues/1944) (@Toasterson)
+- Made BooleanWeight and BoostWeight public [#1991](https://github.com/quickwit-oss/tantivy/issues/1991) (@fulmicoton)
+- Make index compatible with virtual drives on Windows [#1843](https://github.com/quickwit-oss/tantivy/issues/1843) (@Yukun Guo)
+- Auto downgrade index record option, instead of vint error [#1857](https://github.com/quickwit-oss/tantivy/issues/1857) (@PSeitz)
+- Enable range query on fast field for u64 compatible types [#1762](https://github.com/quickwit-oss/tantivy/issues/1762) (@PSeitz) [#1876]
+- sstable
+  - Isolating sstable and stacker in independant crates. [#1718](https://github.com/quickwit-oss/tantivy/issues/1718) (@fulmicoton)
+  - New sstable format [#1943](https://github.com/quickwit-oss/tantivy/issues/1943)[#1953](https://github.com/quickwit-oss/tantivy/issues/1953) (@trinity-1686a)
+  - Use DeltaReader directly to implement Dictionnary::ord_to_term [#1928](https://github.com/quickwit-oss/tantivy/issues/1928) (@trinity-1686a)
+  - Use DeltaReader directly to implement Dictionnary::term_ord [#1925](https://github.com/quickwit-oss/tantivy/issues/1925) (@trinity-1686a)
+- Add seperate tokenizer manager for fast fields [#2019](https://github.com/quickwit-oss/tantivy/issues/2019) (@PSeitz)
+- Make construction of LevenshteinAutomatonBuilder for FuzzyTermQuery instances lazy. [#1756](https://github.com/quickwit-oss/tantivy/issues/1756) (@adamreichold)
+- Added support for madvise when opening an mmaped Index [#2036](https://github.com/quickwit-oss/tantivy/issues/2036) (@fulmicoton)
+- Rename `DatePrecision` to `DateTimePrecision` [#2051](https://github.com/quickwit-oss/tantivy/issues/2051) (@guilload)
+- Query Parser
+  - Quotation mark can now be used for phrase queries. [#2050](https://github.com/quickwit-oss/tantivy/issues/2050) (@fulmicoton)
+  - PhrasePrefixQuery is supported in the query parser via: `field:"phrase ter"*` [#2044](https://github.com/quickwit-oss/tantivy/issues/2044) (@adamreichold)
+- Docs
+  - Update examples for literate docs [#1880](https://github.com/quickwit-oss/tantivy/issues/1880) (@PSeitz)
+  - Add ip field example [#1775](https://github.com/quickwit-oss/tantivy/issues/1775) (@PSeitz)
+  - Fix doc store cache documentation [#1821](https://github.com/quickwit-oss/tantivy/issues/1821) (@PSeitz)
+  - Fix BooleanQuery document [#1999](https://github.com/quickwit-oss/tantivy/issues/1999) (@RT_Enzyme)
+  - Update comments in the faceted search example [#1737](https://github.com/quickwit-oss/tantivy/issues/1737) (@DawChihLiou)
+
+
 Tantivy 0.19
 ================================
 #### Bugfixes
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.19.0"
+version = "0.20.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -20,9 +20,9 @@ byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
 regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
-aho-corasick = "0.7"
+aho-corasick = "1.0"
 tantivy-fst = "0.4.0"
-memmap2 = { version = "0.5.3", optional = true }
+memmap2 = { version = "0.6.0", optional = true }
 lz4_flex = { version = "0.10", default-features = false, features = ["checked-decode"], optional = true }
 brotli = { version = "3.3.4", optional = true }
 zstd = { version = "0.12", optional = true, default-features = false }
@@ -32,7 +32,7 @@ log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
 num_cpus = "1.13.1"
-fs2 = { version = "0.4.3", optional = true }
+fs4 = { version = "0.6.3", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
@@ -44,24 +44,26 @@ rustc-hash = "1.1.0"
 thiserror = "1.0.30"
 htmlescape = "0.3.1"
 fail = "0.5.0"
-murmurhash32 = "0.2.0"
+murmurhash32 = "0.3.0"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 smallvec = "1.8.0"
 rayon = "1.5.2"
-lru = "0.9.0"
+lru = "0.10.0"
 fastdivide = "0.4.0"
 itertools = "0.10.3"
 measure_time = "0.8.2"
 async-trait = "0.1.53"
 arc-swap = "1.5.0"

-columnar = { version="0.1", path="./columnar", package ="tantivy-columnar" }
-sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optional = true }
-stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
-query-grammar = { version= "0.19.0", path="./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
+columnar = { version= "0.1", path="./columnar", package ="tantivy-columnar" }
+sstable = { version= "0.1", path="./sstable", package ="tantivy-sstable", optional = true }
+stacker = { version= "0.1", path="./stacker", package ="tantivy-stacker" }
+query-grammar = { version= "0.20.0", path="./query-grammar", package = "tantivy-query-grammar" }
+tantivy-bitpacker = { version= "0.4", path="./bitpacker" }
 common = { version= "0.5", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version="0.1", path="./tokenizer-api", package="tantivy-tokenizer-api" }
+tokenizer-api = { version= "0.1", path="./tokenizer-api", package="tantivy-tokenizer-api" }
+sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] }
+futures-util = { version = "0.3.28", optional = true }

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -72,12 +74,14 @@ maplit = "1.0.2"
 matches = "0.1.9"
 pretty_assertions = "1.2.1"
 proptest = "1.0.0"
-criterion = "0.4"
+criterion = "0.5"
 test-log = "0.2.10"
 env_logger = "0.10.0"
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+pprof = { git = "https://github.com/PSeitz/pprof-rs/", rev = "53af24b", features = ["flamegraph", "criterion"] } # temp fork that works with criterion 0.5
 futures = "0.3.21"
 paste = "1.0.11"
+more-asserts = "0.3.1"
+rand_distr = "0.4.3"

 [dev-dependencies.fail]
 version = "0.5.0"
@@ -88,13 +92,18 @@ opt-level = 3
 debug = false
 debug-assertions = false

+[profile.bench]
+opt-level = 3
+debug = true
+debug-assertions = false
+
 [profile.test]
 debug-assertions = true
 overflow-checks = true

 [features]
 default = ["mmap", "stopwords", "lz4-compression"]
-mmap = ["fs2", "tempfile", "memmap2"]
+mmap = ["fs4", "tempfile", "memmap2"]
 stopwords = []

 brotli-compression = ["brotli"]
@@ -105,7 +114,7 @@ zstd-compression = ["zstd"]
 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.

-quickwit = ["sstable"]
+quickwit = ["sstable", "futures-util"]

 [workspace]
 members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"]
@@ -129,4 +138,3 @@ harness = false
 [[bench]]
 name = "index-bench"
 harness = false
-
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 test:
-	echo "Run test only... No examples."
+	@echo "Run test only... No examples."
 	cargo test --tests --lib

 fmt:
--- a/README.md
+++ b/README.md
@@ -26,6 +26,8 @@ Your mileage WILL vary depending on the nature of queries and their load.

 <img src="doc/assets/images/searchbenchmark.png">

+Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).
+
 # Features

 - Full-text search
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -0,0 +1,21 @@
+# Release a new Tantivy Version
+
+## Steps
+
+1. Identify new packages in workspace since last release
+2. Identify changed packages in workspace since last release
+3. Bump version in `Cargo.toml` and their dependents for all changed packages
+4. Update version of root `Cargo.toml`
+5. Publish version starting with leaf nodes
+6. Set git tag with new version
+
+
+In conjucation with `cargo-release` Steps 1-4 (I'm not sure if the change detection works):
+Set new packages to version 0.0.0
+
+Replace prev-tag-name
+```bash
+cargo release --workspace --no-publish -v --prev-tag-name 0.19 --push-remote origin minor --no-tag --execute
+```
+
+no-tag or it will create tags for all the subpackages
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,23 +0,0 @@
-# Appveyor configuration template for Rust using rustup for Rust installation
-# https://github.com/starkat99/appveyor-rust
-
-os: Visual Studio 2015
-environment:
-  matrix:
-    - channel: stable
-      target: x86_64-pc-windows-msvc
-
-install:
-  - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
-  - rustup-init -yv --default-toolchain %channel% --default-host %target%
-  - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
-  - if defined msys_bits set PATH=%PATH%;C:\msys64\mingw%msys_bits%\bin
-  - rustc -vV
-  - cargo -vV
-
-build: false
-
-test_script:
-  - REM SET RUST_LOG=tantivy,test & cargo test --all --verbose --no-default-features --features lz4-compression --features mmap
-  - REM SET RUST_LOG=tantivy,test & cargo test test_store --verbose --no-default-features --features lz4-compression --features snappy-compression --features brotli-compression --features mmap
-  - REM SET RUST_BACKTRACE=1 & cargo build --examples
--- a/benches/analyzer.rs
+++ b/benches/analyzer.rs
@@ -5,7 +5,7 @@ const ALICE_TXT: &str = include_str!("alice.txt");

 pub fn criterion_benchmark(c: &mut Criterion) {
    let tokenizer_manager = TokenizerManager::default();
-    let tokenizer = tokenizer_manager.get("default").unwrap();
+    let mut tokenizer = tokenizer_manager.get("default").unwrap();
    c.bench_function("default-tokenize-alice", |b| {
        b.iter(|| {
            let mut word_count = 0;
--- a/benches/gh.json
+++ b/benches/gh.json
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -1,10 +1,15 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use pprof::criterion::{Output, PProfProfiler};
-use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
+use tantivy::schema::{FAST, INDEXED, STORED, STRING, TEXT};
 use tantivy::Index;

 const HDFS_LOGS: &str = include_str!("hdfs.json");
-const NUM_REPEATS: usize = 2;
+const GH_LOGS: &str = include_str!("gh.json");
+const WIKI: &str = include_str!("wiki.json");
+
+fn get_lines(input: &str) -> Vec<&str> {
+    input.trim().split('\n').collect()
+}

 pub fn hdfs_index_benchmark(c: &mut Criterion) {
    let schema = {
@@ -28,85 +33,147 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
    };

    let mut group = c.benchmark_group("index-hdfs");
+    group.throughput(Throughput::Bytes(HDFS_LOGS.len() as u64));
    group.sample_size(20);
    group.bench_function("index-hdfs-no-commit", |b| {
+        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema.clone());
            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let doc = schema.parse_document(doc_json).unwrap();
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
            }
        })
    });
    group.bench_function("index-hdfs-with-commit", |b| {
+        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let doc = schema.parse_document(doc_json).unwrap();
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
    group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
+        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema_with_store.clone());
            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let doc = schema.parse_document(doc_json).unwrap();
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
            }
        })
    });
    group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
+        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(schema_with_store.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let doc = schema.parse_document(doc_json).unwrap();
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
    group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
+        let lines = get_lines(HDFS_LOGS);
        b.iter(|| {
            let index = Index::create_in_ram(dynamic_schema.clone());
            let json_field = dynamic_schema.get_field("json").unwrap();
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let json_val: serde_json::Map<String, serde_json::Value> =
-                        serde_json::from_str(doc_json).unwrap();
-                    let doc = tantivy::doc!(json_field=>json_val);
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let json_val: serde_json::Map<String, serde_json::Value> =
+                    serde_json::from_str(doc_json).unwrap();
+                let doc = tantivy::doc!(json_field=>json_val);
+                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
    });
-    group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
+}
+
+pub fn gh_index_benchmark(c: &mut Criterion) {
+    let dynamic_schema = {
+        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
+        schema_builder.add_json_field("json", TEXT | FAST);
+        schema_builder.build()
+    };
+
+    let mut group = c.benchmark_group("index-gh");
+    group.throughput(Throughput::Bytes(GH_LOGS.len() as u64));
+
+    group.bench_function("index-gh-no-commit", |b| {
+        let lines = get_lines(GH_LOGS);
        b.iter(|| {
-            let index = Index::create_in_ram(dynamic_schema.clone());
            let json_field = dynamic_schema.get_field("json").unwrap();
+            let index = Index::create_in_ram(dynamic_schema.clone());
+            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in &lines {
+                let json_val: serde_json::Map<String, serde_json::Value> =
+                    serde_json::from_str(doc_json).unwrap();
+                let doc = tantivy::doc!(json_field=>json_val);
+                index_writer.add_document(doc).unwrap();
+            }
+        })
+    });
+    group.bench_function("index-gh-with-commit", |b| {
+        let lines = get_lines(GH_LOGS);
+        b.iter(|| {
+            let json_field = dynamic_schema.get_field("json").unwrap();
+            let index = Index::create_in_ram(dynamic_schema.clone());
            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
-            for _ in 0..NUM_REPEATS {
-                for doc_json in HDFS_LOGS.trim().split('\n') {
-                    let json_val: serde_json::Map<String, serde_json::Value> =
-                        serde_json::from_str(doc_json).unwrap();
-                    let doc = tantivy::doc!(json_field=>json_val);
-                    index_writer.add_document(doc).unwrap();
-                }
+            for doc_json in &lines {
+                let json_val: serde_json::Map<String, serde_json::Value> =
+                    serde_json::from_str(doc_json).unwrap();
+                let doc = tantivy::doc!(json_field=>json_val);
+                index_writer.add_document(doc).unwrap();
+            }
+            index_writer.commit().unwrap();
+        })
+    });
+}
+
+pub fn wiki_index_benchmark(c: &mut Criterion) {
+    let dynamic_schema = {
+        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
+        schema_builder.add_json_field("json", TEXT | FAST);
+        schema_builder.build()
+    };
+
+    let mut group = c.benchmark_group("index-wiki");
+    group.throughput(Throughput::Bytes(WIKI.len() as u64));
+
+    group.bench_function("index-wiki-no-commit", |b| {
+        let lines = get_lines(WIKI);
+        b.iter(|| {
+            let json_field = dynamic_schema.get_field("json").unwrap();
+            let index = Index::create_in_ram(dynamic_schema.clone());
+            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in &lines {
+                let json_val: serde_json::Map<String, serde_json::Value> =
+                    serde_json::from_str(doc_json).unwrap();
+                let doc = tantivy::doc!(json_field=>json_val);
+                index_writer.add_document(doc).unwrap();
+            }
+        })
+    });
+    group.bench_function("index-wiki-with-commit", |b| {
+        let lines = get_lines(WIKI);
+        b.iter(|| {
+            let json_field = dynamic_schema.get_field("json").unwrap();
+            let index = Index::create_in_ram(dynamic_schema.clone());
+            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in &lines {
+                let json_val: serde_json::Map<String, serde_json::Value> =
+                    serde_json::from_str(doc_json).unwrap();
+                let doc = tantivy::doc!(json_field=>json_val);
+                index_writer.add_document(doc).unwrap();
            }
            index_writer.commit().unwrap();
        })
@@ -115,7 +182,17 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {

 criterion_group! {
    name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = Criterion::default();
    targets = hdfs_index_benchmark
 }
-criterion_main!(benches);
+criterion_group! {
+    name = gh_benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = gh_index_benchmark
+}
+criterion_group! {
+    name = wiki_benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = wiki_index_benchmark
+}
+criterion_main!(benches, gh_benches, wiki_benches);
--- a/benches/wiki.json
+++ b/benches/wiki.json
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.3.0"
+version = "0.4.0"
 edition = "2021"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
@@ -15,6 +15,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
+bitpacking = {version="0.8", default-features=false, features = ["bitpacker1x"]}

 [dev-dependencies]
 rand = "0.8"
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -1,10 +1,14 @@
 use std::convert::TryInto;
 use std::io;
+use std::ops::{Range, RangeInclusive};
+
+use bitpacking::{BitPacker as ExternalBitPackerTrait, BitPacker1x};

 pub struct BitPacker {
    mini_buffer: u64,
    mini_buffer_written: usize,
 }
+
 impl Default for BitPacker {
    fn default() -> Self {
        BitPacker::new()
@@ -118,6 +122,125 @@ impl BitUnpacker {
        let val_shifted = val_unshifted_unmasked >> bit_shift;
        val_shifted & self.mask
    }
+
+    // Decodes the range of bitpacked `u32` values with idx
+    // in [start_idx, start_idx + output.len()).
+    //
+    // #Panics
+    //
+    // This methods panics if `num_bits` is > 32.
+    fn get_batch_u32s(&self, start_idx: u32, data: &[u8], output: &mut [u32]) {
+        assert!(
+            self.bit_width() <= 32,
+            "Bitwidth must be <= 32 to use this method."
+        );
+
+        let end_idx = start_idx + output.len() as u32;
+
+        let end_bit_read = end_idx * self.num_bits;
+        let end_byte_read = (end_bit_read + 7) / 8;
+        assert!(
+            end_byte_read as usize <= data.len(),
+            "Requested index is out of bounds."
+        );
+
+        // Simple slow implementation of get_batch_u32s, to deal with our ramps.
+        let get_batch_ramp = |start_idx: u32, output: &mut [u32]| {
+            for (out, idx) in output.iter_mut().zip(start_idx..) {
+                *out = self.get(idx, data) as u32;
+            }
+        };
+
+        // We use an unrolled routine to decode 32 values at once.
+        // We therefore decompose our range of values to decode into three ranges:
+        // - Entrance ramp: [start_idx, fast_track_start) (up to 31 values)
+        // - Highway: [fast_track_start, fast_track_end) (a length multiple of 32s)
+        // - Exit ramp: [fast_track_end, start_idx + output.len()) (up to 31 values)
+
+        // We want the start of the fast track to start align with bytes.
+        // A sufficient condition is to start with an idx that is a multiple of 8,
+        // so highway start is the closest multiple of 8 that is >= start_idx.
+        let entrance_ramp_len = 8 - (start_idx % 8) % 8;
+
+        let highway_start: u32 = start_idx + entrance_ramp_len;
+
+        if highway_start + BitPacker1x::BLOCK_LEN as u32 > end_idx {
+            // We don't have enough values to have even a single block of highway.
+            // Let's just supply the values the simple way.
+            get_batch_ramp(start_idx, output);
+            return;
+        }
+
+        let num_blocks: u32 = (end_idx - highway_start) / BitPacker1x::BLOCK_LEN as u32;
+
+        // Entrance ramp
+        get_batch_ramp(start_idx, &mut output[..entrance_ramp_len as usize]);
+
+        // Highway
+        let mut offset = (highway_start * self.num_bits) as usize / 8;
+        let mut output_cursor = (highway_start - start_idx) as usize;
+        for _ in 0..num_blocks {
+            offset += BitPacker1x.decompress(
+                &data[offset..],
+                &mut output[output_cursor..],
+                self.num_bits as u8,
+            );
+            output_cursor += 32;
+        }
+
+        // Exit ramp
+        let highway_end = highway_start + num_blocks * BitPacker1x::BLOCK_LEN as u32;
+        get_batch_ramp(highway_end, &mut output[output_cursor..]);
+    }
+
+    pub fn get_ids_for_value_range(
+        &self,
+        range: RangeInclusive<u64>,
+        id_range: Range<u32>,
+        data: &[u8],
+        positions: &mut Vec<u32>,
+    ) {
+        if self.bit_width() > 32 {
+            self.get_ids_for_value_range_slow(range, id_range, data, positions)
+        } else {
+            if *range.start() > u32::MAX as u64 {
+                positions.clear();
+                return;
+            }
+            let range_u32 = (*range.start() as u32)..=(*range.end()).min(u32::MAX as u64) as u32;
+            self.get_ids_for_value_range_fast(range_u32, id_range, data, positions)
+        }
+    }
+
+    fn get_ids_for_value_range_slow(
+        &self,
+        range: RangeInclusive<u64>,
+        id_range: Range<u32>,
+        data: &[u8],
+        positions: &mut Vec<u32>,
+    ) {
+        positions.clear();
+        for i in id_range {
+            // If we cared we could make this branchless, but the slow implementation should rarely
+            // kick in.
+            let val = self.get(i, data);
+            if range.contains(&val) {
+                positions.push(i);
+            }
+        }
+    }
+
+    fn get_ids_for_value_range_fast(
+        &self,
+        value_range: RangeInclusive<u32>,
+        id_range: Range<u32>,
+        data: &[u8],
+        positions: &mut Vec<u32>,
+    ) {
+        positions.resize(id_range.len(), 0u32);
+        self.get_batch_u32s(id_range.start, data, positions);
+        crate::filter_vec::filter_vec_in_place(value_range, id_range.start, positions)
+    }
 }

 #[cfg(test)]
@@ -200,4 +323,58 @@ mod test {
            test_bitpacker_aux(num_bits, &vals);
        }
    }
+
+    #[test]
+    #[should_panic]
+    fn test_get_batch_panics_over_32_bits() {
+        let bitunpacker = BitUnpacker::new(33);
+        let mut output: [u32; 1] = [0u32];
+        bitunpacker.get_batch_u32s(0, &[0, 0, 0, 0, 0, 0, 0, 0], &mut output[..]);
+    }
+
+    #[test]
+    fn test_get_batch_limit() {
+        let bitunpacker = BitUnpacker::new(1);
+        let mut output: [u32; 3] = [0u32, 0u32, 0u32];
+        bitunpacker.get_batch_u32s(8 * 4 - 3, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_get_batch_panics_when_off_scope() {
+        let bitunpacker = BitUnpacker::new(1);
+        let mut output: [u32; 3] = [0u32, 0u32, 0u32];
+        // We are missing exactly one bit.
+        bitunpacker.get_batch_u32s(8 * 4 - 2, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
+    }
+
+    proptest::proptest! {
+        #[test]
+        fn test_get_batch_u32s_proptest(num_bits in 0u8..=32u8) {
+            let mask =
+                if num_bits == 32u8 {
+                    u32::MAX
+                } else {
+                    (1u32 << num_bits) - 1
+                };
+            let mut buffer: Vec<u8> = Vec::new();
+            let mut bitpacker = BitPacker::new();
+            for val in 0..100 {
+                bitpacker.write(val & mask as u64, num_bits, &mut buffer).unwrap();
+            }
+            bitpacker.flush(&mut buffer).unwrap();
+            let bitunpacker = BitUnpacker::new(num_bits);
+            let mut output: Vec<u32> = Vec::new();
+            for len in [0, 1, 2, 32, 33, 34, 64] {
+                for start_idx in 0u32..32u32 {
+                    output.resize(len as usize, 0);
+                    bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output);
+                    for i in 0..len {
+                        let expected = (start_idx + i as u32) & mask;
+                        assert_eq!(output[i], expected);
+                    }
+                }
+            }
+        }
+    }
 }
--- a/bitpacker/src/filter_vec/avx2.rs
+++ b/bitpacker/src/filter_vec/avx2.rs
@@ -0,0 +1,365 @@
+//! SIMD filtering of a vector as described in the following blog post.
+//! <https://quickwit.io/blog/filtering%20a%20vector%20with%20simd%20instructions%20avx-2%20and%20avx-512>
+use std::arch::x86_64::{
+    __m256i as DataType, _mm256_add_epi32 as op_add, _mm256_cmpgt_epi32 as op_greater,
+    _mm256_lddqu_si256 as load_unaligned, _mm256_or_si256 as op_or, _mm256_set1_epi32 as set1,
+    _mm256_storeu_si256 as store_unaligned, _mm256_xor_si256 as op_xor, *,
+};
+use std::ops::RangeInclusive;
+
+const NUM_LANES: usize = 8;
+
+const HIGHEST_BIT: u32 = 1 << 31;
+
+#[inline]
+fn u32_to_i32(val: u32) -> i32 {
+    (val ^ HIGHEST_BIT) as i32
+}
+
+#[inline]
+unsafe fn u32_to_i32_avx2(vals_u32x8s: DataType) -> DataType {
+    const HIGHEST_BIT_MASK: DataType = from_u32x8([HIGHEST_BIT; NUM_LANES]);
+    op_xor(vals_u32x8s, HIGHEST_BIT_MASK)
+}
+
+pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+    // We use a monotonic mapping from u32 to i32 to make the comparison possible in AVX2.
+    let range_i32: RangeInclusive<i32> = u32_to_i32(*range.start())..=u32_to_i32(*range.end());
+    let num_words = output.len() / NUM_LANES;
+    let mut output_len = unsafe {
+        filter_vec_avx2_aux(
+            output.as_ptr() as *const __m256i,
+            range_i32,
+            output.as_mut_ptr(),
+            offset,
+            num_words,
+        )
+    };
+    let reminder_start = num_words * NUM_LANES;
+    for i in reminder_start..output.len() {
+        let val = output[i];
+        output[output_len] = offset + i as u32;
+        output_len += if range.contains(&val) { 1 } else { 0 };
+    }
+    output.truncate(output_len);
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn filter_vec_avx2_aux(
+    mut input: *const __m256i,
+    range: RangeInclusive<i32>,
+    output: *mut u32,
+    offset: u32,
+    num_words: usize,
+) -> usize {
+    let mut output_tail = output;
+    let range_simd = set1(*range.start())..=set1(*range.end());
+    let mut ids = from_u32x8([
+        offset,
+        offset + 1,
+        offset + 2,
+        offset + 3,
+        offset + 4,
+        offset + 5,
+        offset + 6,
+        offset + 7,
+    ]);
+    const SHIFT: __m256i = from_u32x8([NUM_LANES as u32; NUM_LANES]);
+    for _ in 0..num_words {
+        let word = load_unaligned(input);
+        let word = u32_to_i32_avx2(word);
+        let keeper_bitset = compute_filter_bitset(word, range_simd.clone());
+        let added_len = keeper_bitset.count_ones();
+        let filtered_doc_ids = compact(ids, keeper_bitset);
+        store_unaligned(output_tail as *mut __m256i, filtered_doc_ids);
+        output_tail = output_tail.offset(added_len as isize);
+        ids = op_add(ids, SHIFT);
+        input = input.offset(1);
+    }
+    output_tail.offset_from(output) as usize
+}
+
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn compact(data: DataType, mask: u8) -> DataType {
+    let vperm_mask = MASK_TO_PERMUTATION[mask as usize];
+    _mm256_permutevar8x32_epi32(data, vperm_mask)
+}
+
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn compute_filter_bitset(val: __m256i, range: std::ops::RangeInclusive<__m256i>) -> u8 {
+    let too_low = op_greater(*range.start(), val);
+    let too_high = op_greater(val, *range.end());
+    let inside = op_or(too_low, too_high);
+    255 - std::arch::x86_64::_mm256_movemask_ps(std::mem::transmute::<DataType, __m256>(inside))
+        as u8
+}
+
+union U8x32 {
+    vector: DataType,
+    vals: [u32; NUM_LANES],
+}
+
+const fn from_u32x8(vals: [u32; NUM_LANES]) -> DataType {
+    unsafe { U8x32 { vals }.vector }
+}
+
+const MASK_TO_PERMUTATION: [DataType; 256] = [
+    from_u32x8([0, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 0, 0, 0, 0, 0]),
+    from_u32x8([3, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 3, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 0, 0, 0, 0]),
+    from_u32x8([4, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 4, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 0, 0, 0, 0]),
+    from_u32x8([3, 4, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 0, 0, 0, 0]),
+    from_u32x8([2, 3, 4, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 0, 0, 0]),
+    from_u32x8([5, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 5, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 5, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 5, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 5, 0, 0, 0, 0]),
+    from_u32x8([3, 5, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 5, 0, 0, 0, 0]),
+    from_u32x8([2, 3, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 5, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 5, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 5, 0, 0, 0]),
+    from_u32x8([4, 5, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([2, 4, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 5, 0, 0, 0]),
+    from_u32x8([3, 4, 5, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 5, 0, 0, 0]),
+    from_u32x8([2, 3, 4, 5, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 5, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 5, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 5, 0, 0]),
+    from_u32x8([6, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 6, 0, 0, 0, 0]),
+    from_u32x8([3, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 6, 0, 0, 0, 0]),
+    from_u32x8([2, 3, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 6, 0, 0, 0]),
+    from_u32x8([4, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([2, 4, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 6, 0, 0, 0]),
+    from_u32x8([3, 4, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 6, 0, 0, 0]),
+    from_u32x8([2, 3, 4, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 6, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 6, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 6, 0, 0]),
+    from_u32x8([5, 6, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 5, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 5, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([2, 5, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 5, 6, 0, 0, 0]),
+    from_u32x8([3, 5, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 5, 6, 0, 0, 0]),
+    from_u32x8([2, 3, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 5, 6, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 5, 6, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 5, 6, 0, 0]),
+    from_u32x8([4, 5, 6, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([2, 4, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 5, 6, 0, 0]),
+    from_u32x8([3, 4, 5, 6, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 5, 6, 0, 0]),
+    from_u32x8([2, 3, 4, 5, 6, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 5, 6, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 5, 6, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 5, 6, 0]),
+    from_u32x8([7, 0, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([2, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 7, 0, 0, 0, 0]),
+    from_u32x8([3, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 7, 0, 0, 0, 0]),
+    from_u32x8([2, 3, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 7, 0, 0, 0]),
+    from_u32x8([4, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([2, 4, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 7, 0, 0, 0]),
+    from_u32x8([3, 4, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 7, 0, 0, 0]),
+    from_u32x8([2, 3, 4, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 7, 0, 0]),
+    from_u32x8([5, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 5, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 5, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([2, 5, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 5, 7, 0, 0, 0]),
+    from_u32x8([3, 5, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 5, 7, 0, 0, 0]),
+    from_u32x8([2, 3, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 5, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 5, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 5, 7, 0, 0]),
+    from_u32x8([4, 5, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([2, 4, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 5, 7, 0, 0]),
+    from_u32x8([3, 4, 5, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 5, 7, 0, 0]),
+    from_u32x8([2, 3, 4, 5, 7, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 5, 7, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 5, 7, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 5, 7, 0]),
+    from_u32x8([6, 7, 0, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([1, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([2, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 2, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 6, 7, 0, 0, 0]),
+    from_u32x8([3, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 3, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 6, 7, 0, 0, 0]),
+    from_u32x8([2, 3, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 3, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 6, 7, 0, 0]),
+    from_u32x8([4, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 4, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([2, 4, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 6, 7, 0, 0]),
+    from_u32x8([3, 4, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 3, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 6, 7, 0, 0]),
+    from_u32x8([2, 3, 4, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 6, 7, 0, 0]),
+    from_u32x8([1, 2, 3, 4, 6, 7, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 6, 7, 0]),
+    from_u32x8([5, 6, 7, 0, 0, 0, 0, 0]),
+    from_u32x8([0, 5, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([1, 5, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 1, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([2, 5, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 2, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 2, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 2, 5, 6, 7, 0, 0]),
+    from_u32x8([3, 5, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 3, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 3, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 3, 5, 6, 7, 0, 0]),
+    from_u32x8([2, 3, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 2, 3, 5, 6, 7, 0, 0]),
+    from_u32x8([1, 2, 3, 5, 6, 7, 0, 0]),
+    from_u32x8([0, 1, 2, 3, 5, 6, 7, 0]),
+    from_u32x8([4, 5, 6, 7, 0, 0, 0, 0]),
+    from_u32x8([0, 4, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([1, 4, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 1, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([2, 4, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 2, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([1, 2, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([0, 1, 2, 4, 5, 6, 7, 0]),
+    from_u32x8([3, 4, 5, 6, 7, 0, 0, 0]),
+    from_u32x8([0, 3, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([1, 3, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([0, 1, 3, 4, 5, 6, 7, 0]),
+    from_u32x8([2, 3, 4, 5, 6, 7, 0, 0]),
+    from_u32x8([0, 2, 3, 4, 5, 6, 7, 0]),
+    from_u32x8([1, 2, 3, 4, 5, 6, 7, 0]),
+    from_u32x8([0, 1, 2, 3, 4, 5, 6, 7]),
+];
--- a/bitpacker/src/filter_vec/mod.rs
+++ b/bitpacker/src/filter_vec/mod.rs
@@ -0,0 +1,165 @@
+use std::ops::RangeInclusive;
+
+#[cfg(any(target_arch = "x86_64"))]
+mod avx2;
+
+mod scalar;
+
+#[derive(Clone, Copy, Eq, PartialEq, Debug)]
+#[repr(u8)]
+enum FilterImplPerInstructionSet {
+    #[cfg(target_arch = "x86_64")]
+    AVX2 = 0u8,
+    Scalar = 1u8,
+}
+
+impl FilterImplPerInstructionSet {
+    #[inline]
+    pub fn is_available(&self) -> bool {
+        match *self {
+            #[cfg(target_arch = "x86_64")]
+            FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
+            FilterImplPerInstructionSet::Scalar => true,
+        }
+    }
+}
+
+// List of available implementation in preferred order.
+#[cfg(target_arch = "x86_64")]
+const IMPLS: [FilterImplPerInstructionSet; 2] = [
+    FilterImplPerInstructionSet::AVX2,
+    FilterImplPerInstructionSet::Scalar,
+];
+
+#[cfg(not(target_arch = "x86_64"))]
+const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];
+
+impl FilterImplPerInstructionSet {
+    #[allow(unused_variables)]
+    #[inline]
+    fn from(code: u8) -> FilterImplPerInstructionSet {
+        #[cfg(target_arch = "x86_64")]
+        if code == FilterImplPerInstructionSet::AVX2 as u8 {
+            return FilterImplPerInstructionSet::AVX2;
+        }
+        FilterImplPerInstructionSet::Scalar
+    }
+
+    #[inline]
+    fn filter_vec_in_place(self, range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+        match self {
+            #[cfg(target_arch = "x86_64")]
+            FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
+            FilterImplPerInstructionSet::Scalar => {
+                scalar::filter_vec_in_place(range, offset, output)
+            }
+        }
+    }
+}
+
+#[inline]
+fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
+    use std::sync::atomic::{AtomicU8, Ordering};
+    static INSTRUCTION_SET_BYTE: AtomicU8 = AtomicU8::new(u8::MAX);
+    let instruction_set_byte: u8 = INSTRUCTION_SET_BYTE.load(Ordering::Relaxed);
+    if instruction_set_byte == u8::MAX {
+        // Let's initialize the instruction set and cache it.
+        let instruction_set = IMPLS
+            .into_iter()
+            .find(FilterImplPerInstructionSet::is_available)
+            .unwrap();
+        INSTRUCTION_SET_BYTE.store(instruction_set as u8, Ordering::Relaxed);
+        return instruction_set;
+    }
+    FilterImplPerInstructionSet::from(instruction_set_byte)
+}
+
+pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+    get_best_available_instruction_set().filter_vec_in_place(range, offset, output)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_best_available_instruction_set() {
+        // This does not test much unfortunately.
+        // We just make sure the function returns without crashing and returns the same result.
+        let instruction_set = get_best_available_instruction_set();
+        assert_eq!(get_best_available_instruction_set(), instruction_set);
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_instruction_set_to_code_from_code() {
+        for instruction_set in [
+            FilterImplPerInstructionSet::AVX2,
+            FilterImplPerInstructionSet::Scalar,
+        ] {
+            let code = instruction_set as u8;
+            assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
+        }
+    }
+
+    fn test_filter_impl_empty_aux(filter_impl: FilterImplPerInstructionSet) {
+        let mut output = vec![];
+        filter_impl.filter_vec_in_place(0..=u32::MAX, 0, &mut output);
+        assert_eq!(&output, &[]);
+    }
+
+    fn test_filter_impl_simple_aux(filter_impl: FilterImplPerInstructionSet) {
+        let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
+        filter_impl.filter_vec_in_place(3..=10, 0, &mut output);
+        assert_eq!(&output, &[0, 3, 6, 7]);
+    }
+
+    fn test_filter_impl_simple_aux_shifted(filter_impl: FilterImplPerInstructionSet) {
+        let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
+        filter_impl.filter_vec_in_place(3..=10, 10, &mut output);
+        assert_eq!(&output, &[10, 13, 16, 17]);
+    }
+
+    fn test_filter_impl_simple_outside_i32_range(filter_impl: FilterImplPerInstructionSet) {
+        let mut output = vec![u32::MAX, i32::MAX as u32 + 1, 0, 1, 3, 1, 1, 1, 1];
+        filter_impl.filter_vec_in_place(1..=i32::MAX as u32 + 1u32, 0, &mut output);
+        assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
+    }
+
+    fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
+        test_filter_impl_empty_aux(filter_impl);
+        test_filter_impl_simple_aux(filter_impl);
+        test_filter_impl_simple_aux_shifted(filter_impl);
+        test_filter_impl_simple_outside_i32_range(filter_impl);
+    }
+
+    #[test]
+    #[cfg(target_arch = "x86_64")]
+    fn test_filter_implementation_avx2() {
+        if FilterImplPerInstructionSet::AVX2.is_available() {
+            test_filter_impl_test_suite(FilterImplPerInstructionSet::AVX2);
+        }
+    }
+
+    #[test]
+    fn test_filter_implementation_scalar() {
+        test_filter_impl_test_suite(FilterImplPerInstructionSet::Scalar);
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    proptest::proptest! {
+        #[test]
+        fn test_filter_compare_scalar_and_avx2_impl_proptest(
+            start in proptest::prelude::any::<u32>(),
+            end in proptest::prelude::any::<u32>(),
+            offset in 0u32..2u32,
+            mut vals in proptest::collection::vec(0..u32::MAX, 0..30)) {
+            if FilterImplPerInstructionSet::AVX2.is_available() {
+                let mut vals_clone = vals.clone();
+                FilterImplPerInstructionSet::AVX2.filter_vec_in_place(start..=end, offset, &mut vals);
+                FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
+                assert_eq!(&vals, &vals_clone);
+            }
+       }
+    }
+}
--- a/bitpacker/src/filter_vec/scalar.rs
+++ b/bitpacker/src/filter_vec/scalar.rs
@@ -0,0 +1,13 @@
+use std::ops::RangeInclusive;
+
+pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+    // We restrict the accepted boundary, because unsigned integers & SIMD don't
+    // play well.
+    let mut output_cursor = 0;
+    for i in 0..output.len() {
+        let val = output[i];
+        output[output_cursor] = offset + i as u32;
+        output_cursor += if range.contains(&val) { 1 } else { 0 };
+    }
+    output.truncate(output_cursor);
+}
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -1,5 +1,6 @@
 mod bitpacker;
 mod blocked_bitpacker;
+mod filter_vec;

 use std::cmp::Ordering;

--- a/cliff.toml
+++ b/cliff.toml
@@ -0,0 +1,89 @@
+# configuration file for git-cliff{ pattern = "foo", replace = "bar"}
+# see https://github.com/orhun/git-cliff#configuration-file
+
+[changelog]
+# changelog header
+header = """
+"""
+# template for the changelog body
+# https://tera.netlify.app/docs/#introduction
+body = """
+{% if version %}\
+    {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }})
+    ==================
+{% else %}\
+    ## [unreleased]
+{% endif %}\
+{% for commit in commits %}
+    - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | split(pat="\n") | first | trim | upper_first }}(@{{ commit.author.name }})\
+{% endfor %}
+"""
+# remove the leading and trailing whitespace from the template
+trim = true
+# changelog footer
+footer = """
+"""
+
+postprocessors = [
+    { pattern = 'Paul Masurel', replace = "fulmicoton"}, # replace with github user
+    { pattern = 'PSeitz', replace = "PSeitz"}, # replace with github user
+    { pattern = 'Adam Reichold', replace = "adamreichold"}, # replace with github user
+    { pattern = 'trinity-1686a', replace = "trinity-1686a"}, # replace with github user
+    { pattern = 'Michael Kleen', replace = "mkleen"}, # replace with github user
+    { pattern = 'Adrien Guillo', replace = "guilload"}, # replace with github user
+    { pattern = 'François Massot', replace = "fmassot"}, # replace with github user
+    { pattern = '', replace = ""}, # replace with github user
+]
+
+[git]
+# parse the commits based on https://www.conventionalcommits.org
+# This is required or commit.message contains the whole commit message and not just the title
+conventional_commits = true
+# filter out the commits that are not conventional
+filter_unconventional = false
+# process each line of a commit as an individual commit
+split_commits = false
+# regex for preprocessing the commit messages
+commit_preprocessors = [
+    { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "[#${2}](https://github.com/quickwit-oss/tantivy/issues/${2})"}, # replace issue numbers
+]
+#link_parsers = [
+    #{ pattern = "#(\\d+)", href = "https://github.com/quickwit-oss/tantivy/pulls/$1"},
+#]
+# regex for parsing and grouping commits
+commit_parsers = [
+    { message = "^feat", group = "Features"},
+    { message = "^fix", group = "Bug Fixes"},
+    { message = "^doc", group = "Documentation"},
+    { message = "^perf", group = "Performance"},
+    { message = "^refactor", group = "Refactor"},
+    { message = "^style", group = "Styling"},
+    { message = "^test", group = "Testing"},
+    { message = "^chore\\(release\\): prepare for", skip = true},
+    { message = "(?i)clippy", skip = true},
+    { message = "(?i)dependabot", skip = true},
+    { message = "(?i)fmt", skip = true},
+    { message = "(?i)bump", skip = true},
+    { message = "(?i)readme", skip = true},
+    { message = "(?i)comment", skip = true},
+    { message = "(?i)spelling", skip = true},
+    { message = "^chore", group = "Miscellaneous Tasks"},
+    { body = ".*security", group = "Security"},
+    { message = ".*", group = "Other", default_scope = "other"},
+]
+# protect breaking changes from being skipped due to matching a skipping commit_parser
+protect_breaking_commits = false
+# filter out the commits that are not matched by commit parsers
+filter_commits = false
+# glob pattern for matching git tags
+tag_pattern = "v[0-9]*"
+# regex for skipping tags
+skip_tags = "v0.1.0-beta.1"
+# regex for ignoring tags
+ignore_tags = ""
+# sort the tags topologically
+topo_order = false
+# sort the commits inside sections by oldest/newest order
+sort_commits = "newest"
+# limit the number of commits included in the changelog.
+# limit_commits = 42
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -3,26 +3,26 @@ name = "tantivy-columnar"
 version = "0.1.0"
 edition = "2021"
 license = "MIT"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
+desciption = "column oriented storage for tantivy"
+categories = ["database-implementations", "data-structures", "compression"]

 [dependencies]
 itertools = "0.10.5"
-log = "0.4.17"
 fnv = "1.0.7"
 fastdivide = "0.4.0"
-rand = { version = "0.8.5", optional = true }
-measure_time = { version = "0.8.2", optional = true }
-prettytable-rs = { version = "0.10.0", optional = true }

-stacker = { path = "../stacker", package="tantivy-stacker"}
-sstable = { path = "../sstable", package = "tantivy-sstable" }
-common = { path = "../common", package = "tantivy-common" }
-tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
+stacker = { version= "0.1", path = "../stacker", package="tantivy-stacker"}
+sstable = { version= "0.1", path = "../sstable", package = "tantivy-sstable" }
+common = { version= "0.5", path = "../common", package = "tantivy-common" }
+tantivy-bitpacker = { version= "0.4", path = "../bitpacker/" }
 serde = "1.0.152"

 [dev-dependencies]
 proptest = "1"
 more-asserts = "0.3.1"
-rand = "0.8.5"
+rand = "0.8"

 [features]
 unstable = []
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -0,0 +1,36 @@
+use crate::{Column, DocId, RowId};
+
+#[derive(Debug, Default, Clone)]
+pub struct ColumnBlockAccessor<T> {
+    val_cache: Vec<T>,
+    docid_cache: Vec<DocId>,
+    row_id_cache: Vec<RowId>,
+}
+
+impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
+    ColumnBlockAccessor<T>
+{
+    #[inline]
+    pub fn fetch_block(&mut self, docs: &[u32], accessor: &Column<T>) {
+        self.docid_cache.clear();
+        self.row_id_cache.clear();
+        accessor.row_ids_for_docs(docs, &mut self.docid_cache, &mut self.row_id_cache);
+        self.val_cache.resize(self.row_id_cache.len(), T::default());
+        accessor
+            .values
+            .get_vals(&self.row_id_cache, &mut self.val_cache);
+    }
+
+    #[inline]
+    pub fn iter_vals(&self) -> impl Iterator<Item = T> + '_ {
+        self.val_cache.iter().cloned()
+    }
+
+    #[inline]
+    pub fn iter_docid_vals(&self) -> impl Iterator<Item = (DocId, T)> + '_ {
+        self.docid_cache
+            .iter()
+            .cloned()
+            .zip(self.val_cache.iter().cloned())
+    }
+}
--- a/columnar/src/column/dictionary_encoded.rs
+++ b/columnar/src/column/dictionary_encoded.rs
@@ -1,6 +1,6 @@
-use std::io;
 use std::ops::Deref;
 use std::sync::Arc;
+use std::{fmt, io};

 use sstable::{Dictionary, VoidSSTable};

@@ -21,6 +21,14 @@ pub struct BytesColumn {
    pub(crate) term_ord_column: Column<u64>,
 }

+impl fmt::Debug for BytesColumn {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("BytesColumn")
+            .field("term_ord_column", &self.term_ord_column)
+            .finish()
+    }
+}
+
 impl BytesColumn {
    /// Fills the given `output` buffer with the term associated to the ordinal `ord`.
    ///
@@ -56,6 +64,12 @@ impl BytesColumn {
 #[derive(Clone)]
 pub struct StrColumn(BytesColumn);

+impl fmt::Debug for StrColumn {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self.term_ord_column)
+    }
+}
+
 impl From<StrColumn> for BytesColumn {
    fn from(str_column: StrColumn) -> BytesColumn {
        str_column.0
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -1,7 +1,7 @@
 mod dictionary_encoded;
 mod serialize;

-use std::fmt::Debug;
+use std::fmt::{self, Debug};
 use std::io::Write;
 use std::ops::{Deref, Range, RangeInclusive};
 use std::sync::Arc;
@@ -16,14 +16,33 @@ pub use serialize::{
 use crate::column_index::ColumnIndex;
 use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
 use crate::column_values::{monotonic_map_column, ColumnValues};
-use crate::{Cardinality, MonotonicallyMappableToU64, RowId};
+use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};

 #[derive(Clone)]
 pub struct Column<T = u64> {
-    pub idx: ColumnIndex,
+    pub index: ColumnIndex,
    pub values: Arc<dyn ColumnValues<T>>,
 }

+impl<T: Debug + PartialOrd + Send + Sync + Copy + 'static> Debug for Column<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let num_docs = self.num_docs();
+        let entries = (0..num_docs)
+            .map(|i| (i, self.values_for_doc(i).collect::<Vec<_>>()))
+            .filter(|(_, vals)| !vals.is_empty());
+        f.debug_map().entries(entries).finish()
+    }
+}
+
+impl<T: PartialOrd + Default> Column<T> {
+    pub fn build_empty_column(num_docs: u32) -> Column<T> {
+        Column {
+            index: ColumnIndex::Empty { num_docs },
+            values: Arc::new(EmptyColumnValues),
+        }
+    }
+}
+
 impl<T: MonotonicallyMappableToU64> Column<T> {
    pub fn to_u64_monotonic(self) -> Column<u64> {
        let values = Arc::new(monotonic_map_column(
@@ -31,7 +50,7 @@ impl<T: MonotonicallyMappableToU64> Column<T> {
            StrictlyMonotonicMappingToInternal::<T>::new(),
        ));
        Column {
-            idx: self.idx,
+            index: self.index,
            values,
        }
    }
@@ -40,11 +59,11 @@ impl<T: MonotonicallyMappableToU64> Column<T> {
 impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    #[inline]
    pub fn get_cardinality(&self) -> Cardinality {
-        self.idx.get_cardinality()
+        self.index.get_cardinality()
    }

    pub fn num_docs(&self) -> RowId {
-        match &self.idx {
+        match &self.index {
            ColumnIndex::Empty { num_docs } => *num_docs,
            ColumnIndex::Full => self.values.num_vals(),
            ColumnIndex::Optional(optional_index) => optional_index.num_docs(),
@@ -68,8 +87,25 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.values_for_doc(row_id).next()
    }

-    pub fn values_for_doc(&self, row_id: RowId) -> impl Iterator<Item = T> + '_ {
-        self.value_row_ids(row_id)
+    /// Translates a block of docis to row_ids.
+    ///
+    /// returns the row_ids and the matching docids on the same index
+    /// e.g.
+    /// DocId In:  [0, 5, 6]
+    /// DocId Out: [0, 0, 6, 6]
+    /// RowId Out: [0, 1, 2, 3]
+    #[inline]
+    pub fn row_ids_for_docs(
+        &self,
+        doc_ids: &[DocId],
+        doc_ids_out: &mut Vec<DocId>,
+        row_ids: &mut Vec<RowId>,
+    ) {
+        self.index.docids_to_rowids(doc_ids, doc_ids_out, row_ids)
+    }
+
+    pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> + '_ {
+        self.value_row_ids(doc_id)
            .map(|value_row_id: RowId| self.values.get_val(value_row_id))
    }

@@ -82,13 +118,15 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        doc_ids: &mut Vec<u32>,
    ) {
        // convert passed docid range to row id range
-        let rowid_range = self.idx.docid_range_to_rowids(selected_docid_range.clone());
+        let rowid_range = self
+            .index
+            .docid_range_to_rowids(selected_docid_range.clone());

        // Load rows
        self.values
            .get_row_ids_for_value_range(value_range, rowid_range, doc_ids);
        // Convert rows to docids
-        self.idx
+        self.index
            .select_batch_in_place(selected_docid_range.start, doc_ids);
    }

@@ -113,7 +151,7 @@ impl<T> Deref for Column<T> {
    type Target = ColumnIndex;

    fn deref(&self) -> &Self::Target {
-        &self.idx
+        &self.index
    }
 }

@@ -151,7 +189,7 @@ impl<T: PartialOrd + Debug + Send + Sync + Copy + 'static> ColumnValues<T>
    }

    fn num_vals(&self) -> u32 {
-        match &self.column.idx {
+        match &self.column.index {
            ColumnIndex::Empty { .. } => 0u32,
            ColumnIndex::Full => self.column.values.num_vals(),
            ColumnIndex::Optional(optional_idx) => optional_idx.num_docs(),
--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -52,7 +52,7 @@ pub fn open_column_u64<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::
    let column_index = crate::column_index::open_column_index(column_index_data)?;
    let column_values = load_u64_based_column_values(column_values_data)?;
    Ok(Column {
-        idx: column_index,
+        index: column_index,
        values: column_values,
    })
 }
@@ -71,7 +71,7 @@ pub fn open_column_u128<T: MonotonicallyMappableToU128>(
    let column_index = crate::column_index::open_column_index(column_index_data)?;
    let column_values = crate::column_values::open_u128_mapped(column_values_data)?;
    Ok(Column {
-        idx: column_index,
+        index: column_index,
        values: column_values,
    })
 }
--- a/columnar/src/column_index/merge/mod.rs
+++ b/columnar/src/column_index/merge/mod.rs
@@ -1,29 +1,82 @@
 mod shuffled;
 mod stacked;

+use common::ReadOnlyBitSet;
 use shuffled::merge_column_index_shuffled;
 use stacked::merge_column_index_stacked;

 use crate::column_index::SerializableColumnIndex;
 use crate::{Cardinality, ColumnIndex, MergeRowOrder};

-// For simplification, we never have cardinality go down due to deletes.
-fn detect_cardinality(columns: &[Option<ColumnIndex>]) -> Cardinality {
-    columns
-        .iter()
-        .flatten()
-        .map(ColumnIndex::get_cardinality)
-        .max()
-        .unwrap_or(Cardinality::Full)
+fn detect_cardinality_single_column_index(
+    column_index: &ColumnIndex,
+    alive_bitset_opt: &Option<ReadOnlyBitSet>,
+) -> Cardinality {
+    let Some(alive_bitset) = alive_bitset_opt else {
+        return column_index.get_cardinality();
+    };
+    let cardinality_before_deletes = column_index.get_cardinality();
+    if cardinality_before_deletes == Cardinality::Full {
+        // The columnar cardinality can only become more restrictive in the presence of deletes
+        // (where cardinality sorted from the more restrictive to the least restrictive are Full,
+        // Optional, Multivalued)
+        //
+        // If we are already "Full", we are guaranteed to stay "Full" after deletes.
+        return Cardinality::Full;
+    }
+    let mut cardinality_so_far = Cardinality::Full;
+    for doc_id in alive_bitset.iter() {
+        let num_values = column_index.value_row_ids(doc_id).len();
+        let row_cardinality = match num_values {
+            0 => Cardinality::Optional,
+            1 => Cardinality::Full,
+            _ => Cardinality::Multivalued,
+        };
+        cardinality_so_far = cardinality_so_far.max(row_cardinality);
+        if cardinality_so_far >= cardinality_before_deletes {
+            // There won't be any improvement in the cardinality.
+            // We can early exit.
+            return cardinality_before_deletes;
+        }
+    }
+    cardinality_so_far
+}
+
+fn detect_cardinality(
+    column_indexes: &[ColumnIndex],
+    merge_row_order: &MergeRowOrder,
+) -> Cardinality {
+    match merge_row_order {
+        MergeRowOrder::Stack(_) => column_indexes
+            .iter()
+            .map(ColumnIndex::get_cardinality)
+            .max()
+            .unwrap_or(Cardinality::Full),
+        MergeRowOrder::Shuffled(shuffle_merge_order) => {
+            let mut merged_cardinality = Cardinality::Full;
+            for (column_index, alive_bitset_opt) in column_indexes
+                .iter()
+                .zip(shuffle_merge_order.alive_bitsets.iter())
+            {
+                let cardinality: Cardinality =
+                    detect_cardinality_single_column_index(column_index, alive_bitset_opt);
+                if cardinality == Cardinality::Multivalued {
+                    return cardinality;
+                }
+                merged_cardinality = merged_cardinality.max(cardinality);
+            }
+            merged_cardinality
+        }
+    }
 }

 pub fn merge_column_index<'a>(
-    columns: &'a [Option<ColumnIndex>],
+    columns: &'a [ColumnIndex],
    merge_row_order: &'a MergeRowOrder,
 ) -> SerializableColumnIndex<'a> {
    // For simplification, we do not try to detect whether the cardinality could be
    // downgraded thanks to deletes.
-    let cardinality_after_merge = detect_cardinality(columns);
+    let cardinality_after_merge = detect_cardinality(columns, merge_row_order);
    match merge_row_order {
        MergeRowOrder::Stack(stack_merge_order) => {
            merge_column_index_stacked(columns, cardinality_after_merge, stack_merge_order)
@@ -45,42 +98,61 @@ mod tests {
    use crate::column_index::merge::detect_cardinality;
    use crate::column_index::multivalued_index::MultiValueIndex;
    use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
-    use crate::{Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder};
+    use crate::{
+        Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
+    };

    #[test]
    fn test_detect_cardinality() {
-        assert_eq!(detect_cardinality(&[]), Cardinality::Full);
+        assert_eq!(
+            detect_cardinality(&[], &StackMergeOrder::stack_for_test(&[]).into()),
+            Cardinality::Full
+        );
        let optional_index: ColumnIndex = OptionalIndex::for_test(1, &[]).into();
        let multivalued_index: ColumnIndex = MultiValueIndex::for_test(&[0, 1]).into();
        assert_eq!(
-            detect_cardinality(&[Some(optional_index.clone()), None]),
+            detect_cardinality(
+                &[optional_index.clone(), ColumnIndex::Empty { num_docs: 0 }],
+                &StackMergeOrder::stack_for_test(&[1, 0]).into()
+            ),
            Cardinality::Optional
        );
        assert_eq!(
-            detect_cardinality(&[Some(optional_index.clone()), Some(ColumnIndex::Full)]),
+            detect_cardinality(
+                &[optional_index.clone(), ColumnIndex::Full],
+                &StackMergeOrder::stack_for_test(&[1, 1]).into()
+            ),
            Cardinality::Optional
        );
        assert_eq!(
-            detect_cardinality(&[Some(multivalued_index.clone()), None]),
+            detect_cardinality(
+                &[
+                    multivalued_index.clone(),
+                    ColumnIndex::Empty { num_docs: 0 }
+                ],
+                &StackMergeOrder::stack_for_test(&[1, 0]).into()
+            ),
            Cardinality::Multivalued
        );
        assert_eq!(
-            detect_cardinality(&[
-                Some(multivalued_index.clone()),
-                Some(optional_index.clone())
-            ]),
+            detect_cardinality(
+                &[multivalued_index.clone(), optional_index.clone()],
+                &StackMergeOrder::stack_for_test(&[1, 1]).into()
+            ),
            Cardinality::Multivalued
        );
        assert_eq!(
-            detect_cardinality(&[Some(optional_index), Some(multivalued_index)]),
+            detect_cardinality(
+                &[optional_index, multivalued_index],
+                &StackMergeOrder::stack_for_test(&[1, 1]).into()
+            ),
            Cardinality::Multivalued
        );
    }

    #[test]
    fn test_merge_index_multivalued_sorted() {
-        let column_indexes: Vec<Option<ColumnIndex>> =
-            vec![Some(MultiValueIndex::for_test(&[0, 2, 5]).into())];
+        let column_indexes: Vec<ColumnIndex> = vec![MultiValueIndex::for_test(&[0, 2, 5]).into()];
        let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
            &[2],
            vec![
@@ -104,10 +176,10 @@ mod tests {

    #[test]
    fn test_merge_index_multivalued_sorted_several_segment() {
-        let column_indexes: Vec<Option<ColumnIndex>> = vec![
-            Some(MultiValueIndex::for_test(&[0, 2, 5]).into()),
-            None,
-            Some(MultiValueIndex::for_test(&[0, 1, 4]).into()),
+        let column_indexes: Vec<ColumnIndex> = vec![
+            MultiValueIndex::for_test(&[0, 2, 5]).into(),
+            ColumnIndex::Empty { num_docs: 0 },
+            MultiValueIndex::for_test(&[0, 1, 4]).into(),
        ];
        let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
            &[2, 0, 2],
--- a/columnar/src/column_index/merge/shuffled.rs
+++ b/columnar/src/column_index/merge/shuffled.rs
@@ -5,7 +5,7 @@ use crate::iterable::Iterable;
 use crate::{Cardinality, ColumnIndex, RowId, ShuffleMergeOrder};

 pub fn merge_column_index_shuffled<'a>(
-    column_indexes: &'a [Option<ColumnIndex>],
+    column_indexes: &'a [ColumnIndex],
    cardinality_after_merge: Cardinality,
    shuffle_merge_order: &'a ShuffleMergeOrder,
 ) -> SerializableColumnIndex<'a> {
@@ -33,41 +33,41 @@ pub fn merge_column_index_shuffled<'a>(
 ///
 /// In other words the column_indexes passed as argument may NOT be multivalued.
 fn merge_column_index_shuffled_optional<'a>(
-    column_indexes: &'a [Option<ColumnIndex>],
+    column_indexes: &'a [ColumnIndex],
    merge_order: &'a ShuffleMergeOrder,
 ) -> Box<dyn Iterable<RowId> + 'a> {
-    Box::new(ShuffledOptionalIndex {
+    Box::new(ShuffledIndex {
        column_indexes,
        merge_order,
    })
 }

-struct ShuffledOptionalIndex<'a> {
-    column_indexes: &'a [Option<ColumnIndex>],
+struct ShuffledIndex<'a> {
+    column_indexes: &'a [ColumnIndex],
    merge_order: &'a ShuffleMergeOrder,
 }

-impl<'a> Iterable<u32> for ShuffledOptionalIndex<'a> {
+impl<'a> Iterable<u32> for ShuffledIndex<'a> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
-        Box::new(self.merge_order
-        .iter_new_to_old_row_addrs()
-        .enumerate()
-        .filter_map(|(new_row_id, old_row_addr)| {
-            let Some(column_index) = &self.column_indexes[old_row_addr.segment_ord as usize] else {
-                return None;
-            };
-            let row_id = new_row_id as u32;
-            if column_index.has_value(old_row_addr.row_id) {
-                Some(row_id)
-            } else {
-                None
-            }
-        }))
+        Box::new(
+            self.merge_order
+                .iter_new_to_old_row_addrs()
+                .enumerate()
+                .filter_map(|(new_row_id, old_row_addr)| {
+                    let column_index = &self.column_indexes[old_row_addr.segment_ord as usize];
+                    let row_id = new_row_id as u32;
+                    if column_index.has_value(old_row_addr.row_id) {
+                        Some(row_id)
+                    } else {
+                        None
+                    }
+                }),
+        )
    }
 }

 fn merge_column_index_shuffled_multivalued<'a>(
-    column_indexes: &'a [Option<ColumnIndex>],
+    column_indexes: &'a [ColumnIndex],
    merge_order: &'a ShuffleMergeOrder,
 ) -> Box<dyn Iterable<RowId> + 'a> {
    Box::new(ShuffledMultivaluedIndex {
@@ -77,19 +77,16 @@ fn merge_column_index_shuffled_multivalued<'a>(
 }

 struct ShuffledMultivaluedIndex<'a> {
-    column_indexes: &'a [Option<ColumnIndex>],
+    column_indexes: &'a [ColumnIndex],
    merge_order: &'a ShuffleMergeOrder,
 }

 fn iter_num_values<'a>(
-    column_indexes: &'a [Option<ColumnIndex>],
+    column_indexes: &'a [ColumnIndex],
    merge_order: &'a ShuffleMergeOrder,
 ) -> impl Iterator<Item = u32> + 'a {
    merge_order.iter_new_to_old_row_addrs().map(|row_addr| {
-        let Some(column_index) = &column_indexes[row_addr.segment_ord as usize] else {
-                    // No values in the entire column. It surely means there are 0 values associated to this row.
-                    return 0u32;
-                };
+        let column_index = &column_indexes[row_addr.segment_ord as usize];
        match column_index {
            ColumnIndex::Empty { .. } => 0u32,
            ColumnIndex::Full => 1,
@@ -143,7 +140,7 @@ mod tests {
    #[test]
    fn test_merge_column_index_optional_shuffle() {
        let optional_index: ColumnIndex = OptionalIndex::for_test(2, &[0]).into();
-        let column_indexes = vec![Some(optional_index), Some(ColumnIndex::Full)];
+        let column_indexes = vec![optional_index, ColumnIndex::Full];
        let row_addrs = vec![
            RowAddr {
                segment_ord: 0u32,
--- a/columnar/src/column_index/merge/stacked.rs
+++ b/columnar/src/column_index/merge/stacked.rs
@@ -9,7 +9,7 @@ use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
 ///
 /// There are no sort nor deletes involved.
 pub fn merge_column_index_stacked<'a>(
-    columns: &'a [Option<ColumnIndex>],
+    columns: &'a [ColumnIndex],
    cardinality_after_merge: Cardinality,
    stack_merge_order: &'a StackMergeOrder,
 ) -> SerializableColumnIndex<'a> {
@@ -33,7 +33,7 @@ pub fn merge_column_index_stacked<'a>(
 }

 struct StackedOptionalIndex<'a> {
-    columns: &'a [Option<ColumnIndex>],
+    columns: &'a [ColumnIndex],
    stack_merge_order: &'a StackMergeOrder,
 }

@@ -46,16 +46,16 @@ impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
                .flat_map(|(columnar_id, column_index_opt)| {
                    let columnar_row_range = self.stack_merge_order.columnar_range(columnar_id);
                    let rows_it: Box<dyn Iterator<Item = RowId>> = match column_index_opt {
-                        Some(ColumnIndex::Full) => Box::new(columnar_row_range),
-                        Some(ColumnIndex::Optional(optional_index)) => Box::new(
+                        ColumnIndex::Full => Box::new(columnar_row_range),
+                        ColumnIndex::Optional(optional_index) => Box::new(
                            optional_index
                                .iter_rows()
                                .map(move |row_id: RowId| columnar_row_range.start + row_id),
                        ),
-                        Some(ColumnIndex::Multivalued(_)) => {
+                        ColumnIndex::Multivalued(_) => {
                            panic!("No multivalued index is allowed when stacking column index");
                        }
-                        None | Some(ColumnIndex::Empty { .. }) => Box::new(std::iter::empty()),
+                        ColumnIndex::Empty { .. } => Box::new(std::iter::empty()),
                    };
                    rows_it
                }),
@@ -65,20 +65,18 @@ impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {

 #[derive(Clone, Copy)]
 struct StackedMultivaluedIndex<'a> {
-    columns: &'a [Option<ColumnIndex>],
+    columns: &'a [ColumnIndex],
    stack_merge_order: &'a StackMergeOrder,
 }

 fn convert_column_opt_to_multivalued_index<'a>(
-    column_index_opt: Option<&'a ColumnIndex>,
+    column_index_opt: &'a ColumnIndex,
    num_rows: RowId,
 ) -> Box<dyn Iterator<Item = RowId> + 'a> {
    match column_index_opt {
-        None | Some(ColumnIndex::Empty { .. }) => {
-            Box::new(iter::repeat(0u32).take(num_rows as usize + 1))
-        }
-        Some(ColumnIndex::Full) => Box::new(0..num_rows + 1),
-        Some(ColumnIndex::Optional(optional_index)) => {
+        ColumnIndex::Empty { .. } => Box::new(iter::repeat(0u32).take(num_rows as usize + 1)),
+        ColumnIndex::Full => Box::new(0..num_rows + 1),
+        ColumnIndex::Optional(optional_index) => {
            Box::new(
                (0..num_rows)
                    // TODO optimize
@@ -86,9 +84,7 @@ fn convert_column_opt_to_multivalued_index<'a>(
                    .chain(std::iter::once(optional_index.num_non_nulls())),
            )
        }
-        Some(ColumnIndex::Multivalued(multivalued_index)) => {
-            multivalued_index.start_index_column.iter()
-        }
+        ColumnIndex::Multivalued(multivalued_index) => multivalued_index.start_index_column.iter(),
    }
 }

@@ -97,7 +93,6 @@ impl<'a> Iterable<RowId> for StackedMultivaluedIndex<'a> {
        let multivalued_indexes =
            self.columns
                .iter()
-                .map(Option::as_ref)
                .enumerate()
                .map(|(columnar_id, column_opt)| {
                    let num_rows =
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -12,7 +12,7 @@ pub use serialize::{open_column_index, serialize_column_index, SerializableColum
 use crate::column_index::multivalued_index::MultiValueIndex;
 use crate::{Cardinality, DocId, RowId};

-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub enum ColumnIndex {
    Empty {
        num_docs: u32,
@@ -37,11 +37,15 @@ impl From<MultiValueIndex> for ColumnIndex {
 }

 impl ColumnIndex {
+    // Returns the cardinality of the column index.
+    //
+    // By convention, if the column contains no docs, we consider that it is
+    // full.
    #[inline]
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
+            ColumnIndex::Empty { num_docs: 0 } | ColumnIndex::Full => Cardinality::Full,
            ColumnIndex::Empty { .. } => Cardinality::Optional,
-            ColumnIndex::Full => Cardinality::Full,
            ColumnIndex::Optional(_) => Cardinality::Optional,
            ColumnIndex::Multivalued(_) => Cardinality::Multivalued,
        }
@@ -74,6 +78,45 @@ impl ColumnIndex {
        }
    }

+    /// Translates a block of docis to row_ids.
+    ///
+    /// returns the row_ids and the matching docids on the same index
+    /// e.g.
+    /// DocId In:  [0, 5, 6]
+    /// DocId Out: [0, 0, 6, 6]
+    /// RowId Out: [0, 1, 2, 3]
+    #[inline]
+    pub fn docids_to_rowids(
+        &self,
+        doc_ids: &[DocId],
+        doc_ids_out: &mut Vec<DocId>,
+        row_ids: &mut Vec<RowId>,
+    ) {
+        match self {
+            ColumnIndex::Empty { .. } => {}
+            ColumnIndex::Full => {
+                doc_ids_out.extend_from_slice(doc_ids);
+                row_ids.extend_from_slice(doc_ids);
+            }
+            ColumnIndex::Optional(optional_index) => {
+                for doc_id in doc_ids {
+                    if let Some(row_id) = optional_index.rank_if_exists(*doc_id) {
+                        doc_ids_out.push(*doc_id);
+                        row_ids.push(row_id);
+                    }
+                }
+            }
+            ColumnIndex::Multivalued(multivalued_index) => {
+                for doc_id in doc_ids {
+                    for row_id in multivalued_index.range(*doc_id) {
+                        doc_ids_out.push(*doc_id);
+                        row_ids.push(row_id);
+                    }
+                }
+            }
+        }
+    }
+
    pub fn docid_range_to_rowids(&self, doc_id: Range<DocId>) -> Range<RowId> {
        match self {
            ColumnIndex::Empty { .. } => 0..0,
@@ -113,3 +156,21 @@ impl ColumnIndex {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::{Cardinality, ColumnIndex};
+
+    #[test]
+    fn test_column_index_get_cardinality() {
+        assert_eq!(
+            ColumnIndex::Empty { num_docs: 0 }.get_cardinality(),
+            Cardinality::Full
+        );
+        assert_eq!(ColumnIndex::Full.get_cardinality(), Cardinality::Full);
+        assert_eq!(
+            ColumnIndex::Empty { num_docs: 1 }.get_cardinality(),
+            Cardinality::Optional
+        );
+    }
+}
--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -35,6 +35,14 @@ pub struct MultiValueIndex {
    pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
 }

+impl std::fmt::Debug for MultiValueIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("MultiValuedIndex")
+            .field("num_rows", &self.start_index_column.num_vals())
+            .finish_non_exhaustive()
+    }
+}
+
 impl From<Arc<dyn ColumnValues<RowId>>> for MultiValueIndex {
    fn from(start_index_column: Arc<dyn ColumnValues<RowId>>) -> Self {
        MultiValueIndex { start_index_column }
@@ -106,11 +114,8 @@ impl MultiValueIndex {
 #[cfg(test)]
 mod tests {
    use std::ops::Range;
-    use std::sync::Arc;

    use super::MultiValueIndex;
-    use crate::column_values::IterColumn;
-    use crate::{ColumnValues, RowId};

    fn index_to_pos_helper(
        index: &MultiValueIndex,
@@ -124,9 +129,7 @@ mod tests {

    #[test]
    fn test_positions_to_docid() {
-        let offsets: Vec<RowId> = vec![0, 10, 12, 15, 22, 23]; // docid values are [0..10, 10..12, 12..15, etc.]
-        let column: Arc<dyn ColumnValues<RowId>> = Arc::new(IterColumn::from(offsets.into_iter()));
-        let index = MultiValueIndex::from(column);
+        let index = MultiValueIndex::for_test(&[0, 10, 12, 15, 22, 23]);
        assert_eq!(index.num_docs(), 5);
        let positions = &[10u32, 11, 15, 20, 21, 22];
        assert_eq!(index_to_pos_helper(&index, 0..5, positions), vec![1, 3, 4]);
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -88,6 +88,15 @@ pub struct OptionalIndex {
    block_metas: Arc<[BlockMeta]>,
 }

+impl std::fmt::Debug for OptionalIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("OptionalIndex")
+            .field("num_rows", &self.num_rows)
+            .field("num_non_null_rows", &self.num_non_null_rows)
+            .finish_non_exhaustive()
+    }
+}
+
 /// Splits a value address into lower and upper 16bits.
 /// The lower 16 bits are the value in the block
 /// The upper 16 bits are the block index
--- a/columnar/src/column_values/merge.rs
+++ b/columnar/src/column_values/merge.rs
@@ -5,7 +5,7 @@ use crate::iterable::Iterable;
 use crate::{ColumnIndex, ColumnValues, MergeRowOrder};

 pub(crate) struct MergedColumnValues<'a, T> {
-    pub(crate) column_indexes: &'a [Option<ColumnIndex>],
+    pub(crate) column_indexes: &'a [ColumnIndex],
    pub(crate) column_values: &'a [Option<Arc<dyn ColumnValues<T>>>],
    pub(crate) merge_row_order: &'a MergeRowOrder,
 }
@@ -23,8 +23,7 @@ impl<'a, T: Copy + PartialOrd + Debug> Iterable<T> for MergedColumnValues<'a, T>
                shuffle_merge_order
                    .iter_new_to_old_row_addrs()
                    .flat_map(|row_addr| {
-                        let column_index =
-                            self.column_indexes[row_addr.segment_ord as usize].as_ref()?;
+                        let column_index = &self.column_indexes[row_addr.segment_ord as usize];
                        let column_values =
                            self.column_values[row_addr.segment_ord as usize].as_ref()?;
                        let value_range = column_index.value_row_ids(row_addr.row_id);
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -58,10 +58,21 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    /// # Panics
    ///
    /// May panic if `idx` is greater than the column length.
-    fn get_vals(&self, idx: &[u32], output: &mut [T]) {
-        assert!(idx.len() == output.len());
-        for (out, idx) in output.iter_mut().zip(idx.iter()) {
-            *out = self.get_val(*idx as u32);
+    fn get_vals(&self, indexes: &[u32], output: &mut [T]) {
+        assert!(indexes.len() == output.len());
+        let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
+        for (out_x4, idx_x4) in out_and_idx_chunks {
+            out_x4[0] = self.get_val(idx_x4[0]);
+            out_x4[1] = self.get_val(idx_x4[1]);
+            out_x4[2] = self.get_val(idx_x4[2]);
+            out_x4[3] = self.get_val(idx_x4[3]);
+        }
+
+        let step_size = 4;
+        let cutoff = indexes.len() - indexes.len() % step_size;
+
+        for idx in cutoff..indexes.len() {
+            output[idx] = self.get_val(indexes[idx]);
        }
    }

@@ -83,7 +94,6 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    /// Get the row ids of values which are in the provided value range.
    ///
    /// Note that position == docid for single value fast fields
-    #[inline(always)]
    fn get_row_ids_for_value_range(
        &self,
        value_range: RangeInclusive<T>,
@@ -99,20 +109,26 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
        }
    }

-    /// Returns the minimum value for this fast field.
+    /// Returns a lower bound for this column of values.
    ///
-    /// This min_value may not be exact.
-    /// For instance, the min value does not take in account of possible
-    /// deleted document. All values are however guaranteed to be higher than
-    /// `.min_value()`.
+    /// All values are guaranteed to be higher than `.min_value()`
+    /// but this value is not necessary the best boundary value.
+    ///
+    /// We have
+    /// ∀i < self.num_vals(), self.get_val(i) >= self.min_value()
+    /// But we don't have necessarily
+    /// ∃i < self.num_vals(), self.get_val(i) == self.min_value()
    fn min_value(&self) -> T;

-    /// Returns the maximum value for this fast field.
+    /// Returns an upper bound for this column of values.
    ///
-    /// This max_value may not be exact.
-    /// For instance, the max value does not take in account of possible
-    /// deleted document. All values are however guaranteed to be higher than
-    /// `.max_value()`.
+    /// All values are guaranteed to be lower than `.max_value()`
+    /// but this value is not necessary the best boundary value.
+    ///
+    /// We have
+    /// ∀i < self.num_vals(), self.get_val(i) <= self.max_value()
+    /// But we don't have necessarily
+    /// ∃i < self.num_vals(), self.get_val(i) == self.max_value()
    fn max_value(&self) -> T;

    /// The number of values in the column.
@@ -124,6 +140,27 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    }
 }

+/// Empty column of values.
+pub struct EmptyColumnValues;
+
+impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
+    fn get_val(&self, _idx: u32) -> T {
+        panic!("Internal Error: Called get_val of empty column.")
+    }
+
+    fn min_value(&self) -> T {
+        T::default()
+    }
+
+    fn max_value(&self) -> T {
+        T::default()
+    }
+
+    fn num_vals(&self) -> u32 {
+        0
+    }
+}
+
 impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> T {
@@ -167,54 +204,5 @@ impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>>
    }
 }

-/// Wraps an cloneable iterator into a `Column`.
-pub struct IterColumn<T>(T);
-
-impl<T> From<T> for IterColumn<T>
-where T: Iterator + Clone + ExactSizeIterator
-{
-    fn from(iter: T) -> Self {
-        IterColumn(iter)
-    }
-}
-
-impl<T> ColumnValues<T::Item> for IterColumn<T>
-where
-    T: Iterator + Clone + ExactSizeIterator + Send + Sync,
-    T::Item: PartialOrd + Debug,
-{
-    fn get_val(&self, idx: u32) -> T::Item {
-        self.0.clone().nth(idx as usize).unwrap()
-    }
-
-    fn min_value(&self) -> T::Item {
-        self.0.clone().next().unwrap()
-    }
-
-    fn max_value(&self) -> T::Item {
-        self.0.clone().last().unwrap()
-    }
-
-    fn num_vals(&self) -> u32 {
-        self.0.len() as u32
-    }
-
-    fn iter(&self) -> Box<dyn Iterator<Item = T::Item> + '_> {
-        Box::new(self.0.clone())
-    }
-}
-
 #[cfg(all(test, feature = "unstable"))]
 mod bench;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_range_as_col() {
-        let col = IterColumn::from(10..100);
-        assert_eq!(col.num_vals(), 90);
-        assert_eq!(col.max_value(), 99);
-    }
-}
--- a/columnar/src/column_values/monotonic_column.rs
+++ b/columnar/src/column_values/monotonic_column.rs
@@ -50,7 +50,7 @@ where
    Input: PartialOrd + Send + Debug + Sync + Clone,
    Output: PartialOrd + Send + Debug + Sync + Clone,
 {
-    #[inline]
+    #[inline(always)]
    fn get_val(&self, idx: u32) -> Output {
        let from_val = self.from_column.get_val(idx);
        self.monotonic_mapping.mapping(from_val)
--- a/columnar/src/column_values/monotonic_mapping.rs
+++ b/columnar/src/column_values/monotonic_mapping.rs
@@ -139,12 +139,12 @@ impl MonotonicallyMappableToU64 for i64 {
 impl MonotonicallyMappableToU64 for DateTime {
    #[inline(always)]
    fn to_u64(self) -> u64 {
-        common::i64_to_u64(self.into_timestamp_micros())
+        common::i64_to_u64(self.into_timestamp_nanos())
    }

    #[inline(always)]
    fn from_u64(val: u64) -> Self {
-        DateTime::from_timestamp_micros(common::u64_to_i64(val))
+        DateTime::from_timestamp_nanos(common::u64_to_i64(val))
    }
 }

--- a/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
+++ b/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
@@ -10,7 +10,7 @@ use super::{CompactSpace, RangeMapping};
 /// Put the blanks for the sorted values into a binary heap
 fn get_blanks(values_sorted: &BTreeSet<u128>) -> BinaryHeap<BlankRange> {
    let mut blanks: BinaryHeap<BlankRange> = BinaryHeap::new();
-    for (first, second) in values_sorted.iter().tuple_windows() {
+    for (first, second) in values_sorted.iter().copied().tuple_windows() {
        // Correctness Overflow: the values are deduped and sorted (BTreeSet property), that means
        // there's always space between two values.
        let blank_range = first + 1..=second - 1;
@@ -65,12 +65,12 @@ pub fn get_compact_space(
        return compact_space_builder.finish();
    }

-    let mut blanks: BinaryHeap<BlankRange> = get_blanks(values_deduped_sorted);
-    // Replace after stabilization of https://github.com/rust-lang/rust/issues/62924
-
    // We start by space that's limited to min_value..=max_value
-    let min_value = *values_deduped_sorted.iter().next().unwrap_or(&0);
-    let max_value = *values_deduped_sorted.iter().last().unwrap_or(&0);
+    // Replace after stabilization of https://github.com/rust-lang/rust/issues/62924
+    let min_value = values_deduped_sorted.iter().next().copied().unwrap_or(0);
+    let max_value = values_deduped_sorted.iter().last().copied().unwrap_or(0);
+
+    let mut blanks: BinaryHeap<BlankRange> = get_blanks(values_deduped_sorted);

    // +1 for null, in case min and max covers the whole space, we are off by one.
    let mut amplitude_compact_space = (max_value - min_value).saturating_add(1);
@@ -84,6 +84,7 @@ pub fn get_compact_space(
    let mut amplitude_bits: u8 = num_bits(amplitude_compact_space);

    let mut blank_collector = BlankCollector::new();
+
    // We will stage blanks until they reduce the compact space by at least 1 bit and then flush
    // them if the metadata cost is lower than the total number of saved bits.
    // Binary heap to process the gaps by their size
@@ -93,6 +94,7 @@ pub fn get_compact_space(
        let staged_spaces_sum: u128 = blank_collector.staged_blanks_sum();
        let amplitude_new_compact_space = amplitude_compact_space - staged_spaces_sum;
        let amplitude_new_bits = num_bits(amplitude_new_compact_space);
+
        if amplitude_bits == amplitude_new_bits {
            continue;
        }
@@ -100,7 +102,16 @@ pub fn get_compact_space(
        // TODO: Maybe calculate exact cost of blanks and run this more expensive computation only,
        // when amplitude_new_bits changes
        let cost = blank_collector.num_staged_blanks() * cost_per_blank;
-        if cost >= saved_bits {
+
+        // We want to end up with a compact space that fits into 32 bits.
+        // In order to deal with pathological cases, we force the algorithm to keep
+        // refining the compact space the amplitude bits is lower than 32.
+        //
+        // The worst case scenario happens for a large number of u128s regularly
+        // spread over the full u128 space.
+        //
+        // This change will force the algorithm to degenerate into dictionary encoding.
+        if amplitude_bits <= 32 && cost >= saved_bits {
            // Continue here, since although we walk over the blanks by size,
            // we can potentially save a lot at the last bits, which are smaller blanks
            //
@@ -115,6 +126,8 @@ pub fn get_compact_space(
        compact_space_builder.add_blanks(blank_collector.drain().map(|blank| blank.blank_range()));
    }

+    assert!(amplitude_bits <= 32);
+
    // special case, when we don't collected any blanks because:
    // * the data is empty (early exit)
    // * the algorithm did decide it's not worth the cost, which can be the case for single values
@@ -199,7 +212,7 @@ impl CompactSpaceBuilder {
            covered_space.push(0..=0); // empty data case
        };

-        let mut compact_start: u64 = 1; // 0 is reserved for `null`
+        let mut compact_start: u32 = 1; // 0 is reserved for `null`
        let mut ranges_mapping: Vec<RangeMapping> = Vec::with_capacity(covered_space.len());
        for cov in covered_space {
            let range_mapping = super::RangeMapping {
@@ -218,6 +231,7 @@ impl CompactSpaceBuilder {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::column_values::u128_based::compact_space::COST_PER_BLANK_IN_BITS;

    #[test]
    fn test_binary_heap_pop_order() {
@@ -228,4 +242,11 @@ mod tests {
        assert_eq!(blanks.pop().unwrap().blank_size(), 101);
        assert_eq!(blanks.pop().unwrap().blank_size(), 11);
    }
+
+    #[test]
+    fn test_worst_case_scenario() {
+        let vals: BTreeSet<u128> = (0..8).map(|i| i * ((1u128 << 34) / 8)).collect();
+        let compact_space = get_compact_space(&vals, vals.len() as u32, COST_PER_BLANK_IN_BITS);
+        assert!(compact_space.amplitude_compact_space() < u32::MAX as u128);
+    }
 }
--- a/columnar/src/column_values/u128_based/compact_space/mod.rs
+++ b/columnar/src/column_values/u128_based/compact_space/mod.rs
@@ -42,15 +42,15 @@ pub struct CompactSpace {
 #[derive(Debug, Clone, Eq, PartialEq)]
 struct RangeMapping {
    value_range: RangeInclusive<u128>,
-    compact_start: u64,
+    compact_start: u32,
 }
 impl RangeMapping {
-    fn range_length(&self) -> u64 {
-        (self.value_range.end() - self.value_range.start()) as u64 + 1
+    fn range_length(&self) -> u32 {
+        (self.value_range.end() - self.value_range.start()) as u32 + 1
    }

    // The last value of the compact space in this range
-    fn compact_end(&self) -> u64 {
+    fn compact_end(&self) -> u32 {
        self.compact_start + self.range_length() - 1
    }
 }
@@ -81,7 +81,7 @@ impl BinarySerializable for CompactSpace {
        let num_ranges = VInt::deserialize(reader)?.0;
        let mut ranges_mapping: Vec<RangeMapping> = vec![];
        let mut value = 0u128;
-        let mut compact_start = 1u64; // 0 is reserved for `null`
+        let mut compact_start = 1u32; // 0 is reserved for `null`
        for _ in 0..num_ranges {
            let blank_delta_start = VIntU128::deserialize(reader)?.0;
            value += blank_delta_start;
@@ -122,10 +122,10 @@ impl CompactSpace {

    /// Returns either Ok(the value in the compact space) or if it is outside the compact space the
    /// Err(position where it would be inserted)
-    fn u128_to_compact(&self, value: u128) -> Result<u64, usize> {
+    fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
        self.ranges_mapping
            .binary_search_by(|probe| {
-                let value_range = &probe.value_range;
+                let value_range: &RangeInclusive<u128> = &probe.value_range;
                if value < *value_range.start() {
                    Ordering::Greater
                } else if value > *value_range.end() {
@@ -136,13 +136,13 @@ impl CompactSpace {
            })
            .map(|pos| {
                let range_mapping = &self.ranges_mapping[pos];
-                let pos_in_range = (value - range_mapping.value_range.start()) as u64;
+                let pos_in_range: u32 = (value - range_mapping.value_range.start()) as u32;
                range_mapping.compact_start + pos_in_range
            })
    }

-    /// Unpacks a value from compact space u64 to u128 space
-    fn compact_to_u128(&self, compact: u64) -> u128 {
+    /// Unpacks a value from compact space u32 to u128 space
+    fn compact_to_u128(&self, compact: u32) -> u128 {
        let pos = self
            .ranges_mapping
            .binary_search_by_key(&compact, |range_mapping| range_mapping.compact_start)
@@ -178,11 +178,15 @@ impl CompactSpaceCompressor {
    /// Taking the vals as Vec may cost a lot of memory. It is used to sort the vals.
    pub fn train_from(iter: impl Iterator<Item = u128>) -> Self {
        let mut values_sorted = BTreeSet::new();
+        // Total number of values, with their redundancy.
        let mut total_num_values = 0u32;
        for val in iter {
            total_num_values += 1u32;
            values_sorted.insert(val);
        }
+        let min_value = *values_sorted.iter().next().unwrap_or(&0);
+        let max_value = *values_sorted.iter().last().unwrap_or(&0);
+
        let compact_space =
            get_compact_space(&values_sorted, total_num_values, COST_PER_BLANK_IN_BITS);
        let amplitude_compact_space = compact_space.amplitude_compact_space();
@@ -193,13 +197,12 @@ impl CompactSpaceCompressor {
        );

        let num_bits = tantivy_bitpacker::compute_num_bits(amplitude_compact_space as u64);
-        let min_value = *values_sorted.iter().next().unwrap_or(&0);
-        let max_value = *values_sorted.iter().last().unwrap_or(&0);
+
        assert_eq!(
            compact_space
                .u128_to_compact(max_value)
                .expect("could not convert max value to compact space"),
-            amplitude_compact_space as u64
+            amplitude_compact_space as u32
        );
        CompactSpaceCompressor {
            params: IPCodecParams {
@@ -240,7 +243,7 @@ impl CompactSpaceCompressor {
                        "Could not convert value to compact_space. This is a bug.",
                    )
                })?;
-            bitpacker.write(compact, self.params.num_bits, write)?;
+            bitpacker.write(compact as u64, self.params.num_bits, write)?;
        }
        bitpacker.close(write)?;
        self.write_footer(write)?;
@@ -314,48 +317,6 @@ impl ColumnValues<u128> for CompactSpaceDecompressor {

    #[inline]
    fn get_row_ids_for_value_range(
-        &self,
-        value_range: RangeInclusive<u128>,
-        positions_range: Range<u32>,
-        positions: &mut Vec<u32>,
-    ) {
-        self.get_positions_for_value_range(value_range, positions_range, positions)
-    }
-}
-
-impl CompactSpaceDecompressor {
-    pub fn open(data: OwnedBytes) -> io::Result<CompactSpaceDecompressor> {
-        let (data_slice, footer_len_bytes) = data.split_at(data.len() - 4);
-        let footer_len = u32::deserialize(&mut &footer_len_bytes[..])?;
-
-        let data_footer = &data_slice[data_slice.len() - footer_len as usize..];
-        let params = IPCodecParams::deserialize(&mut &data_footer[..])?;
-        let decompressor = CompactSpaceDecompressor { data, params };
-
-        Ok(decompressor)
-    }
-
-    /// Converting to compact space for the decompressor is more complex, since we may get values
-    /// which are outside the compact space. e.g. if we map
-    /// 1000 => 5
-    /// 2000 => 6
-    ///
-    /// and we want a mapping for 1005, there is no equivalent compact space. We instead return an
-    /// error with the index of the next range.
-    fn u128_to_compact(&self, value: u128) -> Result<u64, usize> {
-        self.params.compact_space.u128_to_compact(value)
-    }
-
-    fn compact_to_u128(&self, compact: u64) -> u128 {
-        self.params.compact_space.compact_to_u128(compact)
-    }
-
-    /// Comparing on compact space: Random dataset 0,24 (50% random hit) - 1.05 GElements/s
-    /// Comparing on compact space: Real dataset 1.08 GElements/s
-    ///
-    /// Comparing on original space: Real dataset .06 GElements/s (not completely optimized)
-    #[inline]
-    pub fn get_positions_for_value_range(
        &self,
        value_range: RangeInclusive<u128>,
        position_range: Range<u32>,
@@ -395,44 +356,42 @@ impl CompactSpaceDecompressor {
            range_mapping.compact_end()
        });

-        let range = compact_from..=compact_to;
+        let value_range = compact_from..=compact_to;
+        self.get_positions_for_compact_value_range(value_range, position_range, positions);
+    }
+}

-        let scan_num_docs = position_range.end - position_range.start;
+impl CompactSpaceDecompressor {
+    pub fn open(data: OwnedBytes) -> io::Result<CompactSpaceDecompressor> {
+        let (data_slice, footer_len_bytes) = data.split_at(data.len() - 4);
+        let footer_len = u32::deserialize(&mut &footer_len_bytes[..])?;

-        let step_size = 4;
-        let cutoff = position_range.start + scan_num_docs - scan_num_docs % step_size;
+        let data_footer = &data_slice[data_slice.len() - footer_len as usize..];
+        let params = IPCodecParams::deserialize(&mut &data_footer[..])?;
+        let decompressor = CompactSpaceDecompressor { data, params };

-        let mut push_if_in_range = |idx, val| {
-            if range.contains(&val) {
-                positions.push(idx);
-            }
-        };
-        let get_val = |idx| self.params.bit_unpacker.get(idx, &self.data);
-        // unrolled loop
-        for idx in (position_range.start..cutoff).step_by(step_size as usize) {
-            let idx1 = idx;
-            let idx2 = idx + 1;
-            let idx3 = idx + 2;
-            let idx4 = idx + 3;
-            let val1 = get_val(idx1);
-            let val2 = get_val(idx2);
-            let val3 = get_val(idx3);
-            let val4 = get_val(idx4);
-            push_if_in_range(idx1, val1);
-            push_if_in_range(idx2, val2);
-            push_if_in_range(idx3, val3);
-            push_if_in_range(idx4, val4);
-        }
+        Ok(decompressor)
+    }

-        // handle rest
-        for idx in cutoff..position_range.end {
-            push_if_in_range(idx, get_val(idx));
-        }
+    /// Converting to compact space for the decompressor is more complex, since we may get values
+    /// which are outside the compact space. e.g. if we map
+    /// 1000 => 5
+    /// 2000 => 6
+    ///
+    /// and we want a mapping for 1005, there is no equivalent compact space. We instead return an
+    /// error with the index of the next range.
+    fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
+        self.params.compact_space.u128_to_compact(value)
+    }
+
+    fn compact_to_u128(&self, compact: u32) -> u128 {
+        self.params.compact_space.compact_to_u128(compact)
    }

    #[inline]
-    fn iter_compact(&self) -> impl Iterator<Item = u64> + '_ {
-        (0..self.params.num_vals).map(move |idx| self.params.bit_unpacker.get(idx, &self.data))
+    fn iter_compact(&self) -> impl Iterator<Item = u32> + '_ {
+        (0..self.params.num_vals)
+            .map(move |idx| self.params.bit_unpacker.get(idx, &self.data) as u32)
    }

    #[inline]
@@ -445,7 +404,7 @@ impl CompactSpaceDecompressor {

    #[inline]
    pub fn get(&self, idx: u32) -> u128 {
-        let compact = self.params.bit_unpacker.get(idx, &self.data);
+        let compact = self.params.bit_unpacker.get(idx, &self.data) as u32;
        self.compact_to_u128(compact)
    }

@@ -456,6 +415,20 @@ impl CompactSpaceDecompressor {
    pub fn max_value(&self) -> u128 {
        self.params.max_value
    }
+
+    fn get_positions_for_compact_value_range(
+        &self,
+        value_range: RangeInclusive<u32>,
+        position_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        self.params.bit_unpacker.get_ids_for_value_range(
+            *value_range.start() as u64..=*value_range.end() as u64,
+            position_range,
+            &self.data,
+            positions,
+        );
+    }
 }

 #[cfg(test)]
@@ -469,12 +442,12 @@ mod tests {

    #[test]
    fn compact_space_test() {
-        let ips = &[
+        let ips: BTreeSet<u128> = [
            2u128, 4u128, 1000, 1001, 1002, 1003, 1004, 1005, 1008, 1010, 1012, 1260,
        ]
        .into_iter()
        .collect();
-        let compact_space = get_compact_space(ips, ips.len() as u32, 11);
+        let compact_space = get_compact_space(&ips, ips.len() as u32, 11);
        let amplitude = compact_space.amplitude_compact_space();
        assert_eq!(amplitude, 17);
        assert_eq!(1, compact_space.u128_to_compact(2).unwrap());
@@ -497,8 +470,8 @@ mod tests {
        );

        for ip in ips {
-            let compact = compact_space.u128_to_compact(*ip).unwrap();
-            assert_eq!(compact_space.compact_to_u128(compact), *ip);
+            let compact = compact_space.u128_to_compact(ip).unwrap();
+            assert_eq!(compact_space.compact_to_u128(compact), ip);
        }
    }

@@ -524,7 +497,7 @@ mod tests {
                    .map(|pos| pos as u32)
                    .collect::<Vec<_>>();
                let mut positions = Vec::new();
-                decompressor.get_positions_for_value_range(
+                decompressor.get_row_ids_for_value_range(
                    range,
                    0..decompressor.num_vals(),
                    &mut positions,
@@ -569,7 +542,7 @@ mod tests {
            let val = *val;
            let pos = pos as u32;
            let mut positions = Vec::new();
-            decomp.get_positions_for_value_range(val..=val, pos..pos + 1, &mut positions);
+            decomp.get_row_ids_for_value_range(val..=val, pos..pos + 1, &mut positions);
            assert_eq!(positions, vec![pos]);
        }

--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -1,4 +1,6 @@
 use std::io::{self, Write};
+use std::num::NonZeroU64;
+use std::ops::{Range, RangeInclusive};

 use common::{BinarySerializable, OwnedBytes};
 use fastdivide::DividerU64;
@@ -16,6 +18,46 @@ pub struct BitpackedReader {
    stats: ColumnStats,
 }

+#[inline(always)]
+const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
+    // copied from unstable rust standard library.
+    let d = n / q.get();
+    let r = n % q.get();
+    if r > 0 {
+        d + 1
+    } else {
+        d
+    }
+}
+
+// The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
+// f is defined by:
+// f: bitpacked -> stats.min_value + stats.gcd * bitpacked
+//
+// In order to run range queries, we invert the transformation.
+// `transform_range_before_linear_transformation` returns the range of values
+// [min_bipacked_value..max_bitpacked_value] such that
+// f(bitpacked) ∈ [min_value, max_value] <=> bitpacked ∈ [min_bitpacked_value, max_bitpacked_value]
+fn transform_range_before_linear_transformation(
+    stats: &ColumnStats,
+    range: RangeInclusive<u64>,
+) -> Option<RangeInclusive<u64>> {
+    if range.is_empty() {
+        return None;
+    }
+    if stats.min_value > *range.end() {
+        return None;
+    }
+    if stats.max_value < *range.start() {
+        return None;
+    }
+    let shifted_range =
+        range.start().saturating_sub(stats.min_value)..=range.end().saturating_sub(stats.min_value);
+    let start_before_gcd_multiplication: u64 = div_ceil(*shifted_range.start(), stats.gcd);
+    let end_before_gcd_multiplication: u64 = *shifted_range.end() / stats.gcd;
+    Some(start_before_gcd_multiplication..=end_before_gcd_multiplication)
+}
+
 impl ColumnValues for BitpackedReader {
    #[inline(always)]
    fn get_val(&self, doc: u32) -> u64 {
@@ -34,6 +76,25 @@ impl ColumnValues for BitpackedReader {
    fn num_vals(&self) -> RowId {
        self.stats.num_rows
    }
+
+    fn get_row_ids_for_value_range(
+        &self,
+        range: RangeInclusive<u64>,
+        doc_id_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        let Some(transformed_range) = transform_range_before_linear_transformation(&self.stats, range)
+        else {
+            positions.clear();
+            return;
+        };
+        self.bit_unpacker.get_ids_for_value_range(
+            transformed_range,
+            doc_id_range,
+            &self.data,
+            positions,
+        );
+    }
 }

 fn num_bits(stats: &ColumnStats) -> u8 {
--- a/columnar/src/column_values/u64_based/stats_collector.rs
+++ b/columnar/src/column_values/u64_based/stats_collector.rs
@@ -27,7 +27,7 @@ pub struct StatsCollector {
    // This is the same as computing the difference between the values and the first value.
    //
    // This way, we can compress i64-converted-to-u64 (e.g. timestamp that were supplied in
-    // seconds, only to be converted in microseconds).
+    // seconds, only to be converted in nanoseconds).
    increment_gcd_opt: Option<(NonZeroU64, DividerU64)>,
    first_value_opt: Option<u64>,
 }
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -1,6 +1,6 @@
 use proptest::prelude::*;
 use proptest::strategy::Strategy;
-use proptest::{num, prop_oneof, proptest};
+use proptest::{prop_oneof, proptest};

 #[test]
 fn test_serialize_and_load_simple() {
@@ -99,14 +99,28 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(

    let reader = TColumnCodec::load(OwnedBytes::new(buffer)).unwrap();
    assert_eq!(reader.num_vals(), vals.len() as u32);
+    let mut buffer = Vec::new();
    for (doc, orig_val) in vals.iter().copied().enumerate() {
        let val = reader.get_val(doc as u32);
        assert_eq!(
            val, orig_val,
            "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data `{vals:?}`",
        );
+
+        buffer.resize(1, 0);
+        reader.get_vals(&[doc as u32], &mut buffer);
+        let val = buffer[0];
+        assert_eq!(
+            val, orig_val,
+            "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data `{vals:?}`",
+        );
    }

+    let all_docs: Vec<u32> = (0..vals.len() as u32).collect();
+    buffer.resize(all_docs.len(), 0);
+    reader.get_vals(&all_docs, &mut buffer);
+    assert_eq!(vals, buffer);
+
    if !vals.is_empty() {
        let test_rand_idx = rand::thread_rng().gen_range(0..=vals.len() - 1);
        let expected_positions: Vec<u32> = vals
--- a/columnar/src/columnar/column_type.rs
+++ b/columnar/src/columnar/column_type.rs
@@ -1,3 +1,4 @@
+use std::fmt;
 use std::fmt::Debug;
 use std::net::Ipv6Addr;

@@ -21,6 +22,22 @@ pub enum ColumnType {
    DateTime = 7u8,
 }

+impl fmt::Display for ColumnType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let short_str = match self {
+            ColumnType::I64 => "i64",
+            ColumnType::U64 => "u64",
+            ColumnType::F64 => "f64",
+            ColumnType::Bytes => "bytes",
+            ColumnType::Str => "str",
+            ColumnType::Bool => "bool",
+            ColumnType::IpAddr => "ip",
+            ColumnType::DateTime => "datetime",
+        };
+        write!(f, "{short_str}")
+    }
+}
+
 // The order needs to match _exactly_ the order in the enum
 const COLUMN_TYPES: [ColumnType; 8] = [
    ColumnType::I64,
@@ -37,6 +54,9 @@ impl ColumnType {
    pub fn to_code(self) -> u8 {
        self as u8
    }
+    pub fn is_date_time(&self) -> bool {
+        self == &ColumnType::DateTime
+    }

    pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
        COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
--- a/columnar/src/columnar/merge/merge_dict_column.rs
+++ b/columnar/src/columnar/merge/merge_dict_column.rs
@@ -1,7 +1,7 @@
 use std::io::{self, Write};

 use common::{BitSet, CountingWriter, ReadOnlyBitSet};
-use sstable::{SSTable, TermOrdinal};
+use sstable::{SSTable, Streamer, TermOrdinal, VoidSSTable};

 use super::term_merger::TermMerger;
 use crate::column::serialize_column_mappable_to_u64;
@@ -56,17 +56,19 @@ impl<'a> RemappedTermOrdinalsValues<'a> {
            .bytes_columns
            .iter()
            .enumerate()
-            .flat_map(|(segment_ord, byte_column)| {
-                let segment_ord = self.term_ord_mapping.get_segment(segment_ord as u32);
-                byte_column.iter().flat_map(move |bytes_column| {
-                    bytes_column
-                        .ords()
-                        .values
-                        .iter()
-                        .map(move |term_ord| segment_ord[term_ord as usize])
-                })
+            .flat_map(|(seg_ord, bytes_column_opt)| {
+                let bytes_column = bytes_column_opt.as_ref()?;
+                Some((seg_ord, bytes_column))
+            })
+            .flat_map(move |(seg_ord, bytes_column)| {
+                let term_ord_after_merge_mapping =
+                    self.term_ord_mapping.get_segment(seg_ord as u32);
+                bytes_column
+                    .ords()
+                    .values
+                    .iter()
+                    .map(move |term_ord| term_ord_after_merge_mapping[term_ord as usize])
            });
-        // TODO see if we can better decompose the mapping / and the stacking
        Box::new(iter)
    }

@@ -124,16 +126,20 @@ fn serialize_merged_dict(
    let mut term_ord_mapping = TermOrdinalMapping::default();

    let mut field_term_streams = Vec::new();
-    for column in bytes_columns.iter().flatten() {
-        term_ord_mapping.add_segment(column.dictionary.num_terms());
-        let terms = column.dictionary.stream()?;
-        field_term_streams.push(terms);
+    for column_opt in bytes_columns.iter() {
+        if let Some(column) = column_opt {
+            term_ord_mapping.add_segment(column.dictionary.num_terms());
+            let terms: Streamer<VoidSSTable> = column.dictionary.stream()?;
+            field_term_streams.push(terms);
+        } else {
+            term_ord_mapping.add_segment(0);
+            field_term_streams.push(Streamer::empty());
+        }
    }

    let mut merged_terms = TermMerger::new(field_term_streams);
    let mut sstable_builder = sstable::VoidSSTable::writer(output);

-    // TODO support complex `merge_row_order`.
    match merge_row_order {
        MergeRowOrder::Stack(_) => {
            let mut current_term_ord = 0;
--- a/columnar/src/columnar/merge/merge_mapping.rs
+++ b/columnar/src/columnar/merge/merge_mapping.rs
@@ -11,6 +11,17 @@ pub struct StackMergeOrder {
 }

 impl StackMergeOrder {
+    #[cfg(test)]
+    pub fn stack_for_test(num_rows_per_columnar: &[u32]) -> StackMergeOrder {
+        let mut cumulated_row_ids: Vec<RowId> = Vec::with_capacity(num_rows_per_columnar.len());
+        let mut cumulated_row_id = 0;
+        for &num_rows in num_rows_per_columnar {
+            cumulated_row_id += num_rows;
+            cumulated_row_ids.push(cumulated_row_id);
+        }
+        StackMergeOrder { cumulated_row_ids }
+    }
+
    pub fn stack(columnars: &[&ColumnarReader]) -> StackMergeOrder {
        let mut cumulated_row_ids: Vec<RowId> = Vec::with_capacity(columnars.len());
        let mut cumulated_row_id = 0;
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -7,6 +7,7 @@ use std::io;
 use std::net::Ipv6Addr;
 use std::sync::Arc;

+use itertools::Itertools;
 pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};

 use super::writer::ColumnarSerializer;
@@ -28,7 +29,7 @@ use crate::{
 ///
 /// See also [README.md].
 #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
-enum ColumnTypeCategory {
+pub(crate) enum ColumnTypeCategory {
    Bool,
    Str,
    Numerical,
@@ -78,20 +79,25 @@ pub fn merge_columnar(
    output: &mut impl io::Write,
 ) -> io::Result<()> {
    let mut serializer = ColumnarSerializer::new(output);
-
-    let columns_to_merge = group_columns_for_merge(columnar_readers, required_columns)?;
+    let num_rows_per_columnar = columnar_readers
+        .iter()
+        .map(|reader| reader.num_rows())
+        .collect::<Vec<u32>>();
+    let columns_to_merge =
+        group_columns_for_merge(columnar_readers, required_columns, &merge_row_order)?;
    for ((column_name, column_type), columns) in columns_to_merge {
        let mut column_serializer =
-            serializer.serialize_column(column_name.as_bytes(), column_type);
+            serializer.start_serialize_column(column_name.as_bytes(), column_type);
        merge_column(
            column_type,
+            &num_rows_per_columnar,
            columns,
            &merge_row_order,
            &mut column_serializer,
        )?;
+        column_serializer.finalize()?;
    }
    serializer.finalize(merge_row_order.num_rows())?;
-
    Ok(())
 }

@@ -108,6 +114,7 @@ fn dynamic_column_to_u64_monotonic(dynamic_column: DynamicColumn) -> Option<Colu

 fn merge_column(
    column_type: ColumnType,
+    num_docs_per_column: &[u32],
    columns: Vec<Option<DynamicColumn>>,
    merge_row_order: &MergeRowOrder,
    wrt: &mut impl io::Write,
@@ -118,17 +125,19 @@ fn merge_column(
        | ColumnType::F64
        | ColumnType::DateTime
        | ColumnType::Bool => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
            let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
                Vec::with_capacity(columns.len());
-            for dynamic_column_opt in columns {
-                if let Some(Column { idx, values }) =
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
+                if let Some(Column { index: idx, values }) =
                    dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
                {
-                    column_indexes.push(Some(idx));
+                    column_indexes.push(idx);
                    column_values.push(Some(values));
                } else {
-                    column_indexes.push(None);
+                    column_indexes.push(ColumnIndex::Empty {
+                        num_docs: num_docs_per_column[i],
+                    });
                    column_values.push(None);
                }
            }
@@ -142,15 +151,19 @@ fn merge_column(
            serialize_column_mappable_to_u64(merged_column_index, &merge_column_values, wrt)?;
        }
        ColumnType::IpAddr => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
            let mut column_values: Vec<Option<Arc<dyn ColumnValues<Ipv6Addr>>>> =
                Vec::with_capacity(columns.len());
-            for dynamic_column_opt in columns {
-                if let Some(DynamicColumn::IpAddr(Column { idx, values })) = dynamic_column_opt {
-                    column_indexes.push(Some(idx));
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
+                if let Some(DynamicColumn::IpAddr(Column { index: idx, values })) =
+                    dynamic_column_opt
+                {
+                    column_indexes.push(idx);
                    column_values.push(Some(values));
                } else {
-                    column_indexes.push(None);
+                    column_indexes.push(ColumnIndex::Empty {
+                        num_docs: num_docs_per_column[i],
+                    });
                    column_values.push(None);
                }
            }
@@ -166,20 +179,22 @@ fn merge_column(
            serialize_column_mappable_to_u128(merged_column_index, &merge_column_values, wrt)?;
        }
        ColumnType::Bytes | ColumnType::Str => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
            let mut bytes_columns: Vec<Option<BytesColumn>> = Vec::with_capacity(columns.len());
-            for dynamic_column_opt in columns {
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
                match dynamic_column_opt {
                    Some(DynamicColumn::Str(str_column)) => {
-                        column_indexes.push(Some(str_column.term_ord_column.idx.clone()));
+                        column_indexes.push(str_column.term_ord_column.index.clone());
                        bytes_columns.push(Some(str_column.into()));
                    }
                    Some(DynamicColumn::Bytes(bytes_column)) => {
-                        column_indexes.push(Some(bytes_column.term_ord_column.idx.clone()));
+                        column_indexes.push(bytes_column.term_ord_column.index.clone());
                        bytes_columns.push(Some(bytes_column));
                    }
                    _ => {
-                        column_indexes.push(None);
+                        column_indexes.push(ColumnIndex::Empty {
+                            num_docs: num_docs_per_column[i],
+                        });
                        bytes_columns.push(None);
                    }
                }
@@ -275,10 +290,69 @@ fn merged_numerical_columns_type<'a>(
    compatible_numerical_types.to_numerical_type()
 }

+fn is_empty_after_merge(
+    merge_row_order: &MergeRowOrder,
+    column: &DynamicColumn,
+    columnar_id: usize,
+) -> bool {
+    if column.num_values() == 0u32 {
+        // It was empty before the merge.
+        return true;
+    }
+    match merge_row_order {
+        MergeRowOrder::Stack(_) => {
+            // If we are stacking the columnar, no rows are being deleted.
+            false
+        }
+        MergeRowOrder::Shuffled(shuffled) => {
+            if let Some(alive_bitset) = &shuffled.alive_bitsets[columnar_id] {
+                let column_index = column.column_index();
+                match column_index {
+                    ColumnIndex::Empty { .. } => true,
+                    ColumnIndex::Full => alive_bitset.len() == 0,
+                    ColumnIndex::Optional(optional_index) => {
+                        for doc in optional_index.iter_rows() {
+                            if alive_bitset.contains(doc) {
+                                return false;
+                            }
+                        }
+                        true
+                    }
+                    ColumnIndex::Multivalued(multivalued_index) => {
+                        for (doc_id, (start_index, end_index)) in multivalued_index
+                            .start_index_column
+                            .iter()
+                            .tuple_windows()
+                            .enumerate()
+                        {
+                            let doc_id = doc_id as u32;
+                            if start_index == end_index {
+                                // There are no values in this document
+                                continue;
+                            }
+                            // The document contains values and is present in the alive bitset.
+                            // The column is therefore not empty.
+                            if alive_bitset.contains(doc_id) {
+                                return false;
+                            }
+                        }
+                        true
+                    }
+                }
+            } else {
+                // No document is being deleted.
+                // The shuffle is applying a permutation.
+                false
+            }
+        }
+    }
+}
+
 #[allow(clippy::type_complexity)]
 fn group_columns_for_merge(
    columnar_readers: &[&ColumnarReader],
    required_columns: &[(String, ColumnType)],
+    merge_row_order: &MergeRowOrder,
 ) -> io::Result<BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>>> {
    // Each column name may have multiple types of column associated.
    // For merging we are interested in the same column type category since they can be merged.
@@ -295,9 +369,16 @@ fn group_columns_for_merge(

    for (columnar_id, columnar_reader) in columnar_readers.iter().enumerate() {
        let column_name_and_handle = columnar_reader.list_columns()?;
+        // We skip columns that end up with 0 documents.
+        // That way, we make sure they don't end up influencing the merge type or
+        // creating empty columns.
+
        for (column_name, handle) in column_name_and_handle {
            let column_category: ColumnTypeCategory = handle.column_type().into();
            let column = handle.open()?;
+            if is_empty_after_merge(merge_row_order, &column, columnar_id) {
+                continue;
+            }
            columns_grouped
                .entry((column_name, column_category))
                .or_insert_with(|| {
@@ -361,8 +442,8 @@ fn coerce_column(column_type: ColumnType, column: DynamicColumn) -> io::Result<D
 fn min_max_if_numerical(column: &DynamicColumn) -> Option<(NumericalValue, NumericalValue)> {
    match column {
        DynamicColumn::I64(column) => Some((column.min_value().into(), column.max_value().into())),
-        DynamicColumn::U64(column) => Some((column.min_value().into(), column.min_value().into())),
-        DynamicColumn::F64(column) => Some((column.min_value().into(), column.min_value().into())),
+        DynamicColumn::U64(column) => Some((column.min_value().into(), column.max_value().into())),
+        DynamicColumn::F64(column) => Some((column.min_value().into(), column.max_value().into())),
        DynamicColumn::Bool(_)
        | DynamicColumn::IpAddr(_)
        | DynamicColumn::DateTime(_)
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,3 +1,5 @@
+use itertools::Itertools;
+
 use super::*;
 use crate::{Cardinality, ColumnarWriter, HasAssociatedColumnType, RowId};

@@ -23,8 +25,10 @@ fn test_column_coercion_to_u64() {
    let columnar1 = make_columnar("numbers", &[1i64]);
    // u64 type
    let columnar2 = make_columnar("numbers", &[u64::MAX]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2], &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
 }
@@ -33,8 +37,10 @@ fn test_column_coercion_to_u64() {
 fn test_column_no_coercion_if_all_the_same() {
    let columnar1 = make_columnar("numbers", &[1u64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2], &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
 }
@@ -43,8 +49,10 @@ fn test_column_no_coercion_if_all_the_same() {
 fn test_column_coercion_to_i64() {
    let columnar1 = make_columnar("numbers", &[-1i64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2], &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
 }
@@ -52,10 +60,13 @@ fn test_column_coercion_to_i64() {
 #[test]
 fn test_impossible_coercion_returns_an_error() {
    let columnar1 = make_columnar("numbers", &[u64::MAX]);
-    let group_error =
-        group_columns_for_merge(&[&columnar1], &[("numbers".to_string(), ColumnType::I64)])
-            .map(|_| ())
-            .unwrap_err();
+    let merge_order = StackMergeOrder::stack(&[&columnar1]).into();
+    let group_error = group_columns_for_merge(
+        &[&columnar1],
+        &[("numbers".to_string(), ColumnType::I64)],
+        &merge_order,
+    )
+    .unwrap_err();
    assert_eq!(group_error.kind(), io::ErrorKind::InvalidInput);
 }

@@ -63,10 +74,13 @@ fn test_impossible_coercion_returns_an_error() {
 fn test_group_columns_with_required_column() {
    let columnar1 = make_columnar("numbers", &[1i64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
        group_columns_for_merge(
            &[&columnar1, &columnar2],
            &[("numbers".to_string(), ColumnType::U64)],
+            &merge_order,
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
@@ -77,10 +91,13 @@ fn test_group_columns_with_required_column() {
 fn test_group_columns_required_column_with_no_existing_columns() {
    let columnar1 = make_columnar("numbers", &[2u64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
        group_columns_for_merge(
-            &[&columnar1, &columnar2],
+            columnars,
            &[("required_col".to_string(), ColumnType::Str)],
+            &merge_order,
        )
        .unwrap();
    assert_eq!(column_map.len(), 2);
@@ -96,10 +113,13 @@ fn test_group_columns_required_column_with_no_existing_columns() {
 fn test_group_columns_required_column_is_above_all_columns_have_the_same_type_rule() {
    let columnar1 = make_columnar("numbers", &[2i64]);
    let columnar2 = make_columnar("numbers", &[2i64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
        group_columns_for_merge(
-            &[&columnar1, &columnar2],
+            columnars,
            &[("numbers".to_string(), ColumnType::U64)],
+            &merge_order,
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
@@ -110,8 +130,10 @@ fn test_group_columns_required_column_is_above_all_columns_have_the_same_type_ru
 fn test_missing_column() {
    let columnar1 = make_columnar("numbers", &[-1i64]);
    let columnar2 = make_columnar("numbers2", &[2u64]);
+    let columnars = &[&columnar1, &columnar2];
+    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2], &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 2);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
    {
@@ -153,20 +175,24 @@ fn make_numerical_columnar_multiple_columns(
    ColumnarReader::open(buffer).unwrap()
 }

-fn make_byte_columnar_multiple_columns(columns: &[(&str, &[&[&[u8]]])]) -> ColumnarReader {
+#[track_caller]
+fn make_byte_columnar_multiple_columns(
+    columns: &[(&str, &[&[&[u8]]])],
+    num_rows: u32,
+) -> ColumnarReader {
    let mut dataframe_writer = ColumnarWriter::default();
    for (column_name, column_values) in columns {
+        assert_eq!(
+            column_values.len(),
+            num_rows as usize,
+            "All columns must have `{num_rows}` rows"
+        );
        for (row_id, vals) in column_values.iter().enumerate() {
            for val in vals.iter() {
                dataframe_writer.record_bytes(row_id as u32, column_name, val);
            }
        }
    }
-    let num_rows = columns
-        .iter()
-        .map(|(_, val_rows)| val_rows.len() as RowId)
-        .max()
-        .unwrap_or(0u32);
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
        .serialize(num_rows, None, &mut buffer)
@@ -245,6 +271,8 @@ fn test_merge_columnar_texts() {
    let cols = columnar_reader.read_columns("texts").unwrap();
    let dynamic_column = cols[0].open().unwrap();
    let DynamicColumn::Str(vals) = dynamic_column else { panic!() };
+    assert_eq!(vals.ords().get_cardinality(), Cardinality::Optional);
+
    let get_str_for_ord = |ord| {
        let mut out = String::new();
        vals.ord_to_str(ord, &mut out).unwrap();
@@ -272,8 +300,8 @@ fn test_merge_columnar_texts() {

 #[test]
 fn test_merge_columnar_byte() {
-    let columnar1 = make_byte_columnar_multiple_columns(&[("bytes", &[&[b"bbbb"], &[b"baaa"]])]);
-    let columnar2 = make_byte_columnar_multiple_columns(&[("bytes", &[&[], &[b"a"]])]);
+    let columnar1 = make_byte_columnar_multiple_columns(&[("bytes", &[&[b"bbbb"], &[b"baaa"]])], 2);
+    let columnar2 = make_byte_columnar_multiple_columns(&[("bytes", &[&[], &[b"a"]])], 2);
    let mut buffer = Vec::new();
    let columnars = &[&columnar1, &columnar2];
    let stack_merge_order = StackMergeOrder::stack(columnars);
@@ -316,3 +344,149 @@ fn test_merge_columnar_byte() {
    assert_eq!(get_bytes_for_row(2), b"");
    assert_eq!(get_bytes_for_row(3), b"a");
 }
+
+#[test]
+fn test_merge_columnar_byte_with_missing() {
+    let columnar1 = make_byte_columnar_multiple_columns(&[], 3);
+    let columnar2 = make_byte_columnar_multiple_columns(&[("col", &[&[b"b"], &[]])], 2);
+    let columnar3 = make_byte_columnar_multiple_columns(
+        &[
+            ("col", &[&[], &[b"b"], &[b"a", b"b"]]),
+            ("col2", &[&[b"hello"], &[], &[b"a", b"b"]]),
+        ],
+        3,
+    );
+    let mut buffer = Vec::new();
+    let columnars = &[&columnar1, &columnar2, &columnar3];
+    let stack_merge_order = StackMergeOrder::stack(columnars);
+    crate::columnar::merge_columnar(
+        columnars,
+        &[],
+        MergeRowOrder::Stack(stack_merge_order),
+        &mut buffer,
+    )
+    .unwrap();
+    let columnar_reader = ColumnarReader::open(buffer).unwrap();
+    assert_eq!(columnar_reader.num_rows(), 3 + 2 + 3);
+    assert_eq!(columnar_reader.num_columns(), 2);
+    let cols = columnar_reader.read_columns("col").unwrap();
+    let dynamic_column = cols[0].open().unwrap();
+    let DynamicColumn::Bytes(vals) = dynamic_column else { panic!() };
+    let get_bytes_for_ord = |ord| {
+        let mut out = Vec::new();
+        vals.ord_to_bytes(ord, &mut out).unwrap();
+        out
+    };
+    assert_eq!(vals.dictionary.num_terms(), 2);
+    assert_eq!(get_bytes_for_ord(0), b"a");
+    assert_eq!(get_bytes_for_ord(1), b"b");
+    let get_bytes_for_row = |row_id| {
+        let terms: Vec<Vec<u8>> = vals
+            .term_ords(row_id)
+            .map(|term_ord| {
+                let mut out = Vec::new();
+                vals.ord_to_bytes(term_ord, &mut out).unwrap();
+                out
+            })
+            .collect();
+        terms
+    };
+    assert!(get_bytes_for_row(0).is_empty());
+    assert!(get_bytes_for_row(1).is_empty());
+    assert!(get_bytes_for_row(2).is_empty());
+    assert_eq!(get_bytes_for_row(3), vec![b"b".to_vec()]);
+    assert!(get_bytes_for_row(4).is_empty());
+    assert!(get_bytes_for_row(5).is_empty());
+    assert_eq!(get_bytes_for_row(6), vec![b"b".to_vec()]);
+    assert_eq!(get_bytes_for_row(7), vec![b"a".to_vec(), b"b".to_vec()]);
+}
+
+#[test]
+fn test_merge_columnar_different_types() {
+    let columnar1 = make_text_columnar_multiple_columns(&[("mixed", &[&["a"]])]);
+    let columnar2 = make_text_columnar_multiple_columns(&[("mixed", &[&[], &["b"]])]);
+    let columnar3 = make_columnar("mixed", &[1i64]);
+    let mut buffer = Vec::new();
+    let columnars = &[&columnar1, &columnar2, &columnar3];
+    let stack_merge_order = StackMergeOrder::stack(columnars);
+    crate::columnar::merge_columnar(
+        columnars,
+        &[],
+        MergeRowOrder::Stack(stack_merge_order),
+        &mut buffer,
+    )
+    .unwrap();
+    let columnar_reader = ColumnarReader::open(buffer).unwrap();
+    assert_eq!(columnar_reader.num_rows(), 4);
+    assert_eq!(columnar_reader.num_columns(), 2);
+    let cols = columnar_reader.read_columns("mixed").unwrap();
+
+    // numeric column
+    let dynamic_column = cols[0].open().unwrap();
+    let DynamicColumn::I64(vals) = dynamic_column else { panic!() };
+    assert_eq!(vals.get_cardinality(), Cardinality::Optional);
+    assert_eq!(vals.values_for_doc(0).collect_vec(), vec![]);
+    assert_eq!(vals.values_for_doc(1).collect_vec(), vec![]);
+    assert_eq!(vals.values_for_doc(2).collect_vec(), vec![]);
+    assert_eq!(vals.values_for_doc(3).collect_vec(), vec![1]);
+    assert_eq!(vals.values_for_doc(4).collect_vec(), vec![]);
+
+    // text column
+    let dynamic_column = cols[1].open().unwrap();
+    let DynamicColumn::Str(vals) = dynamic_column else { panic!() };
+    assert_eq!(vals.ords().get_cardinality(), Cardinality::Optional);
+    let get_str_for_ord = |ord| {
+        let mut out = String::new();
+        vals.ord_to_str(ord, &mut out).unwrap();
+        out
+    };
+
+    assert_eq!(vals.dictionary.num_terms(), 2);
+    assert_eq!(get_str_for_ord(0), "a");
+    assert_eq!(get_str_for_ord(1), "b");
+
+    let get_str_for_row = |row_id| {
+        let term_ords: Vec<String> = vals
+            .term_ords(row_id)
+            .map(|el| {
+                let mut out = String::new();
+                vals.ord_to_str(el, &mut out).unwrap();
+                out
+            })
+            .collect();
+        term_ords
+    };
+
+    assert_eq!(get_str_for_row(0), vec!["a".to_string()]);
+    assert_eq!(get_str_for_row(1), Vec::<String>::new());
+    assert_eq!(get_str_for_row(2), vec!["b".to_string()]);
+    assert_eq!(get_str_for_row(3), Vec::<String>::new());
+}
+
+#[test]
+fn test_merge_columnar_different_empty_cardinality() {
+    let columnar1 = make_text_columnar_multiple_columns(&[("mixed", &[&["a"]])]);
+    let columnar2 = make_columnar("mixed", &[1i64]);
+    let mut buffer = Vec::new();
+    let columnars = &[&columnar1, &columnar2];
+    let stack_merge_order = StackMergeOrder::stack(columnars);
+    crate::columnar::merge_columnar(
+        columnars,
+        &[],
+        MergeRowOrder::Stack(stack_merge_order),
+        &mut buffer,
+    )
+    .unwrap();
+    let columnar_reader = ColumnarReader::open(buffer).unwrap();
+    assert_eq!(columnar_reader.num_rows(), 2);
+    assert_eq!(columnar_reader.num_columns(), 2);
+    let cols = columnar_reader.read_columns("mixed").unwrap();
+
+    // numeric column
+    let dynamic_column = cols[0].open().unwrap();
+    assert_eq!(dynamic_column.get_cardinality(), Cardinality::Optional);
+
+    // text column
+    let dynamic_column = cols[1].open().unwrap();
+    assert_eq!(dynamic_column.get_cardinality(), Cardinality::Optional);
+}
--- a/columnar/src/columnar/mod.rs
+++ b/columnar/src/columnar/mod.rs
@@ -5,6 +5,8 @@ mod reader;
 mod writer;

 pub use column_type::{ColumnType, HasAssociatedColumnType};
+#[cfg(test)]
+pub(crate) use merge::ColumnTypeCategory;
 pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
 pub use reader::ColumnarReader;
 pub use writer::ColumnarWriter;
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -1,4 +1,4 @@
-use std::{io, mem};
+use std::{fmt, io, mem};

 use common::file_slice::FileSlice;
 use common::BinarySerializable;
@@ -21,6 +21,32 @@ pub struct ColumnarReader {
    num_rows: RowId,
 }

+impl fmt::Debug for ColumnarReader {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let num_rows = self.num_rows();
+        let columns = self.list_columns().unwrap();
+        let num_cols = columns.len();
+        let mut debug_struct = f.debug_struct("Columnar");
+        debug_struct
+            .field("num_rows", &num_rows)
+            .field("num_cols", &num_cols);
+        for (col_name, dynamic_column_handle) in columns.into_iter().take(5) {
+            let col = dynamic_column_handle.open().unwrap();
+            if col.num_values() > 10 {
+                debug_struct.field(&col_name, &"..");
+            } else {
+                debug_struct.field(&col_name, &col);
+            }
+        }
+        if num_cols > 5 {
+            debug_struct.finish_non_exhaustive()?;
+        } else {
+            debug_struct.finish()?;
+        }
+        Ok(())
+    }
+}
+
 /// Functions by both the async/sync code listing columns.
 /// It takes a stream from the column sstable and return the list of
 /// `DynamicColumn` available in it.
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -104,16 +104,25 @@ impl ColumnarWriter {
        };
        let mut symbols_buffer = Vec::new();
        let mut values = Vec::new();
-        let mut last_doc_opt: Option<RowId> = None;
+        let mut start_doc_check_fill = 0;
+        let mut current_doc_opt: Option<RowId> = None;
+        // Assumption: NewDoc will never call the same doc twice and is strictly increasing between
+        // calls
        for op in numerical_col_writer.operation_iterator(&self.arena, None, &mut symbols_buffer) {
            match op {
                ColumnOperation::NewDoc(doc) => {
-                    last_doc_opt = Some(doc);
+                    current_doc_opt = Some(doc);
                }
                ColumnOperation::Value(numerical_value) => {
-                    if let Some(last_doc) = last_doc_opt {
+                    if let Some(current_doc) = current_doc_opt {
+                        // Fill up with 0.0 since last doc
+                        values.extend((start_doc_check_fill..current_doc).map(|doc| (0.0, doc)));
+                        start_doc_check_fill = current_doc + 1;
+                        // handle multi values
+                        current_doc_opt = None;
+
                        let score: f32 = f64::coerce(numerical_value) as f32;
-                        values.push((score, last_doc));
+                        values.push((score, current_doc));
                    }
                }
            }
@@ -123,9 +132,9 @@ impl ColumnarWriter {
        }
        values.sort_by(|(left_score, _), (right_score, _)| {
            if reversed {
-                right_score.partial_cmp(left_score).unwrap()
+                right_score.total_cmp(left_score)
            } else {
-                left_score.partial_cmp(right_score).unwrap()
+                left_score.total_cmp(right_score)
            }
        });
        values.into_iter().map(|(_score, doc)| doc).collect()
@@ -257,7 +266,7 @@ impl ColumnarWriter {
            let mut column: ColumnWriter = column_opt.unwrap_or_default();
            column.record(
                doc,
-                NumericalValue::I64(datetime.into_timestamp_micros()),
+                NumericalValue::I64(datetime.into_timestamp_nanos()),
                arena,
            );
            column
@@ -361,7 +370,7 @@ impl ColumnarWriter {
                    let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
                    let cardinality = column_writer.get_cardinality(num_docs);
                    let mut column_serializer =
-                        serializer.serialize_column(column_name, column_type);
+                        serializer.start_serialize_column(column_name, column_type);
                    serialize_bool_column(
                        cardinality,
                        num_docs,
@@ -373,12 +382,13 @@ impl ColumnarWriter {
                        buffers,
                        &mut column_serializer,
                    )?;
+                    column_serializer.finalize()?;
                }
                ColumnType::IpAddr => {
                    let column_writer: ColumnWriter = self.ip_addr_field_hash_map.read(addr);
                    let cardinality = column_writer.get_cardinality(num_docs);
                    let mut column_serializer =
-                        serializer.serialize_column(column_name, ColumnType::IpAddr);
+                        serializer.start_serialize_column(column_name, ColumnType::IpAddr);
                    serialize_ip_addr_column(
                        cardinality,
                        num_docs,
@@ -390,6 +400,7 @@ impl ColumnarWriter {
                        buffers,
                        &mut column_serializer,
                    )?;
+                    column_serializer.finalize()?;
                }
                ColumnType::Bytes | ColumnType::Str => {
                    let str_or_bytes_column_writer: StrOrBytesColumnWriter =
@@ -404,7 +415,7 @@ impl ColumnarWriter {
                        .column_writer
                        .get_cardinality(num_docs);
                    let mut column_serializer =
-                        serializer.serialize_column(column_name, column_type);
+                        serializer.start_serialize_column(column_name, column_type);
                    serialize_bytes_or_str_column(
                        cardinality,
                        num_docs,
@@ -418,13 +429,14 @@ impl ColumnarWriter {
                        buffers,
                        &mut column_serializer,
                    )?;
+                    column_serializer.finalize()?;
                }
                ColumnType::F64 | ColumnType::I64 | ColumnType::U64 => {
                    let numerical_column_writer: NumericalColumnWriter =
                        self.numerical_field_hash_map.read(addr);
                    let cardinality = numerical_column_writer.cardinality(num_docs);
                    let mut column_serializer =
-                        serializer.serialize_column(column_name, column_type);
+                        serializer.start_serialize_column(column_name, column_type);
                    let numerical_type = column_type.numerical_type().unwrap();
                    serialize_numerical_column(
                        cardinality,
@@ -438,12 +450,13 @@ impl ColumnarWriter {
                        buffers,
                        &mut column_serializer,
                    )?;
+                    column_serializer.finalize()?;
                }
                ColumnType::DateTime => {
                    let column_writer: ColumnWriter = self.datetime_field_hash_map.read(addr);
                    let cardinality = column_writer.get_cardinality(num_docs);
                    let mut column_serializer =
-                        serializer.serialize_column(column_name, ColumnType::DateTime);
+                        serializer.start_serialize_column(column_name, ColumnType::DateTime);
                    serialize_numerical_column(
                        cardinality,
                        num_docs,
@@ -456,6 +469,7 @@ impl ColumnarWriter {
                        buffers,
                        &mut column_serializer,
                    )?;
+                    column_serializer.finalize()?;
                }
            };
        }
--- a/columnar/src/columnar/writer/serializer.rs
+++ b/columnar/src/columnar/writer/serializer.rs
@@ -34,11 +34,12 @@ impl<W: io::Write> ColumnarSerializer<W> {
        }
    }

-    pub fn serialize_column<'a>(
+    /// Creates a ColumnSerializer.
+    pub fn start_serialize_column<'a>(
        &'a mut self,
        column_name: &[u8],
        column_type: ColumnType,
-    ) -> impl io::Write + 'a {
+    ) -> ColumnSerializer<'a, W> {
        let start_offset = self.wrt.written_bytes();
        prepare_key(column_name, column_type, &mut self.prepare_key_buffer);
        ColumnSerializer {
@@ -60,20 +61,21 @@ impl<W: io::Write> ColumnarSerializer<W> {
    }
 }

-struct ColumnSerializer<'a, W: io::Write> {
+pub struct ColumnSerializer<'a, W: io::Write> {
    columnar_serializer: &'a mut ColumnarSerializer<W>,
    start_offset: u64,
 }

-impl<'a, W: io::Write> Drop for ColumnSerializer<'a, W> {
-    fn drop(&mut self) {
+impl<'a, W: io::Write> ColumnSerializer<'a, W> {
+    pub fn finalize(self) -> io::Result<()> {
        let end_offset: u64 = self.columnar_serializer.wrt.written_bytes();
        let byte_range = self.start_offset..end_offset;
-        self.columnar_serializer.sstable_range.insert_cannot_fail(
+        self.columnar_serializer.sstable_range.insert(
            &self.columnar_serializer.prepare_key_buffer[..],
            &byte_range,
-        );
+        )?;
        self.columnar_serializer.prepare_key_buffer.clear();
+        Ok(())
    }
 }

--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -1,14 +1,14 @@
-use std::io;
 use std::net::Ipv6Addr;
 use std::sync::Arc;
+use std::{fmt, io};

 use common::file_slice::FileSlice;
-use common::{DateTime, HasLen, OwnedBytes};
+use common::{ByteCount, DateTime, HasLen, OwnedBytes};

 use crate::column::{BytesColumn, Column, StrColumn};
 use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
 use crate::columnar::ColumnType;
-use crate::{Cardinality, NumericalType};
+use crate::{Cardinality, ColumnIndex, NumericalType};

 #[derive(Clone)]
 pub enum DynamicColumn {
@@ -22,19 +22,54 @@ pub enum DynamicColumn {
    Str(StrColumn),
 }

-impl DynamicColumn {
-    pub fn get_cardinality(&self) -> Cardinality {
+impl fmt::Debug for DynamicColumn {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "[{} {} |", self.get_cardinality(), self.column_type())?;
        match self {
-            DynamicColumn::Bool(c) => c.get_cardinality(),
-            DynamicColumn::I64(c) => c.get_cardinality(),
-            DynamicColumn::U64(c) => c.get_cardinality(),
-            DynamicColumn::F64(c) => c.get_cardinality(),
-            DynamicColumn::IpAddr(c) => c.get_cardinality(),
-            DynamicColumn::DateTime(c) => c.get_cardinality(),
-            DynamicColumn::Bytes(c) => c.ords().get_cardinality(),
-            DynamicColumn::Str(c) => c.ords().get_cardinality(),
+            DynamicColumn::Bool(col) => write!(f, " {col:?}")?,
+            DynamicColumn::I64(col) => write!(f, " {col:?}")?,
+            DynamicColumn::U64(col) => write!(f, " {col:?}")?,
+            DynamicColumn::F64(col) => write!(f, "{col:?}")?,
+            DynamicColumn::IpAddr(col) => write!(f, "{col:?}")?,
+            DynamicColumn::DateTime(col) => write!(f, "{col:?}")?,
+            DynamicColumn::Bytes(col) => write!(f, "{col:?}")?,
+            DynamicColumn::Str(col) => write!(f, "{col:?}")?,
+        }
+        write!(f, "]")
+    }
+}
+
+impl DynamicColumn {
+    pub fn column_index(&self) -> &ColumnIndex {
+        match self {
+            DynamicColumn::Bool(c) => &c.index,
+            DynamicColumn::I64(c) => &c.index,
+            DynamicColumn::U64(c) => &c.index,
+            DynamicColumn::F64(c) => &c.index,
+            DynamicColumn::IpAddr(c) => &c.index,
+            DynamicColumn::DateTime(c) => &c.index,
+            DynamicColumn::Bytes(c) => &c.ords().index,
+            DynamicColumn::Str(c) => &c.ords().index,
        }
    }
+
+    pub fn get_cardinality(&self) -> Cardinality {
+        self.column_index().get_cardinality()
+    }
+
+    pub fn num_values(&self) -> u32 {
+        match self {
+            DynamicColumn::Bool(c) => c.values.num_vals(),
+            DynamicColumn::I64(c) => c.values.num_vals(),
+            DynamicColumn::U64(c) => c.values.num_vals(),
+            DynamicColumn::F64(c) => c.values.num_vals(),
+            DynamicColumn::IpAddr(c) => c.values.num_vals(),
+            DynamicColumn::DateTime(c) => c.values.num_vals(),
+            DynamicColumn::Bytes(c) => c.ords().values.num_vals(),
+            DynamicColumn::Str(c) => c.ords().values.num_vals(),
+        }
+    }
+
    pub fn column_type(&self) -> ColumnType {
        match self {
            DynamicColumn::Bool(_) => ColumnType::Bool,
@@ -73,11 +108,11 @@ impl DynamicColumn {
    fn coerce_to_f64(self) -> Option<DynamicColumn> {
        match self {
            DynamicColumn::I64(column) => Some(DynamicColumn::F64(Column {
-                idx: column.idx,
+                index: column.index,
                values: Arc::new(monotonic_map_column(column.values, MapI64ToF64)),
            })),
            DynamicColumn::U64(column) => Some(DynamicColumn::F64(Column {
-                idx: column.idx,
+                index: column.index,
                values: Arc::new(monotonic_map_column(column.values, MapU64ToF64)),
            })),
            DynamicColumn::F64(_) => Some(self),
@@ -91,7 +126,7 @@ impl DynamicColumn {
                    return None;
                }
                Some(DynamicColumn::I64(Column {
-                    idx: column.idx,
+                    index: column.index,
                    values: Arc::new(monotonic_map_column(column.values, MapU64ToI64)),
                }))
            }
@@ -106,7 +141,7 @@ impl DynamicColumn {
                    return None;
                }
                Some(DynamicColumn::U64(Column {
-                    idx: column.idx,
+                    index: column.index,
                    values: Arc::new(monotonic_map_column(column.values, MapI64ToU64)),
                }))
            }
@@ -248,8 +283,8 @@ impl DynamicColumnHandle {
        Ok(dynamic_column)
    }

-    pub fn num_bytes(&self) -> usize {
-        self.file_slice.len()
+    pub fn num_bytes(&self) -> ByteCount {
+        self.file_slice.len().into()
    }

    pub fn column_type(&self) -> ColumnType {
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -7,8 +7,10 @@ extern crate more_asserts;
 #[cfg(all(test, feature = "unstable"))]
 extern crate test;

+use std::fmt::Display;
 use std::io;

+mod block_accessor;
 mod column;
 mod column_index;
 pub mod column_values;
@@ -19,9 +21,12 @@ mod iterable;
 pub(crate) mod utils;
 mod value;

+pub use block_accessor::ColumnBlockAccessor;
 pub use column::{BytesColumn, Column, StrColumn};
 pub use column_index::ColumnIndex;
-pub use column_values::{ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
+pub use column_values::{
+    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
+};
 pub use columnar::{
    merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
@@ -34,7 +39,7 @@ pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
 pub type RowId = u32;
 pub type DocId = u32;

-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub struct RowAddr {
    pub segment_ord: u32,
    pub row_id: RowId,
@@ -71,6 +76,17 @@ pub enum Cardinality {
    Multivalued = 2,
 }

+impl Display for Cardinality {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let short_str = match self {
+            Cardinality::Full => "full",
+            Cardinality::Optional => "opt",
+            Cardinality::Multivalued => "mult",
+        };
+        write!(f, "{short_str}")
+    }
+}
+
 impl Cardinality {
    pub fn is_optional(&self) -> bool {
        matches!(self, Cardinality::Optional)
@@ -81,7 +97,6 @@ impl Cardinality {
    pub(crate) fn to_code(self) -> u8 {
        self as u8
    }
-
    pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidData> {
        match code {
            0 => Ok(Cardinality::Full),
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -1,10 +1,19 @@
+use std::collections::HashMap;
+use std::fmt::Debug;
 use std::net::Ipv6Addr;

+use common::DateTime;
+use proptest::prelude::*;
+use proptest::sample::subsequence;
+
 use crate::column_values::MonotonicallyMappableToU128;
-use crate::columnar::ColumnType;
+use crate::columnar::{ColumnType, ColumnTypeCategory};
 use crate::dynamic_column::{DynamicColumn, DynamicColumnHandle};
-use crate::value::NumericalValue;
-use crate::{Cardinality, ColumnarReader, ColumnarWriter};
+use crate::value::{Coerce, NumericalValue};
+use crate::{
+    BytesColumn, Cardinality, Column, ColumnarReader, ColumnarWriter, RowAddr, RowId,
+    ShuffleMergeOrder, StackMergeOrder,
+};

 #[test]
 fn test_dataframe_writer_str() {
@@ -17,7 +26,7 @@ fn test_dataframe_writer_str() {
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
    assert_eq!(cols.len(), 1);
-    assert_eq!(cols[0].num_bytes(), 158);
+    assert_eq!(cols[0].num_bytes(), 87);
 }

 #[test]
@@ -31,7 +40,7 @@ fn test_dataframe_writer_bytes() {
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
    assert_eq!(cols.len(), 1);
-    assert_eq!(cols[0].num_bytes(), 158);
+    assert_eq!(cols[0].num_bytes(), 87);
 }

 #[test]
@@ -126,7 +135,7 @@ fn test_dataframe_writer_numerical() {
    assert_eq!(cols[0].num_bytes(), 33);
    let column = cols[0].open().unwrap();
    let DynamicColumn::I64(column_i64) = column else { panic!(); };
-    assert_eq!(column_i64.idx.get_cardinality(), Cardinality::Optional);
+    assert_eq!(column_i64.index.get_cardinality(), Cardinality::Optional);
    assert_eq!(column_i64.first(0), None);
    assert_eq!(column_i64.first(1), Some(12i64));
    assert_eq!(column_i64.first(2), Some(13i64));
@@ -136,6 +145,46 @@ fn test_dataframe_writer_numerical() {
    assert_eq!(column_i64.first(6), None); //< we can change the spec for that one.
 }

+#[test]
+fn test_dataframe_sort_by_full() {
+    let mut dataframe_writer = ColumnarWriter::default();
+    dataframe_writer.record_numerical(0u32, "value", NumericalValue::U64(1));
+    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
+    let data = dataframe_writer.sort_order("value", 2, false);
+    assert_eq!(data, vec![0, 1]);
+}
+
+#[test]
+fn test_dataframe_sort_by_opt() {
+    let mut dataframe_writer = ColumnarWriter::default();
+    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(3));
+    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(2));
+    let data = dataframe_writer.sort_order("value", 5, false);
+    // 0, 2, 4 is 0.0
+    assert_eq!(data, vec![0, 2, 4, 3, 1]);
+    let data = dataframe_writer.sort_order("value", 5, true);
+    assert_eq!(
+        data,
+        vec![4, 2, 0, 3, 1].into_iter().rev().collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn test_dataframe_sort_by_multi() {
+    let mut dataframe_writer = ColumnarWriter::default();
+    // valid for sort
+    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
+    // those are ignored for sort
+    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
+    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
+    // valid for sort
+    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(3));
+    // ignored, would change sort order
+    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(1));
+    let data = dataframe_writer.sort_order("value", 4, false);
+    assert_eq!(data, vec![0, 2, 1, 3]);
+}
+
 #[test]
 fn test_dictionary_encoded_str() {
    let mut buffer = Vec::new();
@@ -210,3 +259,667 @@ fn test_dictionary_encoded_bytes() {
        .unwrap();
    assert_eq!(term_buffer, b"b");
 }
+
+fn num_strategy() -> impl Strategy<Value = NumericalValue> {
+    prop_oneof![
+        3 => Just(NumericalValue::U64(0u64)),
+        3 => Just(NumericalValue::U64(u64::MAX)),
+        3 => Just(NumericalValue::I64(0i64)),
+        3 => Just(NumericalValue::I64(i64::MIN)),
+        3 => Just(NumericalValue::I64(i64::MAX)),
+        3 => Just(NumericalValue::F64(1.2f64)),
+        1 => any::<f64>().prop_map(NumericalValue::from),
+        1 => any::<u64>().prop_map(NumericalValue::from),
+        1 => any::<i64>().prop_map(NumericalValue::from),
+    ]
+}
+
+#[derive(Debug, Clone, Copy)]
+enum ColumnValue {
+    Str(&'static str),
+    Bytes(&'static [u8]),
+    Numerical(NumericalValue),
+    IpAddr(Ipv6Addr),
+    Bool(bool),
+    DateTime(DateTime),
+}
+
+impl<T: Into<NumericalValue>> From<T> for ColumnValue {
+    fn from(val: T) -> ColumnValue {
+        ColumnValue::Numerical(val.into())
+    }
+}
+
+impl ColumnValue {
+    pub(crate) fn column_type_category(&self) -> ColumnTypeCategory {
+        match self {
+            ColumnValue::Str(_) => ColumnTypeCategory::Str,
+            ColumnValue::Bytes(_) => ColumnTypeCategory::Bytes,
+            ColumnValue::Numerical(_) => ColumnTypeCategory::Numerical,
+            ColumnValue::IpAddr(_) => ColumnTypeCategory::IpAddr,
+            ColumnValue::Bool(_) => ColumnTypeCategory::Bool,
+            ColumnValue::DateTime(_) => ColumnTypeCategory::DateTime,
+        }
+    }
+}
+
+fn column_name_strategy() -> impl Strategy<Value = &'static str> {
+    prop_oneof![Just("c1"), Just("c2")]
+}
+
+fn string_strategy() -> impl Strategy<Value = &'static str> {
+    prop_oneof![Just("a"), Just("b")]
+}
+
+fn bytes_strategy() -> impl Strategy<Value = &'static [u8]> {
+    prop_oneof![Just(&[0u8][..]), Just(&[1u8][..])]
+}
+
+// A random column value
+fn column_value_strategy() -> impl Strategy<Value = ColumnValue> {
+    prop_oneof![
+        10 => string_strategy().prop_map(|s| ColumnValue::Str(s)),
+        1 => bytes_strategy().prop_map(|b| ColumnValue::Bytes(b)),
+        40 => num_strategy().prop_map(|n| ColumnValue::Numerical(n)),
+        1 => (1u16..3u16).prop_map(|ip_addr_byte| ColumnValue::IpAddr(Ipv6Addr::new(
+            127,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            ip_addr_byte
+        ))),
+        1 => any::<bool>().prop_map(|b| ColumnValue::Bool(b)),
+        1 => (0_679_723_993i64..1_679_723_995i64)
+            .prop_map(|val| { ColumnValue::DateTime(DateTime::from_timestamp_secs(val)) })
+    ]
+}
+
+// A document contains up to 4 values.
+fn doc_strategy() -> impl Strategy<Value = Vec<(&'static str, ColumnValue)>> {
+    proptest::collection::vec((column_name_strategy(), column_value_strategy()), 0..=4)
+}
+
+fn num_docs_strategy() -> impl Strategy<Value = usize> {
+    prop_oneof!(
+        // We focus heavily on the 0..2 case as we assume it is sufficient to cover all edge cases.
+        0usize..=3usize,
+        // We leave 50% of the effort exploring more defensively.
+        3usize..=12usize
+    )
+}
+
+// A columnar contains up to 2 docs.
+fn columnar_docs_strategy() -> impl Strategy<Value = Vec<Vec<(&'static str, ColumnValue)>>> {
+    num_docs_strategy()
+        .prop_flat_map(|num_docs| proptest::collection::vec(doc_strategy(), num_docs))
+}
+
+fn columnar_docs_and_mapping_strategy(
+) -> impl Strategy<Value = (Vec<Vec<(&'static str, ColumnValue)>>, Vec<RowId>)> {
+    columnar_docs_strategy().prop_flat_map(|docs| {
+        permutation_strategy(docs.len()).prop_map(move |permutation| (docs.clone(), permutation))
+    })
+}
+
+fn permutation_strategy(n: usize) -> impl Strategy<Value = Vec<RowId>> {
+    Just((0u32..n as RowId).collect()).prop_shuffle()
+}
+
+fn permutation_and_subset_strategy(n: usize) -> impl Strategy<Value = Vec<usize>> {
+    let vals: Vec<usize> = (0..n).collect();
+    subsequence(vals, 0..=n).prop_shuffle()
+}
+
+fn build_columnar_with_mapping(
+    docs: &[Vec<(&'static str, ColumnValue)>],
+    old_to_new_row_ids_opt: Option<&[RowId]>,
+) -> ColumnarReader {
+    let num_docs = docs.len() as u32;
+    let mut buffer = Vec::new();
+    let mut columnar_writer = ColumnarWriter::default();
+    for (doc_id, vals) in docs.iter().enumerate() {
+        for (column_name, col_val) in vals {
+            match *col_val {
+                ColumnValue::Str(str_val) => {
+                    columnar_writer.record_str(doc_id as u32, column_name, str_val);
+                }
+                ColumnValue::Bytes(bytes) => {
+                    columnar_writer.record_bytes(doc_id as u32, column_name, bytes)
+                }
+                ColumnValue::Numerical(num) => {
+                    columnar_writer.record_numerical(doc_id as u32, column_name, num);
+                }
+                ColumnValue::IpAddr(ip_addr) => {
+                    columnar_writer.record_ip_addr(doc_id as u32, column_name, ip_addr);
+                }
+                ColumnValue::Bool(bool_val) => {
+                    columnar_writer.record_bool(doc_id as u32, column_name, bool_val);
+                }
+                ColumnValue::DateTime(date_time) => {
+                    columnar_writer.record_datetime(doc_id as u32, column_name, date_time);
+                }
+            }
+        }
+    }
+    columnar_writer
+        .serialize(num_docs, old_to_new_row_ids_opt, &mut buffer)
+        .unwrap();
+    let columnar_reader = ColumnarReader::open(buffer).unwrap();
+    columnar_reader
+}
+
+fn build_columnar(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
+    build_columnar_with_mapping(docs, None)
+}
+
+fn assert_columnar_eq_strict(left: &ColumnarReader, right: &ColumnarReader) {
+    assert_columnar_eq(left, right, false);
+}
+
+fn assert_columnar_eq(
+    left: &ColumnarReader,
+    right: &ColumnarReader,
+    lenient_on_numerical_value: bool,
+) {
+    assert_eq!(left.num_rows(), right.num_rows());
+    let left_columns = left.list_columns().unwrap();
+    let right_columns = right.list_columns().unwrap();
+    assert_eq!(left_columns.len(), right_columns.len());
+    for i in 0..left_columns.len() {
+        assert_eq!(left_columns[i].0, right_columns[i].0);
+        let left_column = left_columns[i].1.open().unwrap();
+        let right_column = right_columns[i].1.open().unwrap();
+        assert_dyn_column_eq(&left_column, &right_column, lenient_on_numerical_value);
+    }
+}
+
+fn assert_column_eq<T: Copy + PartialOrd + Debug + Send + Sync + 'static>(
+    left: &Column<T>,
+    right: &Column<T>,
+) {
+    assert_eq!(left.get_cardinality(), right.get_cardinality());
+    assert_eq!(left.num_docs(), right.num_docs());
+    let num_docs = left.num_docs();
+    for doc in 0..num_docs {
+        assert_eq!(
+            left.index.value_row_ids(doc),
+            right.index.value_row_ids(doc)
+        );
+    }
+    assert_eq!(left.values.num_vals(), right.values.num_vals());
+    let num_vals = left.values.num_vals();
+    for i in 0..num_vals {
+        assert_eq!(left.values.get_val(i), right.values.get_val(i));
+    }
+}
+
+fn assert_bytes_column_eq(left: &BytesColumn, right: &BytesColumn) {
+    assert_eq!(
+        left.term_ord_column.get_cardinality(),
+        right.term_ord_column.get_cardinality()
+    );
+    assert_eq!(left.num_rows(), right.num_rows());
+    assert_column_eq(&left.term_ord_column, &right.term_ord_column);
+    assert_eq!(left.dictionary.num_terms(), right.dictionary.num_terms());
+    let num_terms = left.dictionary.num_terms();
+    let mut left_terms = left.dictionary.stream().unwrap();
+    let mut right_terms = right.dictionary.stream().unwrap();
+    for _ in 0..num_terms {
+        assert!(left_terms.advance());
+        assert!(right_terms.advance());
+        assert_eq!(left_terms.key(), right_terms.key());
+    }
+    assert!(!left_terms.advance());
+    assert!(!right_terms.advance());
+}
+
+fn assert_dyn_column_eq(
+    left_dyn_column: &DynamicColumn,
+    right_dyn_column: &DynamicColumn,
+    lenient_on_numerical_value: bool,
+) {
+    assert_eq!(
+        &left_dyn_column.get_cardinality(),
+        &right_dyn_column.get_cardinality()
+    );
+    match &(left_dyn_column, right_dyn_column) {
+        (DynamicColumn::Bool(left_col), DynamicColumn::Bool(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::I64(left_col), DynamicColumn::I64(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::U64(left_col), DynamicColumn::U64(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::F64(left_col), DynamicColumn::F64(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::DateTime(left_col), DynamicColumn::DateTime(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::IpAddr(left_col), DynamicColumn::IpAddr(right_col)) => {
+            assert_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::Bytes(left_col), DynamicColumn::Bytes(right_col)) => {
+            assert_bytes_column_eq(left_col, right_col);
+        }
+        (DynamicColumn::Str(left_col), DynamicColumn::Str(right_col)) => {
+            assert_bytes_column_eq(left_col, right_col);
+        }
+        (left, right) => {
+            if lenient_on_numerical_value {
+                assert_eq!(
+                    ColumnTypeCategory::from(left.column_type()),
+                    ColumnTypeCategory::from(right.column_type())
+                );
+            } else {
+                panic!(
+                    "Column type are not the same: {:?} vs {:?}",
+                    left.column_type(),
+                    right.column_type()
+                );
+            }
+        }
+    }
+}
+
+trait AssertEqualToColumnValue {
+    fn assert_equal_to_column_value(&self, column_value: &ColumnValue);
+}
+
+impl AssertEqualToColumnValue for bool {
+    fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
+        let ColumnValue::Bool(val) = column_value else { panic!() };
+        assert_eq!(self, val);
+    }
+}
+
+impl AssertEqualToColumnValue for Ipv6Addr {
+    fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
+        let ColumnValue::IpAddr(val) = column_value else { panic!() };
+        assert_eq!(self, val);
+    }
+}
+
+impl<T: Coerce + PartialEq + Debug + Into<NumericalValue>> AssertEqualToColumnValue for T {
+    fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
+        let ColumnValue::Numerical(num) = column_value else { panic!() };
+        assert_eq!(self, &T::coerce(*num));
+    }
+}
+
+impl AssertEqualToColumnValue for DateTime {
+    fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
+        let ColumnValue::DateTime(dt) = column_value else { panic!() };
+        assert_eq!(self, dt);
+    }
+}
+
+fn assert_column_values<
+    T: AssertEqualToColumnValue + PartialEq + Copy + PartialOrd + Debug + Send + Sync + 'static,
+>(
+    col: &Column<T>,
+    expected: &HashMap<u32, Vec<&ColumnValue>>,
+) {
+    let mut num_non_empty_rows = 0;
+    for doc in 0..col.num_docs() {
+        let doc_vals: Vec<T> = col.values_for_doc(doc).collect();
+        if doc_vals.is_empty() {
+            continue;
+        }
+        num_non_empty_rows += 1;
+        let expected_vals = expected.get(&doc).unwrap();
+        assert_eq!(doc_vals.len(), expected_vals.len());
+        for (val, &expected) in doc_vals.iter().zip(expected_vals.iter()) {
+            val.assert_equal_to_column_value(expected)
+        }
+    }
+    assert_eq!(num_non_empty_rows, expected.len());
+}
+
+fn assert_bytes_column_values(
+    col: &BytesColumn,
+    expected: &HashMap<u32, Vec<&ColumnValue>>,
+    is_str: bool,
+) {
+    let mut num_non_empty_rows = 0;
+    let mut buffer = Vec::new();
+    for doc in 0..col.term_ord_column.num_docs() {
+        let doc_vals: Vec<u64> = col.term_ords(doc).collect();
+        if doc_vals.is_empty() {
+            continue;
+        }
+        let expected_vals = expected.get(&doc).unwrap();
+        assert_eq!(doc_vals.len(), expected_vals.len());
+        for (&expected_col_val, &ord) in expected_vals.iter().zip(&doc_vals) {
+            col.ord_to_bytes(ord, &mut buffer).unwrap();
+            match expected_col_val {
+                ColumnValue::Str(str_val) => {
+                    assert!(is_str);
+                    assert_eq!(str_val.as_bytes(), &buffer);
+                }
+                ColumnValue::Bytes(bytes_val) => {
+                    assert!(!is_str);
+                    assert_eq!(bytes_val, &buffer);
+                }
+                _ => {
+                    panic!();
+                }
+            }
+        }
+        num_non_empty_rows += 1;
+    }
+    assert_eq!(num_non_empty_rows, expected.len());
+}
+
+// This proptest attempts to create a tiny columnar based of up to 3 rows, and checks that the
+// resulting columnar matches the row data.
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(500))]
+    #[test]
+    fn test_single_columnar_builder_proptest(docs in columnar_docs_strategy()) {
+        let columnar = build_columnar(&docs[..]);
+        assert_eq!(columnar.num_rows() as usize, docs.len());
+        let mut expected_columns: HashMap<(&str, ColumnTypeCategory), HashMap<u32, Vec<&ColumnValue>> > = Default::default();
+        for (doc_id, doc_vals) in docs.iter().enumerate() {
+            for (col_name, col_val) in doc_vals {
+                expected_columns
+                    .entry((col_name, col_val.column_type_category()))
+                    .or_default()
+                    .entry(doc_id as u32)
+                    .or_default()
+                    .push(col_val);
+            }
+        }
+        let column_list = columnar.list_columns().unwrap();
+        assert_eq!(expected_columns.len(), column_list.len());
+        for (column_name, column) in column_list {
+            let dynamic_column = column.open().unwrap();
+            let col_category: ColumnTypeCategory = dynamic_column.column_type().into();
+            let expected_col_values: &HashMap<u32, Vec<&ColumnValue>> = expected_columns.get(&(column_name.as_str(), col_category)).unwrap();
+            match &dynamic_column {
+                DynamicColumn::Bool(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::I64(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::U64(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::F64(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::IpAddr(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::DateTime(col) =>
+                    assert_column_values(col, expected_col_values),
+                DynamicColumn::Bytes(col) =>
+                    assert_bytes_column_values(col, expected_col_values, false),
+                DynamicColumn::Str(col) =>
+                    assert_bytes_column_values(col, expected_col_values, true),
+            }
+        }
+    }
+}
+
+// Same as `test_single_columnar_builder_proptest` but with a shuffling mapping.
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(500))]
+    #[test]
+    fn test_single_columnar_builder_with_shuffle_proptest((docs, mapping) in columnar_docs_and_mapping_strategy()) {
+        let columnar = build_columnar_with_mapping(&docs[..], Some(&mapping));
+        assert_eq!(columnar.num_rows() as usize, docs.len());
+        let mut expected_columns: HashMap<(&str, ColumnTypeCategory), HashMap<u32, Vec<&ColumnValue>> > = Default::default();
+        for (doc_id, doc_vals) in docs.iter().enumerate() {
+            for (col_name, col_val) in doc_vals {
+                expected_columns
+                    .entry((col_name, col_val.column_type_category()))
+                    .or_default()
+                    .entry(mapping[doc_id])
+                    .or_default()
+                    .push(col_val);
+            }
+        }
+        let column_list = columnar.list_columns().unwrap();
+        assert_eq!(expected_columns.len(), column_list.len());
+        for (column_name, column) in column_list {
+            let dynamic_column = column.open().unwrap();
+            let col_category: ColumnTypeCategory = dynamic_column.column_type().into();
+            let expected_col_values: &HashMap<u32, Vec<&ColumnValue>> = expected_columns.get(&(column_name.as_str(), col_category)).unwrap();
+            for _doc_id in 0..columnar.num_rows() {
+                match &dynamic_column {
+                    DynamicColumn::Bool(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::I64(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::U64(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::F64(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::IpAddr(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::DateTime(col) =>
+                        assert_column_values(col, expected_col_values),
+                    DynamicColumn::Bytes(col) =>
+                        assert_bytes_column_values(col, expected_col_values, false),
+                    DynamicColumn::Str(col) =>
+                        assert_bytes_column_values(col, expected_col_values, true),
+                }
+            }
+        }
+    }
+}
+
+// This tests create 2 or 3 random small columnar and attempts to merge them.
+// It compares the resulting merged dataframe with what would have been obtained by building the
+// dataframe from the concatenated rows to begin with.
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(1000))]
+    #[test]
+    fn test_columnar_merge_proptest(columnar_docs in proptest::collection::vec(columnar_docs_strategy(), 2..=3)) {
+        let columnar_readers: Vec<ColumnarReader> = columnar_docs.iter()
+            .map(|docs| build_columnar(&docs[..]))
+            .collect::<Vec<_>>();
+        let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
+        let mut output: Vec<u8> = Vec::new();
+        let stack_merge_order = StackMergeOrder::stack(&columnar_readers_arr[..]).into();
+        crate::merge_columnar(&columnar_readers_arr[..], &[], stack_merge_order, &mut output).unwrap();
+        let merged_columnar = ColumnarReader::open(output).unwrap();
+        let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> = columnar_docs.iter().cloned().flatten().collect();
+        let expected_merged_columnar = build_columnar(&concat_rows[..]);
+        assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
+    }
+}
+
+#[test]
+fn test_columnar_merging_empty_columnar() {
+    let columnar_docs: Vec<Vec<Vec<(&str, ColumnValue)>>> =
+        vec![vec![], vec![vec![("c1", ColumnValue::Str("a"))]]];
+    let columnar_readers: Vec<ColumnarReader> = columnar_docs
+        .iter()
+        .map(|docs| build_columnar(&docs[..]))
+        .collect::<Vec<_>>();
+    let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
+    let mut output: Vec<u8> = Vec::new();
+    let stack_merge_order = StackMergeOrder::stack(&columnar_readers_arr[..]);
+    crate::merge_columnar(
+        &columnar_readers_arr[..],
+        &[],
+        crate::MergeRowOrder::Stack(stack_merge_order),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> =
+        columnar_docs.iter().cloned().flatten().collect();
+    let expected_merged_columnar = build_columnar(&concat_rows[..]);
+    assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
+}
+
+#[test]
+fn test_columnar_merging_number_columns() {
+    let columnar_docs: Vec<Vec<Vec<(&str, ColumnValue)>>> = vec![
+        // columnar 1
+        vec![
+            // doc 1.1
+            vec![("c2", ColumnValue::Numerical(0i64.into()))],
+        ],
+        // columnar2
+        vec![
+            // doc 2.1
+            vec![("c2", ColumnValue::Numerical(0u64.into()))],
+            // doc 2.2
+            vec![("c2", ColumnValue::Numerical(u64::MAX.into()))],
+        ],
+    ];
+    let columnar_readers: Vec<ColumnarReader> = columnar_docs
+        .iter()
+        .map(|docs| build_columnar(&docs[..]))
+        .collect::<Vec<_>>();
+    let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
+    let mut output: Vec<u8> = Vec::new();
+    let stack_merge_order = StackMergeOrder::stack(&columnar_readers_arr[..]);
+    crate::merge_columnar(
+        &columnar_readers_arr[..],
+        &[],
+        crate::MergeRowOrder::Stack(stack_merge_order),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> =
+        columnar_docs.iter().cloned().flatten().collect();
+    let expected_merged_columnar = build_columnar(&concat_rows[..]);
+    assert_columnar_eq_strict(&merged_columnar, &expected_merged_columnar);
+}
+
+// TODO add non trivial remap and merge
+// TODO test required_columns
+// TODO document edge case: required_columns incompatible with values.
+
+fn columnar_docs_and_remap(
+) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
+    proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
+        |columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
+            let row_addrs: Vec<RowAddr> = columnars_docs
+                .iter()
+                .enumerate()
+                .flat_map(|(segment_ord, columnar_docs)| {
+                    (0u32..columnar_docs.len() as u32).map(move |row_id| RowAddr {
+                        segment_ord: segment_ord as u32,
+                        row_id,
+                    })
+                })
+                .collect();
+            permutation_and_subset_strategy(row_addrs.len()).prop_map(move |shuffled_subset| {
+                let shuffled_row_addr_subset: Vec<RowAddr> =
+                    shuffled_subset.iter().map(|ord| row_addrs[*ord]).collect();
+                (columnars_docs.clone(), shuffled_row_addr_subset)
+            })
+        },
+    )
+}
+
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(1000))]
+    #[test]
+    fn test_columnar_merge_and_remap_proptest((columnar_docs, shuffle_merge_order) in columnar_docs_and_remap()) {
+        let shuffled_rows: Vec<Vec<(&'static str, ColumnValue)>> = shuffle_merge_order.iter()
+            .map(|row_addr| columnar_docs[row_addr.segment_ord as usize][row_addr.row_id as usize].clone())
+            .collect();
+        let expected_merged_columnar = build_columnar(&shuffled_rows[..]);
+        let columnar_readers: Vec<ColumnarReader> = columnar_docs.iter()
+            .map(|docs| build_columnar(&docs[..]))
+            .collect::<Vec<_>>();
+        let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
+        let mut output: Vec<u8> = Vec::new();
+        let segment_num_rows: Vec<RowId> = columnar_docs.iter().map(|docs| docs.len() as RowId).collect();
+        let shuffle_merge_order = ShuffleMergeOrder::for_test(&segment_num_rows, shuffle_merge_order);
+        crate::merge_columnar(&columnar_readers_arr[..], &[], shuffle_merge_order.into(), &mut output).unwrap();
+        let merged_columnar = ColumnarReader::open(output).unwrap();
+        assert_columnar_eq(&merged_columnar, &expected_merged_columnar, true);
+    }
+}
+
+#[test]
+fn test_columnar_merge_empty() {
+    let columnar_reader_1 = build_columnar(&[]);
+    let rows: &[Vec<_>] = &[vec![("c1", ColumnValue::Str("a"))]][..];
+    let columnar_reader_2 = build_columnar(rows);
+    let mut output: Vec<u8> = Vec::new();
+    let segment_num_rows: Vec<RowId> = vec![0, 0];
+    let shuffle_merge_order = ShuffleMergeOrder::for_test(&segment_num_rows, vec![]);
+    crate::merge_columnar(
+        &[&columnar_reader_1, &columnar_reader_2],
+        &[],
+        shuffle_merge_order.into(),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    assert_eq!(merged_columnar.num_rows(), 0);
+    assert_eq!(merged_columnar.num_columns(), 0);
+}
+
+#[test]
+fn test_columnar_merge_single_str_column() {
+    let columnar_reader_1 = build_columnar(&[]);
+    let rows: &[Vec<_>] = &[vec![("c1", ColumnValue::Str("a"))]][..];
+    let columnar_reader_2 = build_columnar(rows);
+    let mut output: Vec<u8> = Vec::new();
+    let segment_num_rows: Vec<RowId> = vec![0, 1];
+    let shuffle_merge_order = ShuffleMergeOrder::for_test(
+        &segment_num_rows,
+        vec![RowAddr {
+            segment_ord: 1u32,
+            row_id: 0u32,
+        }],
+    );
+    crate::merge_columnar(
+        &[&columnar_reader_1, &columnar_reader_2],
+        &[],
+        shuffle_merge_order.into(),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    assert_eq!(merged_columnar.num_rows(), 1);
+    assert_eq!(merged_columnar.num_columns(), 1);
+}
+
+#[test]
+fn test_delete_decrease_cardinality() {
+    let columnar_reader_1 = build_columnar(&[]);
+    let rows: &[Vec<_>] = &[
+        vec![
+            ("c", ColumnValue::from(0i64)),
+            ("c", ColumnValue::from(0i64)),
+        ],
+        vec![("c", ColumnValue::from(0i64))],
+    ][..];
+    // c is multivalued here
+    let columnar_reader_2 = build_columnar(rows);
+    let mut output: Vec<u8> = Vec::new();
+    let shuffle_merge_order = ShuffleMergeOrder::for_test(
+        &[0, 2],
+        vec![RowAddr {
+            segment_ord: 1u32,
+            row_id: 1u32,
+        }],
+    );
+    crate::merge_columnar(
+        &[&columnar_reader_1, &columnar_reader_2],
+        &[],
+        shuffle_merge_order.into(),
+        &mut output,
+    )
+    .unwrap();
+    let merged_columnar = ColumnarReader::open(output).unwrap();
+    assert_eq!(merged_columnar.num_rows(), 1);
+    assert_eq!(merged_columnar.num_columns(), 1);
+    let cols = merged_columnar.read_columns("c").unwrap();
+    assert_eq!(cols.len(), 1);
+    assert_eq!(cols[0].column_type(), ColumnType::I64);
+    assert_eq!(cols[0].open().unwrap().get_cardinality(), Cardinality::Full);
+}
--- a/columnar/src/value.rs
+++ b/columnar/src/value.rs
@@ -109,7 +109,7 @@ impl Coerce for f64 {
 impl Coerce for DateTime {
    fn coerce(value: NumericalValue) -> Self {
        let timestamp_micros = i64::coerce(value);
-        DateTime::from_timestamp_micros(timestamp_micros)
+        DateTime::from_timestamp_nanos(timestamp_micros)
    }
 }

--- a/common/benches/bench.rs
+++ b/common/benches/bench.rs
@@ -0,0 +1,39 @@
+#![feature(test)]
+
+extern crate test;
+
+#[cfg(test)]
+mod tests {
+    use rand::seq::IteratorRandom;
+    use rand::thread_rng;
+    use tantivy_common::serialize_vint_u32;
+    use test::Bencher;
+
+    #[bench]
+    fn bench_vint(b: &mut Bencher) {
+        let vals: Vec<u32> = (0..20_000).collect();
+        b.iter(|| {
+            let mut out = 0u64;
+            for val in vals.iter().cloned() {
+                let mut buf = [0u8; 8];
+                serialize_vint_u32(val, &mut buf);
+                out += u64::from(buf[0]);
+            }
+            out
+        });
+    }
+
+    #[bench]
+    fn bench_vint_rand(b: &mut Bencher) {
+        let vals: Vec<u32> = (0..20_000).choose_multiple(&mut thread_rng(), 100_000);
+        b.iter(|| {
+            let mut out = 0u64;
+            for val in vals.iter().cloned() {
+                let mut buf = [0u8; 8];
+                serialize_vint_u32(val, &mut buf);
+                out += u64::from(buf[0]);
+            }
+            out
+        });
+    }
+}
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -4,6 +4,8 @@ use std::{fmt, io, u64};

 use ownedbytes::OwnedBytes;

+use crate::ByteCount;
+
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub struct TinySet(u64);

@@ -386,8 +388,8 @@ impl ReadOnlyBitSet {
    }

    /// Number of bytes used in the bitset representation.
-    pub fn num_bytes(&self) -> usize {
-        self.data.len()
+    pub fn num_bytes(&self) -> ByteCount {
+        self.data.len().into()
    }
 }

--- a/common/src/byte_count.rs
+++ b/common/src/byte_count.rs
@@ -0,0 +1,114 @@
+use std::iter::Sum;
+use std::ops::{Add, AddAssign};
+
+use serde::{Deserialize, Serialize};
+
+/// Indicates space usage in bytes
+#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub struct ByteCount(u64);
+
+impl std::fmt::Debug for ByteCount {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&self.human_readable())
+    }
+}
+
+impl std::fmt::Display for ByteCount {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&self.human_readable())
+    }
+}
+
+const SUFFIX_AND_THRESHOLD: [(&str, u64); 5] = [
+    ("KB", 1_000),
+    ("MB", 1_000_000),
+    ("GB", 1_000_000_000),
+    ("TB", 1_000_000_000_000),
+    ("PB", 1_000_000_000_000_000),
+];
+
+impl ByteCount {
+    #[inline]
+    pub fn get_bytes(&self) -> u64 {
+        self.0
+    }
+
+    pub fn human_readable(&self) -> String {
+        for (suffix, threshold) in SUFFIX_AND_THRESHOLD.iter().rev() {
+            if self.get_bytes() >= *threshold {
+                let unit_num = self.get_bytes() as f64 / *threshold as f64;
+                return format!("{unit_num:.2} {suffix}");
+            }
+        }
+        format!("{:.2} B", self.get_bytes())
+    }
+}
+
+impl From<u64> for ByteCount {
+    fn from(value: u64) -> Self {
+        ByteCount(value)
+    }
+}
+impl From<usize> for ByteCount {
+    fn from(value: usize) -> Self {
+        ByteCount(value as u64)
+    }
+}
+
+impl Sum for ByteCount {
+    #[inline]
+    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+        iter.fold(ByteCount::default(), |acc, x| acc + x)
+    }
+}
+
+impl PartialEq<u64> for ByteCount {
+    #[inline]
+    fn eq(&self, other: &u64) -> bool {
+        self.get_bytes() == *other
+    }
+}
+
+impl PartialOrd<u64> for ByteCount {
+    #[inline]
+    fn partial_cmp(&self, other: &u64) -> Option<std::cmp::Ordering> {
+        self.get_bytes().partial_cmp(other)
+    }
+}
+
+impl Add for ByteCount {
+    type Output = Self;
+
+    #[inline]
+    fn add(self, other: Self) -> Self {
+        Self(self.get_bytes() + other.get_bytes())
+    }
+}
+
+impl AddAssign for ByteCount {
+    #[inline]
+    fn add_assign(&mut self, other: Self) {
+        *self = Self(self.get_bytes() + other.get_bytes());
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::ByteCount;
+
+    #[test]
+    fn test_bytes() {
+        assert_eq!(ByteCount::from(0u64).human_readable(), "0 B");
+        assert_eq!(ByteCount::from(300u64).human_readable(), "300 B");
+        assert_eq!(ByteCount::from(1_000_000u64).human_readable(), "1.00 MB");
+        assert_eq!(ByteCount::from(1_500_000u64).human_readable(), "1.50 MB");
+        assert_eq!(
+            ByteCount::from(1_500_000_000u64).human_readable(),
+            "1.50 GB"
+        );
+        assert_eq!(
+            ByteCount::from(3_213_000_000_000u64).human_readable(),
+            "3.21 TB"
+        );
+    }
+}
--- a/common/src/datetime.rs
+++ b/common/src/datetime.rs
@@ -1,25 +1,33 @@
+#![allow(deprecated)]
+
 use std::fmt;

 use serde::{Deserialize, Serialize};
 use time::format_description::well_known::Rfc3339;
 use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};

-/// DateTime Precision
+/// Precision with which datetimes are truncated when stored in fast fields. This setting is only
+/// relevant for fast fields. In the docstore, datetimes are always saved with nanosecond precision.
 #[derive(
    Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default,
 )]
 #[serde(rename_all = "lowercase")]
-pub enum DatePrecision {
-    /// Seconds precision
+pub enum DateTimePrecision {
+    /// Second precision.
    #[default]
    Seconds,
-    /// Milli-seconds precision.
+    /// Millisecond precision.
    Milliseconds,
-    /// Micro-seconds precision.
+    /// Microsecond precision.
    Microseconds,
+    /// Nanosecond precision.
+    Nanoseconds,
 }

-/// A date/time value with microsecond precision.
+#[deprecated(since = "0.20.0", note = "Use `DateTimePrecision` instead")]
+pub type DatePrecision = DateTimePrecision;
+
+/// A date/time value with nanoseconds precision.
 ///
 /// This timestamp does not carry any explicit time zone information.
 /// Users are responsible for applying the provided conversion
@@ -31,29 +39,46 @@ pub enum DatePrecision {
 /// to prevent unintended usage.
 #[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct DateTime {
-    // Timestamp in microseconds.
-    pub(crate) timestamp_micros: i64,
+    // Timestamp in nanoseconds.
+    pub(crate) timestamp_nanos: i64,
 }

 impl DateTime {
+    /// Minimum possible `DateTime` value.
+    pub const MIN: DateTime = DateTime {
+        timestamp_nanos: i64::MIN,
+    };
+
+    /// Maximum possible `DateTime` value.
+    pub const MAX: DateTime = DateTime {
+        timestamp_nanos: i64::MAX,
+    };
+
    /// Create new from UNIX timestamp in seconds
    pub const fn from_timestamp_secs(seconds: i64) -> Self {
        Self {
-            timestamp_micros: seconds * 1_000_000,
+            timestamp_nanos: seconds * 1_000_000_000,
        }
    }

    /// Create new from UNIX timestamp in milliseconds
    pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
        Self {
-            timestamp_micros: milliseconds * 1_000,
+            timestamp_nanos: milliseconds * 1_000_000,
        }
    }

    /// Create new from UNIX timestamp in microseconds.
    pub const fn from_timestamp_micros(microseconds: i64) -> Self {
        Self {
-            timestamp_micros: microseconds,
+            timestamp_nanos: microseconds * 1_000,
+        }
+    }
+
+    /// Create new from UNIX timestamp in nanoseconds.
+    pub const fn from_timestamp_nanos(nanoseconds: i64) -> Self {
+        Self {
+            timestamp_nanos: nanoseconds,
        }
    }

@@ -61,9 +86,9 @@ impl DateTime {
    ///
    /// The given date/time is converted to UTC and the actual
    /// time zone is discarded.
-    pub const fn from_utc(dt: OffsetDateTime) -> Self {
-        let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64;
-        Self { timestamp_micros }
+    pub fn from_utc(dt: OffsetDateTime) -> Self {
+        let timestamp_nanos = dt.unix_timestamp_nanos() as i64;
+        Self { timestamp_nanos }
    }

    /// Create new from `PrimitiveDateTime`
@@ -77,23 +102,27 @@ impl DateTime {

    /// Convert to UNIX timestamp in seconds.
    pub const fn into_timestamp_secs(self) -> i64 {
-        self.timestamp_micros / 1_000_000
+        self.timestamp_nanos / 1_000_000_000
    }

    /// Convert to UNIX timestamp in milliseconds.
    pub const fn into_timestamp_millis(self) -> i64 {
-        self.timestamp_micros / 1_000
+        self.timestamp_nanos / 1_000_000
    }

    /// Convert to UNIX timestamp in microseconds.
    pub const fn into_timestamp_micros(self) -> i64 {
-        self.timestamp_micros
+        self.timestamp_nanos / 1_000
+    }
+
+    /// Convert to UNIX timestamp in nanoseconds.
+    pub const fn into_timestamp_nanos(self) -> i64 {
+        self.timestamp_nanos
    }

    /// Convert to UTC `OffsetDateTime`
    pub fn into_utc(self) -> OffsetDateTime {
-        let timestamp_nanos = self.timestamp_micros as i128 * 1000;
-        let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos)
+        let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(self.timestamp_nanos as i128)
            .expect("valid UNIX timestamp");
        debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
        utc_datetime
@@ -116,20 +145,21 @@ impl DateTime {
    }

    /// Truncates the microseconds value to the corresponding precision.
-    pub fn truncate(self, precision: DatePrecision) -> Self {
+    pub fn truncate(self, precision: DateTimePrecision) -> Self {
        let truncated_timestamp_micros = match precision {
-            DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000,
-            DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000,
-            DatePrecision::Microseconds => self.timestamp_micros,
+            DateTimePrecision::Seconds => (self.timestamp_nanos / 1_000_000_000) * 1_000_000_000,
+            DateTimePrecision::Milliseconds => (self.timestamp_nanos / 1_000_000) * 1_000_000,
+            DateTimePrecision::Microseconds => (self.timestamp_nanos / 1_000) * 1_000,
+            DateTimePrecision::Nanoseconds => self.timestamp_nanos,
        };
        Self {
-            timestamp_micros: truncated_timestamp_micros,
+            timestamp_nanos: truncated_timestamp_micros,
        }
    }
 }

 impl fmt::Debug for DateTime {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let utc_rfc3339 = self.into_utc().format(&Rfc3339).map_err(|_| fmt::Error)?;
        f.write_str(&utc_rfc3339)
    }
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -5,7 +5,7 @@ use std::{fmt, io};
 use async_trait::async_trait;
 use ownedbytes::{OwnedBytes, StableDeref};

-use crate::HasLen;
+use crate::{ByteCount, HasLen};

 /// Objects that represents files sections in tantivy.
 ///
@@ -216,6 +216,11 @@ impl FileSlice {
    pub fn slice_to(&self, to_offset: usize) -> FileSlice {
        self.slice(0..to_offset)
    }
+
+    /// Returns the byte count of the FileSlice.
+    pub fn num_bytes(&self) -> ByteCount {
+        self.range.len().into()
+    }
 }

 #[async_trait]
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -5,6 +5,7 @@ use std::ops::Deref;
 pub use byteorder::LittleEndian as Endianness;

 mod bitset;
+mod byte_count;
 mod datetime;
 pub mod file_slice;
 mod group_by;
@@ -12,13 +13,15 @@ mod serialize;
 mod vint;
 mod writer;
 pub use bitset::*;
-pub use datetime::{DatePrecision, DateTime};
+pub use byte_count::ByteCount;
+#[allow(deprecated)]
+pub use datetime::DatePrecision;
+pub use datetime::{DateTime, DateTimePrecision};
 pub use group_by::GroupByIteratorExtended;
 pub use ownedbytes::{OwnedBytes, StableDeref};
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{
-    deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
-    serialize_vint_u32, write_u32_vint, VInt, VIntU128,
+    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
 };
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};

--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -1,8 +1,6 @@
 use std::io;
 use std::io::{Read, Write};

-use byteorder::{ByteOrder, LittleEndian};
-
 use super::BinarySerializable;

 /// Variable int serializes a u128 number
@@ -19,26 +17,6 @@ pub fn serialize_vint_u128(mut val: u128, output: &mut Vec<u8>) {
    }
 }

-/// Deserializes a u128 number
-///
-/// Returns the number and the slice after the vint
-pub fn deserialize_vint_u128(data: &[u8]) -> io::Result<(u128, &[u8])> {
-    let mut result = 0u128;
-    let mut shift = 0u64;
-    for i in 0..19 {
-        let b = data[i];
-        result |= u128::from(b % 128u8) << shift;
-        if b >= STOP_BIT {
-            return Ok((result, &data[i + 1..]));
-        }
-        shift += 7;
-    }
-    Err(io::Error::new(
-        io::ErrorKind::InvalidData,
-        "Failed to deserialize u128 vint",
-    ))
-}
-
 ///   Wrapper over a `u128` that serializes as a variable int.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct VIntU128(pub u128);
@@ -80,17 +58,13 @@ pub struct VInt(pub u64);

 const STOP_BIT: u8 = 128;

+#[inline]
 pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {
    const START_2: u64 = 1 << 7;
    const START_3: u64 = 1 << 14;
    const START_4: u64 = 1 << 21;
    const START_5: u64 = 1 << 28;

-    const STOP_1: u64 = START_2 - 1;
-    const STOP_2: u64 = START_3 - 1;
-    const STOP_3: u64 = START_4 - 1;
-    const STOP_4: u64 = START_5 - 1;
-
    const MASK_1: u64 = 127;
    const MASK_2: u64 = MASK_1 << 7;
    const MASK_3: u64 = MASK_2 << 7;
@@ -99,25 +73,29 @@ pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {

    let val = u64::from(val);
    const STOP_BIT: u64 = 128u64;
-    let (res, num_bytes) = match val {
-        0..=STOP_1 => (val | STOP_BIT, 1),
-        START_2..=STOP_2 => (
+    let (res, num_bytes) = if val < START_2 {
+        (val | STOP_BIT, 1)
+    } else if val < START_3 {
+        (
            (val & MASK_1) | ((val & MASK_2) << 1) | (STOP_BIT << (8)),
            2,
-        ),
-        START_3..=STOP_3 => (
+        )
+    } else if val < START_4 {
+        (
            (val & MASK_1) | ((val & MASK_2) << 1) | ((val & MASK_3) << 2) | (STOP_BIT << (8 * 2)),
            3,
-        ),
-        START_4..=STOP_4 => (
+        )
+    } else if val < START_5 {
+        (
            (val & MASK_1)
                | ((val & MASK_2) << 1)
                | ((val & MASK_3) << 2)
                | ((val & MASK_4) << 3)
                | (STOP_BIT << (8 * 3)),
            4,
-        ),
-        _ => (
+        )
+    } else {
+        (
            (val & MASK_1)
                | ((val & MASK_2) << 1)
                | ((val & MASK_3) << 2)
@@ -125,9 +103,9 @@ pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {
                | ((val & MASK_5) << 4)
                | (STOP_BIT << (8 * 4)),
            5,
-        ),
+        )
    };
-    LittleEndian::write_u64(&mut buf[..], res);
+    *buf = res.to_le_bytes();
    &buf[0..num_bytes]
 }

@@ -245,7 +223,6 @@ impl BinarySerializable for VInt {
 mod tests {

    use super::{serialize_vint_u32, BinarySerializable, VInt};
-    use crate::vint::{deserialize_vint_u128, serialize_vint_u128, VIntU128};

    fn aux_test_vint(val: u64) {
        let mut v = [14u8; 10];
@@ -284,27 +261,7 @@ mod tests {
        let mut buffer2 = [0u8; 8];
        let len_vint = VInt(val as u64).serialize_into(&mut buffer);
        let res2 = serialize_vint_u32(val, &mut buffer2);
-        assert_eq!(&buffer[..len_vint], res2, "array wrong for {}", val);
-    }
-
-    fn aux_test_vint_u128(val: u128) {
-        let mut data = vec![];
-        serialize_vint_u128(val, &mut data);
-        let (deser_val, _data) = deserialize_vint_u128(&data).unwrap();
-        assert_eq!(val, deser_val);
-
-        let mut out = vec![];
-        VIntU128(val).serialize(&mut out).unwrap();
-        let deser_val = VIntU128::deserialize(&mut &out[..]).unwrap();
-        assert_eq!(val, deser_val.0);
-    }
-
-    #[test]
-    fn test_vint_u128() {
-        aux_test_vint_u128(0);
-        aux_test_vint_u128(1);
-        aux_test_vint_u128(u128::MAX / 3);
-        aux_test_vint_u128(u128::MAX);
+        assert_eq!(&buffer[..len_vint], res2, "array wrong for {val}");
    }

    #[test]
--- a/examples/aggregation.rs
+++ b/examples/aggregation.rs
@@ -7,13 +7,8 @@
 // ---

 use serde_json::{Deserializer, Value};
-use tantivy::aggregation::agg_req::{
-    Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
-    RangeAggregation,
-};
+use tantivy::aggregation::agg_req::Aggregations;
 use tantivy::aggregation::agg_result::AggregationResults;
-use tantivy::aggregation::bucket::RangeAggregationRange;
-use tantivy::aggregation::metric::AverageAggregation;
 use tantivy::aggregation::AggregationCollector;
 use tantivy::query::AllQuery;
 use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing, FAST};
@@ -42,7 +37,7 @@ fn main() -> tantivy::Result<()> {
                .set_index_option(IndexRecordOption::WithFreqs)
                .set_tokenizer("raw"),
        )
-        .set_fast()
+        .set_fast(None)
        .set_stored();
    schema_builder.add_text_field("category", text_fieldtype);
    schema_builder.add_f64_field("stock", FAST);
@@ -192,58 +187,11 @@ fn main() -> tantivy::Result<()> {
    //

    let agg_req: Aggregations = serde_json::from_str(agg_req_str)?;
-    let collector = AggregationCollector::from_aggs(agg_req, None);
+    let collector = AggregationCollector::from_aggs(agg_req, Default::default());

-    let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-    let res2: Value = serde_json::to_value(agg_res)?;
-
-    // ### Request Rust API
-    //
-    // This is exactly the same request as above, but via the rust structures.
-    //
-
-    let agg_req: Aggregations = vec![(
-        "group_by_stock".to_string(),
-        Aggregation::Bucket(BucketAggregation {
-            bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                field: "stock".to_string(),
-                ranges: vec![
-                    RangeAggregationRange {
-                        key: Some("few".into()),
-                        from: None,
-                        to: Some(1f64),
-                    },
-                    RangeAggregationRange {
-                        key: Some("some".into()),
-                        from: Some(1f64),
-                        to: Some(10f64),
-                    },
-                    RangeAggregationRange {
-                        key: Some("many".into()),
-                        from: Some(10f64),
-                        to: None,
-                    },
-                ],
-                ..Default::default()
-            }),
-            sub_aggregation: vec![(
-                "average_price".to_string(),
-                Aggregation::Metric(MetricAggregation::Average(
-                    AverageAggregation::from_field_name("price".to_string()),
-                )),
-            )]
-            .into_iter()
-            .collect(),
-        }),
-    )]
-    .into_iter()
-    .collect();
-
-    let collector = AggregationCollector::from_aggs(agg_req, None);
    // We use the `AllQuery` which will pass all documents to the AggregationCollector.
    let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-
-    let res1: Value = serde_json::to_value(agg_res)?;
+    let res: Value = serde_json::to_value(agg_res)?;

    // ### Aggregation Result
    //
@@ -261,8 +209,7 @@ fn main() -> tantivy::Result<()> {
    }
    "#;
    let expected_json: Value = serde_json::from_str(expected_res)?;
-    assert_eq!(expected_json, res1);
-    assert_eq!(expected_json, res2);
+    assert_eq!(expected_json, res);

    // ### Request 2
    //
@@ -287,7 +234,7 @@ fn main() -> tantivy::Result<()> {

    let agg_req: Aggregations = serde_json::from_str(agg_req_str)?;

-    let collector = AggregationCollector::from_aggs(agg_req, None);
+    let collector = AggregationCollector::from_aggs(agg_req, Default::default());

    let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
    let res: Value = serde_json::to_value(agg_res)?;
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
    let opts = DateOptions::from(INDEXED)
        .set_stored()
        .set_fast()
-        .set_precision(tantivy::DatePrecision::Seconds);
+        .set_precision(tantivy::DateTimePrecision::Seconds);
    // Add `occurred_at` date field type
    let occurred_at = schema_builder.add_date_field("occurred_at", opts);
    let event_type = schema_builder.add_text_field("event", STRING | STORED);
--- a/examples/index_from_multiple_threads.rs
+++ b/examples/index_from_multiple_threads.rs
@@ -96,7 +96,7 @@ fn main() -> tantivy::Result<()> {
            let mut index_writer_wlock = index_writer.write().unwrap();
            index_writer_wlock.commit()?
        };
-        println!("committed with opstamp {}", opstamp);
+        println!("committed with opstamp {opstamp}");
        thread::sleep(Duration::from_millis(500));
    }

--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -84,7 +84,7 @@ fn main() -> tantivy::Result<()> {
                // Doc 0: TermFreq 2: [0, 4]
                // Doc 2: TermFreq 1: [0]
                // ```
-                println!("Doc {}: TermFreq {}: {:?}", doc_id, term_freq, positions);
+                println!("Doc {doc_id}: TermFreq {term_freq}: {positions:?}");
                doc_id = segment_postings.advance();
            }
        }
@@ -125,7 +125,7 @@ fn main() -> tantivy::Result<()> {
                // Once again these docs MAY contains deleted documents as well.
                let docs = block_segment_postings.docs();
                // Prints `Docs [0, 2].`
-                println!("Docs {:?}", docs);
+                println!("Docs {docs:?}");
                block_segment_postings.advance();
            }
        }
--- a/examples/phrase_prefix_search.rs
+++ b/examples/phrase_prefix_search.rs
@@ -0,0 +1,79 @@
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::{doc, Index, ReloadPolicy, Result};
+use tempfile::TempDir;
+
+fn main() -> Result<()> {
+    let index_path = TempDir::new()?;
+
+    let mut schema_builder = Schema::builder();
+    schema_builder.add_text_field("title", TEXT | STORED);
+    schema_builder.add_text_field("body", TEXT);
+    let schema = schema_builder.build();
+
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    let index = Index::create_in_dir(&index_path, schema)?;
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    index_writer.add_document(doc!(
+    title => "The Old Man and the Sea",
+    body => "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone \
+            eighty-four days now without taking a fish.",
+    ))?;
+
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ))?;
+
+    // Multivalued field just need to be repeated.
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+    title => "The Modern Prometheus",
+    body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
+             enterprise which you have regarded with such evil forebodings.  I arrived here \
+             yesterday, and my first task is to assure my dear sister of my welfare and \
+             increasing confidence in the success of my undertaking."
+    ))?;
+
+    index_writer.commit()?;
+
+    let reader = index
+        .reader_builder()
+        .reload_policy(ReloadPolicy::OnCommit)
+        .try_into()?;
+
+    let searcher = reader.searcher();
+
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+    // This will match documents containing the phrase "in the"
+    // followed by some word starting with "su",
+    // i.e. it will match "in the sunlight" and "in the success",
+    // but not "in the Gulf Stream".
+    let query = query_parser.parse_query("\"in the su\"*")?;
+
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let mut titles = top_docs
+        .into_iter()
+        .map(|(_score, doc_address)| {
+            let doc = searcher.doc(doc_address)?;
+            let title = doc.get_first(title).unwrap().as_text().unwrap().to_owned();
+            Ok(title)
+        })
+        .collect::<Result<Vec<_>>>()?;
+    titles.sort_unstable();
+    assert_eq!(titles, ["Frankenstein", "Of Mice and Men"]);
+
+    Ok(())
+}
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -12,12 +12,13 @@
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::TermQuery;
 use tantivy::schema::*;
-use tantivy::tokenizer::{PreTokenizedString, SimpleTokenizer, Token, Tokenizer};
+use tantivy::tokenizer::{PreTokenizedString, SimpleTokenizer, Token, TokenStream, Tokenizer};
 use tantivy::{doc, Index, ReloadPolicy};
 use tempfile::TempDir;

 fn pre_tokenize_text(text: &str) -> Vec<Token> {
-    let mut token_stream = SimpleTokenizer.token_stream(text);
+    let mut tokenizer = SimpleTokenizer::default();
+    let mut token_stream = tokenizer.token_stream(text);
    let mut tokens = vec![];
    while token_stream.advance() {
        tokens.push(token_stream.token().clone());
--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -56,7 +56,7 @@ fn main() -> tantivy::Result<()> {
    for (score, doc_address) in top_docs {
        let doc = searcher.doc(doc_address)?;
        let snippet = snippet_generator.snippet_from_doc(&doc);
-        println!("Document score {}:", score);
+        println!("Document score {score}:");
        println!(
            "title: {}",
            doc.get_first(title).unwrap().as_text().unwrap()
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -50,12 +50,13 @@ fn main() -> tantivy::Result<()> {

    // This tokenizer lowers all of the text (to help with stop word matching)
    // then removes all instances of `the` and `and` from the corpus
-    let tokenizer = TextAnalyzer::from(SimpleTokenizer)
+    let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
        .filter(LowerCaser)
        .filter(StopWordFilter::remove(vec![
            "the".to_string(),
            "and".to_string(),
-        ]));
+        ]))
+        .build();

    index.tokenizers().register("stoppy", tokenizer);

@@ -105,7 +106,7 @@ fn main() -> tantivy::Result<()> {

    for (score, doc_address) in top_docs {
        let retrieved_doc = searcher.doc(doc_address)?;
-        println!("\n==\nDocument score {}:", score);
+        println!("\n==\nDocument score {score}:");
        println!("{}", schema.to_json(&retrieved_doc));
    }

--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -139,6 +139,16 @@ impl OwnedBytes {
        self.advance(8);
        u64::from_le_bytes(octlet)
    }
+
+    /// Reads an `u32` encoded as little-endian from the `OwnedBytes` and advance by 4 bytes.
+    #[inline]
+    pub fn read_u32(&mut self) -> u32 {
+        assert!(self.len() > 3);
+
+        let quad: [u8; 4] = self.as_slice()[..4].try_into().unwrap();
+        self.advance(4);
+        u32::from_le_bytes(quad)
+    }
 }

 impl fmt::Debug for OwnedBytes {
@@ -150,7 +160,7 @@ impl fmt::Debug for OwnedBytes {
        } else {
            self.as_slice()
        };
-        write!(f, "OwnedBytes({:?}, len={})", bytes_truncated, self.len())
+        write!(f, "OwnedBytes({bytes_truncated:?}, len={})", self.len())
    }
 }

@@ -249,12 +259,12 @@ mod tests {
    fn test_owned_bytes_debug() {
        let short_bytes = OwnedBytes::new(b"abcd".as_ref());
        assert_eq!(
-            format!("{:?}", short_bytes),
+            format!("{short_bytes:?}"),
            "OwnedBytes([97, 98, 99, 100], len=4)"
        );
        let long_bytes = OwnedBytes::new(b"abcdefghijklmnopq".as_ref());
        assert_eq!(
-            format!("{:?}", long_bytes),
+            format!("{long_bytes:?}"),
            "OwnedBytes([97, 98, 99, 100, 101, 102, 103, 104, 105, 106], len=17)"
        );
    }
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.19.0"
+version = "0.20.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/query-grammar/src/lib.rs
+++ b/query-grammar/src/lib.rs
@@ -7,7 +7,9 @@ use combine::parser::Parser;

 pub use crate::occur::Occur;
 use crate::query_grammar::parse_to_ast;
-pub use crate::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
+pub use crate::user_input_ast::{
+    Delimiter, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral,
+};

 pub struct Error;

--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -5,13 +5,14 @@ use combine::parser::range::{take_while, take_while1};
 use combine::parser::repeat::escaped;
 use combine::parser::Parser;
 use combine::{
-    attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
+    any, attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
    skip_many1, value,
 };
 use once_cell::sync::Lazy;
 use regex::Regex;

 use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
+use crate::user_input_ast::Delimiter;
 use crate::Occur;

 // Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
@@ -56,7 +57,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
            !c.is_whitespace() && ![':', '^', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
        })),
    )
-        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        .map(|(s1, s2): (char, String)| format!("{s1}{s2}"))
        .and_then(|s: String| match s.as_str() {
            "OR" | "AND " | "NOT" => Err(StringStreamError::UnexpectedParse),
            _ => Ok(s),
@@ -74,7 +75,7 @@ fn relaxed_word<'a>() -> impl Parser<&'a str, Output = String> {
            !c.is_whitespace() && !['{', '}', '"', '[', ']', '(', ')'].contains(&c)
        })),
    )
-        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        .map(|(s1, s2): (char, String)| format!("{s1}{s2}"))
 }

 /// Parses a date time according to rfc3339
@@ -133,17 +134,50 @@ fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
    recognize((date, char('T'), time))
 }

-fn term_val<'a>() -> impl Parser<&'a str, Output = String> {
-    let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
-    negative_number().or(phrase.or(word()))
+fn escaped_character<'a>() -> impl Parser<&'a str, Output = char> {
+    (char('\\'), any()).map(|(_, x)| x)
+}
+
+fn escaped_string<'a>(delimiter: char) -> impl Parser<&'a str, Output = String> {
+    (
+        char(delimiter),
+        many(choice((
+            escaped_character(),
+            satisfy(move |c: char| c != delimiter),
+        ))),
+        char(delimiter),
+    )
+        .map(|(_, s, _)| s)
+}
+
+fn term_val<'a>() -> impl Parser<&'a str, Output = (Delimiter, String)> {
+    let double_quotes = escaped_string('"').map(|phrase| (Delimiter::DoubleQuotes, phrase));
+    let single_quotes = escaped_string('\'').map(|phrase| (Delimiter::SingleQuotes, phrase));
+    let text_no_delimiter = word().map(|text| (Delimiter::None, text));
+    negative_number()
+        .map(|negative_number_str| (Delimiter::None, negative_number_str))
+        .or(double_quotes)
+        .or(single_quotes)
+        .or(text_no_delimiter)
 }

 fn term_query<'a>() -> impl Parser<&'a str, Output = UserInputLiteral> {
-    (field_name(), term_val(), slop_val()).map(|(field_name, phrase, slop)| UserInputLiteral {
-        field_name: Some(field_name),
-        phrase,
-        slop,
-    })
+    (field_name(), term_val(), slop_or_prefix_val()).map(
+        |(field_name, (delimiter, phrase), (slop, prefix))| UserInputLiteral {
+            field_name: Some(field_name),
+            phrase,
+            delimiter,
+            slop,
+            prefix,
+        },
+    )
+}
+
+fn slop_or_prefix_val<'a>() -> impl Parser<&'a str, Output = (u32, bool)> {
+    let prefix_val = char('*').map(|_ast| (0, true));
+    let slop_val = slop_val().map(|slop| (slop, false));
+
+    prefix_val.or(slop_val)
 }

 fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
@@ -159,11 +193,16 @@ fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
 }

 fn literal<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
-    let term_default_field = (term_val(), slop_val()).map(|(phrase, slop)| UserInputLiteral {
-        field_name: None,
-        phrase,
-        slop,
-    });
+    let term_default_field =
+        (term_val(), slop_or_prefix_val()).map(|((delimiter, phrase), (slop, prefix))| {
+            UserInputLiteral {
+                field_name: None,
+                phrase,
+                delimiter,
+                slop,
+                prefix,
+            }
+        });

    attempt(term_query())
        .or(term_default_field)
@@ -178,9 +217,9 @@ fn negative_number<'a>() -> impl Parser<&'a str, Output = String> {
    )
        .map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
            if let Some(('.', s3)) = s3 {
-                format!("{}{}.{}", s1, s2, s3)
+                format!("{s1}{s2}.{s3}")
            } else {
-                format!("{}{}", s1, s2)
+                format!("{s1}{s2}")
            }
        })
 }
@@ -268,7 +307,11 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
 /// Function that parses a set out of a Stream
 /// Supports ranges like: `IN [val1 val2 val3]`
 fn set<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
-    let term_list = between(char('['), char(']'), sep_by(term_val(), spaces()));
+    let term_list = between(
+        char('['),
+        char(']'),
+        sep_by(term_val().map(|(_delimiter, text)| text), spaces()),
+    );

    let set_content = ((string("IN"), spaces()), term_list).map(|(_, elements)| elements);

@@ -401,6 +444,28 @@ pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
    spaces()
        .with(optional(ast()).skip(eof()))
        .map(|opt_ast| opt_ast.unwrap_or_else(UserInputAst::empty_query))
+        .map(rewrite_ast)
+}
+
+/// Removes unnecessary children clauses in AST
+///
+/// Motivated by [issue #1433](https://github.com/quickwit-oss/tantivy/issues/1433)
+fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
+    if let UserInputAst::Clause(terms) = &mut input {
+        for term in terms {
+            rewrite_ast_clause(term);
+        }
+    }
+    input
+}
+
+fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
+    match input {
+        (None, UserInputAst::Clause(ref mut clauses)) if clauses.len() == 1 => {
+            *input = clauses.pop().unwrap(); // safe because clauses.len() == 1
+        }
+        _ => {}
+    }
 }

 #[cfg(test)]
@@ -419,9 +484,7 @@ mod test {
    fn assert_nearly_equals(expected: f64, val: f64) {
        assert!(
            nearly_equals(val, expected),
-            "Got {}, expected {}.",
-            val,
-            expected
+            "Got {val}, expected {expected}."
        );
    }

@@ -466,9 +529,10 @@ mod test {
        assert_eq!(remaining, "");
    }

+    #[track_caller]
    fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
        let query = parse_to_ast().parse(query).unwrap().0;
-        let query_str = format!("{:?}", query);
+        let query_str = format!("{query:?}");
        assert_eq!(query_str, expected);
    }

@@ -484,8 +548,9 @@ mod test {
    #[test]
    fn test_parse_query_to_ast_hyphen() {
        test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
-        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
-        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+        test_parse_query_to_ast_helper("'www-form-encoded'", "'www-form-encoded'");
+        test_parse_query_to_ast_helper("www-form-encoded", "www-form-encoded");
+        test_parse_query_to_ast_helper("www-form-encoded", "www-form-encoded");
    }

    #[test]
@@ -494,25 +559,25 @@ mod test {
            format!("{:?}", parse_to_ast().parse("NOT")),
            "Err(UnexpectedParse)"
        );
-        test_parse_query_to_ast_helper("NOTa", "\"NOTa\"");
-        test_parse_query_to_ast_helper("NOT a", "(-\"a\")");
+        test_parse_query_to_ast_helper("NOTa", "NOTa");
+        test_parse_query_to_ast_helper("NOT a", "(-a)");
    }

    #[test]
    fn test_boosting() {
        assert!(parse_to_ast().parse("a^2^3").is_err());
        assert!(parse_to_ast().parse("a^2^").is_err());
-        test_parse_query_to_ast_helper("a^3", "(\"a\")^3");
-        test_parse_query_to_ast_helper("a^3 b^2", "(*(\"a\")^3 *(\"b\")^2)");
-        test_parse_query_to_ast_helper("a^1", "\"a\"");
+        test_parse_query_to_ast_helper("a^3", "(a)^3");
+        test_parse_query_to_ast_helper("a^3 b^2", "(*(a)^3 *(b)^2)");
+        test_parse_query_to_ast_helper("a^1", "a");
    }

    #[test]
    fn test_parse_query_to_ast_binary_op() {
-        test_parse_query_to_ast_helper("a AND b", "(+\"a\" +\"b\")");
-        test_parse_query_to_ast_helper("a OR b", "(?\"a\" ?\"b\")");
-        test_parse_query_to_ast_helper("a OR b AND c", "(?\"a\" ?(+\"b\" +\"c\"))");
-        test_parse_query_to_ast_helper("a AND b         AND c", "(+\"a\" +\"b\" +\"c\")");
+        test_parse_query_to_ast_helper("a AND b", "(+a +b)");
+        test_parse_query_to_ast_helper("a OR b", "(?a ?b)");
+        test_parse_query_to_ast_helper("a OR b AND c", "(?a ?(+b +c))");
+        test_parse_query_to_ast_helper("a AND b         AND c", "(+a +b +c)");
        assert_eq!(
            format!("{:?}", parse_to_ast().parse("a OR b aaa")),
            "Err(UnexpectedParse)"
@@ -554,7 +619,7 @@ mod test {
    fn test_occur_leaf() {
        let ((occur, ast), _) = super::occur_leaf().parse("+abc").unwrap();
        assert_eq!(occur, Some(Occur::Must));
-        assert_eq!(format!("{:?}", ast), "\"abc\"");
+        assert_eq!(format!("{ast:?}"), "abc");
    }

    #[test]
@@ -613,7 +678,7 @@ mod test {
        let escaped_special_chars_re = Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap();
        for special_char in SPECIAL_CHARS.iter() {
            assert_eq!(
-                escaped_special_chars_re.replace_all(&format!("\\{}", special_char), "$1"),
+                escaped_special_chars_re.replace_all(&format!("\\{special_char}"), "$1"),
                special_char.to_string()
            );
        }
@@ -708,56 +773,62 @@ mod test {

    #[test]
    fn test_parse_query_to_triming_spaces() {
-        test_parse_query_to_ast_helper("   abc", "\"abc\"");
-        test_parse_query_to_ast_helper("abc ", "\"abc\"");
-        test_parse_query_to_ast_helper("(  a OR abc)", "(?\"a\" ?\"abc\")");
-        test_parse_query_to_ast_helper("(a  OR abc)", "(?\"a\" ?\"abc\")");
-        test_parse_query_to_ast_helper("(a OR  abc)", "(?\"a\" ?\"abc\")");
-        test_parse_query_to_ast_helper("a OR abc ", "(?\"a\" ?\"abc\")");
-        test_parse_query_to_ast_helper("(a OR abc )", "(?\"a\" ?\"abc\")");
-        test_parse_query_to_ast_helper("(a OR  abc) ", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("   abc", "abc");
+        test_parse_query_to_ast_helper("abc ", "abc");
+        test_parse_query_to_ast_helper("(  a OR abc)", "(?a ?abc)");
+        test_parse_query_to_ast_helper("(a  OR abc)", "(?a ?abc)");
+        test_parse_query_to_ast_helper("(a OR  abc)", "(?a ?abc)");
+        test_parse_query_to_ast_helper("a OR abc ", "(?a ?abc)");
+        test_parse_query_to_ast_helper("(a OR abc )", "(?a ?abc)");
+        test_parse_query_to_ast_helper("(a OR  abc) ", "(?a ?abc)");
    }

    #[test]
    fn test_parse_query_single_term() {
-        test_parse_query_to_ast_helper("abc", "\"abc\"");
+        test_parse_query_to_ast_helper("abc", "abc");
    }

    #[test]
    fn test_parse_query_default_clause() {
-        test_parse_query_to_ast_helper("a b", "(*\"a\" *\"b\")");
+        test_parse_query_to_ast_helper("a b", "(*a *b)");
    }

    #[test]
    fn test_parse_query_must_default_clause() {
-        test_parse_query_to_ast_helper("+(a b)", "(*\"a\" *\"b\")");
+        test_parse_query_to_ast_helper("+(a b)", "(*a *b)");
    }

    #[test]
    fn test_parse_query_must_single_term() {
-        test_parse_query_to_ast_helper("+d", "\"d\"");
+        test_parse_query_to_ast_helper("+d", "d");
    }

    #[test]
    fn test_single_term_with_field() {
-        test_parse_query_to_ast_helper("abc:toto", "\"abc\":\"toto\"");
+        test_parse_query_to_ast_helper("abc:toto", "\"abc\":toto");
+    }
+
+    #[test]
+    fn test_phrase_with_field() {
+        test_parse_query_to_ast_helper("abc:\"happy tax payer\"", "\"abc\":\"happy tax payer\"");
+        test_parse_query_to_ast_helper("abc:'happy tax payer'", "\"abc\":'happy tax payer'");
    }

    #[test]
    fn test_single_term_with_float() {
-        test_parse_query_to_ast_helper("abc:1.1", "\"abc\":\"1.1\"");
-        test_parse_query_to_ast_helper("a.b.c:1.1", "\"a.b.c\":\"1.1\"");
-        test_parse_query_to_ast_helper("a\\ b\\ c:1.1", "\"a b c\":\"1.1\"");
+        test_parse_query_to_ast_helper("abc:1.1", "\"abc\":1.1");
+        test_parse_query_to_ast_helper("a.b.c:1.1", "\"a.b.c\":1.1");
+        test_parse_query_to_ast_helper("a\\ b\\ c:1.1", "\"a b c\":1.1");
    }

    #[test]
    fn test_must_clause() {
-        test_parse_query_to_ast_helper("(+a +b)", "(+\"a\" +\"b\")");
+        test_parse_query_to_ast_helper("(+a +b)", "(+a +b)");
    }

    #[test]
    fn test_parse_test_query_plus_a_b_plus_d() {
-        test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")");
+        test_parse_query_to_ast_helper("+(a b) +d", "(+(*a *b) +d)");
    }

    #[test]
@@ -770,13 +841,13 @@ mod test {

    #[test]
    fn test_parse_test_query_other() {
-        test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
-        test_parse_query_to_ast_helper("+abc:toto", "\"abc\":\"toto\"");
-        test_parse_query_to_ast_helper("+a\\+b\\+c:toto", "\"a+b+c\":\"toto\"");
-        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+\"abc\":\"toto\" -\"titi\")");
-        test_parse_query_to_ast_helper("-abc:toto", "(-\"abc\":\"toto\")");
+        test_parse_query_to_ast_helper("(+a +b) d", "(*(+a +b) *d)");
+        test_parse_query_to_ast_helper("+abc:toto", "\"abc\":toto");
+        test_parse_query_to_ast_helper("+a\\+b\\+c:toto", "\"a+b+c\":toto");
+        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+\"abc\":toto -titi)");
+        test_parse_query_to_ast_helper("-abc:toto", "(-\"abc\":toto)");
        test_is_parse_err("--abc:toto");
-        test_parse_query_to_ast_helper("abc:a b", "(*\"abc\":\"a\" *\"b\")");
+        test_parse_query_to_ast_helper("abc:a b", "(*\"abc\":a *b)");
        test_parse_query_to_ast_helper("abc:\"a b\"", "\"abc\":\"a b\"");
        test_parse_query_to_ast_helper("foo:[1 TO 5]", "\"foo\":[\"1\" TO \"5\"]");
    }
@@ -801,15 +872,42 @@ mod test {
        assert!(parse_to_ast().parse("foo:\"a b\"~").is_err());
        assert!(parse_to_ast().parse("\"a b\"~a").is_err());
        assert!(parse_to_ast().parse("\"a b\"~100000000000000000").is_err());
-
-        test_parse_query_to_ast_helper("\"a b\"^2~4", "(*(\"a b\")^2 *\"~4\")");
+        test_parse_query_to_ast_helper("\"a b\"^2~4", "(*(\"a b\")^2 *~4)");
        test_parse_query_to_ast_helper("\"~Document\"", "\"~Document\"");
-        test_parse_query_to_ast_helper("~Document", "\"~Document\"");
-        test_parse_query_to_ast_helper("a~2", "\"a~2\"");
+        test_parse_query_to_ast_helper("~Document", "~Document");
+        test_parse_query_to_ast_helper("a~2", "a~2");
        test_parse_query_to_ast_helper("\"a b\"~0", "\"a b\"");
        test_parse_query_to_ast_helper("\"a b\"~1", "\"a b\"~1");
        test_parse_query_to_ast_helper("\"a b\"~3", "\"a b\"~3");
        test_parse_query_to_ast_helper("foo:\"a b\"~300", "\"foo\":\"a b\"~300");
        test_parse_query_to_ast_helper("\"a b\"~300^2", "(\"a b\"~300)^2");
    }
+
+    #[test]
+    fn test_phrase_prefix() {
+        test_parse_query_to_ast_helper("\"a b\"*", "\"a b\"*");
+        test_parse_query_to_ast_helper("\"a\"*", "\"a\"*");
+        test_parse_query_to_ast_helper("\"\"*", "\"\"*");
+        test_parse_query_to_ast_helper("foo:\"a b\"*", "\"foo\":\"a b\"*");
+        test_parse_query_to_ast_helper("foo:\"a\"*", "\"foo\":\"a\"*");
+        test_parse_query_to_ast_helper("foo:\"\"*", "\"foo\":\"\"*");
+    }
+
+    #[test]
+    fn test_not_queries_are_consistent() {
+        test_parse_query_to_ast_helper("tata -toto", "(*tata -toto)");
+        test_parse_query_to_ast_helper("tata NOT toto", "(*tata -toto)");
+    }
+
+    #[test]
+    fn test_escaping() {
+        test_parse_query_to_ast_helper(
+            r#"myfield:"hello\"happy\'tax""#,
+            r#""myfield":"hello"happy'tax""#,
+        );
+        test_parse_query_to_ast_helper(
+            r#"myfield:'hello\"happy\'tax'"#,
+            r#""myfield":'hello"happy'tax'"#,
+        );
+    }
 }
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -19,7 +19,7 @@ pub enum UserInputLeaf {
 }

 impl Debug for UserInputLeaf {
-    fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), fmt::Error> {
+    fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> {
        match self {
            UserInputLeaf::Literal(literal) => literal.fmt(formatter),
            UserInputLeaf::Range {
@@ -28,7 +28,7 @@ impl Debug for UserInputLeaf {
                ref upper,
            } => {
                if let Some(ref field) = field {
-                    write!(formatter, "\"{}\":", field)?;
+                    write!(formatter, "\"{field}\":")?;
                }
                lower.display_lower(formatter)?;
                write!(formatter, " TO ")?;
@@ -37,14 +37,14 @@ impl Debug for UserInputLeaf {
            }
            UserInputLeaf::Set { field, elements } => {
                if let Some(ref field) = field {
-                    write!(formatter, "\"{}\": ", field)?;
+                    write!(formatter, "\"{field}\": ")?;
                }
                write!(formatter, "IN [")?;
-                for (i, element) in elements.iter().enumerate() {
+                for (i, text) in elements.iter().enumerate() {
                    if i != 0 {
                        write!(formatter, " ")?;
                    }
-                    write!(formatter, "\"{}\"", element)?;
+                    write!(formatter, "\"{text}\"")?;
                }
                write!(formatter, "]")
            }
@@ -53,21 +53,42 @@ impl Debug for UserInputLeaf {
    }
 }

+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum Delimiter {
+    SingleQuotes,
+    DoubleQuotes,
+    None,
+}
+
 #[derive(PartialEq)]
 pub struct UserInputLiteral {
    pub field_name: Option<String>,
    pub phrase: String,
+    pub delimiter: Delimiter,
    pub slop: u32,
+    pub prefix: bool,
 }

 impl fmt::Debug for UserInputLiteral {
-    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Some(ref field) = self.field_name {
-            write!(formatter, "\"{}\":", field)?;
+            write!(formatter, "\"{field}\":")?;
+        }
+        match self.delimiter {
+            Delimiter::SingleQuotes => {
+                write!(formatter, "'{}'", self.phrase)?;
+            }
+            Delimiter::DoubleQuotes => {
+                write!(formatter, "\"{}\"", self.phrase)?;
+            }
+            Delimiter::None => {
+                write!(formatter, "{}", self.phrase)?;
+            }
        }
-        write!(formatter, "\"{}\"", self.phrase)?;
        if self.slop > 0 {
            write!(formatter, "~{}", self.slop)?;
+        } else if self.prefix {
+            write!(formatter, "*")?;
        }
        Ok(())
    }
@@ -83,16 +104,16 @@ pub enum UserInputBound {
 impl UserInputBound {
    fn display_lower(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match *self {
-            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
-            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{word}\""),
+            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{word}\""),
            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
        }
    }

    fn display_upper(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match *self {
-            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
-            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{word}\"]"),
+            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{word}\"}}"),
            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
        }
    }
@@ -163,9 +184,9 @@ fn print_occur_ast(
    formatter: &mut fmt::Formatter,
 ) -> fmt::Result {
    if let Some(occur) = occur_opt {
-        write!(formatter, "{}{:?}", occur, ast)?;
+        write!(formatter, "{occur}{ast:?}")?;
    } else {
-        write!(formatter, "*{:?}", ast)?;
+        write!(formatter, "*{ast:?}")?;
    }
    Ok(())
 }
@@ -187,8 +208,8 @@ impl fmt::Debug for UserInputAst {
                }
                Ok(())
            }
-            UserInputAst::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
-            UserInputAst::Boost(ref leaf, boost) => write!(formatter, "({:?})^{}", leaf, boost),
+            UserInputAst::Leaf(ref subquery) => write!(formatter, "{subquery:?}"),
+            UserInputAst::Boost(ref leaf, boost) => write!(formatter, "({leaf:?})^{boost}"),
        }
    }
 }
--- a/src/aggregation/agg_bench.rs
+++ b/src/aggregation/agg_bench.rs
@@ -0,0 +1,550 @@
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use rand::prelude::SliceRandom;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use rand_distr::Distribution;
+    use serde_json::json;
+    use test::{self, Bencher};
+
+    use crate::aggregation::agg_req::Aggregations;
+    use crate::aggregation::AggregationCollector;
+    use crate::query::{AllQuery, TermQuery};
+    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
+    use crate::{Index, Term};
+
+    #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
+    enum Cardinality {
+        /// All documents contain exactly one value.
+        /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
+        #[default]
+        Full = 0,
+        /// All documents contain at most one value.
+        Optional = 1,
+        /// All documents may contain any number of values.
+        Multivalued = 2,
+        /// 1 / 20 documents has a value
+        Sparse = 3,
+    }
+
+    fn get_collector(agg_req: Aggregations) -> AggregationCollector {
+        AggregationCollector::from_aggs(agg_req, Default::default())
+    }
+
+    fn get_test_index_bench(cardinality: Cardinality) -> crate::Result<Index> {
+        let mut schema_builder = Schema::builder();
+        let text_fieldtype = crate::schema::TextOptions::default()
+            .set_indexing_options(
+                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
+            )
+            .set_stored();
+        let text_field = schema_builder.add_text_field("text", text_fieldtype);
+        let json_field = schema_builder.add_json_field("json", FAST);
+        let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+        let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
+        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
+        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
+        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
+        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
+        let index = Index::create_from_tempdir(schema_builder.build())?;
+        let few_terms_data = vec!["INFO", "ERROR", "WARN", "DEBUG"];
+
+        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
+
+        let many_terms_data = (0..150_000)
+            .map(|num| format!("author{}", num))
+            .collect::<Vec<_>>();
+        {
+            let mut rng = StdRng::from_seed([1u8; 32]);
+            let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
+            // To make the different test cases comparable we just change one doc to force the
+            // cardinality
+            if cardinality == Cardinality::Optional {
+                index_writer.add_document(doc!())?;
+            }
+            if cardinality == Cardinality::Multivalued {
+                index_writer.add_document(doc!(
+                    json_field => json!({"mixed_type": 10.0}),
+                    json_field => json!({"mixed_type": 10.0}),
+                    text_field => "cool",
+                    text_field => "cool",
+                    text_field_many_terms => "cool",
+                    text_field_many_terms => "cool",
+                    text_field_few_terms => "cool",
+                    text_field_few_terms => "cool",
+                    score_field => 1u64,
+                    score_field => 1u64,
+                    score_field_f64 => lg_norm.sample(&mut rng),
+                    score_field_f64 => lg_norm.sample(&mut rng),
+                    score_field_i64 => 1i64,
+                    score_field_i64 => 1i64,
+                ))?;
+            }
+            let mut doc_with_value = 1_000_000;
+            if cardinality == Cardinality::Sparse {
+                doc_with_value /= 20;
+            }
+            let val_max = 1_000_000.0;
+            for _ in 0..doc_with_value {
+                let val: f64 = rng.gen_range(0.0..1_000_000.0);
+                let json = if rng.gen_bool(0.1) {
+                    // 10% are numeric values
+                    json!({ "mixed_type": val })
+                } else {
+                    json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
+                };
+                index_writer.add_document(doc!(
+                    text_field => "cool",
+                    json_field => json,
+                    text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
+                    text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                    score_field => val as u64,
+                    score_field_f64 => lg_norm.sample(&mut rng),
+                    score_field_i64 => val as i64,
+                ))?;
+                if cardinality == Cardinality::Sparse {
+                    for _ in 0..20 {
+                        index_writer.add_document(doc!(text_field => "cool"))?;
+                    }
+                }
+            }
+            // writing the segment
+            index_writer.commit()?;
+        }
+
+        Ok(index)
+    }
+
+    use paste::paste;
+    #[macro_export]
+    macro_rules! bench_all_cardinalities {
+        (  $x:ident ) => {
+            paste! {
+                #[bench]
+                fn $x(b: &mut Bencher) {
+                    [<$x _card>](b, Cardinality::Full)
+                }
+
+                #[bench]
+                fn [<$x _opt>](b: &mut Bencher) {
+                    [<$x _card>](b, Cardinality::Optional)
+                }
+
+                #[bench]
+                fn [<$x _multi>](b: &mut Bencher) {
+                    [<$x _card>](b, Cardinality::Multivalued)
+                }
+
+                #[bench]
+                fn [<$x _sparse>](b: &mut Bencher) {
+                    [<$x _card>](b, Cardinality::Sparse)
+                }
+
+            }
+        };
+    }
+
+    bench_all_cardinalities!(bench_aggregation_average_u64);
+
+    fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+        let text_field = reader.searcher().schema().get_field("text").unwrap();
+
+        b.iter(|| {
+            let term_query = TermQuery::new(
+                Term::from_field_text(text_field, "cool"),
+                IndexRecordOption::Basic,
+            );
+
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "average": { "avg": { "field": "score", } }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&term_query, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_stats_f64);
+
+    fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+        let text_field = reader.searcher().schema().get_field("text").unwrap();
+
+        b.iter(|| {
+            let term_query = TermQuery::new(
+                Term::from_field_text(text_field, "cool"),
+                IndexRecordOption::Basic,
+            );
+
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "average_f64": { "stats": { "field": "score_f64", } }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&term_query, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_average_f64);
+
+    fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+        let text_field = reader.searcher().schema().get_field("text").unwrap();
+
+        b.iter(|| {
+            let term_query = TermQuery::new(
+                Term::from_field_text(text_field, "cool"),
+                IndexRecordOption::Basic,
+            );
+
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "average_f64": { "avg": { "field": "score_f64", } }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&term_query, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_percentiles_f64);
+
+    fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_str = r#"
+            {
+              "mypercentiles": {
+                "percentiles": {
+                  "field": "score_f64",
+                  "percents": [ 95, 99, 99.9 ]
+                }
+              }
+            } "#;
+            let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_average_u64_and_f64);
+
+    fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+        let text_field = reader.searcher().schema().get_field("text").unwrap();
+
+        b.iter(|| {
+            let term_query = TermQuery::new(
+                Term::from_field_text(text_field, "cool"),
+                IndexRecordOption::Basic,
+            );
+
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "average_f64": { "avg": { "field": "score_f64" } },
+                "average": { "avg": { "field": "score" } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&term_query, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_terms_few);
+
+    fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req: Aggregations = serde_json::from_value(json!({
+                "my_texts": { "terms": { "field": "text_few_terms" } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg);
+
+    fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req: Aggregations = serde_json::from_value(json!({
+                "my_texts": {
+                    "terms": { "field": "text_many_terms" },
+                    "aggs": {
+                        "average_f64": { "avg": { "field": "score_f64" } }
+                    }
+                },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg);
+
+    fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card(
+        b: &mut Bencher,
+        cardinality: Cardinality,
+    ) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req: Aggregations = serde_json::from_value(json!({
+                "my_texts": {
+                    "terms": { "field": "json.mixed_type" },
+                    "aggs": {
+                        "average_f64": { "avg": { "field": "score_f64" } }
+                    }
+                },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_terms_many2);
+
+    fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req: Aggregations = serde_json::from_value(json!({
+                "my_texts": { "terms": { "field": "text_many_terms" } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term);
+
+    fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req: Aggregations = serde_json::from_value(json!({
+                "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_range_only);
+
+    fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "range_f64": { "range": { "field": "score_f64", "ranges": [
+                    { "from": 3, "to": 7000 },
+                    { "from": 7000, "to": 20000 },
+                    { "from": 20000, "to": 30000 },
+                    { "from": 30000, "to": 40000 },
+                    { "from": 40000, "to": 50000 },
+                    { "from": 50000, "to": 60000 }
+                ] } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_range_with_avg);
+
+    fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "rangef64": {
+                    "range": {
+                        "field": "score_f64",
+                        "ranges": [
+                            { "from": 3, "to": 7000 },
+                            { "from": 7000, "to": 20000 },
+                            { "from": 20000, "to": 30000 },
+                            { "from": 30000, "to": 40000 },
+                            { "from": 40000, "to": 50000 },
+                            { "from": 50000, "to": 60000 }
+                        ]
+                    },
+                    "aggs": {
+                        "average_f64": { "avg": { "field": "score_f64" } }
+                    }
+                },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    // hard bounds has a different algorithm, because it actually limits collection range
+    //
+    bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds);
+
+    fn bench_aggregation_histogram_only_hard_bounds_card(
+        b: &mut Bencher,
+        cardinality: Cardinality,
+    ) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_histogram_with_avg);
+
+    fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "rangef64": {
+                    "histogram": { "field": "score_f64", "interval": 100 },
+                    "aggs": {
+                        "average_f64": { "avg": { "field": "score_f64" } }
+                    }
+                }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_histogram_only);
+
+    fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+
+        b.iter(|| {
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "rangef64": {
+                    "histogram": {
+                        "field": "score_f64",
+                        "interval": 100 // 1000 buckets
+                    },
+                }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&AllQuery, &collector).unwrap()
+        });
+    }
+
+    bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg);
+
+    fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) {
+        let index = get_test_index_bench(cardinality).unwrap();
+        let reader = index.reader().unwrap();
+        let text_field = reader.searcher().schema().get_field("text").unwrap();
+
+        b.iter(|| {
+            let term_query = TermQuery::new(
+                Term::from_field_text(text_field, "cool"),
+                IndexRecordOption::Basic,
+            );
+
+            let agg_req_1: Aggregations = serde_json::from_value(json!({
+                "rangef64": {
+                    "range": {
+                        "field": "score_f64",
+                        "ranges": [
+                            { "from": 3, "to": 7000 },
+                            { "from": 7000, "to": 20000 },
+                            { "from": 20000, "to": 60000 }
+                        ]
+                    },
+                    "aggs": {
+                        "average_in_range": { "avg": { "field": "score" } }
+                    }
+                },
+                "average": { "avg": { "field": "score" } }
+            }))
+            .unwrap();
+
+            let collector = get_collector(agg_req_1);
+
+            let searcher = reader.searcher();
+            searcher.search(&term_query, &collector).unwrap()
+        });
+    }
+}
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -0,0 +1,134 @@
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+
+use common::ByteCount;
+
+use super::collector::DEFAULT_MEMORY_LIMIT;
+use super::{AggregationError, DEFAULT_BUCKET_LIMIT};
+
+/// An estimate for memory consumption. Non recursive
+pub trait MemoryConsumption {
+    fn memory_consumption(&self) -> usize;
+}
+
+impl<K, V, S> MemoryConsumption for HashMap<K, V, S> {
+    fn memory_consumption(&self) -> usize {
+        let capacity = self.capacity();
+        (std::mem::size_of::<K>() + std::mem::size_of::<V>() + 1) * capacity
+    }
+}
+
+/// Aggregation memory limit after which the request fails. Defaults to DEFAULT_MEMORY_LIMIT
+/// (500MB). The limit is shared by all SegmentCollectors
+pub struct AggregationLimits {
+    /// The counter which is shared between the aggregations for one request.
+    memory_consumption: Arc<AtomicU64>,
+    /// The memory_limit in bytes
+    memory_limit: ByteCount,
+    /// The maximum number of buckets _returned_
+    /// This is not counting intermediate buckets.
+    bucket_limit: u32,
+}
+impl Clone for AggregationLimits {
+    fn clone(&self) -> Self {
+        Self {
+            memory_consumption: Arc::clone(&self.memory_consumption),
+            memory_limit: self.memory_limit,
+            bucket_limit: self.bucket_limit,
+        }
+    }
+}
+
+impl Default for AggregationLimits {
+    fn default() -> Self {
+        Self {
+            memory_consumption: Default::default(),
+            memory_limit: DEFAULT_MEMORY_LIMIT.into(),
+            bucket_limit: DEFAULT_BUCKET_LIMIT,
+        }
+    }
+}
+
+impl AggregationLimits {
+    /// *memory_limit*
+    /// memory_limit is defined in bytes.
+    /// Aggregation fails when the estimated memory consumption of the aggregation is higher than
+    /// memory_limit.     
+    /// memory_limit will default to `DEFAULT_MEMORY_LIMIT` (500MB)
+    ///
+    /// *bucket_limit*
+    /// Limits the maximum number of buckets returned from an aggregation request.
+    /// bucket_limit will default to `DEFAULT_BUCKET_LIMIT` (65000)
+    pub fn new(memory_limit: Option<u64>, bucket_limit: Option<u32>) -> Self {
+        Self {
+            memory_consumption: Default::default(),
+            memory_limit: memory_limit.unwrap_or(DEFAULT_MEMORY_LIMIT).into(),
+            bucket_limit: bucket_limit.unwrap_or(DEFAULT_BUCKET_LIMIT),
+        }
+    }
+
+    /// Create a new ResourceLimitGuard, that will release the memory when dropped.
+    pub fn new_guard(&self) -> ResourceLimitGuard {
+        ResourceLimitGuard {
+            /// The counter which is shared between the aggregations for one request.
+            memory_consumption: Arc::clone(&self.memory_consumption),
+            /// The memory_limit in bytes
+            memory_limit: self.memory_limit,
+            allocated_with_the_guard: 0,
+        }
+    }
+
+    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
+        self.memory_consumption
+            .fetch_add(num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+        Ok(())
+    }
+
+    pub(crate) fn get_bucket_limit(&self) -> u32 {
+        self.bucket_limit
+    }
+}
+
+fn validate_memory_consumption(
+    memory_consumption: &AtomicU64,
+    memory_limit: ByteCount,
+) -> Result<(), AggregationError> {
+    // Load the estimated memory consumed by the aggregations
+    let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
+    if memory_consumed > memory_limit {
+        return Err(AggregationError::MemoryExceeded {
+            limit: memory_limit,
+            current: memory_consumed,
+        });
+    }
+    Ok(())
+}
+
+pub struct ResourceLimitGuard {
+    /// The counter which is shared between the aggregations for one request.
+    memory_consumption: Arc<AtomicU64>,
+    /// The memory_limit in bytes
+    memory_limit: ByteCount,
+    /// Allocated memory with this guard.
+    allocated_with_the_guard: u64,
+}
+
+impl ResourceLimitGuard {
+    pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
+        self.memory_consumption
+            .fetch_add(num_bytes, Ordering::Relaxed);
+        validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
+        Ok(())
+    }
+}
+
+impl Drop for ResourceLimitGuard {
+    /// Removes the memory consumed tracked by this _instance_ of AggregationLimits.
+    /// This is used to clear the segment specific memory consumption all at once.
+    fn drop(&mut self) {
+        self.memory_consumption
+            .fetch_sub(self.allocated_with_the_guard, Ordering::Relaxed);
+    }
+}
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -9,25 +9,7 @@
 //! # Example
 //!
 //! ```
-//! use tantivy::aggregation::bucket::RangeAggregation;
-//! use tantivy::aggregation::agg_req::BucketAggregationType;
-//! use tantivy::aggregation::agg_req::{Aggregation, Aggregations};
-//! use tantivy::aggregation::agg_req::BucketAggregation;
-//! let agg_req1: Aggregations = vec![
-//!     (
-//!         "range".to_string(),
-//!         Aggregation::Bucket(BucketAggregation {
-//!             bucket_agg: BucketAggregationType::Range(RangeAggregation{
-//!                 field: "score".to_string(),
-//!                 ranges: vec![(3f64..7f64).into(), (7f64..20f64).into()],
-//!                 keyed: false,
-//!             }),
-//!             sub_aggregation: Default::default(),
-//!         }),
-//!     ),
-//! ]
-//! .into_iter()
-//! .collect();
+//! use tantivy::aggregation::agg_req::Aggregations;
 //!
 //! let elasticsearch_compatible_json_req = r#"
 //! {
@@ -41,89 +23,51 @@
 //!     }
 //!   }
 //! }"#;
-//! let agg_req2: Aggregations = serde_json::from_str(elasticsearch_compatible_json_req).unwrap();
-//! assert_eq!(agg_req1, agg_req2);
+//! let _agg_req: Aggregations = serde_json::from_str(elasticsearch_compatible_json_req).unwrap();
 //! ```

 use std::collections::{HashMap, HashSet};

 use serde::{Deserialize, Serialize};

-pub use super::bucket::RangeAggregation;
-use super::bucket::{DateHistogramAggregationReq, HistogramAggregation, TermsAggregation};
-use super::metric::{
-    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation, StatsAggregation,
-    SumAggregation,
+use super::bucket::{
+    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
+};
+use super::metric::{
+    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation,
+    PercentilesAggregationReq, StatsAggregation, SumAggregation,
 };
-use super::VecWithNames;

 /// The top-level aggregation request structure, which contains [`Aggregation`] and their user
-/// defined names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
+/// defined names. It is also used in buckets aggregations to define sub-aggregations.
 ///
 /// The key is the user defined name of the aggregation.
 pub type Aggregations = HashMap<String, Aggregation>;

-/// Like Aggregations, but optimized to work with the aggregation result
-#[derive(Clone, Debug)]
-pub(crate) struct AggregationsInternal {
-    pub(crate) metrics: VecWithNames<MetricAggregation>,
-    pub(crate) buckets: VecWithNames<BucketAggregationInternal>,
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+/// Aggregation request.
+///
+/// An aggregation is either a bucket or a metric.
+pub struct Aggregation {
+    /// The aggregation variant, which can be either a bucket or a metric.
+    #[serde(flatten)]
+    pub agg: AggregationVariants,
+    /// The sub_aggregations, only valid for bucket type aggregations. Each bucket will aggregate
+    /// on the document set in the bucket.
+    #[serde(rename = "aggs")]
+    #[serde(default)]
+    #[serde(skip_serializing_if = "Aggregations::is_empty")]
+    pub sub_aggregation: Aggregations,
 }

-impl From<Aggregations> for AggregationsInternal {
-    fn from(aggs: Aggregations) -> Self {
-        let mut metrics = vec![];
-        let mut buckets = vec![];
-        for (key, agg) in aggs {
-            match agg {
-                Aggregation::Bucket(bucket) => buckets.push((
-                    key,
-                    BucketAggregationInternal {
-                        bucket_agg: bucket.bucket_agg,
-                        sub_aggregation: bucket.sub_aggregation.into(),
-                    },
-                )),
-                Aggregation::Metric(metric) => metrics.push((key, metric)),
-            }
-        }
-        Self {
-            metrics: VecWithNames::from_entries(metrics),
-            buckets: VecWithNames::from_entries(buckets),
-        }
+impl Aggregation {
+    pub(crate) fn sub_aggregation(&self) -> &Aggregations {
+        &self.sub_aggregation
    }
-}

-#[derive(Clone, Debug)]
-// Like BucketAggregation, but optimized to work with the result
-pub(crate) struct BucketAggregationInternal {
-    /// Bucket aggregation strategy to group documents.
-    pub bucket_agg: BucketAggregationType,
-    /// The sub_aggregations in the buckets. Each bucket will aggregate on the document set in the
-    /// bucket.
-    pub sub_aggregation: AggregationsInternal,
-}
-
-impl BucketAggregationInternal {
-    pub(crate) fn as_range(&self) -> Option<&RangeAggregation> {
-        match &self.bucket_agg {
-            BucketAggregationType::Range(range) => Some(range),
-            _ => None,
-        }
-    }
-    pub(crate) fn as_histogram(&self) -> crate::Result<Option<HistogramAggregation>> {
-        match &self.bucket_agg {
-            BucketAggregationType::Histogram(histogram) => Ok(Some(histogram.clone())),
-            BucketAggregationType::DateHistogram(histogram) => {
-                Ok(Some(histogram.to_histogram_req()?))
-            }
-            _ => Ok(None),
-        }
-    }
-    pub(crate) fn as_term(&self) -> Option<&TermsAggregation> {
-        match &self.bucket_agg {
-            BucketAggregationType::Terms(terms) => Some(terms),
-            _ => None,
-        }
+    fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
+        fast_field_names.insert(self.agg.get_fast_field_name().to_string());
+        fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
    }
 }

@@ -136,97 +80,24 @@ pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
    fast_field_names
 }

-/// Aggregation request of [`BucketAggregation`] or [`MetricAggregation`].
-///
-/// An aggregation is either a bucket or a metric.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-#[serde(untagged)]
-pub enum Aggregation {
-    /// Bucket aggregation, see [`BucketAggregation`] for details.
-    Bucket(BucketAggregation),
-    /// Metric aggregation, see [`MetricAggregation`] for details.
-    Metric(MetricAggregation),
-}
-
-impl Aggregation {
-    fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
-        match self {
-            Aggregation::Bucket(bucket) => bucket.get_fast_field_names(fast_field_names),
-            Aggregation::Metric(metric) => {
-                fast_field_names.insert(metric.get_fast_field_name().to_string());
-            }
-        }
-    }
-}
-
-/// BucketAggregations create buckets of documents. Each bucket is associated with a rule which
-/// determines whether or not a document in the falls into it. In other words, the buckets
-/// effectively define document sets. Buckets are not necessarily disjunct, therefore a document can
-/// fall into multiple buckets. In addition to the buckets themselves, the bucket aggregations also
-/// compute and return the number of documents for each bucket. Bucket aggregations, as opposed to
-/// metric aggregations, can hold sub-aggregations. These sub-aggregations will be aggregated for
-/// the buckets created by their "parent" bucket aggregation. There are different bucket
-/// aggregators, each with a different "bucketing" strategy. Some define a single bucket, some
-/// define fixed number of multiple buckets, and others dynamically create the buckets during the
-/// aggregation process.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct BucketAggregation {
-    /// Bucket aggregation strategy to group documents.
-    #[serde(flatten)]
-    pub bucket_agg: BucketAggregationType,
-    /// The sub_aggregations in the buckets. Each bucket will aggregate on the document set in the
-    /// bucket.
-    #[serde(rename = "aggs")]
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Aggregations::is_empty")]
-    pub sub_aggregation: Aggregations,
-}
-
-impl BucketAggregation {
-    fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
-        let fast_field_name = self.bucket_agg.get_fast_field_name();
-        fast_field_names.insert(fast_field_name.to_string());
-        fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
-    }
-}
-
-/// The bucket aggregation types.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub enum BucketAggregationType {
+/// All aggregation types.
+pub enum AggregationVariants {
+    // Bucket aggregation types
    /// Put data into buckets of user-defined ranges.
    #[serde(rename = "range")]
    Range(RangeAggregation),
-    /// Put data into buckets of user-defined ranges.
+    /// Put data into a histogram.
    #[serde(rename = "histogram")]
    Histogram(HistogramAggregation),
-    /// Put data into buckets of user-defined ranges.
+    /// Put data into a date histogram.
    #[serde(rename = "date_histogram")]
    DateHistogram(DateHistogramAggregationReq),
    /// Put data into buckets of terms.
    #[serde(rename = "terms")]
    Terms(TermsAggregation),
-}

-impl BucketAggregationType {
-    fn get_fast_field_name(&self) -> &str {
-        match self {
-            BucketAggregationType::Terms(terms) => terms.field.as_str(),
-            BucketAggregationType::Range(range) => range.field.as_str(),
-            BucketAggregationType::Histogram(histogram) => histogram.field.as_str(),
-            BucketAggregationType::DateHistogram(histogram) => histogram.field.as_str(),
-        }
-    }
-}
-
-/// The aggregations in this family compute metrics based on values extracted
-/// from the documents that are being aggregated. Values are extracted from the fast field of
-/// the document.
-
-/// Some aggregations output a single numeric metric (e.g. Average) and are called
-/// single-value numeric metrics aggregation, others generate multiple metrics (e.g. Stats) and are
-/// called multi-value numeric metrics aggregation.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub enum MetricAggregation {
+    // Metric aggregation types
    /// Computes the average of the extracted values.
    #[serde(rename = "avg")]
    Average(AverageAggregation),
@@ -246,25 +117,107 @@ pub enum MetricAggregation {
    /// Computes the sum of the extracted values.
    #[serde(rename = "sum")]
    Sum(SumAggregation),
+    /// Computes the sum of the extracted values.
+    #[serde(rename = "percentiles")]
+    Percentiles(PercentilesAggregationReq),
 }

-impl MetricAggregation {
+impl AggregationVariants {
    fn get_fast_field_name(&self) -> &str {
        match self {
-            MetricAggregation::Average(avg) => avg.field_name(),
-            MetricAggregation::Count(count) => count.field_name(),
-            MetricAggregation::Max(max) => max.field_name(),
-            MetricAggregation::Min(min) => min.field_name(),
-            MetricAggregation::Stats(stats) => stats.field_name(),
-            MetricAggregation::Sum(sum) => sum.field_name(),
+            AggregationVariants::Terms(terms) => terms.field.as_str(),
+            AggregationVariants::Range(range) => range.field.as_str(),
+            AggregationVariants::Histogram(histogram) => histogram.field.as_str(),
+            AggregationVariants::DateHistogram(histogram) => histogram.field.as_str(),
+            AggregationVariants::Average(avg) => avg.field_name(),
+            AggregationVariants::Count(count) => count.field_name(),
+            AggregationVariants::Max(max) => max.field_name(),
+            AggregationVariants::Min(min) => min.field_name(),
+            AggregationVariants::Stats(stats) => stats.field_name(),
+            AggregationVariants::Sum(sum) => sum.field_name(),
+            AggregationVariants::Percentiles(per) => per.field_name(),
+        }
+    }
+
+    pub(crate) fn as_range(&self) -> Option<&RangeAggregation> {
+        match &self {
+            AggregationVariants::Range(range) => Some(range),
+            _ => None,
+        }
+    }
+    pub(crate) fn as_histogram(&self) -> crate::Result<Option<HistogramAggregation>> {
+        match &self {
+            AggregationVariants::Histogram(histogram) => Ok(Some(histogram.clone())),
+            AggregationVariants::DateHistogram(histogram) => {
+                Ok(Some(histogram.to_histogram_req()?))
+            }
+            _ => Ok(None),
+        }
+    }
+    pub(crate) fn as_term(&self) -> Option<&TermsAggregation> {
+        match &self {
+            AggregationVariants::Terms(terms) => Some(terms),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn as_percentile(&self) -> Option<&PercentilesAggregationReq> {
+        match &self {
+            AggregationVariants::Percentiles(percentile_req) => Some(percentile_req),
+            _ => None,
        }
    }
 }

 #[cfg(test)]
 mod tests {
+
    use super::*;

+    #[test]
+    fn deser_json_test() {
+        let agg_req_json = r#"{
+            "price_avg": { "avg": { "field": "price" } },
+            "price_count": { "value_count": { "field": "price" } },
+            "price_max": { "max": { "field": "price" } },
+            "price_min": { "min": { "field": "price" } },
+            "price_stats": { "stats": { "field": "price" } },
+            "price_sum": { "sum": { "field": "price" } }
+        }"#;
+        let _agg_req: Aggregations = serde_json::from_str(agg_req_json).unwrap();
+    }
+
+    #[test]
+    fn deser_json_test_bucket() {
+        let agg_req_json = r#"
+    {
+        "termagg": {
+            "terms": {
+                "field": "json.mixed_type",
+                "order": { "min_price": "desc" }
+            },
+            "aggs": {
+                "min_price": { "min": { "field": "json.mixed_type" } }
+            }
+        },
+        "rangeagg": {
+            "range": {
+                "field": "json.mixed_type",
+                "ranges": [
+                    { "to": 3.0 },
+                    { "from": 19.0, "to": 20.0 },
+                    { "from": 20.0 }
+                ]
+            },
+            "aggs": {
+                "average_in_range": { "avg": { "field": "json.mixed_type" } }
+            }
+        }
+    } "#;
+
+        let _agg_req: Aggregations = serde_json::from_str(agg_req_json).unwrap();
+    }
+
    #[test]
    fn test_metric_aggregations_deser() {
        let agg_req_json = r#"{
@@ -278,46 +231,27 @@ mod tests {
        let agg_req: Aggregations = serde_json::from_str(agg_req_json).unwrap();

        assert!(
-            matches!(agg_req.get("price_avg").unwrap(), Aggregation::Metric(MetricAggregation::Average(avg)) if avg.field == "price")
+            matches!(&agg_req.get("price_avg").unwrap().agg, AggregationVariants::Average(avg) if avg.field == "price")
        );
        assert!(
-            matches!(agg_req.get("price_count").unwrap(), Aggregation::Metric(MetricAggregation::Count(count)) if count.field == "price")
+            matches!(&agg_req.get("price_count").unwrap().agg, AggregationVariants::Count(count) if count.field == "price")
        );
        assert!(
-            matches!(agg_req.get("price_max").unwrap(), Aggregation::Metric(MetricAggregation::Max(max)) if max.field == "price")
+            matches!(&agg_req.get("price_max").unwrap().agg, AggregationVariants::Max(max) if max.field == "price")
        );
        assert!(
-            matches!(agg_req.get("price_min").unwrap(), Aggregation::Metric(MetricAggregation::Min(min)) if min.field == "price")
+            matches!(&agg_req.get("price_min").unwrap().agg, AggregationVariants::Min(min) if min.field == "price")
        );
        assert!(
-            matches!(agg_req.get("price_stats").unwrap(), Aggregation::Metric(MetricAggregation::Stats(stats)) if stats.field == "price")
+            matches!(&agg_req.get("price_stats").unwrap().agg, AggregationVariants::Stats(stats) if stats.field == "price")
        );
        assert!(
-            matches!(agg_req.get("price_sum").unwrap(), Aggregation::Metric(MetricAggregation::Sum(sum)) if sum.field == "price")
+            matches!(&agg_req.get("price_sum").unwrap().agg, AggregationVariants::Sum(sum) if sum.field == "price")
        );
    }

    #[test]
    fn serialize_to_json_test() {
-        let agg_req1: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "score".to_string(),
-                    ranges: vec![
-                        (f64::MIN..3f64).into(),
-                        (3f64..7f64).into(),
-                        (7f64..20f64).into(),
-                        (20f64..f64::MAX).into(),
-                    ],
-                    keyed: true,
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
-
        let elasticsearch_compatible_json_req = r#"{
  "range": {
    "range": {
@@ -342,57 +276,56 @@ mod tests {
    }
  }
 }"#;
+
+        let agg_req1: Aggregations =
+            { serde_json::from_str(elasticsearch_compatible_json_req).unwrap() };
+
        let agg_req2: String = serde_json::to_string_pretty(&agg_req1).unwrap();
        assert_eq!(agg_req2, elasticsearch_compatible_json_req);
    }

    #[test]
    fn test_get_fast_field_names() {
-        let agg_req2: Aggregations = vec![
-            (
-                "range".to_string(),
-                Aggregation::Bucket(BucketAggregation {
-                    bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                        field: "score2".to_string(),
-                        ranges: vec![
-                            (f64::MIN..3f64).into(),
-                            (3f64..7f64).into(),
-                            (7f64..20f64).into(),
-                            (20f64..f64::MAX).into(),
-                        ],
-                        ..Default::default()
-                    }),
-                    sub_aggregation: Default::default(),
-                }),
-            ),
-            (
-                "metric".to_string(),
-                Aggregation::Metric(MetricAggregation::Average(
-                    AverageAggregation::from_field_name("field123".to_string()),
-                )),
-            ),
-        ]
-        .into_iter()
-        .collect();
-
-        let agg_req1: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "score".to_string(),
-                    ranges: vec![
-                        (f64::MIN..3f64).into(),
-                        (3f64..7f64).into(),
-                        (7f64..20f64).into(),
-                        (20f64..f64::MAX).into(),
+        let range_agg: Aggregation = {
+            serde_json::from_value(json!({
+                "range": {
+                    "field": "score",
+                    "ranges": [
+                        { "to": 3.0 },
+                        { "from": 3.0, "to": 7.0 },
+                        { "from": 7.0, "to": 20.0 },
+                        { "from": 20.0 }
                    ],
-                    ..Default::default()
-                }),
-                sub_aggregation: agg_req2,
-            }),
-        )]
-        .into_iter()
-        .collect();
+                }
+
+            }))
+            .unwrap()
+        };
+
+        let agg_req1: Aggregations = {
+            serde_json::from_value(json!({
+                "range1": range_agg,
+                "range2":{
+                    "range": {
+                        "field": "score2",
+                        "ranges": [
+                            { "to": 3.0 },
+                            { "from": 3.0, "to": 7.0 },
+                            { "from": 7.0, "to": 20.0 },
+                            { "from": 20.0 }
+                        ],
+                    },
+                    "aggs": {
+                        "metric": {
+                            "avg": {
+                                "field": "field123"
+                            }
+                        }
+                    }
+                }
+            }))
+            .unwrap()
+        };

        assert_eq!(
            get_fast_field_names(&agg_req1),
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -1,11 +1,9 @@
 //! This will enhance the request tree with access to the fastfield and metadata.

-use std::rc::Rc;
-use std::sync::atomic::AtomicU32;
+use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};

-use columnar::{Column, ColumnType, StrColumn};
-
-use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
+use super::agg_limits::ResourceLimitGuard;
+use super::agg_req::{Aggregation, AggregationVariants, Aggregations};
 use super::bucket::{
    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
 };
@@ -13,162 +11,185 @@ use super::metric::{
    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation, StatsAggregation,
    SumAggregation,
 };
-use super::segment_agg_result::BucketCount;
+use super::segment_agg_result::AggregationLimits;
 use super::VecWithNames;
-use crate::{SegmentReader, TantivyError};
+use crate::SegmentReader;

-#[derive(Clone, Default)]
+#[derive(Default)]
 pub(crate) struct AggregationsWithAccessor {
-    pub metrics: VecWithNames<MetricAggregationWithAccessor>,
-    pub buckets: VecWithNames<BucketAggregationWithAccessor>,
+    pub aggs: VecWithNames<AggregationWithAccessor>,
 }

 impl AggregationsWithAccessor {
-    fn from_data(
-        metrics: VecWithNames<MetricAggregationWithAccessor>,
-        buckets: VecWithNames<BucketAggregationWithAccessor>,
-    ) -> Self {
-        Self { metrics, buckets }
+    fn from_data(aggs: VecWithNames<AggregationWithAccessor>) -> Self {
+        Self { aggs }
    }

    pub fn is_empty(&self) -> bool {
-        self.metrics.is_empty() && self.buckets.is_empty()
+        self.aggs.is_empty()
    }
 }

-#[derive(Clone)]
-pub struct BucketAggregationWithAccessor {
+pub struct AggregationWithAccessor {
    /// In general there can be buckets without fast field access, e.g. buckets that are created
-    /// based on search terms. So eventually this needs to be Option or moved.
+    /// based on search terms. That is not that case currently, but eventually this needs to be
+    /// Option or moved.
    pub(crate) accessor: Column<u64>,
    pub(crate) str_dict_column: Option<StrColumn>,
    pub(crate) field_type: ColumnType,
-    pub(crate) bucket_agg: BucketAggregationType,
+    /// In case there are multiple types of fast fields, e.g. string and numeric.
+    /// Only used for term aggregations currently.
+    pub(crate) accessor2: Option<(Column<u64>, ColumnType)>,
    pub(crate) sub_aggregation: AggregationsWithAccessor,
-    pub(crate) bucket_count: BucketCount,
+    pub(crate) limits: ResourceLimitGuard,
+    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
+    pub(crate) agg: Aggregation,
 }

-impl BucketAggregationWithAccessor {
-    fn try_from_bucket(
-        bucket: &BucketAggregationType,
+impl AggregationWithAccessor {
+    fn try_from_agg(
+        agg: &Aggregation,
        sub_aggregation: &Aggregations,
        reader: &SegmentReader,
-        bucket_count: Rc<AtomicU32>,
-        max_bucket_count: u32,
-    ) -> crate::Result<BucketAggregationWithAccessor> {
+        limits: AggregationLimits,
+    ) -> crate::Result<AggregationWithAccessor> {
        let mut str_dict_column = None;
-        let (accessor, field_type) = match &bucket {
-            BucketAggregationType::Range(RangeAggregation {
+        let mut accessor2 = None;
+        use AggregationVariants::*;
+        let (accessor, field_type) = match &agg.agg {
+            Range(RangeAggregation {
                field: field_name, ..
-            }) => get_ff_reader_and_validate(reader, field_name)?,
-            BucketAggregationType::Histogram(HistogramAggregation {
+            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
+            Histogram(HistogramAggregation {
                field: field_name, ..
-            }) => get_ff_reader_and_validate(reader, field_name)?,
-            BucketAggregationType::DateHistogram(DateHistogramAggregationReq {
-                field: field_name,
-                ..
-            }) => get_ff_reader_and_validate(reader, field_name)?,
-            BucketAggregationType::Terms(TermsAggregation {
+            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
+            DateHistogram(DateHistogramAggregationReq {
+                field: field_name, ..
+            }) => get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?,
+            Terms(TermsAggregation {
                field: field_name, ..
            }) => {
                str_dict_column = reader.fast_fields().str(field_name)?;
-                get_ff_reader_and_validate(reader, field_name)?
+                let allowed_column_types = [
+                    ColumnType::I64,
+                    ColumnType::U64,
+                    ColumnType::F64,
+                    ColumnType::Bytes,
+                    ColumnType::Str,
+                    // ColumnType::Bool Unsupported
+                    // ColumnType::IpAddr Unsupported
+                    // ColumnType::DateTime Unsupported
+                ];
+                let mut columns =
+                    get_all_ff_reader(reader, field_name, Some(&allowed_column_types))?;
+                let first = columns.pop().unwrap();
+                accessor2 = columns.pop();
+                first
+            }
+            Average(AverageAggregation { field: field_name })
+            | Count(CountAggregation { field: field_name })
+            | Max(MaxAggregation { field: field_name })
+            | Min(MinAggregation { field: field_name })
+            | Stats(StatsAggregation { field: field_name })
+            | Sum(SumAggregation { field: field_name }) => {
+                let (accessor, field_type) =
+                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
+
+                (accessor, field_type)
+            }
+            Percentiles(percentiles) => {
+                let (accessor, field_type) = get_ff_reader(
+                    reader,
+                    percentiles.field_name(),
+                    Some(get_numeric_or_date_column_types()),
+                )?;
+                (accessor, field_type)
            }
        };
+
        let sub_aggregation = sub_aggregation.clone();
-        Ok(BucketAggregationWithAccessor {
+        Ok(AggregationWithAccessor {
            accessor,
+            accessor2,
            field_type,
-            sub_aggregation: get_aggs_with_accessor_and_validate(
+            sub_aggregation: get_aggs_with_segment_accessor_and_validate(
                &sub_aggregation,
                reader,
-                bucket_count.clone(),
-                max_bucket_count,
+                &limits,
            )?,
-            bucket_agg: bucket.clone(),
+            agg: agg.clone(),
            str_dict_column,
-            bucket_count: BucketCount {
-                bucket_count,
-                max_bucket_count,
-            },
+            limits: limits.new_guard(),
+            column_block_accessor: Default::default(),
        })
    }
 }

-/// Contains the metric request and the fast field accessor.
-#[derive(Clone)]
-pub struct MetricAggregationWithAccessor {
-    pub metric: MetricAggregation,
-    pub field_type: ColumnType,
-    pub accessor: Column<u64>,
+fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
+    &[
+        ColumnType::F64,
+        ColumnType::U64,
+        ColumnType::I64,
+        ColumnType::DateTime,
+    ]
 }

-impl MetricAggregationWithAccessor {
-    fn try_from_metric(
-        metric: &MetricAggregation,
-        reader: &SegmentReader,
-    ) -> crate::Result<MetricAggregationWithAccessor> {
-        match &metric {
-            MetricAggregation::Average(AverageAggregation { field: field_name })
-            | MetricAggregation::Count(CountAggregation { field: field_name })
-            | MetricAggregation::Max(MaxAggregation { field: field_name })
-            | MetricAggregation::Min(MinAggregation { field: field_name })
-            | MetricAggregation::Stats(StatsAggregation { field: field_name })
-            | MetricAggregation::Sum(SumAggregation { field: field_name }) => {
-                let (accessor, field_type) = get_ff_reader_and_validate(reader, field_name)?;
-
-                Ok(MetricAggregationWithAccessor {
-                    accessor,
-                    field_type,
-                    metric: metric.clone(),
-                })
-            }
-        }
-    }
-}
-
-pub(crate) fn get_aggs_with_accessor_and_validate(
+pub(crate) fn get_aggs_with_segment_accessor_and_validate(
    aggs: &Aggregations,
    reader: &SegmentReader,
-    bucket_count: Rc<AtomicU32>,
-    max_bucket_count: u32,
+    limits: &AggregationLimits,
 ) -> crate::Result<AggregationsWithAccessor> {
-    let mut metrics = vec![];
-    let mut buckets = vec![];
+    let mut aggss = Vec::new();
    for (key, agg) in aggs.iter() {
-        match agg {
-            Aggregation::Bucket(bucket) => buckets.push((
-                key.to_string(),
-                BucketAggregationWithAccessor::try_from_bucket(
-                    &bucket.bucket_agg,
-                    &bucket.sub_aggregation,
-                    reader,
-                    Rc::clone(&bucket_count),
-                    max_bucket_count,
-                )?,
-            )),
-            Aggregation::Metric(metric) => metrics.push((
-                key.to_string(),
-                MetricAggregationWithAccessor::try_from_metric(metric, reader)?,
-            )),
-        }
+        aggss.push((
+            key.to_string(),
+            AggregationWithAccessor::try_from_agg(
+                agg,
+                agg.sub_aggregation(),
+                reader,
+                limits.clone(),
+            )?,
+        ));
    }
    Ok(AggregationsWithAccessor::from_data(
-        VecWithNames::from_entries(metrics),
-        VecWithNames::from_entries(buckets),
+        VecWithNames::from_entries(aggss),
    ))
 }

-/// Get fast field reader with given cardinatility.
-fn get_ff_reader_and_validate(
+/// Get fast field reader or empty as default.
+fn get_ff_reader(
    reader: &SegmentReader,
    field_name: &str,
+    allowed_column_types: Option<&[ColumnType]>,
 ) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
    let ff_fields = reader.fast_fields();
    let ff_field_with_type = ff_fields
-        .u64_lenient_with_type(field_name)?
-        .ok_or_else(|| {
-            TantivyError::InvalidArgument(format!("No fast field found for field: {}", field_name))
-        })?;
+        .u64_lenient_for_type(allowed_column_types, field_name)?
+        .unwrap_or_else(|| {
+            (
+                Column::build_empty_column(reader.num_docs()),
+                ColumnType::U64,
+            )
+        });
+    Ok(ff_field_with_type)
+}
+
+/// Get all fast field reader or empty as default.
+///
+/// Is guaranteed to return at least one column.
+fn get_all_ff_reader(
+    reader: &SegmentReader,
+    field_name: &str,
+    allowed_column_types: Option<&[ColumnType]>,
+) -> crate::Result<Vec<(columnar::Column<u64>, ColumnType)>> {
+    let ff_fields = reader.fast_fields();
+    let mut ff_field_with_type =
+        ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
+    if ff_field_with_type.is_empty() {
+        ff_field_with_type.push((
+            Column::build_empty_column(reader.num_docs()),
+            ColumnType::U64,
+        ));
+    }
    Ok(ff_field_with_type)
 }
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -7,11 +7,9 @@
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

-use super::agg_req::BucketAggregationInternal;
 use super::bucket::GetDocCount;
-use super::intermediate_agg_result::{IntermediateBucketResult, IntermediateMetricResult};
-use super::metric::{SingleMetricResult, Stats};
-use super::Key;
+use super::metric::{PercentilesMetricResult, SingleMetricResult, Stats};
+use super::{AggregationError, Key};
 use crate::TantivyError;

 #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
@@ -19,6 +17,13 @@ use crate::TantivyError;
 pub struct AggregationResults(pub FxHashMap<String, AggregationResult>);

 impl AggregationResults {
+    pub(crate) fn get_bucket_count(&self) -> u64 {
+        self.0
+            .values()
+            .map(|agg| agg.get_bucket_count())
+            .sum::<u64>()
+    }
+
    pub(crate) fn get_value_from_aggregation(
        &self,
        name: &str,
@@ -29,8 +34,7 @@ impl AggregationResults {
        } else {
            // Validation is be done during request parsing, so we can't reach this state.
            Err(TantivyError::InternalError(format!(
-                "Can't find aggregation {:?} in sub-aggregations",
-                name
+                "Can't find aggregation {name:?} in sub-aggregations"
            )))
        }
    }
@@ -47,6 +51,13 @@ pub enum AggregationResult {
 }

 impl AggregationResult {
+    pub(crate) fn get_bucket_count(&self) -> u64 {
+        match self {
+            AggregationResult::BucketResult(bucket) => bucket.get_bucket_count(),
+            AggregationResult::MetricResult(_) => 0,
+        }
+    }
+
    pub(crate) fn get_value_from_aggregation(
        &self,
        _name: &str,
@@ -79,6 +90,8 @@ pub enum MetricResult {
    Stats(Stats),
    /// Sum metric result.
    Sum(SingleMetricResult),
+    /// Sum metric result.
+    Percentiles(PercentilesMetricResult),
 }

 impl MetricResult {
@@ -90,30 +103,9 @@ impl MetricResult {
            MetricResult::Min(min) => Ok(min.value),
            MetricResult::Stats(stats) => stats.get_value(agg_property),
            MetricResult::Sum(sum) => Ok(sum.value),
-        }
-    }
-}
-impl From<IntermediateMetricResult> for MetricResult {
-    fn from(metric: IntermediateMetricResult) -> Self {
-        match metric {
-            IntermediateMetricResult::Average(intermediate_avg) => {
-                MetricResult::Average(intermediate_avg.finalize().into())
-            }
-            IntermediateMetricResult::Count(intermediate_count) => {
-                MetricResult::Count(intermediate_count.finalize().into())
-            }
-            IntermediateMetricResult::Max(intermediate_max) => {
-                MetricResult::Max(intermediate_max.finalize().into())
-            }
-            IntermediateMetricResult::Min(intermediate_min) => {
-                MetricResult::Min(intermediate_min.finalize().into())
-            }
-            IntermediateMetricResult::Stats(intermediate_stats) => {
-                MetricResult::Stats(intermediate_stats.finalize())
-            }
-            IntermediateMetricResult::Sum(intermediate_sum) => {
-                MetricResult::Sum(intermediate_sum.finalize().into())
-            }
+            MetricResult::Percentiles(_) => Err(TantivyError::AggregationError(
+                AggregationError::InvalidRequest("percentiles can't be used to order".to_string()),
+            )),
        }
    }
 }
@@ -153,9 +145,20 @@ pub enum BucketResult {
 }

 impl BucketResult {
-    pub(crate) fn empty_from_req(req: &BucketAggregationInternal) -> crate::Result<Self> {
-        let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg);
-        empty_bucket.into_final_bucket_result(req)
+    pub(crate) fn get_bucket_count(&self) -> u64 {
+        match self {
+            BucketResult::Range { buckets } => {
+                buckets.iter().map(|bucket| bucket.get_bucket_count()).sum()
+            }
+            BucketResult::Histogram { buckets } => {
+                buckets.iter().map(|bucket| bucket.get_bucket_count()).sum()
+            }
+            BucketResult::Terms {
+                buckets,
+                sum_other_doc_count: _,
+                doc_count_error_upper_bound: _,
+            } => buckets.iter().map(|bucket| bucket.get_bucket_count()).sum(),
+        }
    }
 }

@@ -170,6 +173,15 @@ pub enum BucketEntries<T> {
    HashMap(FxHashMap<String, T>),
 }

+impl<T> BucketEntries<T> {
+    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &T> + 'a> {
+        match self {
+            BucketEntries::Vec(vec) => Box::new(vec.iter()),
+            BucketEntries::HashMap(map) => Box::new(map.values()),
+        }
+    }
+}
+
 /// This is the default entry for a bucket, which contains a key, count, and optionally
 /// sub-aggregations.
 ///
@@ -209,6 +221,11 @@ pub struct BucketEntry {
    /// Sub-aggregations in this bucket.
    pub sub_aggregation: AggregationResults,
 }
+impl BucketEntry {
+    pub(crate) fn get_bucket_count(&self) -> u64 {
+        1 + self.sub_aggregation.get_bucket_count()
+    }
+}
 impl GetDocCount for &BucketEntry {
    fn doc_count(&self) -> u64 {
        self.doc_count
@@ -272,3 +289,8 @@ pub struct RangeBucketEntry {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub to_as_string: Option<String>,
 }
+impl RangeBucketEntry {
+    pub(crate) fn get_bucket_count(&self) -> u64 {
+        1 + self.sub_aggregation.get_bucket_count()
+    }
+}
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -37,10 +37,10 @@ pub struct DateHistogramAggregationReq {
    interval: Option<String>,
    #[doc(hidden)]
    /// Only for validation
-    date_interval: Option<String>,
+    calendar_interval: Option<String>,
    /// The field to aggregate on.
    pub field: String,
-    /// The format to format dates.
+    /// The format to format dates. Unsupported currently.
    pub format: Option<String>,
    /// The interval to chunk your data range. Each bucket spans a value range of
    /// [0..fixed_interval). Accepted values
@@ -62,9 +62,18 @@ pub struct DateHistogramAggregationReq {
    ///
    /// Fractional time values are not supported, but you can address this by shifting to another
    /// time unit (e.g., `1.5h` could instead be specified as `90m`).
-    pub fixed_interval: String,
+    ///
+    /// `Option` for validation, the parameter is not optional
+    pub fixed_interval: Option<String>,
    /// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
    /// 1))`.
+    ///
+    /// Offset makes it possible to shift this grid into
+    /// `[offset + interval * k, offset + interval * (k + 1))`. Offset has to be in the range [0,
+    /// interval).
+    ///
+    /// The `offset` parameter is has the same syntax as the `fixed_interval` parameter, but
+    /// also allows for negative values.
    pub offset: Option<String>,
    /// The minimum number of documents in a bucket to be returned. Defaults to 0.
    pub min_doc_count: Option<u64>,
@@ -75,7 +84,7 @@ pub struct DateHistogramAggregationReq {
    /// hard_bounds only limits the buckets, to force a range set both extended_bounds and
    /// hard_bounds to the same range.
    ///
-    /// Needs to be provided as timestamp in microseconds precision.
+    /// Needs to be provided as timestamp in millisecond precision.
    ///
    /// ## Example
    /// ```json
@@ -86,7 +95,7 @@ pub struct DateHistogramAggregationReq {
    ///            "interval": "1d",
    ///            "hard_bounds": {
    ///                "min": 0,
-    ///                "max": 1420502400000000
+    ///                "max": 1420502400000
    ///            }
    ///        }
    ///    }
@@ -112,26 +121,31 @@ impl DateHistogramAggregationReq {
        self.validate()?;
        Ok(HistogramAggregation {
            field: self.field.to_string(),
-            interval: parse_into_microseconds(&self.fixed_interval)? as f64,
+            interval: parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())? as f64,
            offset: self
                .offset
                .as_ref()
-                .map(|offset| parse_offset_into_microseconds(offset))
+                .map(|offset| parse_offset_into_milliseconds(offset))
                .transpose()?
                .map(|el| el as f64),
            min_doc_count: self.min_doc_count,
-            hard_bounds: None,
-            extended_bounds: None,
+            hard_bounds: self.hard_bounds,
+            extended_bounds: self.extended_bounds,
            keyed: self.keyed,
        })
    }

    fn validate(&self) -> crate::Result<()> {
-        if self.interval.is_some() {
+        if let Some(interval) = self.interval.as_ref() {
            return Err(crate::TantivyError::InvalidArgument(format!(
-                "`interval` parameter {:?} in date histogram is unsupported, only \
-                 `fixed_interval` is supported",
-                self.interval
+                "`interval` parameter {interval:?} in date histogram is unsupported, only \
+                 `fixed_interval` is supported"
+            )));
+        }
+        if let Some(interval) = self.calendar_interval.as_ref() {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "`calendar_interval` parameter {interval:?} in date histogram is unsupported, \
+                 only `fixed_interval` is supported"
            )));
        }
        if self.format.is_some() {
@@ -140,15 +154,13 @@ impl DateHistogramAggregationReq {
            ));
        }

-        if self.date_interval.is_some() {
+        if self.fixed_interval.is_none() {
            return Err(crate::TantivyError::InvalidArgument(
-                "date_interval in date histogram is unsupported, only `fixed_interval` is \
-                 supported"
-                    .to_string(),
+                "fixed_interval in date histogram is missing".to_string(),
            ));
        }

-        parse_into_microseconds(&self.fixed_interval)?;
+        parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())?;

        Ok(())
    }
@@ -169,9 +181,12 @@ pub enum DateHistogramParseError {
    /// Offset invalid
    #[error("passed offset is invalid {0:?}")]
    InvalidOffset(String),
+    /// Value out of bounds
+    #[error("passed value is out of bounds: {0:?}")]
+    OutOfBounds(String),
 }

-fn parse_offset_into_microseconds(input: &str) -> Result<i64, AggregationError> {
+fn parse_offset_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
    let is_sign = |byte| &[byte] == b"-" || &[byte] == b"+";
    if input.is_empty() {
        return Err(DateHistogramParseError::InvalidOffset(input.to_string()).into());
@@ -180,18 +195,18 @@ fn parse_offset_into_microseconds(input: &str) -> Result<i64, AggregationError>
    let has_sign = is_sign(input.as_bytes()[0]);
    if has_sign {
        let (sign, input) = input.split_at(1);
-        let val = parse_into_microseconds(input)?;
+        let val = parse_into_milliseconds(input)?;
        if sign == "-" {
            Ok(-val)
        } else {
            Ok(val)
        }
    } else {
-        parse_into_microseconds(input)
+        parse_into_milliseconds(input)
    }
 }

-fn parse_into_microseconds(input: &str) -> Result<i64, AggregationError> {
+fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
    let split_boundary = input
        .as_bytes()
        .iter()
@@ -210,16 +225,21 @@ fn parse_into_microseconds(input: &str) -> Result<i64, AggregationError> {
        // here and being defensive does not hurt.
        .map_err(|_err| DateHistogramParseError::NumberMissing(input.to_string()))?;

-    let multiplier_from_unit = match unit {
-        "ms" => 1,
-        "s" => 1000,
-        "m" => 60 * 1000,
-        "h" => 60 * 60 * 1000,
-        "d" => 24 * 60 * 60 * 1000,
+    let unit_in_ms = match unit {
+        "ms" | "milliseconds" => 1,
+        "s" | "seconds" => 1000,
+        "m" | "minutes" => 60 * 1000,
+        "h" | "hours" => 60 * 60 * 1000,
+        "d" | "days" => 24 * 60 * 60 * 1000,
        _ => return Err(DateHistogramParseError::UnitNotRecognized(unit.to_string()).into()),
    };

-    Ok(number * multiplier_from_unit * 1000)
+    let val = number * unit_in_ms;
+    // The field type is in nanoseconds precision, so validate the value to fit the range
+    val.checked_mul(1_000_000)
+        .ok_or_else(|| DateHistogramParseError::OutOfBounds(input.to_string()))?;
+
+    Ok(val)
 }

 #[cfg(test)]
@@ -234,49 +254,50 @@ mod tests {
    use crate::Index;

    #[test]
-    fn test_parse_into_microseconds() {
-        assert_eq!(parse_into_microseconds("1m").unwrap(), 60_000_000);
-        assert_eq!(parse_into_microseconds("2m").unwrap(), 120_000_000);
+    fn test_parse_into_millisecs() {
+        assert_eq!(parse_into_milliseconds("1m").unwrap(), 60_000);
+        assert_eq!(parse_into_milliseconds("2m").unwrap(), 120_000);
+        assert_eq!(parse_into_milliseconds("2minutes").unwrap(), 120_000);
        assert_eq!(
-            parse_into_microseconds("2y").unwrap_err(),
+            parse_into_milliseconds("2y").unwrap_err(),
            DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
        );
        assert_eq!(
-            parse_into_microseconds("2000").unwrap_err(),
+            parse_into_milliseconds("2000").unwrap_err(),
            DateHistogramParseError::UnitMissing("2000".to_string()).into()
        );
        assert_eq!(
-            parse_into_microseconds("ms").unwrap_err(),
+            parse_into_milliseconds("ms").unwrap_err(),
            DateHistogramParseError::NumberMissing("ms".to_string()).into()
        );
    }

    #[test]
-    fn test_parse_offset_into_microseconds() {
-        assert_eq!(parse_offset_into_microseconds("1m").unwrap(), 60_000_000);
-        assert_eq!(parse_offset_into_microseconds("+1m").unwrap(), 60_000_000);
-        assert_eq!(parse_offset_into_microseconds("-1m").unwrap(), -60_000_000);
-        assert_eq!(parse_offset_into_microseconds("2m").unwrap(), 120_000_000);
-        assert_eq!(parse_offset_into_microseconds("+2m").unwrap(), 120_000_000);
-        assert_eq!(parse_offset_into_microseconds("-2m").unwrap(), -120_000_000);
-        assert_eq!(parse_offset_into_microseconds("-2ms").unwrap(), -2_000);
+    fn test_parse_offset_into_milliseconds() {
+        assert_eq!(parse_offset_into_milliseconds("1m").unwrap(), 60_000);
+        assert_eq!(parse_offset_into_milliseconds("+1m").unwrap(), 60_000);
+        assert_eq!(parse_offset_into_milliseconds("-1m").unwrap(), -60_000);
+        assert_eq!(parse_offset_into_milliseconds("2m").unwrap(), 120_000);
+        assert_eq!(parse_offset_into_milliseconds("+2m").unwrap(), 120_000);
+        assert_eq!(parse_offset_into_milliseconds("-2m").unwrap(), -120_000);
+        assert_eq!(parse_offset_into_milliseconds("-2ms").unwrap(), -2);
        assert_eq!(
-            parse_offset_into_microseconds("2y").unwrap_err(),
+            parse_offset_into_milliseconds("2y").unwrap_err(),
            DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
        );
        assert_eq!(
-            parse_offset_into_microseconds("2000").unwrap_err(),
+            parse_offset_into_milliseconds("2000").unwrap_err(),
            DateHistogramParseError::UnitMissing("2000".to_string()).into()
        );
        assert_eq!(
-            parse_offset_into_microseconds("ms").unwrap_err(),
+            parse_offset_into_milliseconds("ms").unwrap_err(),
            DateHistogramParseError::NumberMissing("ms".to_string()).into()
        );
    }

    #[test]
    fn test_parse_into_milliseconds_do_not_accept_non_ascii() {
-        assert!(parse_into_microseconds("１m").is_err());
+        assert!(parse_into_milliseconds("１m").is_err());
    }

    pub fn get_test_index_from_docs(
@@ -315,30 +336,322 @@ mod tests {
    }

    #[test]
-    fn histogram_test_date_force_merge_segments() -> crate::Result<()> {
+    fn histogram_test_date_force_merge_segments() {
        histogram_test_date_merge_segments(true)
    }

    #[test]
-    fn histogram_test_date() -> crate::Result<()> {
+    fn histogram_test_date() {
        histogram_test_date_merge_segments(false)
    }
-    fn histogram_test_date_merge_segments(merge_segments: bool) -> crate::Result<()> {
+
+    fn histogram_test_date_merge_segments(merge_segments: bool) {
        let docs = vec![
            vec![r#"{ "date": "2015-01-01T12:10:30Z", "text": "aaa" }"#],
            vec![r#"{ "date": "2015-01-01T11:11:30Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb" }"#],
            vec![r#"{ "date": "2015-01-06T00:00:00Z", "text": "ccc" }"#],
        ];
+        let index = get_test_index_from_docs(merge_segments, &docs).unwrap();

-        let index = get_test_index_from_docs(merge_segments, &docs)?;
-        // 30day + offset
+        {
+            // 30day + offset
+            let elasticsearch_compatible_json = json!(
+                {
+                "sales_over_time": {
+                    "date_histogram": {
+                    "field": "date",
+                    "fixed_interval": "30d",
+                    "offset": "-4d"
+                    }
+                }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!({
+                "sales_over_time" : {
+                    "buckets" : [
+                        {
+                            "key_as_string" : "2015-01-01T00:00:00Z",
+                            "key" : 1420070400000.0,
+                            "doc_count" : 4
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+
+        {
+            // 30day + offset + sub_agg
+            let elasticsearch_compatible_json = json!(
+                {
+                    "sales_over_time": {
+                        "date_histogram": {
+                        "field": "date",
+                        "fixed_interval": "30d",
+                        "offset": "-4d"
+                        },
+                        "aggs": {
+                            "texts": {
+                                "terms": {"field": "text"}
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!({
+                "sales_over_time" : {
+                "buckets" : [
+                    {
+                        "key_as_string" : "2015-01-01T00:00:00Z",
+                        "key" : 1420070400000.0,
+                        "doc_count" : 4,
+                        "texts": {
+                            "buckets": [
+                                {
+                                "doc_count": 2,
+                                "key": "bbb"
+                                },
+                                {
+                                "doc_count": 1,
+                                "key": "ccc"
+                                },
+                                {
+                                "doc_count": 1,
+                                "key": "aaa"
+                                }
+                            ],
+                            "doc_count_error_upper_bound": 0,
+                            "sum_other_doc_count": 0
+                            }
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+        {
+            // 1day
+            let elasticsearch_compatible_json = json!(
+                {
+                    "sales_over_time": {
+                        "date_histogram": {
+                            "field": "date",
+                            "fixed_interval": "1d"
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!( {
+                "sales_over_time": {
+                    "buckets": [
+                        {
+                            "doc_count": 2,
+                            "key": 1420070400000.0,
+                            "key_as_string": "2015-01-01T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 1,
+                            "key": 1420156800000.0,
+                            "key_as_string": "2015-01-02T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420243200000.0,
+                            "key_as_string": "2015-01-03T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420329600000.0,
+                            "key_as_string": "2015-01-04T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420416000000.0,
+                            "key_as_string": "2015-01-05T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 1,
+                            "key": 1420502400000.0,
+                            "key_as_string": "2015-01-06T00:00:00Z"
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+
+        {
+            // 1day + extended_bounds
+            let elasticsearch_compatible_json = json!(
+                {
+                    "sales_over_time": {
+                        "date_histogram": {
+                            "field": "date",
+                            "fixed_interval": "1d",
+                            "extended_bounds": {
+                                "min": 1419984000000.0,
+                                "max": 1420588800000.0
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!({
+                "sales_over_time" : {
+                    "buckets": [
+                        {
+                            "doc_count": 0,
+                            "key": 1419984000000.0,
+                            "key_as_string": "2014-12-31T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 2,
+                            "key": 1420070400000.0,
+                            "key_as_string": "2015-01-01T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 1,
+                            "key": 1420156800000.0,
+                            "key_as_string": "2015-01-02T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420243200000.0,
+                            "key_as_string": "2015-01-03T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420329600000.0,
+                            "key_as_string": "2015-01-04T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420416000000.0,
+                            "key_as_string": "2015-01-05T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 1,
+                            "key": 1420502400000.0,
+                            "key_as_string": "2015-01-06T00:00:00Z"
+                        },
+                        {
+                            "doc_count": 0,
+                            "key": 1420588800000.0,
+                            "key_as_string": "2015-01-07T00:00:00Z"
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+        {
+            // 1day + hard_bounds + extended_bounds
+            let elasticsearch_compatible_json = json!(
+                {
+                    "sales_over_time": {
+                        "date_histogram": {
+                            "field": "date",
+                            "fixed_interval": "1d",
+                            "hard_bounds": {
+                                "min": 1420156800000.0,
+                                "max": 1420243200000.0
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!({
+                "sales_over_time" : {
+                    "buckets": [
+                        {
+                            "doc_count": 1,
+                            "key": 1420156800000.0,
+                            "key_as_string": "2015-01-02T00:00:00Z"
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+
+        {
+            // 1day + hard_bounds as Rfc3339
+            let elasticsearch_compatible_json = json!(
+                {
+                    "sales_over_time": {
+                        "date_histogram": {
+                            "field": "date",
+                            "fixed_interval": "1d",
+                            "hard_bounds": {
+                                "min": "2015-01-02T00:00:00Z",
+                                "max": "2015-01-02T12:00:00Z"
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request(agg_req, &index).unwrap();
+            let expected_res = json!({
+                "sales_over_time" : {
+                    "buckets": [
+                        {
+                            "doc_count": 1,
+                            "key": 1420156800000.0,
+                            "key_as_string": "2015-01-02T00:00:00Z"
+                        }
+                    ]
+                }
+            });
+            assert_eq!(res, expected_res);
+        }
+    }
+    #[test]
+    fn histogram_test_invalid_req() {
+        let docs = vec![];
+
+        let index = get_test_index_from_docs(false, &docs).unwrap();
        let elasticsearch_compatible_json = json!(
            {
              "sales_over_time": {
                "date_histogram": {
                  "field": "date",
-                  "fixed_interval": "30d",
+                  "interval": "30d",
                  "offset": "-4d"
                }
              }
@@ -348,128 +661,10 @@ mod tests {
        let agg_req: Aggregations =
            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
                .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        let expected_res = json!({
-          "sales_over_time" : {
-          "buckets" : [
-            {
-              "key_as_string" : "2015-01-01T00:00:00Z",
-              "key" : 1420070400000000.0,
-              "doc_count" : 4
-            }
-          ]
-        }
-        });
-        assert_eq!(res, expected_res);
-
-        // 30day + offset + sub_agg
-        let elasticsearch_compatible_json = json!(
-            {
-              "sales_over_time": {
-                "date_histogram": {
-                  "field": "date",
-                  "fixed_interval": "30d",
-                  "offset": "-4d"
-                },
-                "aggs": {
-                    "texts": {
-                        "terms": {"field": "text"}
-                    }
-                }
-              }
-            }
+        let err = exec_request(agg_req, &index).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            r#"An invalid argument was passed: '`interval` parameter "30d" in date histogram is unsupported, only `fixed_interval` is supported'"#
        );
-
-        let agg_req: Aggregations =
-            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
-                .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        println!("{}", serde_json::to_string_pretty(&res).unwrap());
-        let expected_res = json!({
-          "sales_over_time" : {
-          "buckets" : [
-            {
-              "key_as_string" : "2015-01-01T00:00:00Z",
-              "key" : 1420070400000000.0,
-              "doc_count" : 4,
-              "texts": {
-                  "buckets": [
-                    {
-                      "doc_count": 2,
-                      "key": "bbb"
-                    },
-                    {
-                      "doc_count": 1,
-                      "key": "ccc"
-                    },
-                    {
-                      "doc_count": 1,
-                      "key": "aaa"
-                    }
-                  ],
-                  "doc_count_error_upper_bound": 0,
-                  "sum_other_doc_count": 0
-                }
-            }
-          ]
-        }
-        });
-        assert_eq!(res, expected_res);
-
-        // 1day
-        let elasticsearch_compatible_json = json!(
-            {
-              "sales_over_time": {
-                "date_histogram": {
-                  "field": "date",
-                  "fixed_interval": "1d"
-                }
-              }
-            }
-        );
-
-        let agg_req: Aggregations =
-            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
-                .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        let expected_res = json!( {
-          "sales_over_time": {
-            "buckets": [
-              {
-                "doc_count": 2,
-                "key": 1420070400000000.0,
-                "key_as_string": "2015-01-01T00:00:00Z"
-              },
-              {
-                "doc_count": 1,
-                "key": 1420156800000000.0,
-                "key_as_string": "2015-01-02T00:00:00Z"
-              },
-              {
-                "doc_count": 0,
-                "key": 1420243200000000.0,
-                "key_as_string": "2015-01-03T00:00:00Z"
-              },
-              {
-                "doc_count": 0,
-                "key": 1420329600000000.0,
-                "key_as_string": "2015-01-04T00:00:00Z"
-              },
-              {
-                "doc_count": 0,
-                "key": 1420416000000000.0,
-                "key_as_string": "2015-01-05T00:00:00Z"
-              },
-              {
-                "doc_count": 1,
-                "key": 1420502400000000.0,
-                "key_as_string": "2015-01-06T00:00:00Z"
-              }
-            ]
-          }
-        });
-        assert_eq!(res, expected_res);
-
-        Ok(())
    }
 }
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -1,7 +1,16 @@
 //! Module for all bucket aggregations.
 //!
-//! BucketAggregations create buckets of documents
-//! [`BucketAggregation`](super::agg_req::BucketAggregation).
+//! BucketAggregations create buckets of documents.
+//! Each bucket is associated with a rule which
+//! determines whether or not a document in the falls into it. In other words, the buckets
+//! effectively define document sets. Buckets are not necessarily disjunct, therefore a document can
+//! fall into multiple buckets. In addition to the buckets themselves, the bucket aggregations also
+//! compute and return the number of documents for each bucket. Bucket aggregations, as opposed to
+//! metric aggregations, can hold sub-aggregations. These sub-aggregations will be aggregated for
+//! the buckets created by their "parent" bucket aggregation. There are different bucket
+//! aggregators, each with a different "bucketing" strategy. Some define a single bucket, some
+//! define fixed number of multiple buckets, and others dynamically create the buckets during the
+//! aggregation process.
 //!
 //! Results of final buckets are [`BucketResult`](super::agg_result::BucketResult).
 //! Results of intermediate buckets are
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -5,16 +5,17 @@ use columnar::{ColumnType, MonotonicallyMappableToU64};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

+use crate::aggregation::agg_limits::ResourceLimitGuard;
 use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
 use crate::aggregation::intermediate_agg_result::{
-    IntermediateAggregationResults, IntermediateBucketResult, IntermediateRangeBucketEntry,
-    IntermediateRangeBucketResult,
+    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
+    IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
 };
 use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, BucketCount, SegmentAggregationCollector,
+    build_segment_agg_collector, SegmentAggregationCollector,
 };
 use crate::aggregation::{
-    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey, VecWithNames,
+    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
 };
 use crate::TantivyError;

@@ -157,16 +158,18 @@ impl SegmentRangeBucketEntry {
        self,
        agg_with_accessor: &AggregationsWithAccessor,
    ) -> crate::Result<IntermediateRangeBucketEntry> {
-        let sub_aggregation = if let Some(sub_aggregation) = self.sub_aggregation {
-            sub_aggregation.into_intermediate_aggregations_result(agg_with_accessor)?
+        let mut sub_aggregation_res = IntermediateAggregationResults::default();
+        if let Some(sub_aggregation) = self.sub_aggregation {
+            sub_aggregation
+                .add_intermediate_aggregation_result(agg_with_accessor, &mut sub_aggregation_res)?
        } else {
            Default::default()
        };

        Ok(IntermediateRangeBucketEntry {
-            key: self.key,
+            key: self.key.into(),
            doc_count: self.doc_count,
-            sub_aggregation,
+            sub_aggregation: sub_aggregation_res,
            from: self.from,
            to: self.to,
        })
@@ -174,13 +177,14 @@ impl SegmentRangeBucketEntry {
 }

 impl SegmentAggregationCollector for SegmentRangeCollector {
-    fn into_intermediate_aggregations_result(
+    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
-    ) -> crate::Result<IntermediateAggregationResults> {
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
        let field_type = self.column_type;
-        let name = agg_with_accessor.buckets.keys[self.accessor_idx].to_string();
-        let sub_agg = &agg_with_accessor.buckets.values[self.accessor_idx].sub_aggregation;
+        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let sub_agg = &agg_with_accessor.aggs.values[self.accessor_idx].sub_aggregation;

        let buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry> = self
            .buckets
@@ -200,49 +204,49 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
            column_type: Some(self.column_type),
        });

-        let buckets = Some(VecWithNames::from_entries(vec![(name, bucket)]));
+        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;

-        Ok(IntermediateAggregationResults {
-            metrics: None,
-            buckets,
-        })
+        Ok(())
    }

+    #[inline]
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        self.collect_block(&[doc], agg_with_accessor)
    }

+    #[inline]
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        let accessor = &agg_with_accessor.buckets.values[self.accessor_idx].accessor;
-        let sub_aggregation_accessor =
-            &agg_with_accessor.buckets.values[self.accessor_idx].sub_aggregation;
-        for doc in docs {
-            for val in accessor.values_for_doc(*doc) {
-                let bucket_pos = self.get_bucket_pos(val);
+        let bucket_agg_accessor = &mut agg_with_accessor.aggs.values[self.accessor_idx];

-                let bucket = &mut self.buckets[bucket_pos];
+        bucket_agg_accessor
+            .column_block_accessor
+            .fetch_block(docs, &bucket_agg_accessor.accessor);

-                bucket.bucket.doc_count += 1;
-                if let Some(sub_aggregation) = &mut bucket.bucket.sub_aggregation {
-                    sub_aggregation.collect(*doc, sub_aggregation_accessor)?;
-                }
+        for (doc, val) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
+            let bucket_pos = self.get_bucket_pos(val);
+
+            let bucket = &mut self.buckets[bucket_pos];
+
+            bucket.bucket.doc_count += 1;
+            if let Some(sub_aggregation) = &mut bucket.bucket.sub_aggregation {
+                sub_aggregation.collect(doc, &mut bucket_agg_accessor.sub_aggregation)?;
            }
        }

        Ok(())
    }

-    fn flush(&mut self, agg_with_accessor: &AggregationsWithAccessor) -> crate::Result<()> {
+    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
        let sub_aggregation_accessor =
-            &agg_with_accessor.buckets.values[self.accessor_idx].sub_aggregation;
+            &mut agg_with_accessor.aggs.values[self.accessor_idx].sub_aggregation;

        for bucket in self.buckets.iter_mut() {
            if let Some(sub_agg) = bucket.bucket.sub_aggregation.as_mut() {
@@ -257,8 +261,8 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
 impl SegmentRangeCollector {
    pub(crate) fn from_req_and_validate(
        req: &RangeAggregation,
-        sub_aggregation: &AggregationsWithAccessor,
-        bucket_count: &BucketCount,
+        sub_aggregation: &mut AggregationsWithAccessor,
+        limits: &mut ResourceLimitGuard,
        field_type: ColumnType,
        accessor_idx: usize,
    ) -> crate::Result<Self> {
@@ -302,8 +306,9 @@ impl SegmentRangeCollector {
            })
            .collect::<crate::Result<_>>()?;

-        bucket_count.add_count(buckets.len() as u32);
-        bucket_count.validate_bucket_count()?;
+        limits.add_memory_consumed(
+            buckets.len() as u64 * std::mem::size_of::<SegmentRangeAndBucketEntry>() as u64,
+        )?;

        Ok(SegmentRangeCollector {
            buckets,
@@ -440,14 +445,12 @@ mod tests {
    use serde_json::Value;

    use super::*;
-    use crate::aggregation::agg_req::{
-        Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
-    };
-    use crate::aggregation::metric::AverageAggregation;
+    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::{
        exec_request, exec_request_with_query, get_test_index_2_segments,
        get_test_index_with_num_docs,
    };
+    use crate::aggregation::AggregationLimits;

    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
@@ -461,8 +464,8 @@ mod tests {

        SegmentRangeCollector::from_req_and_validate(
            &req,
-            &Default::default(),
-            &Default::default(),
+            &mut Default::default(),
+            &mut AggregationLimits::default().new_guard(),
            field_type,
            0,
        )
@@ -473,19 +476,18 @@ mod tests {
    fn range_fraction_test() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;

-        let agg_req: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "fraction_f64".to_string(),
-                    ranges: vec![(0f64..0.1f64).into(), (0.1f64..0.2f64).into()],
-                    ..Default::default()
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "range": {
+                "range": {
+                    "field": "fraction_f64",
+                    "ranges": [
+                        {"from": 0.0, "to": 0.1},
+                        {"from": 0.1, "to": 0.2},
+                    ]
+                },
+            }
+        }))
+        .unwrap();

        let res = exec_request_with_query(agg_req, &index, None)?;

@@ -505,28 +507,25 @@ mod tests {
    fn range_fraction_test_with_sub_agg() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;

-        let sub_agg_req: Aggregations = vec![(
-            "score_f64".to_string(),
-            Aggregation::Metric(MetricAggregation::Average(
-                AverageAggregation::from_field_name("score_f64".to_string()),
-            )),
-        )]
-        .into_iter()
-        .collect();
+        let sub_agg_req: Aggregations = serde_json::from_value(json!({
+            "avg": { "avg": { "field": "score_f64", } }

-        let agg_req: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "fraction_f64".to_string(),
-                    ranges: vec![(0f64..0.1f64).into(), (0.1f64..0.2f64).into()],
-                    ..Default::default()
-                }),
-                sub_aggregation: sub_agg_req,
-            }),
-        )]
-        .into_iter()
-        .collect();
+        }))
+        .unwrap();
+
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "range": {
+                "range": {
+                    "field": "fraction_f64",
+                    "ranges": [
+                        {"from": 0.0, "to": 0.1},
+                        {"from": 0.1, "to": 0.2},
+                    ]
+                },
+                "aggs": sub_agg_req
+            }
+        }))
+        .unwrap();

        let res = exec_request_with_query(agg_req, &index, None)?;

@@ -546,19 +545,19 @@ mod tests {
    fn range_keyed_buckets_test() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;

-        let agg_req: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "fraction_f64".to_string(),
-                    ranges: vec![(0f64..0.1f64).into(), (0.1f64..0.2f64).into()],
-                    keyed: true,
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "range": {
+                "range": {
+                    "field": "fraction_f64",
+                    "ranges": [
+                        {"from": 0.0, "to": 0.1},
+                        {"from": 0.1, "to": 0.2},
+                    ],
+                    "keyed": true
+                },
+            }
+        }))
+        .unwrap();

        let res = exec_request_with_query(agg_req, &index, None)?;

@@ -583,30 +582,19 @@ mod tests {
    fn range_custom_key_test() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;

-        let agg_req: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "fraction_f64".to_string(),
-                    ranges: vec![
-                        RangeAggregationRange {
-                            key: Some("custom-key-0-to-0.1".to_string()),
-                            from: Some(0f64),
-                            to: Some(0.1f64),
-                        },
-                        RangeAggregationRange {
-                            key: None,
-                            from: Some(0.1f64),
-                            to: Some(0.2f64),
-                        },
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "range": {
+                "range": {
+                    "field": "fraction_f64",
+                    "ranges": [
+                        {"key": "custom-key-0-to-0.1", "from": 0.0, "to": 0.1},
+                        {"from": 0.1, "to": 0.2},
                    ],
-                    keyed: false,
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+                    "keyed": false
+                },
+            }
+        }))
+        .unwrap();

        let res = exec_request_with_query(agg_req, &index, None)?;

@@ -640,30 +628,19 @@ mod tests {
    fn range_date_test_with_opt(merge_segments: bool) -> crate::Result<()> {
        let index = get_test_index_2_segments(merge_segments)?;

-        let agg_req: Aggregations = vec![(
-            "date_ranges".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "date".to_string(),
-                    ranges: vec![
-                        RangeAggregationRange {
-                            key: None,
-                            from: None,
-                            to: Some(1546300800000000.0f64),
-                        },
-                        RangeAggregationRange {
-                            key: None,
-                            from: Some(1546300800000000.0f64),
-                            to: Some(1546387200000000.0f64),
-                        },
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "date_ranges": {
+                "range": {
+                    "field": "date",
+                    "ranges": [
+                        {"to": 1546300800000000000i64},
+                        {"from": 1546300800000000000i64, "to": 1546387200000000000i64},
                    ],
-                    keyed: false,
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+                    "keyed": false
+                },
+            }
+        }))
+        .unwrap();

        let agg_res = exec_request(agg_req, &index)?;

@@ -702,23 +679,18 @@ mod tests {
    fn range_custom_key_keyed_buckets_test() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;

-        let agg_req: Aggregations = vec![(
-            "range".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                    field: "fraction_f64".to_string(),
-                    ranges: vec![RangeAggregationRange {
-                        key: Some("custom-key-0-to-0.1".to_string()),
-                        from: Some(0f64),
-                        to: Some(0.1f64),
-                    }],
-                    keyed: true,
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "range": {
+                "range": {
+                    "field": "fraction_f64",
+                    "ranges": [
+                        {"key": "custom-key-0-to-0.1", "from": 0.0, "to": 0.1},
+                    ],
+                    "keyed": true
+                },
+            }
+        }))
+        .unwrap();

        let res = exec_request_with_query(agg_req, &index, None)?;

--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
--- a/src/aggregation/buf_collector.rs
+++ b/src/aggregation/buf_collector.rs
@@ -34,17 +34,20 @@ impl BufAggregationCollector {
 }

 impl SegmentAggregationCollector for BufAggregationCollector {
-    fn into_intermediate_aggregations_result(
+    #[inline]
+    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
-    ) -> crate::Result<IntermediateAggregationResults> {
-        Box::new(self.collector).into_intermediate_aggregations_result(agg_with_accessor)
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
+        Box::new(self.collector).add_intermediate_aggregation_result(agg_with_accessor, results)
    }

+    #[inline]
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        self.staged_docs[self.num_staged_docs] = doc;
        self.num_staged_docs += 1;
@@ -56,18 +59,19 @@ impl SegmentAggregationCollector for BufAggregationCollector {
        Ok(())
    }

+    #[inline]
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        for doc in docs {
-            self.collect(*doc, agg_with_accessor)?;
-        }
+        self.collector.collect_block(docs, agg_with_accessor)?;
+
        Ok(())
    }

-    fn flush(&mut self, agg_with_accessor: &AggregationsWithAccessor) -> crate::Result<()> {
+    #[inline]
+    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
        self.collector
            .collect_block(&self.staged_docs[..self.num_staged_docs], agg_with_accessor)?;
        self.num_staged_docs = 0;
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -1,36 +1,36 @@
-use std::rc::Rc;
-
 use super::agg_req::Aggregations;
 use super::agg_req_with_accessor::AggregationsWithAccessor;
 use super::agg_result::AggregationResults;
 use super::buf_collector::BufAggregationCollector;
 use super::intermediate_agg_result::IntermediateAggregationResults;
-use super::segment_agg_result::{build_segment_agg_collector, SegmentAggregationCollector};
-use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate;
+use super::segment_agg_result::{
+    build_segment_agg_collector, AggregationLimits, SegmentAggregationCollector,
+};
+use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
-use crate::{SegmentReader, TantivyError};
+use crate::{DocId, SegmentReader, TantivyError};

 /// The default max bucket count, before the aggregation fails.
-pub const MAX_BUCKET_COUNT: u32 = 65000;
+pub const DEFAULT_BUCKET_LIMIT: u32 = 65000;
+
+/// The default memory limit in bytes before the aggregation fails. 500MB
+pub const DEFAULT_MEMORY_LIMIT: u64 = 500_000_000;

 /// Collector for aggregations.
 ///
 /// The collector collects all aggregations by the underlying aggregation request.
 pub struct AggregationCollector {
    agg: Aggregations,
-    max_bucket_count: u32,
+    limits: AggregationLimits,
 }

 impl AggregationCollector {
    /// Create collector from aggregation request.
    ///
-    /// Aggregation fails when the total bucket count is higher than max_bucket_count.
-    /// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset
-    pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>) -> Self {
-        Self {
-            agg,
-            max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT),
-        }
+    /// Aggregation fails when the limits in `AggregationLimits` is exceeded. (memory limit and
+    /// bucket limit)
+    pub fn from_aggs(agg: Aggregations, limits: AggregationLimits) -> Self {
+        Self { agg, limits }
    }
 }

@@ -44,18 +44,16 @@ impl AggregationCollector {
 /// into the final `AggregationResults` via the `into_final_result()` method.
 pub struct DistributedAggregationCollector {
    agg: Aggregations,
-    max_bucket_count: u32,
+    limits: AggregationLimits,
 }

 impl DistributedAggregationCollector {
    /// Create collector from aggregation request.
    ///
-    /// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset
-    pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>) -> Self {
-        Self {
-            agg,
-            max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT),
-        }
+    /// Aggregation fails when the limits in `AggregationLimits` is exceeded. (memory limit and
+    /// bucket limit)
+    pub fn from_aggs(agg: Aggregations, limits: AggregationLimits) -> Self {
+        Self { agg, limits }
    }
 }

@@ -69,11 +67,7 @@ impl Collector for DistributedAggregationCollector {
        _segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(
-            &self.agg,
-            reader,
-            self.max_bucket_count,
-        )
+        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
    }

    fn requires_scoring(&self) -> bool {
@@ -98,11 +92,7 @@ impl Collector for AggregationCollector {
        _segment_local_id: crate::SegmentOrdinal,
        reader: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
-        AggregationSegmentCollector::from_agg_req_and_reader(
-            &self.agg,
-            reader,
-            self.max_bucket_count,
-        )
+        AggregationSegmentCollector::from_agg_req_and_reader(&self.agg, reader, &self.limits)
    }

    fn requires_scoring(&self) -> bool {
@@ -114,7 +104,7 @@ impl Collector for AggregationCollector {
        segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
    ) -> crate::Result<Self::Fruit> {
        let res = merge_fruits(segment_fruits)?;
-        res.into_final_bucket_result(self.agg.clone())
+        res.into_final_result(self.agg.clone(), &self.limits)
    }
 }

@@ -124,7 +114,7 @@ fn merge_fruits(
    if let Some(fruit) = segment_fruits.pop() {
        let mut fruit = fruit?;
        for next_fruit in segment_fruits {
-            fruit.merge_fruits(next_fruit?);
+            fruit.merge_fruits(next_fruit?)?;
        }
        Ok(fruit)
    } else {
@@ -135,7 +125,7 @@ fn merge_fruits(
 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsWithAccessor,
-    result: BufAggregationCollector,
+    agg_collector: BufAggregationCollector,
    error: Option<TantivyError>,
 }

@@ -145,15 +135,15 @@ impl AggregationSegmentCollector {
    pub fn from_agg_req_and_reader(
        agg: &Aggregations,
        reader: &SegmentReader,
-        max_bucket_count: u32,
+        limits: &AggregationLimits,
    ) -> crate::Result<Self> {
-        let aggs_with_accessor =
-            get_aggs_with_accessor_and_validate(agg, reader, Rc::default(), max_bucket_count)?;
+        let mut aggs_with_accessor =
+            get_aggs_with_segment_accessor_and_validate(agg, reader, limits)?;
        let result =
-            BufAggregationCollector::new(build_segment_agg_collector(&aggs_with_accessor)?);
+            BufAggregationCollector::new(build_segment_agg_collector(&mut aggs_with_accessor)?);
        Ok(AggregationSegmentCollector {
            aggs_with_accessor,
-            result,
+            agg_collector: result,
            error: None,
        })
    }
@@ -163,11 +153,29 @@ impl SegmentCollector for AggregationSegmentCollector {
    type Fruit = crate::Result<IntermediateAggregationResults>;

    #[inline]
-    fn collect(&mut self, doc: crate::DocId, _score: crate::Score) {
+    fn collect(&mut self, doc: DocId, _score: crate::Score) {
        if self.error.is_some() {
            return;
        }
-        if let Err(err) = self.result.collect(doc, &self.aggs_with_accessor) {
+        if let Err(err) = self
+            .agg_collector
+            .collect(doc, &mut self.aggs_with_accessor)
+        {
+            self.error = Some(err);
+        }
+    }
+
+    /// The query pushes the documents to the collector via this method.
+    ///
+    /// Only valid for Collectors that ignore docs
+    fn collect_block(&mut self, docs: &[DocId]) {
+        if self.error.is_some() {
+            return;
+        }
+        if let Err(err) = self
+            .agg_collector
+            .collect_block(docs, &mut self.aggs_with_accessor)
+        {
            self.error = Some(err);
        }
    }
@@ -176,7 +184,14 @@ impl SegmentCollector for AggregationSegmentCollector {
        if let Some(err) = self.error {
            return Err(err);
        }
-        self.result.flush(&self.aggs_with_accessor)?;
-        Box::new(self.result).into_intermediate_aggregations_result(&self.aggs_with_accessor)
+        self.agg_collector.flush(&mut self.aggs_with_accessor)?;
+
+        let mut sub_aggregation_res = IntermediateAggregationResults::default();
+        Box::new(self.agg_collector).add_intermediate_aggregation_result(
+            &self.aggs_with_accessor,
+            &mut sub_aggregation_res,
+        )?;
+
+        Ok(sub_aggregation_res)
    }
 }
--- a/src/aggregation/date.rs
+++ b/src/aggregation/date.rs
@@ -4,13 +4,11 @@ use time::OffsetDateTime;
 use crate::TantivyError;

 pub(crate) fn format_date(val: i64) -> crate::Result<String> {
-    let datetime =
-        OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)).map_err(|err| {
-            TantivyError::InvalidArgument(format!(
-                "Could not convert {:?} to OffsetDateTime, err {:?}",
-                val, err
-            ))
-        })?;
+    let datetime = OffsetDateTime::from_unix_timestamp_nanos(val as i128).map_err(|err| {
+        TantivyError::InvalidArgument(format!(
+            "Could not convert {val:?} to OffsetDateTime, err {err:?}"
+        ))
+    })?;
    let key_as_string = datetime
        .format(&Rfc3339)
        .map_err(|_err| TantivyError::InvalidArgument("Could not serialize date".to_string()))?;
--- a/src/aggregation/error.rs
+++ b/src/aggregation/error.rs
@@ -1,9 +1,39 @@
+use common::ByteCount;
+
 use super::bucket::DateHistogramParseError;

 /// Error that may occur when opening a directory
 #[derive(Debug, Clone, PartialEq, Eq, Error)]
 pub enum AggregationError {
-    /// Failed to open the directory.
+    /// InternalError Aggregation Request
+    #[error("InternalError: {0:?}")]
+    InternalError(String),
+    /// Invalid Aggregation Request
+    #[error("InvalidRequest: {0:?}")]
+    InvalidRequest(String),
+    /// Date histogram parse error
    #[error("Date histogram parse error: {0:?}")]
    DateHistogramParseError(#[from] DateHistogramParseError),
+    /// Memory limit exceeded
+    #[error(
+        "Aborting aggregation because memory limit was exceeded. Limit: {limit:?}, Current: \
+         {current:?}"
+    )]
+    MemoryExceeded {
+        /// Memory consumption limit
+        limit: ByteCount,
+        /// Current memory consumption
+        current: ByteCount,
+    },
+    /// Bucket limit exceeded
+    #[error(
+        "Aborting aggregation because bucket limit was exceeded. Limit: {limit:?}, Current: \
+         {current:?}"
+    )]
+    BucketLimitExceeded {
+        /// Bucket limit
+        limit: u32,
+        /// Current num buckets
+        current: u32,
+    },
 }
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -3,188 +3,201 @@
 //! indices.

 use std::cmp::Ordering;
+use std::collections::hash_map::Entry;
+use std::hash::Hash;

 use columnar::ColumnType;
 use itertools::Itertools;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

-use super::agg_req::{
-    Aggregations, AggregationsInternal, BucketAggregationInternal, BucketAggregationType,
-    MetricAggregation, RangeAggregation,
-};
-use super::agg_result::{AggregationResult, BucketResult, RangeBucketEntry};
+use super::agg_req::{Aggregation, AggregationVariants, Aggregations};
+use super::agg_result::{AggregationResult, BucketResult, MetricResult, RangeBucketEntry};
 use super::bucket::{
    cut_off_buckets, get_agg_name_and_property, intermediate_histogram_buckets_to_final_buckets,
-    GetDocCount, Order, OrderTarget, SegmentHistogramBucketEntry, TermsAggregation,
+    GetDocCount, Order, OrderTarget, RangeAggregation, TermsAggregation,
 };
 use super::metric::{
    IntermediateAverage, IntermediateCount, IntermediateMax, IntermediateMin, IntermediateStats,
-    IntermediateSum,
+    IntermediateSum, PercentilesCollector,
 };
-use super::{format_date, Key, SerializedKey, VecWithNames};
+use super::segment_agg_result::AggregationLimits;
+use super::{format_date, AggregationError, Key, SerializedKey};
 use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
 use crate::aggregation::bucket::TermsAggregationInternal;
+use crate::TantivyError;

 /// Contains the intermediate aggregation result, which is optimized to be merged with other
 /// intermediate results.
+///
+/// Notice: This struct should not be de/serialized via JSON format.
 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateAggregationResults {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) metrics: Option<VecWithNames<IntermediateMetricResult>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) buckets: Option<VecWithNames<IntermediateBucketResult>>,
+    pub(crate) aggs_res: FxHashMap<String, IntermediateAggregationResult>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialOrd, PartialEq)]
+/// The key to identify a bucket.
+/// This might seem redundant with `Key`, but the point is to have a different
+/// Serialize implementation.
+pub enum IntermediateKey {
+    /// String key
+    Str(String),
+    /// `f64` key
+    F64(f64),
+}
+impl From<Key> for IntermediateKey {
+    fn from(value: Key) -> Self {
+        match value {
+            Key::Str(s) => Self::Str(s),
+            Key::F64(f) => Self::F64(f),
+        }
+    }
+}
+impl From<IntermediateKey> for Key {
+    fn from(value: IntermediateKey) -> Self {
+        match value {
+            IntermediateKey::Str(s) => Self::Str(s),
+            IntermediateKey::F64(f) => Self::F64(f),
+        }
+    }
+}
+
+impl Eq for IntermediateKey {}
+
+impl std::hash::Hash for IntermediateKey {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        core::mem::discriminant(self).hash(state);
+        match self {
+            IntermediateKey::Str(text) => text.hash(state),
+            IntermediateKey::F64(val) => val.to_bits().hash(state),
+        }
+    }
 }

 impl IntermediateAggregationResults {
+    /// Add a result
+    pub fn push(&mut self, key: String, value: IntermediateAggregationResult) -> crate::Result<()> {
+        let entry = self.aggs_res.entry(key);
+        match entry {
+            Entry::Occupied(mut e) => {
+                // In case of term aggregation over different types, we need to merge the results.
+                e.get_mut().merge_fruits(value)?;
+            }
+            Entry::Vacant(e) => {
+                e.insert(value);
+            }
+        }
+        Ok(())
+    }
+
    /// Convert intermediate result and its aggregation request to the final result.
-    pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result<AggregationResults> {
-        self.into_final_bucket_result_internal(&(req.into()))
+    pub fn into_final_result(
+        self,
+        req: Aggregations,
+        limits: &AggregationLimits,
+    ) -> crate::Result<AggregationResults> {
+        let res = self.into_final_result_internal(&req, limits)?;
+        let bucket_count = res.get_bucket_count() as u32;
+        if bucket_count > limits.get_bucket_limit() {
+            return Err(TantivyError::AggregationError(
+                AggregationError::BucketLimitExceeded {
+                    limit: limits.get_bucket_limit(),
+                    current: bucket_count,
+                },
+            ));
+        }
+        Ok(res)
    }

    /// Convert intermediate result and its aggregation request to the final result.
    ///
    /// Internal function, AggregationsInternal is used instead Aggregations, which is optimized
    /// for internal processing, by splitting metric and buckets into separate groups.
-    pub(crate) fn into_final_bucket_result_internal(
+    pub(crate) fn into_final_result_internal(
        self,
-        req: &AggregationsInternal,
+        req: &Aggregations,
+        limits: &AggregationLimits,
    ) -> crate::Result<AggregationResults> {
-        // Important assumption:
-        // When the tree contains buckets/metric, we expect it to have all buckets/metrics from the
-        // request
        let mut results: FxHashMap<String, AggregationResult> = FxHashMap::default();
-
-        if let Some(buckets) = self.buckets {
-            convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets)?
-        } else {
-            // When there are no buckets, we create empty buckets, so that the serialized json
-            // format is constant
-            add_empty_final_buckets_to_result(&mut results, &req.buckets)?
-        };
-
-        if let Some(metrics) = self.metrics {
-            convert_and_add_final_metrics_to_result(&mut results, metrics);
-        } else {
-            // When there are no metrics, we create empty metric results, so that the serialized
-            // json format is constant
-            add_empty_final_metrics_to_result(&mut results, &req.metrics)?;
+        for (key, agg_res) in self.aggs_res.into_iter() {
+            let req = req.get(key.as_str()).unwrap();
+            results.insert(key, agg_res.into_final_result(req, limits)?);
+        }
+        // Handle empty results
+        if results.len() != req.len() {
+            for (key, req) in req.iter() {
+                if !results.contains_key(key) {
+                    let empty_res = empty_from_req(req);
+                    results.insert(key.to_string(), empty_res.into_final_result(req, limits)?);
+                }
+            }
        }

        Ok(AggregationResults(results))
    }

-    pub(crate) fn empty_from_req(req: &AggregationsInternal) -> Self {
-        let metrics = if req.metrics.is_empty() {
-            None
-        } else {
-            let metrics = req
-                .metrics
-                .iter()
-                .map(|(key, req)| {
-                    (
-                        key.to_string(),
-                        IntermediateMetricResult::empty_from_req(req),
-                    )
-                })
-                .collect();
-            Some(VecWithNames::from_entries(metrics))
-        };
+    pub(crate) fn empty_from_req(req: &Aggregations) -> Self {
+        let mut aggs_res: FxHashMap<String, IntermediateAggregationResult> = FxHashMap::default();
+        for (key, req) in req.iter() {
+            let empty_res = empty_from_req(req);
+            aggs_res.insert(key.to_string(), empty_res);
+        }

-        let buckets = if req.buckets.is_empty() {
-            None
-        } else {
-            let buckets = req
-                .buckets
-                .iter()
-                .map(|(key, req)| {
-                    (
-                        key.to_string(),
-                        IntermediateBucketResult::empty_from_req(&req.bucket_agg),
-                    )
-                })
-                .collect();
-            Some(VecWithNames::from_entries(buckets))
-        };
-
-        Self { metrics, buckets }
+        Self { aggs_res }
    }

    /// Merge another intermediate aggregation result into this result.
    ///
    /// The order of the values need to be the same on both results. This is ensured when the same
    /// (key values) are present on the underlying `VecWithNames` struct.
-    pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) {
-        if let (Some(buckets_left), Some(buckets_right)) = (&mut self.buckets, other.buckets) {
-            for (bucket_left, bucket_right) in
-                buckets_left.values_mut().zip(buckets_right.into_values())
-            {
-                bucket_left.merge_fruits(bucket_right);
-            }
+    pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) -> crate::Result<()> {
+        for (left, right) in self.aggs_res.values_mut().zip(other.aggs_res.into_values()) {
+            left.merge_fruits(right)?;
        }
+        Ok(())
+    }
+}

-        if let (Some(metrics_left), Some(metrics_right)) = (&mut self.metrics, other.metrics) {
-            for (metric_left, metric_right) in
-                metrics_left.values_mut().zip(metrics_right.into_values())
-            {
-                metric_left.merge_fruits(metric_right);
-            }
+pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult {
+    use AggregationVariants::*;
+    match req.agg {
+        Terms(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Terms(
+            Default::default(),
+        )),
+        Range(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
+            Default::default(),
+        )),
+        Histogram(_) | DateHistogram(_) => {
+            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Histogram {
+                buckets: Vec::new(),
+                column_type: None,
+            })
        }
+        Average(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Average(
+            IntermediateAverage::default(),
+        )),
+        Count(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Count(
+            IntermediateCount::default(),
+        )),
+        Max(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Max(
+            IntermediateMax::default(),
+        )),
+        Min(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Min(
+            IntermediateMin::default(),
+        )),
+        Stats(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Stats(
+            IntermediateStats::default(),
+        )),
+        Sum(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Sum(
+            IntermediateSum::default(),
+        )),
+        Percentiles(_) => IntermediateAggregationResult::Metric(
+            IntermediateMetricResult::Percentiles(PercentilesCollector::default()),
+        ),
    }
 }

-fn convert_and_add_final_metrics_to_result(
-    results: &mut FxHashMap<String, AggregationResult>,
-    metrics: VecWithNames<IntermediateMetricResult>,
-) {
-    results.extend(
-        metrics
-            .into_iter()
-            .map(|(key, metric)| (key, AggregationResult::MetricResult(metric.into()))),
-    );
-}
-
-fn add_empty_final_metrics_to_result(
-    results: &mut FxHashMap<String, AggregationResult>,
-    req_metrics: &VecWithNames<MetricAggregation>,
-) -> crate::Result<()> {
-    results.extend(req_metrics.iter().map(|(key, req)| {
-        let empty_bucket = IntermediateMetricResult::empty_from_req(req);
-        (
-            key.to_string(),
-            AggregationResult::MetricResult(empty_bucket.into()),
-        )
-    }));
-    Ok(())
-}
-
-fn add_empty_final_buckets_to_result(
-    results: &mut FxHashMap<String, AggregationResult>,
-    req_buckets: &VecWithNames<BucketAggregationInternal>,
-) -> crate::Result<()> {
-    let requested_buckets = req_buckets.iter();
-    for (key, req) in requested_buckets {
-        let empty_bucket = AggregationResult::BucketResult(BucketResult::empty_from_req(req)?);
-        results.insert(key.to_string(), empty_bucket);
-    }
-    Ok(())
-}
-
-fn convert_and_add_final_buckets_to_result(
-    results: &mut FxHashMap<String, AggregationResult>,
-    buckets: VecWithNames<IntermediateBucketResult>,
-    req_buckets: &VecWithNames<BucketAggregationInternal>,
-) -> crate::Result<()> {
-    assert_eq!(buckets.len(), req_buckets.len());
-
-    let buckets_with_request = buckets.into_iter().zip(req_buckets.values());
-    for ((key, bucket), req) in buckets_with_request {
-        let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req)?);
-        results.insert(key, result);
-    }
-    Ok(())
-}
-
 /// An aggregation is either a bucket or a metric.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub enum IntermediateAggregationResult {
@@ -194,9 +207,42 @@ pub enum IntermediateAggregationResult {
    Metric(IntermediateMetricResult),
 }

+impl IntermediateAggregationResult {
+    pub(crate) fn into_final_result(
+        self,
+        req: &Aggregation,
+        limits: &AggregationLimits,
+    ) -> crate::Result<AggregationResult> {
+        let res = match self {
+            IntermediateAggregationResult::Bucket(bucket) => {
+                AggregationResult::BucketResult(bucket.into_final_bucket_result(req, limits)?)
+            }
+            IntermediateAggregationResult::Metric(metric) => {
+                AggregationResult::MetricResult(metric.into_final_metric_result(req))
+            }
+        };
+        Ok(res)
+    }
+    fn merge_fruits(&mut self, other: IntermediateAggregationResult) -> crate::Result<()> {
+        match (self, other) {
+            (
+                IntermediateAggregationResult::Bucket(b1),
+                IntermediateAggregationResult::Bucket(b2),
+            ) => b1.merge_fruits(b2),
+            (
+                IntermediateAggregationResult::Metric(m1),
+                IntermediateAggregationResult::Metric(m2),
+            ) => m1.merge_fruits(m2),
+            _ => panic!("aggregation result type mismatch (mixed metric and buckets)"),
+        }
+    }
+}
+
 /// Holds the intermediate data for metric results
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub enum IntermediateMetricResult {
+    /// Intermediate average result.
+    Percentiles(PercentilesCollector),
    /// Intermediate average result.
    Average(IntermediateAverage),
    /// Intermediate count result.
@@ -212,23 +258,34 @@ pub enum IntermediateMetricResult {
 }

 impl IntermediateMetricResult {
-    pub(crate) fn empty_from_req(req: &MetricAggregation) -> Self {
-        match req {
-            MetricAggregation::Average(_) => {
-                IntermediateMetricResult::Average(IntermediateAverage::default())
+    fn into_final_metric_result(self, req: &Aggregation) -> MetricResult {
+        match self {
+            IntermediateMetricResult::Average(intermediate_avg) => {
+                MetricResult::Average(intermediate_avg.finalize().into())
            }
-            MetricAggregation::Count(_) => {
-                IntermediateMetricResult::Count(IntermediateCount::default())
+            IntermediateMetricResult::Count(intermediate_count) => {
+                MetricResult::Count(intermediate_count.finalize().into())
            }
-            MetricAggregation::Max(_) => IntermediateMetricResult::Max(IntermediateMax::default()),
-            MetricAggregation::Min(_) => IntermediateMetricResult::Min(IntermediateMin::default()),
-            MetricAggregation::Stats(_) => {
-                IntermediateMetricResult::Stats(IntermediateStats::default())
+            IntermediateMetricResult::Max(intermediate_max) => {
+                MetricResult::Max(intermediate_max.finalize().into())
            }
-            MetricAggregation::Sum(_) => IntermediateMetricResult::Sum(IntermediateSum::default()),
+            IntermediateMetricResult::Min(intermediate_min) => {
+                MetricResult::Min(intermediate_min.finalize().into())
+            }
+            IntermediateMetricResult::Stats(intermediate_stats) => {
+                MetricResult::Stats(intermediate_stats.finalize())
+            }
+            IntermediateMetricResult::Sum(intermediate_sum) => {
+                MetricResult::Sum(intermediate_sum.finalize().into())
+            }
+            IntermediateMetricResult::Percentiles(percentiles) => MetricResult::Percentiles(
+                percentiles
+                    .into_final_result(req.agg.as_percentile().expect("unexpected metric type")),
+            ),
        }
    }
-    fn merge_fruits(&mut self, other: IntermediateMetricResult) {
+
+    fn merge_fruits(&mut self, other: IntermediateMetricResult) -> crate::Result<()> {
        match (self, other) {
            (
                IntermediateMetricResult::Average(avg_left),
@@ -257,10 +314,18 @@ impl IntermediateMetricResult {
            (IntermediateMetricResult::Sum(sum_left), IntermediateMetricResult::Sum(sum_right)) => {
                sum_left.merge_fruits(sum_right);
            }
+            (
+                IntermediateMetricResult::Percentiles(left),
+                IntermediateMetricResult::Percentiles(right),
+            ) => {
+                left.merge_fruits(right)?;
+            }
            _ => {
-                panic!("incompatible fruit types in tree");
+                panic!("incompatible fruit types in tree or missing merge_fruits handler");
            }
        }
+
+        Ok(())
    }
 }

@@ -286,7 +351,8 @@ pub enum IntermediateBucketResult {
 impl IntermediateBucketResult {
    pub(crate) fn into_final_bucket_result(
        self,
-        req: &BucketAggregationInternal,
+        req: &Aggregation,
+        limits: &AggregationLimits,
    ) -> crate::Result<BucketResult> {
        match self {
            IntermediateBucketResult::Range(range_res) => {
@@ -295,10 +361,12 @@ impl IntermediateBucketResult {
                    .into_values()
                    .map(|bucket| {
                        bucket.into_final_bucket_entry(
-                            &req.sub_aggregation,
-                            req.as_range()
+                            req.sub_aggregation(),
+                            req.agg
+                                .as_range()
                                .expect("unexpected aggregation, expected histogram aggregation"),
                            range_res.column_type,
+                            limits,
                        )
                    })
                    .collect::<crate::Result<Vec<_>>>()?;
@@ -310,6 +378,7 @@ impl IntermediateBucketResult {
                });

                let is_keyed = req
+                    .agg
                    .as_range()
                    .expect("unexpected aggregation, expected range aggregation")
                    .keyed;
@@ -330,13 +399,15 @@ impl IntermediateBucketResult {
                buckets,
            } => {
                let histogram_req = &req
+                    .agg
                    .as_histogram()?
                    .expect("unexpected aggregation, expected histogram aggregation");
                let buckets = intermediate_histogram_buckets_to_final_buckets(
                    buckets,
                    column_type,
                    histogram_req,
-                    &req.sub_aggregation,
+                    req.sub_aggregation(),
+                    limits,
                )?;

                let buckets = if histogram_req.keyed {
@@ -352,32 +423,22 @@ impl IntermediateBucketResult {
                Ok(BucketResult::Histogram { buckets })
            }
            IntermediateBucketResult::Terms(terms) => terms.into_final_result(
-                req.as_term()
+                req.agg
+                    .as_term()
                    .expect("unexpected aggregation, expected term aggregation"),
-                &req.sub_aggregation,
+                req.sub_aggregation(),
+                limits,
            ),
        }
    }

-    pub(crate) fn empty_from_req(req: &BucketAggregationType) -> Self {
-        match req {
-            BucketAggregationType::Terms(_) => IntermediateBucketResult::Terms(Default::default()),
-            BucketAggregationType::Range(_) => IntermediateBucketResult::Range(Default::default()),
-            BucketAggregationType::Histogram(_) | BucketAggregationType::DateHistogram(_) => {
-                IntermediateBucketResult::Histogram {
-                    buckets: vec![],
-                    column_type: None,
-                }
-            }
-        }
-    }
-    fn merge_fruits(&mut self, other: IntermediateBucketResult) {
+    fn merge_fruits(&mut self, other: IntermediateBucketResult) -> crate::Result<()> {
        match (self, other) {
            (
                IntermediateBucketResult::Terms(term_res_left),
                IntermediateBucketResult::Terms(term_res_right),
            ) => {
-                merge_key_maps(&mut term_res_left.entries, term_res_right.entries);
+                merge_maps(&mut term_res_left.entries, term_res_right.entries)?;
                term_res_left.sum_other_doc_count += term_res_right.sum_other_doc_count;
                term_res_left.doc_count_error_upper_bound +=
                    term_res_right.doc_count_error_upper_bound;
@@ -387,7 +448,7 @@ impl IntermediateBucketResult {
                IntermediateBucketResult::Range(range_res_left),
                IntermediateBucketResult::Range(range_res_right),
            ) => {
-                merge_serialized_key_maps(&mut range_res_left.buckets, range_res_right.buckets);
+                merge_maps(&mut range_res_left.buckets, range_res_right.buckets)?;
            }
            (
                IntermediateBucketResult::Histogram {
@@ -399,22 +460,23 @@ impl IntermediateBucketResult {
                    ..
                },
            ) => {
-                let buckets = buckets_left
-                    .drain(..)
-                    .merge_join_by(buckets_right.into_iter(), |left, right| {
-                        left.key.partial_cmp(&right.key).unwrap_or(Ordering::Equal)
-                    })
-                    .map(|either| match either {
-                        itertools::EitherOrBoth::Both(mut left, right) => {
-                            left.merge_fruits(right);
-                            left
-                        }
-                        itertools::EitherOrBoth::Left(left) => left,
-                        itertools::EitherOrBoth::Right(right) => right,
-                    })
-                    .collect();
+                let buckets: Result<Vec<IntermediateHistogramBucketEntry>, TantivyError> =
+                    buckets_left
+                        .drain(..)
+                        .merge_join_by(buckets_right.into_iter(), |left, right| {
+                            left.key.partial_cmp(&right.key).unwrap_or(Ordering::Equal)
+                        })
+                        .map(|either| match either {
+                            itertools::EitherOrBoth::Both(mut left, right) => {
+                                left.merge_fruits(right)?;
+                                Ok(left)
+                            }
+                            itertools::EitherOrBoth::Left(left) => Ok(left),
+                            itertools::EitherOrBoth::Right(right) => Ok(right),
+                        })
+                        .collect::<Result<_, _>>();

-                *buckets_left = buckets;
+                *buckets_left = buckets?;
            }
            (IntermediateBucketResult::Range(_), _) => {
                panic!("try merge on different types")
@@ -426,6 +488,7 @@ impl IntermediateBucketResult {
                panic!("try merge on different types")
            }
        }
+        Ok(())
    }
 }

@@ -439,7 +502,7 @@ pub struct IntermediateRangeBucketResult {
 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// Term aggregation including error counts
 pub struct IntermediateTermBucketResult {
-    pub(crate) entries: FxHashMap<Key, IntermediateTermBucketEntry>,
+    pub(crate) entries: FxHashMap<IntermediateKey, IntermediateTermBucketEntry>,
    pub(crate) sum_other_doc_count: u64,
    pub(crate) doc_count_error_upper_bound: u64,
 }
@@ -448,21 +511,22 @@ impl IntermediateTermBucketResult {
    pub(crate) fn into_final_result(
        self,
        req: &TermsAggregation,
-        sub_aggregation_req: &AggregationsInternal,
+        sub_aggregation_req: &Aggregations,
+        limits: &AggregationLimits,
    ) -> crate::Result<BucketResult> {
        let req = TermsAggregationInternal::from_req(req);
        let mut buckets: Vec<BucketEntry> = self
            .entries
            .into_iter()
-            .filter(|bucket| bucket.1.doc_count >= req.min_doc_count)
+            .filter(|bucket| bucket.1.doc_count as u64 >= req.min_doc_count)
            .map(|(key, entry)| {
                Ok(BucketEntry {
                    key_as_string: None,
-                    key,
-                    doc_count: entry.doc_count,
+                    key: key.into(),
+                    doc_count: entry.doc_count as u64,
                    sub_aggregation: entry
                        .sub_aggregation
-                        .into_final_bucket_result_internal(sub_aggregation_req)?,
+                        .into_final_result_internal(sub_aggregation_req, limits)?,
                })
            })
            .collect::<crate::Result<_>>()?;
@@ -494,7 +558,7 @@ impl IntermediateTermBucketResult {
                        let val = bucket
                            .sub_aggregation
                            .get_value_from_aggregation(agg_name, agg_property)?
-                            .unwrap_or(f64::NAN);
+                            .unwrap_or(f64::MIN);
                        Ok((bucket, val))
                    })
                    .collect::<crate::Result<Vec<_>>>()?;
@@ -533,37 +597,23 @@ impl IntermediateTermBucketResult {
 }

 trait MergeFruits {
-    fn merge_fruits(&mut self, other: Self);
+    fn merge_fruits(&mut self, other: Self) -> crate::Result<()>;
 }

-fn merge_serialized_key_maps<V: MergeFruits + Clone>(
-    entries_left: &mut FxHashMap<SerializedKey, V>,
-    mut entries_right: FxHashMap<SerializedKey, V>,
-) {
+fn merge_maps<V: MergeFruits + Clone, T: Eq + PartialEq + Hash>(
+    entries_left: &mut FxHashMap<T, V>,
+    mut entries_right: FxHashMap<T, V>,
+) -> crate::Result<()> {
    for (name, entry_left) in entries_left.iter_mut() {
        if let Some(entry_right) = entries_right.remove(name) {
-            entry_left.merge_fruits(entry_right);
-        }
-    }
-
-    for (key, res) in entries_right.into_iter() {
-        entries_left.entry(key).or_insert(res);
-    }
-}
-
-fn merge_key_maps<V: MergeFruits + Clone>(
-    entries_left: &mut FxHashMap<Key, V>,
-    mut entries_right: FxHashMap<Key, V>,
-) {
-    for (name, entry_left) in entries_left.iter_mut() {
-        if let Some(entry_right) = entries_right.remove(name) {
-            entry_left.merge_fruits(entry_right);
+            entry_left.merge_fruits(entry_right)?;
        }
    }

    for (key, res) in entries_right.into_iter() {
        entries_left.entry(key).or_insert(res);
    }
+    Ok(())
 }

 /// This is the histogram entry for a bucket, which contains a key, count, and optionally
@@ -581,7 +631,8 @@ pub struct IntermediateHistogramBucketEntry {
 impl IntermediateHistogramBucketEntry {
    pub(crate) fn into_final_bucket_entry(
        self,
-        req: &AggregationsInternal,
+        req: &Aggregations,
+        limits: &AggregationLimits,
    ) -> crate::Result<BucketEntry> {
        Ok(BucketEntry {
            key_as_string: None,
@@ -589,52 +640,41 @@ impl IntermediateHistogramBucketEntry {
            doc_count: self.doc_count,
            sub_aggregation: self
                .sub_aggregation
-                .into_final_bucket_result_internal(req)?,
+                .into_final_result_internal(req, limits)?,
        })
    }
 }

-impl From<SegmentHistogramBucketEntry> for IntermediateHistogramBucketEntry {
-    fn from(entry: SegmentHistogramBucketEntry) -> Self {
-        IntermediateHistogramBucketEntry {
-            key: entry.key,
-            doc_count: entry.doc_count,
-            sub_aggregation: Default::default(),
-        }
-    }
-}
-
 /// This is the range entry for a bucket, which contains a key, count, and optionally
 /// sub_aggregations.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateRangeBucketEntry {
-    /// The unique the bucket is identified.
-    pub key: Key,
+    /// The unique key the bucket is identified with.
+    pub key: IntermediateKey,
    /// The number of documents in the bucket.
    pub doc_count: u64,
    /// The sub_aggregation in this bucket.
    pub sub_aggregation: IntermediateAggregationResults,
    /// The from range of the bucket. Equals `f64::MIN` when `None`.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub from: Option<f64>,
    /// The to range of the bucket. Equals `f64::MAX` when `None`.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub to: Option<f64>,
 }

 impl IntermediateRangeBucketEntry {
    pub(crate) fn into_final_bucket_entry(
        self,
-        req: &AggregationsInternal,
+        req: &Aggregations,
        _range_req: &RangeAggregation,
        column_type: Option<ColumnType>,
+        limits: &AggregationLimits,
    ) -> crate::Result<RangeBucketEntry> {
        let mut range_bucket_entry = RangeBucketEntry {
-            key: self.key,
+            key: self.key.into(),
            doc_count: self.doc_count,
            sub_aggregation: self
                .sub_aggregation
-                .into_final_bucket_result_internal(req)?,
+                .into_final_result_internal(req, limits)?,
            to: self.to,
            from: self.from,
            to_as_string: None,
@@ -663,29 +703,32 @@ impl IntermediateRangeBucketEntry {
 #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateTermBucketEntry {
    /// The number of documents in the bucket.
-    pub doc_count: u64,
+    pub doc_count: u32,
    /// The sub_aggregation in this bucket.
    pub sub_aggregation: IntermediateAggregationResults,
 }

 impl MergeFruits for IntermediateTermBucketEntry {
-    fn merge_fruits(&mut self, other: IntermediateTermBucketEntry) {
+    fn merge_fruits(&mut self, other: IntermediateTermBucketEntry) -> crate::Result<()> {
        self.doc_count += other.doc_count;
-        self.sub_aggregation.merge_fruits(other.sub_aggregation);
+        self.sub_aggregation.merge_fruits(other.sub_aggregation)?;
+        Ok(())
    }
 }

 impl MergeFruits for IntermediateRangeBucketEntry {
-    fn merge_fruits(&mut self, other: IntermediateRangeBucketEntry) {
+    fn merge_fruits(&mut self, other: IntermediateRangeBucketEntry) -> crate::Result<()> {
        self.doc_count += other.doc_count;
-        self.sub_aggregation.merge_fruits(other.sub_aggregation);
+        self.sub_aggregation.merge_fruits(other.sub_aggregation)?;
+        Ok(())
    }
 }

 impl MergeFruits for IntermediateHistogramBucketEntry {
-    fn merge_fruits(&mut self, other: IntermediateHistogramBucketEntry) {
+    fn merge_fruits(&mut self, other: IntermediateHistogramBucketEntry) -> crate::Result<()> {
        self.doc_count += other.doc_count;
-        self.sub_aggregation.merge_fruits(other.sub_aggregation);
+        self.sub_aggregation.merge_fruits(other.sub_aggregation)?;
+        Ok(())
    }
 }

@@ -704,7 +747,7 @@ mod tests {
            buckets.insert(
                key.to_string(),
                IntermediateRangeBucketEntry {
-                    key: Key::Str(key.to_string()),
+                    key: IntermediateKey::Str(key.to_string()),
                    doc_count: *doc_count,
                    sub_aggregation: Default::default(),
                    from: None,
@@ -714,14 +757,15 @@ mod tests {
        }
        map.insert(
            "my_agg_level2".to_string(),
-            IntermediateBucketResult::Range(IntermediateRangeBucketResult {
-                buckets,
-                column_type: None,
-            }),
+            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
+                IntermediateRangeBucketResult {
+                    buckets,
+                    column_type: None,
+                },
+            )),
        );
        IntermediateAggregationResults {
-            buckets: Some(VecWithNames::from_entries(map.into_iter().collect())),
-            metrics: Default::default(),
+            aggs_res: map.into_iter().collect(),
        }
    }

@@ -734,7 +778,7 @@ mod tests {
            buckets.insert(
                key.to_string(),
                IntermediateRangeBucketEntry {
-                    key: Key::Str(key.to_string()),
+                    key: IntermediateKey::Str(key.to_string()),
                    doc_count: *doc_count,
                    from: None,
                    to: None,
@@ -747,14 +791,15 @@ mod tests {
        }
        map.insert(
            "my_agg_level1".to_string(),
-            IntermediateBucketResult::Range(IntermediateRangeBucketResult {
-                buckets,
-                column_type: None,
-            }),
+            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
+                IntermediateRangeBucketResult {
+                    buckets,
+                    column_type: None,
+                },
+            )),
        );
        IntermediateAggregationResults {
-            buckets: Some(VecWithNames::from_entries(map.into_iter().collect())),
-            metrics: Default::default(),
+            aggs_res: map.into_iter().collect(),
        }
    }

@@ -769,7 +814,7 @@ mod tests {
            ("blue".to_string(), 25, "1900".to_string(), 50),
        ]);

-        tree_left.merge_fruits(tree_right);
+        tree_left.merge_fruits(tree_right).unwrap();

        let tree_expected = get_intermediat_tree_with_ranges(&[
            ("red".to_string(), 110, "1900".to_string(), 55),
@@ -790,7 +835,7 @@ mod tests {
            ("green".to_string(), 25, "1900".to_string(), 50),
        ]);

-        tree_left.merge_fruits(tree_right);
+        tree_left.merge_fruits(tree_right).unwrap();

        let tree_expected = get_intermediat_tree_with_ranges(&[
            ("red".to_string(), 110, "1900".to_string(), 55),
@@ -810,7 +855,9 @@ mod tests {

        let orig = tree_left.clone();

-        tree_left.merge_fruits(IntermediateAggregationResults::default());
+        tree_left
+            .merge_fruits(IntermediateAggregationResults::default())
+            .unwrap();

        assert_eq!(tree_left, orig);
    }
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -1,17 +1,25 @@
 //! Module for all metric aggregations.
 //!
-//! The aggregations in this family compute metrics, see [super::agg_req::MetricAggregation] for
-//! details.
+//! The aggregations in this family compute metrics based on values extracted
+//! from the documents that are being aggregated. Values are extracted from the fast field of
+//! the document.
+//! Some aggregations output a single numeric metric (e.g. Average) and are called
+//! single-value numeric metrics aggregation, others generate multiple metrics (e.g. Stats) and are
+//! called multi-value numeric metrics aggregation.
+
 mod average;
 mod count;
 mod max;
 mod min;
+mod percentiles;
 mod stats;
 mod sum;
 pub use average::*;
 pub use count::*;
 pub use max::*;
 pub use min::*;
+pub use percentiles::*;
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 pub use stats::*;
 pub use sum::*;
@@ -37,6 +45,33 @@ impl From<Option<f64>> for SingleMetricResult {
    }
 }

+/// This is the wrapper of percentile entries, which can be vector or hashmap
+/// depending on if it's keyed or not.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum PercentileValues {
+    /// Vector format percentile entries
+    Vec(Vec<PercentileValuesVecEntry>),
+    /// HashMap format percentile entries. Key is the serialized percentile
+    HashMap(FxHashMap<String, f64>),
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+/// The entry when requesting percentiles with keyed: false
+pub struct PercentileValuesVecEntry {
+    key: f64,
+    value: f64,
+}
+
+/// Single-metric aggregations use this common result structure.
+///
+/// Main reason to wrap it in value is to match elasticsearch output structure.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct PercentilesMetricResult {
+    /// The result of the percentile metric.
+    pub values: PercentileValues,
+}
+
 #[cfg(test)]
 mod tests {
    use crate::aggregation::agg_req::Aggregations;
@@ -81,7 +116,7 @@ mod tests {
            "price_sum": { "sum": { "field": "price" } }
        }"#;
        let aggregations: Aggregations = serde_json::from_str(aggregations_json).unwrap();
-        let collector = AggregationCollector::from_aggs(aggregations, None);
+        let collector = AggregationCollector::from_aggs(aggregations, Default::default());
        let reader = index.reader().unwrap();
        let searcher = reader.searcher();
        let aggregations_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -0,0 +1,548 @@
+use std::fmt::Debug;
+
+use columnar::ColumnType;
+use serde::{Deserialize, Serialize};
+
+use super::*;
+use crate::aggregation::agg_req_with_accessor::{
+    AggregationWithAccessor, AggregationsWithAccessor,
+};
+use crate::aggregation::intermediate_agg_result::{
+    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
+};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
+use crate::aggregation::{f64_from_fastfield_u64, AggregationError};
+use crate::{DocId, TantivyError};
+
+/// # Percentiles
+///
+/// The percentiles aggregation is a useful tool for understanding the distribution
+/// of a data set. It calculates the values below which a given percentage of the
+/// data falls. For instance, the 95th percentile indicates the value below which
+/// 95% of the data points can be found.
+///
+/// This aggregation can be particularly interesting for analyzing website or service response
+/// times. For example, if the 95th percentile website load time is significantly higher than the
+/// median, this indicates that a small percentage of users are experiencing much slower load times
+/// than the majority.
+///
+/// To use the percentiles aggregation, you'll need to provide a field to
+/// aggregate on. In the case of website load times, this would typically be a
+/// field containing the duration of time it takes for the site to load.
+///
+/// The following example demonstrates a request for the percentiles of the "load_time"
+/// field:
+///
+/// ```JSON
+/// {
+///     "percentiles": {
+///         "field": "load_time"
+///     }
+/// }
+/// ```
+///
+/// This request will return an object containing the default percentiles (1, 5,
+/// 25, 50 (median), 75, 95, and 99). You can also customize the percentiles you want to
+/// calculate by providing an array of values in the "percents" parameter:
+///
+/// ```JSON
+/// {
+///     "percentiles": {
+///         "field": "load_time",
+///         "percents": [10, 20, 30, 40, 50, 60, 70, 80, 90]
+///     }
+/// }
+/// ```
+///
+/// In this example, the aggregation will return the 10th, 20th, 30th, 40th, 50th,
+/// 60th, 70th, 80th, and 90th percentiles of the "load_time" field.
+///
+/// Analyzing the percentiles of website load times can help you understand the
+/// user experience and identify areas for optimization. For example, if the 95th
+/// percentile load time is significantly higher than the median, this indicates
+/// that a small percentage of users are experiencing much slower load times than
+/// the majority.
+///
+/// # Estimating Percentiles
+///
+/// While percentiles provide valuable insights into the distribution of data, it's
+/// important to understand that they are often estimates. This is because
+/// calculating exact percentiles for large data sets can be computationally
+/// expensive and time-consuming. As a result, many percentile aggregation
+/// algorithms use approximation techniques to provide faster results.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct PercentilesAggregationReq {
+    /// The field name to compute the percentiles on.
+    pub field: String,
+    /// The percentiles to compute.
+    /// Defaults to [1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0]
+    pub percents: Option<Vec<f64>>,
+    /// Whether to return the percentiles as a hash map
+    #[serde(default = "default_as_true")]
+    pub keyed: bool,
+}
+fn default_percentiles() -> &'static [f64] {
+    &[1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0]
+}
+fn default_as_true() -> bool {
+    true
+}
+
+impl PercentilesAggregationReq {
+    /// Creates a new [`PercentilesAggregationReq`] instance from a field name.
+    pub fn from_field_name(field_name: String) -> Self {
+        PercentilesAggregationReq {
+            field: field_name,
+            percents: None,
+            keyed: default_as_true(),
+        }
+    }
+    /// Returns the field name the aggregation is computed on.
+    pub fn field_name(&self) -> &str {
+        &self.field
+    }
+
+    fn validate(&self) -> crate::Result<()> {
+        if let Some(percents) = self.percents.as_ref() {
+            let all_in_range = percents
+                .iter()
+                .cloned()
+                .all(|percent| (0.0..=100.0).contains(&percent));
+            if !all_in_range {
+                return Err(TantivyError::AggregationError(
+                    AggregationError::InvalidRequest(
+                        "All percentiles have to be between 0.0 and 100.0".to_string(),
+                    ),
+                ));
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) struct SegmentPercentilesCollector {
+    field_type: ColumnType,
+    pub(crate) percentiles: PercentilesCollector,
+    pub(crate) accessor_idx: usize,
+    val_cache: Vec<u64>,
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+/// The percentiles collector used during segment collection and for merging results.
+pub struct PercentilesCollector {
+    sketch: sketches_ddsketch::DDSketch,
+}
+impl Default for PercentilesCollector {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Debug for PercentilesCollector {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("IntermediatePercentiles")
+            .field("sketch_len", &self.sketch.length())
+            .finish()
+    }
+}
+impl PartialEq for PercentilesCollector {
+    fn eq(&self, _other: &Self) -> bool {
+        false
+    }
+}
+
+fn format_percentil(percentil: f64) -> String {
+    let mut out = percentil.to_string();
+    // Slightly silly way to format trailing decimals
+    if !out.contains('.') {
+        out.push_str(".0");
+    }
+    out
+}
+
+impl PercentilesCollector {
+    /// Convert result into final result. This will query the quantils from the underlying quantil
+    /// collector.
+    pub fn into_final_result(self, req: &PercentilesAggregationReq) -> PercentilesMetricResult {
+        let percentiles: &[f64] = req
+            .percents
+            .as_ref()
+            .map(|el| el.as_ref())
+            .unwrap_or(default_percentiles());
+        let iter_quantile_and_values = percentiles.iter().cloned().map(|percentile| {
+            (
+                percentile,
+                self.sketch
+                    .quantile(percentile / 100.0)
+                    .expect(
+                        "quantil out of range. This error should have been caught during \
+                         validation phase",
+                    )
+                    .unwrap_or(f64::NAN),
+            )
+        });
+
+        let values = if req.keyed {
+            PercentileValues::HashMap(
+                iter_quantile_and_values
+                    .map(|(val, quantil)| (format_percentil(val), quantil))
+                    .collect(),
+            )
+        } else {
+            PercentileValues::Vec(
+                iter_quantile_and_values
+                    .map(|(key, value)| PercentileValuesVecEntry { key, value })
+                    .collect(),
+            )
+        };
+        PercentilesMetricResult { values }
+    }
+
+    fn new() -> Self {
+        let ddsketch_config = sketches_ddsketch::Config::defaults();
+        let sketch = sketches_ddsketch::DDSketch::new(ddsketch_config);
+        Self { sketch }
+    }
+    fn collect(&mut self, val: f64) {
+        self.sketch.add(val);
+    }
+
+    pub(crate) fn merge_fruits(&mut self, right: PercentilesCollector) -> crate::Result<()> {
+        self.sketch.merge(&right.sketch).map_err(|err| {
+            TantivyError::AggregationError(AggregationError::InternalError(format!(
+                "Error while merging percentiles {err:?}"
+            )))
+        })?;
+
+        Ok(())
+    }
+}
+
+impl SegmentPercentilesCollector {
+    pub fn from_req_and_validate(
+        req: &PercentilesAggregationReq,
+        field_type: ColumnType,
+        accessor_idx: usize,
+    ) -> crate::Result<Self> {
+        req.validate()?;
+        Ok(Self {
+            field_type,
+            percentiles: PercentilesCollector::new(),
+            accessor_idx,
+            val_cache: Default::default(),
+        })
+    }
+    #[inline]
+    pub(crate) fn collect_block_with_field(
+        &mut self,
+        docs: &[DocId],
+        agg_accessor: &mut AggregationWithAccessor,
+    ) {
+        agg_accessor
+            .column_block_accessor
+            .fetch_block(docs, &agg_accessor.accessor);
+
+        for val in agg_accessor.column_block_accessor.iter_vals() {
+            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            self.percentiles.collect(val1);
+        }
+    }
+}
+
+impl SegmentAggregationCollector for SegmentPercentilesCollector {
+    #[inline]
+    fn add_intermediate_aggregation_result(
+        self: Box<Self>,
+        agg_with_accessor: &AggregationsWithAccessor,
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
+        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let intermediate_metric_result = IntermediateMetricResult::Percentiles(self.percentiles);
+
+        results.push(
+            name,
+            IntermediateAggregationResult::Metric(intermediate_metric_result),
+        )?;
+
+        Ok(())
+    }
+
+    #[inline]
+    fn collect(
+        &mut self,
+        doc: crate::DocId,
+        agg_with_accessor: &mut AggregationsWithAccessor,
+    ) -> crate::Result<()> {
+        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
+
+        for val in field.values_for_doc(doc) {
+            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            self.percentiles.collect(val1);
+        }
+
+        Ok(())
+    }
+
+    #[inline]
+    fn collect_block(
+        &mut self,
+        docs: &[crate::DocId],
+        agg_with_accessor: &mut AggregationsWithAccessor,
+    ) -> crate::Result<()> {
+        let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
+        self.collect_block_with_field(docs, field);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use itertools::Itertools;
+    use more_asserts::{assert_ge, assert_le};
+    use rand::rngs::StdRng;
+    use rand::SeedableRng;
+    use serde_json::Value;
+
+    use crate::aggregation::agg_req::Aggregations;
+    use crate::aggregation::agg_result::AggregationResults;
+    use crate::aggregation::tests::{
+        get_test_index_from_values, get_test_index_from_values_and_terms,
+    };
+    use crate::aggregation::AggregationCollector;
+    use crate::query::AllQuery;
+
+    #[test]
+    fn test_aggregation_percentiles_empty_index() -> crate::Result<()> {
+        // test index without segments
+        let values = vec![];
+
+        let index = get_test_index_from_values(false, &values)?;
+
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "percentiles": {
+                "percentiles": {
+                    "field": "score",
+                }
+            },
+        }))
+        .unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+        assert_eq!(
+            res["percentiles"]["values"],
+            json!({
+                "1.0": Value::Null,
+                "5.0": Value::Null,
+                "25.0": Value::Null,
+                "50.0": Value::Null,
+                "75.0": Value::Null,
+                "95.0": Value::Null,
+                "99.0": Value::Null,
+            })
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_aggregation_percentile_simple() -> crate::Result<()> {
+        let values = vec![10.0];
+
+        let index = get_test_index_from_values(false, &values)?;
+
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "percentiles": {
+                "percentiles": {
+                    "field": "score",
+                }
+            },
+        }))
+        .unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+
+        let percents = vec!["1.0", "5.0", "25.0", "50.0", "75.0", "95.0", "99.0"];
+        let range = 9.9..10.1;
+        for percent in percents {
+            let val = res["percentiles"]["values"][percent].as_f64().unwrap();
+            assert!(range.contains(&val));
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_aggregation_percentile_parameters() -> crate::Result<()> {
+        let values = vec![10.0];
+
+        let index = get_test_index_from_values(false, &values)?;
+
+        let agg_req_str = r#"
+        {
+          "mypercentiles": {
+            "percentiles": {
+              "field": "score",
+              "percents": [ 95, 99, 99.9 ]
+            }
+          }
+        } "#;
+        let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+
+        let percents = vec!["95.0", "99.0", "99.9"];
+        let expected_range = 9.9..10.1;
+        for percent in percents {
+            let val = res["mypercentiles"]["values"][percent].as_f64().unwrap();
+            assert!(expected_range.contains(&val));
+        }
+        // Keyed false
+        //
+        let agg_req_str = r#"
+        {
+          "mypercentiles": {
+            "percentiles": {
+              "field": "score",
+              "percents": [ 95, 99, 99.9 ],
+              "keyed": false
+            }
+          }
+        } "#;
+        let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+
+        let vals = &res["mypercentiles"]["values"];
+        assert_eq!(vals[0]["key"].as_f64().unwrap(), 95.0);
+        assert_eq!(vals[1]["key"].as_f64().unwrap(), 99.0);
+        assert_eq!(vals[2]["key"].as_f64().unwrap(), 99.9);
+        assert_eq!(vals[3]["key"], serde_json::Value::Null);
+        assert!(expected_range.contains(&vals[0]["value"].as_f64().unwrap()));
+        assert!(expected_range.contains(&vals[1]["value"].as_f64().unwrap()));
+        assert!(expected_range.contains(&vals[2]["value"].as_f64().unwrap()));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_aggregation_percentiles_single_seg() -> crate::Result<()> {
+        test_aggregation_percentiles(true)
+    }
+
+    #[test]
+    fn test_aggregation_percentiles_multi_seg() -> crate::Result<()> {
+        test_aggregation_percentiles(false)
+    }
+
+    fn test_aggregation_percentiles(merge_segments: bool) -> crate::Result<()> {
+        use rand_distr::Distribution;
+        let num_values_in_segment = vec![100, 30_000, 8000];
+        let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
+        let mut rng = StdRng::from_seed([1u8; 32]);
+
+        let segment_data = |i| {
+            (0..num_values_in_segment[i])
+                .map(|_| lg_norm.sample(&mut rng))
+                .collect_vec()
+        };
+
+        let values = (0..=2).map(segment_data).collect_vec();
+
+        let mut all_values = values
+            .iter()
+            .flat_map(|el| el.iter().cloned())
+            .collect_vec();
+        all_values.sort_unstable_by(|a, b| a.total_cmp(b));
+
+        fn get_exact_quantil(q: f64, all_values: &[f64]) -> f64 {
+            let q = q / 100.0;
+            assert!((0f64..=1f64).contains(&q));
+
+            let index = (all_values.len() as f64 * q).ceil() as usize;
+            let index = index.min(all_values.len() - 1);
+            all_values[index]
+        }
+
+        let segment_and_values = values
+            .into_iter()
+            .map(|segment_data| {
+                segment_data
+                    .into_iter()
+                    .map(|val| (val, val.to_string()))
+                    .collect_vec()
+            })
+            .collect_vec();
+
+        let index =
+            get_test_index_from_values_and_terms(merge_segments, &segment_and_values).unwrap();
+
+        let reader = index.reader()?;
+
+        let agg_req_str = r#"
+        {
+          "mypercentiles": {
+            "percentiles": {
+              "field": "score_f64",
+              "percents": [ 95, 99, 99.9 ]
+            }
+          }
+        } "#;
+        let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
+
+        let searcher = reader.searcher();
+        let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+        let vals = &res["mypercentiles"]["values"];
+
+        let check_quantil = |exact_quantil: f64, val: f64| {
+            let lower = exact_quantil - exact_quantil * 0.02;
+            let upper = exact_quantil + exact_quantil * 0.02;
+            assert_le!(val, upper);
+            assert_ge!(val, lower);
+        };
+
+        let val = vals["95.0"].as_f64().unwrap();
+        let exact_quantil = get_exact_quantil(95.0, &all_values);
+        check_quantil(exact_quantil, val);
+
+        let val = vals["99.0"].as_f64().unwrap();
+        let exact_quantil = get_exact_quantil(99.0, &all_values);
+        check_quantil(exact_quantil, val);
+
+        let val = vals["99.9"].as_f64().unwrap();
+        let exact_quantil = get_exact_quantil(99.9, &all_values);
+        check_quantil(exact_quantil, val);
+
+        Ok(())
+    }
+}
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -1,13 +1,15 @@
-use columnar::{Cardinality, Column, ColumnType};
+use columnar::ColumnType;
 use serde::{Deserialize, Serialize};

 use super::*;
-use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
+use crate::aggregation::agg_req_with_accessor::{
+    AggregationWithAccessor, AggregationsWithAccessor,
+};
+use crate::aggregation::f64_from_fastfield_u64;
 use crate::aggregation::intermediate_agg_result::{
-    IntermediateAggregationResults, IntermediateMetricResult,
+    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
-use crate::aggregation::{f64_from_fastfield_u64, VecWithNames};
 use crate::{DocId, TantivyError};

 /// A multi-value metric aggregation that computes a collection of statistics on numeric values that
@@ -64,8 +66,7 @@ impl Stats {
            "max" => Ok(self.max),
            "avg" => Ok(self.avg),
            _ => Err(TantivyError::InvalidArgument(format!(
-                "Unknown property {} on stats metric aggregation",
-                agg_property
+                "Unknown property {agg_property} on stats metric aggregation"
            ))),
        }
    }
@@ -156,6 +157,7 @@ pub(crate) struct SegmentStatsCollector {
    pub(crate) collecting_for: SegmentStatsType,
    pub(crate) stats: IntermediateStats,
    pub(crate) accessor_idx: usize,
+    val_cache: Vec<u64>,
 }

 impl SegmentStatsCollector {
@@ -169,33 +171,34 @@ impl SegmentStatsCollector {
            collecting_for,
            stats: IntermediateStats::default(),
            accessor_idx,
+            val_cache: Default::default(),
        }
    }
    #[inline]
-    pub(crate) fn collect_block_with_field(&mut self, docs: &[DocId], field: &Column<u64>) {
-        if field.get_cardinality() == Cardinality::Full {
-            for doc in docs {
-                let val = field.values.get_val(*doc);
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
-                self.stats.collect(val1);
-            }
-        } else {
-            for doc in docs {
-                for val in field.values_for_doc(*doc) {
-                    let val1 = f64_from_fastfield_u64(val, &self.field_type);
-                    self.stats.collect(val1);
-                }
-            }
+    pub(crate) fn collect_block_with_field(
+        &mut self,
+        docs: &[DocId],
+        agg_accessor: &mut AggregationWithAccessor,
+    ) {
+        agg_accessor
+            .column_block_accessor
+            .fetch_block(docs, &agg_accessor.accessor);
+
+        for val in agg_accessor.column_block_accessor.iter_vals() {
+            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            self.stats.collect(val1);
        }
    }
 }

 impl SegmentAggregationCollector for SegmentStatsCollector {
-    fn into_intermediate_aggregations_result(
+    #[inline]
+    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
-    ) -> crate::Result<IntermediateAggregationResults> {
-        let name = agg_with_accessor.metrics.keys[self.accessor_idx].to_string();
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
+        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();

        let intermediate_metric_result = match self.collecting_for {
            SegmentStatsType::Average => {
@@ -216,23 +219,21 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
            }
        };

-        let metrics = Some(VecWithNames::from_entries(vec![(
+        results.push(
            name,
-            intermediate_metric_result,
-        )]));
+            IntermediateAggregationResult::Metric(intermediate_metric_result),
+        )?;

-        Ok(IntermediateAggregationResults {
-            metrics,
-            buckets: None,
-        })
+        Ok(())
    }

+    #[inline]
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        let field = &agg_with_accessor.metrics.values[self.accessor_idx].accessor;
+        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;

        for val in field.values_for_doc(doc) {
            let val1 = f64_from_fastfield_u64(val, &self.field_type);
@@ -246,9 +247,9 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        let field = &agg_with_accessor.metrics.values[self.accessor_idx].accessor;
+        let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
        self.collect_block_with_field(docs, field);
        Ok(())
    }
@@ -257,16 +258,10 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
 #[cfg(test)]
 mod tests {

-    use std::iter;
-
    use serde_json::Value;

-    use crate::aggregation::agg_req::{
-        Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
-        RangeAggregation,
-    };
+    use crate::aggregation::agg_req::{Aggregation, Aggregations};
    use crate::aggregation::agg_result::AggregationResults;
-    use crate::aggregation::metric::StatsAggregation;
    use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values};
    use crate::aggregation::AggregationCollector;
    use crate::query::{AllQuery, TermQuery};
@@ -280,16 +275,16 @@ mod tests {

        let index = get_test_index_from_values(false, &values)?;

-        let agg_req_1: Aggregations = vec![(
-            "stats".to_string(),
-            Aggregation::Metric(MetricAggregation::Stats(StatsAggregation::from_field_name(
-                "score".to_string(),
-            ))),
-        )]
-        .into_iter()
-        .collect();
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "stats": {
+                "stats": {
+                    "field": "score",
+                },
+            }
+        }))
+        .unwrap();

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());

        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -312,21 +307,20 @@ mod tests {

    #[test]
    fn test_aggregation_stats_simple() -> crate::Result<()> {
-        // test index without segments
        let values = vec![10.0];

        let index = get_test_index_from_values(false, &values)?;

-        let agg_req_1: Aggregations = vec![(
-            "stats".to_string(),
-            Aggregation::Metric(MetricAggregation::Stats(StatsAggregation::from_field_name(
-                "score".to_string(),
-            ))),
-        )]
-        .into_iter()
-        .collect();
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "stats": {
+                "stats": {
+                    "field": "score",
+                },
+            }
+        }))
+        .unwrap();

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());

        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -359,51 +353,44 @@ mod tests {
            IndexRecordOption::Basic,
        );

-        let agg_req_1: Aggregations = vec![
-            (
-                "stats_i64".to_string(),
-                Aggregation::Metric(MetricAggregation::Stats(StatsAggregation::from_field_name(
-                    "score_i64".to_string(),
-                ))),
-            ),
-            (
-                "stats_f64".to_string(),
-                Aggregation::Metric(MetricAggregation::Stats(StatsAggregation::from_field_name(
-                    "score_f64".to_string(),
-                ))),
-            ),
-            (
-                "stats".to_string(),
-                Aggregation::Metric(MetricAggregation::Stats(StatsAggregation::from_field_name(
-                    "score".to_string(),
-                ))),
-            ),
-            (
-                "range".to_string(),
-                Aggregation::Bucket(BucketAggregation {
-                    bucket_agg: BucketAggregationType::Range(RangeAggregation {
-                        field: "score".to_string(),
-                        ranges: vec![
-                            (3f64..7f64).into(),
-                            (7f64..19f64).into(),
-                            (19f64..20f64).into(),
-                        ],
-                        ..Default::default()
-                    }),
-                    sub_aggregation: iter::once((
-                        "stats".to_string(),
-                        Aggregation::Metric(MetricAggregation::Stats(
-                            StatsAggregation::from_field_name("score".to_string()),
-                        )),
-                    ))
-                    .collect(),
-                }),
-            ),
-        ]
-        .into_iter()
-        .collect();
+        let range_agg: Aggregation = {
+            serde_json::from_value(json!({
+                "range": {
+                    "field": "score",
+                    "ranges": [ { "from": 3.0f64, "to": 7.0f64 }, { "from": 7.0f64, "to": 19.0f64 }, { "from": 19.0f64, "to": 20.0f64 }  ]
+                },
+                "aggs": {
+                    "stats": {
+                        "stats": {
+                            "field": "score"
+                        }
+                    }
+                }
+            }))
+            .unwrap()
+        };

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let agg_req_1: Aggregations = serde_json::from_value(json!({
+            "stats_i64": {
+                "stats": {
+                    "field": "score_i64",
+                },
+            },
+            "stats_f64": {
+                "stats": {
+                    "field": "score_f64",
+                },
+            },
+            "stats": {
+                "stats": {
+                    "field": "score",
+                },
+            },
+            "range": range_agg
+        }))
+        .unwrap();
+
+        let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());

        let searcher = reader.searcher();
        let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -24,6 +24,9 @@
 //! ## JSON Format
 //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
 //!
+//! Notice: Intermediate aggregation results should not be de/serialized via JSON format.
+//! Postcard is a good choice.
+//!
 //! ```verbatim
 //! let agg_req: Aggregations = serde_json::from_str(json_request_string).unwrap();
 //! let collector = AggregationCollector::from_aggs(agg_req, None);
@@ -35,6 +38,7 @@
 //! ## Supported Aggregations
 //! - [Bucket](bucket)
 //!     - [Histogram](bucket::HistogramAggregation)
+//!     - [DateHistogram](bucket::DateHistogramAggregationReq)
 //!     - [Range](bucket::RangeAggregation)
 //!     - [Terms](bucket::TermsAggregation)
 //! - [Metric](metric)
@@ -44,39 +48,12 @@
 //!     - [Max](metric::MaxAggregation)
 //!     - [Sum](metric::SumAggregation)
 //!     - [Count](metric::CountAggregation)
+//!     - [Percentiles](metric::PercentilesAggregationReq)
 //!
 //! # Example
 //! Compute the average metric, by building [`agg_req::Aggregations`], which is built from an
 //! `(String, agg_req::Aggregation)` iterator.
 //!
-//! ```
-//! use tantivy::aggregation::agg_req::{Aggregations, Aggregation, MetricAggregation};
-//! use tantivy::aggregation::AggregationCollector;
-//! use tantivy::aggregation::metric::AverageAggregation;
-//! use tantivy::query::AllQuery;
-//! use tantivy::aggregation::agg_result::AggregationResults;
-//! use tantivy::IndexReader;
-//!
-//! # #[allow(dead_code)]
-//! fn aggregate_on_index(reader: &IndexReader) {
-//!     let agg_req: Aggregations = vec![
-//!     (
-//!             "average".to_string(),
-//!             Aggregation::Metric(MetricAggregation::Average(
-//!                 AverageAggregation::from_field_name("score".to_string()),
-//!             )),
-//!         ),
-//!     ]
-//!     .into_iter()
-//!     .collect();
-//!
-//!     let collector = AggregationCollector::from_aggs(agg_req, None);
-//!
-//!     let searcher = reader.searcher();
-//!     let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-//! }
-//! ```
-//! # Example JSON
 //! Requests are compatible with the elasticsearch JSON request format.
 //!
 //! ```
@@ -116,32 +93,24 @@
 //! aggregation and then calculate the average on each bucket.
 //! ```
 //! use tantivy::aggregation::agg_req::*;
-//! use tantivy::aggregation::metric::AverageAggregation;
-//! use tantivy::aggregation::bucket::RangeAggregation;
-//! let sub_agg_req_1: Aggregations = vec![(
-//!    "average_in_range".to_string(),
-//!         Aggregation::Metric(MetricAggregation::Average(
-//!             AverageAggregation::from_field_name("score".to_string()),
-//!         )),
-//! )]
-//! .into_iter()
-//! .collect();
+//! use serde_json::json;
 //!
-//! let agg_req_1: Aggregations = vec![
-//!     (
-//!         "range".to_string(),
-//!         Aggregation::Bucket(BucketAggregation {
-//!             bucket_agg: BucketAggregationType::Range(RangeAggregation{
-//!                 field: "score".to_string(),
-//!                 ranges: vec![(3f64..7f64).into(), (7f64..20f64).into()],
-//!                 keyed: false,
-//!             }),
-//!             sub_aggregation: sub_agg_req_1.clone(),
-//!         }),
-//!     ),
-//! ]
-//! .into_iter()
-//! .collect();
+//! let agg_req_1: Aggregations = serde_json::from_value(json!({
+//!     "rangef64": {
+//!         "range": {
+//!             "field": "score",
+//!             "ranges": [
+//!                 { "from": 3, "to": 7000 },
+//!                 { "from": 7000, "to": 20000 },
+//!                 { "from": 50000, "to": 60000 }
+//!             ]
+//!         },
+//!         "aggs": {
+//!             "average_in_range": { "avg": { "field": "score" } }
+//!         }
+//!     },
+//! }))
+//! .unwrap();
 //! ```
 //!
 //! # Distributed Aggregation
@@ -153,8 +122,9 @@
 //! [`merge_fruits`](intermediate_agg_result::IntermediateAggregationResults::merge_fruits) method
 //! to merge multiple results. The merged result can then be converted into
 //! [`AggregationResults`](agg_result::AggregationResults) via the
-//! [`into_final_bucket_result`](intermediate_agg_result::IntermediateAggregationResults::into_final_bucket_result) method.
+//! [`into_final_result`](intermediate_agg_result::IntermediateAggregationResults::into_final_result) method.

+mod agg_limits;
 pub mod agg_req;
 mod agg_req_with_accessor;
 pub mod agg_result;
@@ -165,6 +135,7 @@ mod date;
 mod error;
 pub mod intermediate_agg_result;
 pub mod metric;
+
 mod segment_agg_result;
 use std::collections::HashMap;
 use std::fmt::Display;
@@ -172,9 +143,12 @@ use std::fmt::Display;
 #[cfg(test)]
 mod agg_tests;

+mod agg_bench;
+
+pub use agg_limits::AggregationLimits;
 pub use collector::{
    AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
-    MAX_BUCKET_COUNT,
+    DEFAULT_BUCKET_LIMIT,
 };
 use columnar::{ColumnType, MonotonicallyMappableToU64};
 pub(crate) use date::format_date;
@@ -183,13 +157,22 @@ use itertools::Itertools;
 use serde::{Deserialize, Serialize};

 /// Represents an associative array `(key => values)` in a very efficient manner.
-#[derive(Clone, PartialEq, Serialize, Deserialize)]
-pub(crate) struct VecWithNames<T: Clone> {
+#[derive(PartialEq, Serialize, Deserialize)]
+pub(crate) struct VecWithNames<T> {
    pub(crate) values: Vec<T>,
    keys: Vec<String>,
 }

-impl<T: Clone> Default for VecWithNames<T> {
+impl<T: Clone> Clone for VecWithNames<T> {
+    fn clone(&self) -> Self {
+        Self {
+            values: self.values.clone(),
+            keys: self.keys.clone(),
+        }
+    }
+}
+
+impl<T> Default for VecWithNames<T> {
    fn default() -> Self {
        Self {
            values: Default::default(),
@@ -198,24 +181,19 @@ impl<T: Clone> Default for VecWithNames<T> {
    }
 }

-impl<T: Clone + std::fmt::Debug> std::fmt::Debug for VecWithNames<T> {
+impl<T: std::fmt::Debug> std::fmt::Debug for VecWithNames<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_map().entries(self.iter()).finish()
    }
 }

-impl<T: Clone> From<HashMap<String, T>> for VecWithNames<T> {
+impl<T> From<HashMap<String, T>> for VecWithNames<T> {
    fn from(map: HashMap<String, T>) -> Self {
        VecWithNames::from_entries(map.into_iter().collect_vec())
    }
 }

-impl<T: Clone> VecWithNames<T> {
-    fn extend(&mut self, entries: VecWithNames<T>) {
-        self.keys.extend(entries.keys);
-        self.values.extend(entries.values);
-    }
-
+impl<T> VecWithNames<T> {
    fn from_entries(mut entries: Vec<(String, T)>) -> Self {
        // Sort to ensure order of elements match across multiple instances
        entries.sort_by(|left, right| left.0.cmp(&right.0));
@@ -230,21 +208,12 @@ impl<T: Clone> VecWithNames<T> {
            keys: data_names,
        }
    }
-    fn into_iter(self) -> impl Iterator<Item = (String, T)> {
-        self.keys.into_iter().zip(self.values.into_iter())
-    }
    fn iter(&self) -> impl Iterator<Item = (&str, &T)> + '_ {
        self.keys().zip(self.values.iter())
    }
    fn keys(&self) -> impl Iterator<Item = &str> + '_ {
        self.keys.iter().map(|key| key.as_str())
    }
-    fn into_values(self) -> impl Iterator<Item = T> {
-        self.values.into_iter()
-    }
-    fn values(&self) -> impl Iterator<Item = &T> + '_ {
-        self.values.iter()
-    }
    fn values_mut(&mut self) -> impl Iterator<Item = &mut T> + '_ {
        self.values.iter_mut()
    }
@@ -313,7 +282,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &ColumnType) -> f64 {
        ColumnType::I64 | ColumnType::DateTime => i64::from_u64(val) as f64,
        ColumnType::F64 => f64::from_u64(val),
        _ => {
-            panic!("unexpected type {:?}. This should not happen", field_type)
+            panic!("unexpected type {field_type:?}. This should not happen")
        }
    }
 }
@@ -345,9 +314,8 @@ mod tests {
    use time::OffsetDateTime;

    use super::agg_req::Aggregations;
+    use super::segment_agg_result::AggregationLimits;
    use super::*;
-    use crate::aggregation::agg_req::{Aggregation, BucketAggregation, BucketAggregationType};
-    use crate::aggregation::bucket::TermsAggregation;
    use crate::indexer::NoMergePolicy;
    use crate::query::{AllQuery, TermQuery};
    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
@@ -371,7 +339,16 @@ mod tests {
        index: &Index,
        query: Option<(&str, &str)>,
    ) -> crate::Result<Value> {
-        let collector = AggregationCollector::from_aggs(agg_req, None);
+        exec_request_with_query_and_memory_limit(agg_req, index, query, Default::default())
+    }
+
+    pub fn exec_request_with_query_and_memory_limit(
+        agg_req: Aggregations,
+        index: &Index,
+        query: Option<(&str, &str)>,
+        limits: AggregationLimits,
+    ) -> crate::Result<Value> {
+        let collector = AggregationCollector::from_aggs(agg_req, limits);

        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -434,7 +411,7 @@ mod tests {
                    .set_index_option(IndexRecordOption::Basic)
                    .set_fieldnorms(false),
            )
-            .set_fast()
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
        let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
@@ -450,7 +427,7 @@ mod tests {
        let index = Index::create_in_ram(schema_builder.build());
        {
            // let mut index_writer = index.writer_for_tests()?;
-            let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?;
+            let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            for values in segment_and_values {
                for (i, term) in values {
@@ -489,7 +466,7 @@ mod tests {
            .set_indexing_options(
                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
            )
-            .set_fast()
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
        let date_field = schema_builder.add_date_field("date", FAST);
@@ -595,50 +572,4 @@ mod tests {

        Ok(index)
    }
-
-    #[test]
-    fn test_aggregation_on_json_object() {
-        let mut schema_builder = Schema::builder();
-        let json = schema_builder.add_json_field("json", FAST);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
-        index_writer
-            .add_document(doc!(json => json!({"color": "red"})))
-            .unwrap();
-        index_writer
-            .add_document(doc!(json => json!({"color": "blue"})))
-            .unwrap();
-        index_writer.commit().unwrap();
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let agg: Aggregations = vec![(
-            "jsonagg".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
-                    field: "json.color".to_string(),
-                    ..Default::default()
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
-        let aggregation_collector = AggregationCollector::from_aggs(agg, None);
-        let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
-        let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
-        assert_eq!(
-            &aggregation_res_json,
-            &serde_json::json!({
-                "jsonagg": {
-                    "buckets": [
-                        {"doc_count": 1, "key": "blue"},
-                        {"doc_count": 1, "key": "red"}
-                    ],
-                    "doc_count_error_upper_bound": 0,
-                    "sum_other_doc_count": 0
-                }
-            })
-        );
-    }
 }
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -4,45 +4,41 @@
 //! merging.

 use std::fmt::Debug;
-use std::rc::Rc;
-use std::sync::atomic::AtomicU32;

-use super::agg_req::MetricAggregation;
-use super::agg_req_with_accessor::{
-    AggregationsWithAccessor, BucketAggregationWithAccessor, MetricAggregationWithAccessor,
-};
+pub(crate) use super::agg_limits::AggregationLimits;
+use super::agg_req::AggregationVariants;
+use super::agg_req_with_accessor::{AggregationWithAccessor, AggregationsWithAccessor};
 use super::bucket::{SegmentHistogramCollector, SegmentRangeCollector, SegmentTermCollector};
-use super::collector::MAX_BUCKET_COUNT;
 use super::intermediate_agg_result::IntermediateAggregationResults;
 use super::metric::{
-    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation, SegmentStatsCollector,
-    SegmentStatsType, StatsAggregation, SumAggregation,
+    AverageAggregation, CountAggregation, MaxAggregation, MinAggregation,
+    SegmentPercentilesCollector, SegmentStatsCollector, SegmentStatsType, StatsAggregation,
+    SumAggregation,
 };
-use super::VecWithNames;
-use crate::aggregation::agg_req::BucketAggregationType;
-use crate::TantivyError;
+use crate::aggregation::bucket::SegmentTermCollectorComposite;

 pub(crate) trait SegmentAggregationCollector: CollectorClone + Debug {
-    fn into_intermediate_aggregations_result(
+    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
-    ) -> crate::Result<IntermediateAggregationResults>;
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()>;

    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()>;

    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()>;

    /// Finalize method. Some Aggregator collect blocks of docs before calling `collect_block`.
    /// This method ensures those staged docs will be collected.
-    fn flush(&mut self, _agg_with_accessor: &AggregationsWithAccessor) -> crate::Result<()> {
+    fn flush(&mut self, _agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
        Ok(())
    }
 }
@@ -66,92 +62,101 @@ impl Clone for Box<dyn SegmentAggregationCollector> {
 }

 pub(crate) fn build_segment_agg_collector(
-    req: &AggregationsWithAccessor,
+    req: &mut AggregationsWithAccessor,
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    // Single metric special case
-    if req.buckets.is_empty() && req.metrics.len() == 1 {
-        let req = &req.metrics.values[0];
+    // Single collector special case
+    if req.aggs.len() == 1 {
+        let req = &mut req.aggs.values[0];
        let accessor_idx = 0;
-        return build_metric_segment_agg_collector(req, accessor_idx);
-    }
-
-    // Single bucket special case
-    if req.metrics.is_empty() && req.buckets.len() == 1 {
-        let req = &req.buckets.values[0];
-        let accessor_idx = 0;
-        return build_bucket_segment_agg_collector(req, accessor_idx);
+        return build_single_agg_segment_collector(req, accessor_idx);
    }

    let agg = GenericSegmentAggregationResultsCollector::from_req_and_validate(req)?;
    Ok(Box::new(agg))
 }

-pub(crate) fn build_metric_segment_agg_collector(
-    req: &MetricAggregationWithAccessor,
+pub(crate) fn build_single_agg_segment_collector(
+    req: &mut AggregationWithAccessor,
    accessor_idx: usize,
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    let stats_collector = match &req.metric {
-        MetricAggregation::Average(AverageAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Average, accessor_idx)
+    use AggregationVariants::*;
+    match &req.agg.agg {
+        Terms(terms_req) => {
+            if let Some(acc2) = req.accessor2.as_ref() {
+                Ok(Box::new(
+                    SegmentTermCollectorComposite::from_req_and_validate(
+                        terms_req,
+                        &mut req.sub_aggregation,
+                        req.field_type,
+                        acc2.1,
+                        accessor_idx,
+                    )?,
+                ))
+            } else {
+                Ok(Box::new(SegmentTermCollector::from_req_and_validate(
+                    terms_req,
+                    &mut req.sub_aggregation,
+                    req.field_type,
+                    accessor_idx,
+                )?))
+            }
        }
-        MetricAggregation::Count(CountAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Count, accessor_idx)
-        }
-        MetricAggregation::Max(MaxAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Max, accessor_idx)
-        }
-        MetricAggregation::Min(MinAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Min, accessor_idx)
-        }
-        MetricAggregation::Stats(StatsAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Stats, accessor_idx)
-        }
-        MetricAggregation::Sum(SumAggregation { .. }) => {
-            SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Sum, accessor_idx)
-        }
-    };
-
-    Ok(Box::new(stats_collector))
-}
-
-pub(crate) fn build_bucket_segment_agg_collector(
-    req: &BucketAggregationWithAccessor,
-    accessor_idx: usize,
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    match &req.bucket_agg {
-        BucketAggregationType::Terms(terms_req) => {
-            Ok(Box::new(SegmentTermCollector::from_req_and_validate(
-                terms_req,
-                &req.sub_aggregation,
+        Range(range_req) => Ok(Box::new(SegmentRangeCollector::from_req_and_validate(
+            range_req,
+            &mut req.sub_aggregation,
+            &mut req.limits,
+            req.field_type,
+            accessor_idx,
+        )?)),
+        Histogram(histogram) => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
+            histogram.clone(),
+            &mut req.sub_aggregation,
+            req.field_type,
+            accessor_idx,
+        )?)),
+        DateHistogram(histogram) => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
+            histogram.to_histogram_req()?,
+            &mut req.sub_aggregation,
+            req.field_type,
+            accessor_idx,
+        )?)),
+        Average(AverageAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Average,
+            accessor_idx,
+        ))),
+        Count(CountAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Count,
+            accessor_idx,
+        ))),
+        Max(MaxAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Max,
+            accessor_idx,
+        ))),
+        Min(MinAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Min,
+            accessor_idx,
+        ))),
+        Stats(StatsAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Stats,
+            accessor_idx,
+        ))),
+        Sum(SumAggregation { .. }) => Ok(Box::new(SegmentStatsCollector::from_req(
+            req.field_type,
+            SegmentStatsType::Sum,
+            accessor_idx,
+        ))),
+        Percentiles(percentiles_req) => Ok(Box::new(
+            SegmentPercentilesCollector::from_req_and_validate(
+                percentiles_req,
                req.field_type,
                accessor_idx,
-            )?))
-        }
-        BucketAggregationType::Range(range_req) => {
-            Ok(Box::new(SegmentRangeCollector::from_req_and_validate(
-                range_req,
-                &req.sub_aggregation,
-                &req.bucket_count,
-                req.field_type,
-                accessor_idx,
-            )?))
-        }
-        BucketAggregationType::Histogram(histogram) => {
-            Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
-                histogram,
-                &req.sub_aggregation,
-                req.field_type,
-                accessor_idx,
-            )?))
-        }
-        BucketAggregationType::DateHistogram(histogram) => {
-            Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
-                &histogram.to_histogram_req()?,
-                &req.sub_aggregation,
-                req.field_type,
-                accessor_idx,
-            )?))
-        }
+            )?,
+        )),
    }
 }

@@ -160,56 +165,34 @@ pub(crate) fn build_bucket_segment_agg_collector(
 /// can handle arbitrary complexity of  sub-aggregations. Ideally we never have to pick this one
 /// and can provide specialized versions instead, that remove some of its overhead.
 pub(crate) struct GenericSegmentAggregationResultsCollector {
-    pub(crate) metrics: Option<Vec<Box<dyn SegmentAggregationCollector>>>,
-    pub(crate) buckets: Option<Vec<Box<dyn SegmentAggregationCollector>>>,
+    pub(crate) aggs: Vec<Box<dyn SegmentAggregationCollector>>,
 }

 impl Debug for GenericSegmentAggregationResultsCollector {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentAggregationResultsCollector")
-            .field("metrics", &self.metrics)
-            .field("buckets", &self.buckets)
+            .field("aggs", &self.aggs)
            .finish()
    }
 }

 impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
-    fn into_intermediate_aggregations_result(
+    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
-    ) -> crate::Result<IntermediateAggregationResults> {
-        let buckets = if let Some(buckets) = self.buckets {
-            let mut intermeditate_buckets = VecWithNames::default();
-            for bucket in buckets {
-                // TODO too many allocations?
-                let res = bucket.into_intermediate_aggregations_result(agg_with_accessor)?;
-                // unwrap is fine since we only have buckets here
-                intermeditate_buckets.extend(res.buckets.unwrap());
-            }
-            Some(intermeditate_buckets)
-        } else {
-            None
-        };
-        let metrics = if let Some(metrics) = self.metrics {
-            let mut intermeditate_metrics = VecWithNames::default();
-            for metric in metrics {
-                // TODO too many allocations?
-                let res = metric.into_intermediate_aggregations_result(agg_with_accessor)?;
-                // unwrap is fine since we only have metrics here
-                intermeditate_metrics.extend(res.metrics.unwrap());
-            }
-            Some(intermeditate_metrics)
-        } else {
-            None
-        };
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
+        for agg in self.aggs {
+            agg.add_intermediate_aggregation_result(agg_with_accessor, results)?;
+        }

-        Ok(IntermediateAggregationResults { metrics, buckets })
+        Ok(())
    }

    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
        self.collect_block(&[doc], agg_with_accessor)?;

@@ -219,102 +202,32 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_with_accessor: &mut AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        if let Some(metrics) = self.metrics.as_mut() {
-            for collector in metrics {
-                collector.collect_block(docs, agg_with_accessor)?;
-            }
-        }
-
-        if let Some(buckets) = self.buckets.as_mut() {
-            for collector in buckets {
-                collector.collect_block(docs, agg_with_accessor)?;
-            }
+        for collector in &mut self.aggs {
+            collector.collect_block(docs, agg_with_accessor)?;
        }

        Ok(())
    }

-    fn flush(&mut self, agg_with_accessor: &AggregationsWithAccessor) -> crate::Result<()> {
-        if let Some(metrics) = &mut self.metrics {
-            for collector in metrics {
-                collector.flush(agg_with_accessor)?;
-            }
-        }
-        if let Some(buckets) = &mut self.buckets {
-            for collector in buckets {
-                collector.flush(agg_with_accessor)?;
-            }
+    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
+        for collector in &mut self.aggs {
+            collector.flush(agg_with_accessor)?;
        }
        Ok(())
    }
 }

 impl GenericSegmentAggregationResultsCollector {
-    pub(crate) fn from_req_and_validate(req: &AggregationsWithAccessor) -> crate::Result<Self> {
-        let buckets = req
-            .buckets
-            .iter()
+    pub(crate) fn from_req_and_validate(req: &mut AggregationsWithAccessor) -> crate::Result<Self> {
+        let aggs = req
+            .aggs
+            .values_mut()
            .enumerate()
-            .map(|(accessor_idx, (_key, req))| {
-                build_bucket_segment_agg_collector(req, accessor_idx)
-            })
-            .collect::<crate::Result<Vec<Box<dyn SegmentAggregationCollector>>>>()?;
-        let metrics = req
-            .metrics
-            .iter()
-            .enumerate()
-            .map(|(accessor_idx, (_key, req))| {
-                build_metric_segment_agg_collector(req, accessor_idx)
-            })
+            .map(|(accessor_idx, req)| build_single_agg_segment_collector(req, accessor_idx))
            .collect::<crate::Result<Vec<Box<dyn SegmentAggregationCollector>>>>()?;

-        let metrics = if metrics.is_empty() {
-            None
-        } else {
-            Some(metrics)
-        };
-
-        let buckets = if buckets.is_empty() {
-            None
-        } else {
-            Some(buckets)
-        };
-        Ok(GenericSegmentAggregationResultsCollector { metrics, buckets })
-    }
-}
-
-#[derive(Clone)]
-pub(crate) struct BucketCount {
-    /// The counter which is shared between the aggregations for one request.
-    pub(crate) bucket_count: Rc<AtomicU32>,
-    pub(crate) max_bucket_count: u32,
-}
-
-impl Default for BucketCount {
-    fn default() -> Self {
-        Self {
-            bucket_count: Default::default(),
-            max_bucket_count: MAX_BUCKET_COUNT,
-        }
-    }
-}
-
-impl BucketCount {
-    pub(crate) fn validate_bucket_count(&self) -> crate::Result<()> {
-        if self.get_count() > self.max_bucket_count {
-            return Err(TantivyError::InvalidArgument(
-                "Aborting aggregation because too many buckets were created".to_string(),
-            ));
-        }
-        Ok(())
-    }
-    pub(crate) fn add_count(&self, count: u32) {
-        self.bucket_count
-            .fetch_add(count, std::sync::atomic::Ordering::Relaxed);
-    }
-    pub(crate) fn get_count(&self) -> u32 {
-        self.bucket_count.load(std::sync::atomic::Ordering::Relaxed)
+        Ok(GenericSegmentAggregationResultsCollector { aggs })
    }
 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -414,8 +414,8 @@ impl FacetCounts {
    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
    where Facet: From<T> {
        let facet = Facet::from(facet_from);
-        let left_bound = Bound::Excluded(facet.clone());
-        let right_bound = if facet.is_root() {
+        let lower_bound = Bound::Excluded(facet.clone());
+        let upper_bound = if facet.is_root() {
            Bound::Unbounded
        } else {
            let mut facet_after_bytes: String = facet.encoded_str().to_owned();
@@ -424,7 +424,7 @@ impl FacetCounts {
            Bound::Excluded(facet_after)
        };
        let underlying: btree_map::Range<'_, _, _> =
-            self.facet_counts.range((left_bound, right_bound));
+            self.facet_counts.range((lower_bound, upper_bound));
        FacetChildIterator { underlying }
    }

@@ -812,7 +812,7 @@ mod bench {

        let mut docs = vec![];
        for val in 0..50 {
-            let facet = Facet::from(&format!("/facet_{}", val));
+            let facet = Facet::from(&format!("/facet_{val}"));
            for _ in 0..val * val {
                docs.push(doc!(facet_field=>facet.clone()));
            }
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -113,7 +113,7 @@ impl Collector for HistogramCollector {
        segment: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
        let column_opt = segment.fast_fields().u64_lenient(&self.field)?;
-        let column = column_opt.ok_or_else(|| FastFieldNotAvailableError {
+        let (column, _column_type) = column_opt.ok_or_else(|| FastFieldNotAvailableError {
            field_name: self.field.clone(),
        })?;
        let column_u64 = column.first_or_default_col(0u64);
@@ -295,7 +295,7 @@ mod tests {
            DateTime::from_primitive(
                Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
            ),
-            3_600_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
+            3_600_000_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
            10,
        );
        let week_histogram = searcher.search(&all_query, &week_histogram_collector)?;
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -180,9 +180,11 @@ pub trait Collector: Sync + Send {
                })?;
            }
            (Some(alive_bitset), false) => {
-                weight.for_each_no_score(reader, &mut |doc| {
-                    if alive_bitset.is_alive(doc) {
-                        segment_collector.collect(doc, 0.0);
+                weight.for_each_no_score(reader, &mut |docs| {
+                    for doc in docs.iter().cloned() {
+                        if alive_bitset.is_alive(doc) {
+                            segment_collector.collect(doc, 0.0);
+                        }
                    }
                })?;
            }
@@ -192,8 +194,8 @@ pub trait Collector: Sync + Send {
                })?;
            }
            (None, false) => {
-                weight.for_each_no_score(reader, &mut |doc| {
-                    segment_collector.collect(doc, 0.0);
+                weight.for_each_no_score(reader, &mut |docs| {
+                    segment_collector.collect_block(docs);
                })?;
            }
        }
@@ -270,6 +272,13 @@ pub trait SegmentCollector: 'static {
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score);

+    /// The query pushes the scored document to the collector via this method.
+    fn collect_block(&mut self, docs: &[DocId]) {
+        for doc in docs {
+            self.collect(*doc, 0.0);
+        }
+    }
+
    /// Extract the fruit of the collection from the `SegmentCollector`.
    fn harvest(self) -> Self::Fruit;
 }
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -52,10 +52,8 @@ where
        let requested_type = field_entry.field_type().value_type();
        if schema_type != requested_type {
            return Err(TantivyError::SchemaError(format!(
-                "Field {:?} is of type {:?}!={:?}",
-                field_entry.name(),
-                schema_type,
-                requested_type
+                "Field {:?} is of type {schema_type:?}!={requested_type:?}",
+                field_entry.name()
            )));
        }
        self.collector.for_segment(segment_local_id, segment)
@@ -155,12 +153,13 @@ impl CustomScorer<u64> for ScorerByField {
        //
        // The conversion will then happen only on the top-K docs.
        let sort_column_opt = segment_reader.fast_fields().u64_lenient(&self.field)?;
-        let sort_column = sort_column_opt
-            .ok_or_else(|| FastFieldNotAvailableError {
+        let (sort_column, _sort_column_type) =
+            sort_column_opt.ok_or_else(|| FastFieldNotAvailableError {
                field_name: self.field.clone(),
-            })?
-            .first_or_default_col(0u64);
-        Ok(ScorerByFastFieldReader { sort_column })
+            })?;
+        Ok(ScorerByFastFieldReader {
+            sort_column: sort_column.first_or_default_col(0u64),
+        })
    }
 }

@@ -1030,7 +1029,7 @@ mod tests {
        let segment = searcher.segment_reader(0);
        let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
        let err = top_collector.for_segment(0, segment).err().unwrap();
-        assert!(matches!(err, crate::TantivyError::SchemaError(_)));
+        assert!(matches!(err, crate::TantivyError::InvalidArgument(_)));
        Ok(())
    }

--- a/Show More
+++ b/Show More