Implement GreaterThanOrEqual and LessThanOrEqual to handle boundary cases in Chain.

Simpler implementation of first_vals_in_value_range.
Fix compound filters, and remove redundant implementation in Chain implementation
2026-01-06 09:12:55 +00:00 · 2025-12-29 15:38:28 -07:00 · 2025-12-29 14:51:21 -07:00 · 2025-12-29 14:51:17 -07:00 · 2025-12-27 21:03:02 -07:00 · 2025-12-27 17:56:55 -07:00
253 changed files with 21948 additions and 4716 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -1,29 +0,0 @@
-name: Coverage
-
-on:
-  push:
-    branches: [main]
-
-# Ensures that we cancel running jobs for the same PR / same workflow.
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  coverage:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install Rust
-        run: rustup toolchain install nightly-2024-07-01 --profile minimal --component llvm-tools-preview
-      - uses: Swatinem/rust-cache@v2
-      - uses: taiki-e/install-action@cargo-llvm-cov
-      - name: Generate code coverage
-        run: cargo +nightly-2024-07-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        continue-on-error: true
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
-          files: lcov.info
-          fail_ci_if_error: true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -76,7 +76,9 @@ jobs:
            profile: minimal
            override: true

-    - uses: taiki-e/install-action@nextest
+    - uses: taiki-e/install-action@v2
+      with:
+        tool: 'nextest'
    - uses: Swatinem/rust-cache@v2

    - name: Run tests
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@ target
 target/debug
 .vscode
 target/release
-Cargo.lock
 benchmark
 .DS_Store
 *.bk
@@ -15,3 +14,7 @@ trace.dat
 cargo-timing*
 control
 variable
+
+# for `sample record -p`
+profile.json
+profile.json.gz
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,18 @@ Tantivy 0.25
 - Support mixed field types in query parser [#2676](https://github.com/quickwit-oss/tantivy/pull/2676)(@trinity-1686a)
 - Add per-field size details [#2679](https://github.com/quickwit-oss/tantivy/pull/2679)(@fulmicoton)

+Tantivy 0.24.2
+================================
+- Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz) 
+
+Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
+[order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
+for `Order::Asc`
+
+Tantivy 0.24.1
+================================
+- Fix: bump required rust version to 1.81
+  
 Tantivy 0.24
 ================================
 Tantivy 0.24 will be backwards compatible with indices created with v0.22 and v0.21. The new minimum rust version will be 1.75. Tantivy 0.23 will be skipped.
@@ -66,7 +78,7 @@ This will slightly increase space and access time. [#2439](https://github.com/qu

 - **Store DateTime as nanoseconds in doc store** DateTime in the doc store was truncated to microseconds previously. This removes this truncation, while still keeping backwards compatibility. [#2486](https://github.com/quickwit-oss/tantivy/pull/2486)(@PSeitz)

- **Performace/Memory**
+- **Performance/Memory**
    - lift clauses in LogicalAst for optimized ast during execution [#2449](https://github.com/quickwit-oss/tantivy/pull/2449)(@PSeitz)
    - Use Vec instead of BTreeMap to back OwnedValue object [#2364](https://github.com/quickwit-oss/tantivy/pull/2364)(@fulmicoton)
    - Replace TantivyDocument with CompactDoc. CompactDoc is much smaller and provides similar performance. [#2402](https://github.com/quickwit-oss/tantivy/pull/2402)(@PSeitz)
@@ -96,6 +108,14 @@ This will slightly increase space and access time. [#2439](https://github.com/qu
 - Fix trait bound of StoreReader::iter [#2360](https://github.com/quickwit-oss/tantivy/pull/2360)(@adamreichold)
 - remove read_postings_no_deletes [#2526](https://github.com/quickwit-oss/tantivy/pull/2526)(@PSeitz)

+Tantivy 0.22.1
+================================
+- Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz) 
+
+Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
+[order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
+for `Order::Asc`
+
 Tantivy 0.22
 ================================

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.25.0"
+version = "0.26.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -21,11 +21,11 @@ byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
 regex = { version = "1.5.5", default-features = false, features = [
-    "std",
-    "unicode",
+  "std",
+  "unicode",
 ] }
 aho-corasick = "1.0"
-tantivy-fst = "0.5"
+tantivy-fst = { git = "https://github.com/paradedb/fst.git" }
 memmap2 = { version = "0.9.0", optional = true }
 lz4_flex = { version = "0.11", default-features = false, optional = true }
 zstd = { version = "0.13", optional = true, default-features = false }
@@ -38,9 +38,10 @@ levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
+tantivy-stemmers = { version = "0.4.0", default-features = false, features = ["polish_yarovoy"] }
 downcast-rs = "2.0.1"
 bitpacking = { version = "0.9.2", default-features = false, features = [
-    "bitpacker4x",
+  "bitpacker4x",
 ] }
 census = "0.4.2"
 rustc-hash = "2.0.0"
@@ -48,6 +49,10 @@ thiserror = "2.0.1"
 htmlescape = "0.3.1"
 fail = { version = "0.5.0", optional = true }
 time = { version = "0.3.35", features = ["serde-well-known"] }
+# TODO: We have integer wrappers with PartialOrd, and a misfeature of
+# `deranged` causes inference to fail in a bunch of cases. See
+# https://github.com/jhpratt/deranged/issues/18#issuecomment-2746844093
+deranged = "=0.4.0"
 smallvec = "1.8.0"
 rayon = "1.5.2"
 lru = "0.12.0"
@@ -69,6 +74,8 @@ hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
 futures-util = { version = "0.3.28", optional = true }
 futures-channel = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
+parking_lot = "0.12.4"
+typetag = "0.2.21"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -134,14 +141,14 @@ compare_hash_only = ["stacker/compare_hash_only"]

 [workspace]
 members = [
-    "query-grammar",
-    "bitpacker",
-    "common",
-    "ownedbytes",
-    "stacker",
-    "sstable",
-    "tokenizer-api",
-    "columnar",
+  "query-grammar",
+  "bitpacker",
+  "common",
+  "ownedbytes",
+  "stacker",
+  "sstable",
+  "tokenizer-api",
+  "columnar",
 ]

 # Following the "fail" crate best practises, we isolate
@@ -167,3 +174,11 @@ harness = false
 [[bench]]
 name = "agg_bench"
 harness = false
+
+[[bench]]
+name = "exists_json"
+harness = false
+
+[[bench]]
+name = "and_or_queries"
+harness = false
--- a/README.md
+++ b/README.md
@@ -23,8 +23,6 @@ performance for different types of queries/collections.

 Your mileage WILL vary depending on the nature of queries and their load.

-<img src="doc/assets/images/searchbenchmark.png">
-
 Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).

 ## Features
@@ -125,6 +123,7 @@ You can also find other bindings on [GitHub](https://github.com/search?q=tantivy
 - [seshat](https://github.com/matrix-org/seshat/): A matrix message database/indexer
 - [tantiny](https://github.com/baygeldin/tantiny): Tiny full-text search for Ruby
 - [lnx](https://github.com/lnx-search/lnx): adaptable, typo tolerant search engine with a REST API
+- [Bichon](https://github.com/rustmailer/bichon): A lightweight, high-performance Rust email archiver with WebUI
 - and [more](https://github.com/search?q=tantivy)!

 ### On average, how much faster is Tantivy compared to Lucene?
--- a/TODO.txt
+++ b/TODO.txt
@@ -10,7 +10,7 @@ rename FastFieldReaders::open to load
 remove fast field reader

 find a way to unify the two DateTime.
-readd type check in the filter wrapper
+re-add type check in the filter wrapper

 add unit test on columnar list columns.

--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -1,5 +1,6 @@
 use binggan::plugins::PeakMemAllocPlugin;
 use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use rand::distributions::WeightedIndex;
 use rand::prelude::SliceRandom;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
@@ -54,11 +55,19 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, extendedstats_f64);
    register!(group, percentiles_f64);
    register!(group, terms_few);
+    register!(group, terms_all_unique);
    register!(group, terms_many);
    register!(group, terms_many_top_1000);
    register!(group, terms_many_order_by_term);
    register!(group, terms_many_with_top_hits);
+    register!(group, terms_all_unique_with_avg_sub_agg);
    register!(group, terms_many_with_avg_sub_agg);
+    register!(group, terms_few_with_avg_sub_agg);
+    register!(group, terms_status_with_avg_sub_agg);
+    register!(group, terms_status);
+    register!(group, terms_few_with_histogram);
+    register!(group, terms_status_with_histogram);
+
    register!(group, terms_many_json_mixed_type_with_avg_sub_agg);

    register!(group, cardinality_agg);
@@ -71,8 +80,15 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, histogram);
    register!(group, histogram_hard_bounds);
    register!(group, histogram_with_avg_sub_agg);
+    register!(group, histogram_with_term_agg_few);
    register!(group, avg_and_range_with_avg_sub_agg);

+    // Filter aggregation benchmarks
+    register!(group, filter_agg_all_query_count_agg);
+    register!(group, filter_agg_term_query_count_agg);
+    register!(group, filter_agg_all_query_with_sub_aggs);
+    register!(group, filter_agg_term_query_with_sub_aggs);
+
    group.run();
 }

@@ -123,12 +139,12 @@ fn extendedstats_f64(index: &Index) {
 }
 fn percentiles_f64(index: &Index) {
    let agg_req = json!({
-      "mypercentiles": {
-        "percentiles": {
-          "field": "score_f64",
-          "percents": [ 95, 99, 99.9 ]
+        "mypercentiles": {
+            "percentiles": {
+                "field": "score_f64",
+                "percents": [ 95, 99, 99.9 ]
+            }
        }
-      }
    });
    execute_agg(index, agg_req);
 }
@@ -165,6 +181,19 @@ fn terms_few(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+fn terms_status(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_few_terms_status" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_all_unique(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_all_unique_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+
 fn terms_many(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_many_terms" } },
@@ -213,6 +242,63 @@ fn terms_many_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+fn terms_all_unique_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_all_unique_terms" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_few_with_histogram(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms" },
+            "aggs": {
+                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_status_with_histogram(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms_status" },
+            "aggs": {
+                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn terms_few_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_status_with_avg_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms_status" },
+            "aggs": {
+                "average_f64": { "avg": { "field": "score_f64" } }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
 fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -339,6 +425,17 @@ fn histogram_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+fn histogram_with_term_agg_few(index: &Index) {
+    let agg_req = json!({
+        "rangef64": {
+            "histogram": { "field": "score_f64", "interval": 10 },
+            "aggs": {
+                "my_texts": { "terms": { "field": "text_few_terms" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
 fn avg_and_range_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "rangef64": {
@@ -386,14 +483,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
        .set_stored();
    let text_field = schema_builder.add_text_field("text", text_fieldtype);
    let json_field = schema_builder.add_json_field("json", FAST);
+    let text_field_all_unique_terms =
+        schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
+    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
+    let text_field_few_terms_status =
+        schema_builder.add_text_field("text_few_terms_status", STRING | FAST);
    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
    let index = Index::create_from_tempdir(schema_builder.build())?;
    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
+    // Approximate production log proportions: INFO dominant, WARN and DEBUG occasional, ERROR rare.
+    let log_level_distribution = WeightedIndex::new([80u32, 3, 12, 5]).unwrap();

    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();

@@ -409,15 +513,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!())?;
        }
        if cardinality == Cardinality::Multivalued {
+            let log_level_sample_a = few_terms_data[log_level_distribution.sample(&mut rng)];
+            let log_level_sample_b = few_terms_data[log_level_distribution.sample(&mut rng)];
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
                text_field => "cool",
                text_field => "cool",
+                text_field_all_unique_terms => "cool",
+                text_field_all_unique_terms => "coolo",
                text_field_many_terms => "cool",
                text_field_many_terms => "cool",
                text_field_few_terms => "cool",
                text_field_few_terms => "cool",
+                text_field_few_terms_status => log_level_sample_a,
+                text_field_few_terms_status => log_level_sample_b,
                score_field => 1u64,
                score_field => 1u64,
                score_field_f64 => lg_norm.sample(&mut rng),
@@ -442,8 +552,10 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!(
                text_field => "cool",
                json_field => json,
+                text_field_all_unique_terms => format!("unique_term_{}", rng.gen::<u64>()),
                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                text_field_few_terms_status => few_terms_data[log_level_distribution.sample(&mut rng)],
                score_field => val as u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => val as i64,
@@ -460,3 +572,61 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {

    Ok(index)
 }
+
+// Filter aggregation benchmarks
+
+fn filter_agg_all_query_count_agg(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "*",
+            "aggs": {
+                "count": { "value_count": { "field": "score" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_term_query_count_agg(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "text:cool",
+            "aggs": {
+                "count": { "value_count": { "field": "score" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_all_query_with_sub_aggs(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "*",
+            "aggs": {
+                "avg_score": { "avg": { "field": "score" } },
+                "stats_score": { "stats": { "field": "score_f64" } },
+                "terms_text": {
+                    "terms": { "field": "text_few_terms" }
+                }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_term_query_with_sub_aggs(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "text:cool",
+            "aggs": {
+                "avg_score": { "avg": { "field": "score" } },
+                "stats_score": { "stats": { "field": "score_f64" } },
+                "terms_text": {
+                    "terms": { "field": "text_few_terms" }
+                }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
--- a/benches/and_or_queries.rs
+++ b/benches/and_or_queries.rs
@@ -0,0 +1,218 @@
+// Benchmarks boolean conjunction queries using binggan.
+//
+// What’s measured:
+// - Or and And queries with varying selectivity (only `Term` queries for now on leafs)
+// - Nested AND/OR combinations (on multiple fields)
+// - No-scoring path using the Count collector (focus on iterator/skip performance)
+// - Top-K retrieval (k=10) using the TopDocs collector
+//
+// Corpus model:
+// - Synthetic docs; each token a/b/c is independently included per doc
+// - If none of a/b/c are included, emit a neutral filler token to keep doc length similar
+//
+// Notes:
+// - After optimization, when scoring is disabled Tantivy reads doc-only postings
+//   (IndexRecordOption::Basic), avoiding frequency decoding overhead.
+// - This bench isolates boolean iteration speed and intersection/union cost.
+// - Use `cargo bench --bench boolean_conjunction` to run.
+
+use binggan::{black_box, BenchGroup, BenchRunner};
+use rand::prelude::*;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+use tantivy::collector::sort_key::SortByStaticFastValue;
+use tantivy::collector::{Collector, Count, TopDocs};
+use tantivy::query::{Query, QueryParser};
+use tantivy::schema::{Schema, FAST, TEXT};
+use tantivy::{doc, Index, Order, ReloadPolicy, Searcher};
+
+#[derive(Clone)]
+struct BenchIndex {
+    #[allow(dead_code)]
+    index: Index,
+    searcher: Searcher,
+    query_parser: QueryParser,
+}
+
+/// Build a single index containing both fields (title, body) and
+/// return two BenchIndex views:
+/// - single_field: QueryParser defaults to only "body"
+/// - multi_field:  QueryParser defaults to ["title", "body"]
+fn build_shared_indices(num_docs: usize, p_a: f32, p_b: f32, p_c: f32) -> (BenchIndex, BenchIndex) {
+    // Unified schema (two text fields)
+    let mut schema_builder = Schema::builder();
+    let f_title = schema_builder.add_text_field("title", TEXT);
+    let f_body = schema_builder.add_text_field("body", TEXT);
+    let f_score = schema_builder.add_u64_field("score", FAST);
+    let f_score2 = schema_builder.add_u64_field("score2", FAST);
+    let schema = schema_builder.build();
+    let index = Index::create_in_ram(schema.clone());
+
+    // Populate index with stable RNG for reproducibility.
+    let mut rng = StdRng::from_seed([7u8; 32]);
+
+    // Populate: spread each present token 90/10 to body/title
+    {
+        let mut writer = index.writer_with_num_threads(1, 500_000_000).unwrap();
+        for _ in 0..num_docs {
+            let has_a = rng.gen_bool(p_a as f64);
+            let has_b = rng.gen_bool(p_b as f64);
+            let has_c = rng.gen_bool(p_c as f64);
+            let score = rng.gen_range(0u64..100u64);
+            let score2 = rng.gen_range(0u64..100_000u64);
+            let mut title_tokens: Vec<&str> = Vec::new();
+            let mut body_tokens: Vec<&str> = Vec::new();
+            if has_a {
+                if rng.gen_bool(0.1) {
+                    title_tokens.push("a");
+                } else {
+                    body_tokens.push("a");
+                }
+            }
+            if has_b {
+                if rng.gen_bool(0.1) {
+                    title_tokens.push("b");
+                } else {
+                    body_tokens.push("b");
+                }
+            }
+            if has_c {
+                if rng.gen_bool(0.1) {
+                    title_tokens.push("c");
+                } else {
+                    body_tokens.push("c");
+                }
+            }
+            if title_tokens.is_empty() && body_tokens.is_empty() {
+                body_tokens.push("z");
+            }
+            writer
+                .add_document(doc!(
+                    f_title=>title_tokens.join(" "),
+                    f_body=>body_tokens.join(" "),
+                    f_score=>score,
+                    f_score2=>score2,
+                ))
+                .unwrap();
+        }
+        writer.commit().unwrap();
+    }
+
+    // Prepare reader/searcher once.
+    let reader = index
+        .reader_builder()
+        .reload_policy(ReloadPolicy::Manual)
+        .try_into()
+        .unwrap();
+    let searcher = reader.searcher();
+
+    // Build two query parsers with different default fields.
+    let qp_single = QueryParser::for_index(&index, vec![f_body]);
+    let qp_multi = QueryParser::for_index(&index, vec![f_title, f_body]);
+
+    let single_view = BenchIndex {
+        index: index.clone(),
+        searcher: searcher.clone(),
+        query_parser: qp_single,
+    };
+    let multi_view = BenchIndex {
+        index,
+        searcher,
+        query_parser: qp_multi,
+    };
+    (single_view, multi_view)
+}
+
+fn main() {
+    // Prepare corpora with varying selectivity. Build one index per corpus
+    // and derive two views (single-field vs multi-field) from it.
+    let scenarios = vec![
+        (
+            "N=1M, p(a)=5%, p(b)=1%, p(c)=15%".to_string(),
+            1_000_000,
+            0.05,
+            0.01,
+            0.15,
+        ),
+        (
+            "N=1M, p(a)=1%, p(b)=1%, p(c)=15%".to_string(),
+            1_000_000,
+            0.01,
+            0.01,
+            0.15,
+        ),
+    ];
+
+    let queries = &["a", "+a +b", "+a +b +c", "a OR b", "a OR b OR c"];
+
+    let mut runner = BenchRunner::new();
+    for (label, n, pa, pb, pc) in scenarios {
+        let (single_view, multi_view) = build_shared_indices(n, pa, pb, pc);
+
+        for (view_name, bench_index) in [("single_field", single_view), ("multi_field", multi_view)]
+        {
+            // Single-field group: default field is body only
+            let mut group = runner.new_group();
+            group.set_name(format!("{} — {}", view_name, label));
+            for query_str in queries {
+                add_bench_task(&mut group, &bench_index, query_str, Count, "count");
+                add_bench_task(
+                    &mut group,
+                    &bench_index,
+                    query_str,
+                    TopDocs::with_limit(10).order_by_score(),
+                    "top10",
+                );
+                add_bench_task(
+                    &mut group,
+                    &bench_index,
+                    query_str,
+                    TopDocs::with_limit(10).order_by_fast_field::<u64>("score", Order::Asc),
+                    "top10_by_ff",
+                );
+                add_bench_task(
+                    &mut group,
+                    &bench_index,
+                    query_str,
+                    TopDocs::with_limit(10).order_by((
+                        SortByStaticFastValue::<u64>::for_field("score"),
+                        SortByStaticFastValue::<u64>::for_field("score2"),
+                    )),
+                    "top10_by_2ff",
+                );
+            }
+            group.run();
+        }
+    }
+}
+
+fn add_bench_task<C: Collector + 'static>(
+    bench_group: &mut BenchGroup,
+    bench_index: &BenchIndex,
+    query_str: &str,
+    collector: C,
+    collector_name: &str,
+) {
+    let task_name = format!("{}_{}", query_str.replace(" ", "_"), collector_name);
+    let query = bench_index.query_parser.parse_query(query_str).unwrap();
+    let search_task = SearchTask {
+        searcher: bench_index.searcher.clone(),
+        collector,
+        query,
+    };
+    bench_group.register(task_name, move |_| black_box(search_task.run()));
+}
+
+struct SearchTask<C: Collector> {
+    searcher: Searcher,
+    collector: C,
+    query: Box<dyn Query>,
+}
+
+impl<C: Collector> SearchTask<C> {
+    #[inline(never)]
+    pub fn run(&self) -> usize {
+        self.searcher.search(&self.query, &self.collector).unwrap();
+        1
+    }
+}
--- a/benches/exists_json.rs
+++ b/benches/exists_json.rs
@@ -0,0 +1,69 @@
+use binggan::plugins::PeakMemAllocPlugin;
+use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use serde_json::json;
+use tantivy::collector::Count;
+use tantivy::query::ExistsQuery;
+use tantivy::schema::{Schema, FAST, TEXT};
+use tantivy::{doc, Index};
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
+
+fn main() {
+    let doc_count: usize = 500_000;
+    let subfield_counts: &[usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 16, 256, 4096, 65536, 262144];
+
+    let indices: Vec<(String, Index)> = subfield_counts
+        .iter()
+        .map(|&sub_fields| {
+            (
+                format!("subfields={sub_fields}"),
+                build_index_with_json_subfields(doc_count, sub_fields),
+            )
+        })
+        .collect();
+
+    let mut group = InputGroup::new_with_inputs(indices);
+    group.add_plugin(PeakMemAllocPlugin::new(GLOBAL));
+
+    group.config().num_iter_group = Some(1);
+    group.config().num_iter_bench = Some(1);
+    group.register("exists_json", exists_json_union);
+
+    group.run();
+}
+
+fn exists_json_union(index: &Index) {
+    let reader = index.reader().expect("reader");
+    let searcher = reader.searcher();
+    let query = ExistsQuery::new("json".to_string(), true);
+    let count = searcher.search(&query, &Count).expect("exists search");
+    // Prevents optimizer from eliding the search
+    black_box(count);
+}
+
+fn build_index_with_json_subfields(num_docs: usize, num_subfields: usize) -> Index {
+    // Schema: single JSON field stored as FAST to support ExistsQuery.
+    let mut schema_builder = Schema::builder();
+    let json_field = schema_builder.add_json_field("json", TEXT | FAST);
+    let schema = schema_builder.build();
+
+    let index = Index::create_from_tempdir(schema).expect("create index");
+    {
+        let mut index_writer = index
+            .writer_with_num_threads(1, 200_000_000)
+            .expect("writer");
+        for i in 0..num_docs {
+            let sub = i % num_subfields;
+            // Only one subpath set per document; rotate subpaths so that
+            // no single subpath is full, but the union covers all docs.
+            let v = json!({ format!("field_{sub}"): i as u64 });
+            index_writer
+                .add_document(doc!(json_field => v))
+                .expect("add_document");
+        }
+        index_writer.commit().expect("commit");
+    }
+
+    index
+}
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -11,9 +11,6 @@ keywords = []
 documentation = "https://docs.rs/tantivy-bitpacker/latest/tantivy_bitpacker"
 homepage = "https://github.com/quickwit-oss/tantivy"

-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
 bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }

--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -65,10 +65,16 @@ impl BitPacker {

 #[derive(Clone, Debug, Default, Copy)]
 pub struct BitUnpacker {
-    num_bits: usize,
+    num_bits: u32,
    mask: u64,
 }

+pub type BlockNumber = usize;
+
+// 16k
+const BLOCK_SIZE_MIN_POW: u8 = 14;
+const BLOCK_SIZE_MIN: usize = 2 << BLOCK_SIZE_MIN_POW;
+
 impl BitUnpacker {
    /// Creates a bit unpacker, that assumes the same bitwidth for all values.
    ///
@@ -82,8 +88,9 @@ impl BitUnpacker {
        } else {
            (1u64 << num_bits) - 1u64
        };
+
        BitUnpacker {
-            num_bits: usize::from(num_bits),
+            num_bits: u32::from(num_bits),
            mask,
        }
    }
@@ -92,16 +99,69 @@ impl BitUnpacker {
        self.num_bits as u8
    }

+    /// Calculates a block number for the given `idx`.
+    #[inline]
+    pub fn block_num(&self, idx: u32) -> BlockNumber {
+        // Find the address in bits of the index.
+        let addr_in_bits = (idx * self.num_bits) as usize;
+
+        // Then round down to the nearest byte.
+        let addr_in_bytes = addr_in_bits >> 3;
+
+        // And compute the containing BlockNumber.
+        addr_in_bytes >> (BLOCK_SIZE_MIN_POW + 1)
+    }
+
+    /// Given a block number and dataset length, calculates a data Range for the block.
+    pub fn block(&self, block: BlockNumber, data_len: usize) -> Range<usize> {
+        let block_addr = block << (BLOCK_SIZE_MIN_POW + 1);
+        // We extend the end of the block by a constant factor, so that it overlaps the next
+        // block. That ensures that we never need to read on a block boundary.
+        block_addr..(std::cmp::min(block_addr + BLOCK_SIZE_MIN + 8, data_len))
+    }
+
+    /// Calculates the number of blocks for the given data_len.
+    ///
+    /// Usually only called at startup to pre-allocate structures.
+    pub fn block_count(&self, data_len: usize) -> usize {
+        let block_count = data_len / (BLOCK_SIZE_MIN as usize);
+        if data_len % (BLOCK_SIZE_MIN as usize) == 0 {
+            block_count
+        } else {
+            block_count + 1
+        }
+    }
+
+    /// Returns a range within the data which covers the given id_range.
+    ///
+    /// NOTE: This method is used for batch reads which bypass blocks to avoid dealing with block
+    /// boundaries.
+    #[inline]
+    pub fn block_oblivious_range(&self, id_range: Range<u32>, data_len: usize) -> Range<usize> {
+        let start_in_bits = id_range.start * self.num_bits;
+        let start = (start_in_bits >> 3) as usize;
+        let end_in_bits = id_range.end * self.num_bits;
+        let end = (end_in_bits >> 3) as usize;
+        // TODO: We fetch more than we need and then truncate.
+        start..(std::cmp::min(end + 8, data_len))
+    }
+
    #[inline]
    pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
-        let addr_in_bits = idx as usize * self.num_bits;
-        let addr = addr_in_bits >> 3;
+        self.get_from_subset(idx, 0, data)
+    }
+
+    /// Get the value at the given idx, which must exist within the given subset of the data.
+    #[inline]
+    pub fn get_from_subset(&self, idx: u32, data_offset: usize, data: &[u8]) -> u64 {
+        let addr_in_bits = idx * self.num_bits;
+        let addr = (addr_in_bits >> 3) as usize - data_offset;
        if addr + 8 > data.len() {
            if self.num_bits == 0 {
                return 0;
            }
            let bit_shift = addr_in_bits & 7;
-            return self.get_slow_path(addr, bit_shift as u32, data);
+            return self.get_slow_path(addr, bit_shift, data);
        }
        let bit_shift = addr_in_bits & 7;
        let bytes: [u8; 8] = (&data[addr..addr + 8]).try_into().unwrap();
@@ -113,6 +173,7 @@ impl BitUnpacker {
    #[inline(never)]
    fn get_slow_path(&self, addr: usize, bit_shift: u32, data: &[u8]) -> u64 {
        let mut bytes: [u8; 8] = [0u8; 8];
+
        let available_bytes = data.len() - addr;
        // This function is meant to only be called if we did not have 8 bytes to load.
        debug_assert!(available_bytes < 8);
@@ -128,26 +189,25 @@ impl BitUnpacker {
    // #Panics
    //
    // This methods panics if `num_bits` is > 32.
-    fn get_batch_u32s(&self, start_idx: u32, data: &[u8], output: &mut [u32]) {
+    fn get_batch_u32s(&self, start_idx: u32, data_offset: usize, data: &[u8], output: &mut [u32]) {
        assert!(
            self.bit_width() <= 32,
            "Bitwidth must be <= 32 to use this method."
        );

-        let end_idx: u32 = start_idx + output.len() as u32;
+        let end_idx = start_idx + output.len() as u32;

-        // We use `usize` here to avoid overflow issues.
-        let end_bit_read = (end_idx as usize) * self.num_bits;
+        let end_bit_read = end_idx * self.num_bits;
        let end_byte_read = (end_bit_read + 7) / 8;
        assert!(
-            end_byte_read <= data.len(),
+            end_byte_read as usize <= data_offset + data.len(),
            "Requested index is out of bounds."
        );

        // Simple slow implementation of get_batch_u32s, to deal with our ramps.
        let get_batch_ramp = |start_idx: u32, output: &mut [u32]| {
            for (out, idx) in output.iter_mut().zip(start_idx..) {
-                *out = self.get(idx, data) as u32;
+                *out = self.get_from_subset(idx, data_offset, data) as u32;
            }
        };

@@ -160,24 +220,24 @@ impl BitUnpacker {
        // We want the start of the fast track to start align with bytes.
        // A sufficient condition is to start with an idx that is a multiple of 8,
        // so highway start is the closest multiple of 8 that is >= start_idx.
-        let entrance_ramp_len: u32 = 8 - (start_idx % 8) % 8;
+        let entrance_ramp_len = 8 - (start_idx % 8) % 8;

        let highway_start: u32 = start_idx + entrance_ramp_len;

-        if highway_start + (BitPacker1x::BLOCK_LEN as u32) > end_idx {
+        if highway_start + BitPacker1x::BLOCK_LEN as u32 > end_idx {
            // We don't have enough values to have even a single block of highway.
            // Let's just supply the values the simple way.
            get_batch_ramp(start_idx, output);
            return;
        }

-        let num_blocks: usize = (end_idx - highway_start) as usize / BitPacker1x::BLOCK_LEN;
+        let num_blocks: u32 = (end_idx - highway_start) / BitPacker1x::BLOCK_LEN as u32;

        // Entrance ramp
        get_batch_ramp(start_idx, &mut output[..entrance_ramp_len as usize]);

        // Highway
-        let mut offset = (highway_start as usize * self.num_bits) / 8;
+        let mut offset = ((highway_start * self.num_bits) as usize / 8) - data_offset;
        let mut output_cursor = (highway_start - start_idx) as usize;
        for _ in 0..num_blocks {
            offset += BitPacker1x.decompress(
@@ -189,7 +249,7 @@ impl BitUnpacker {
        }

        // Exit ramp
-        let highway_end: u32 = highway_start + (num_blocks * BitPacker1x::BLOCK_LEN) as u32;
+        let highway_end = highway_start + num_blocks * BitPacker1x::BLOCK_LEN as u32;
        get_batch_ramp(highway_end, &mut output[output_cursor..]);
    }

@@ -199,16 +259,27 @@ impl BitUnpacker {
        id_range: Range<u32>,
        data: &[u8],
        positions: &mut Vec<u32>,
+    ) {
+        self.get_ids_for_value_range_from_subset(range, id_range, 0, data, positions)
+    }
+
+    pub fn get_ids_for_value_range_from_subset(
+        &self,
+        range: RangeInclusive<u64>,
+        id_range: Range<u32>,
+        data_offset: usize,
+        data: &[u8],
+        positions: &mut Vec<u32>,
    ) {
        if self.bit_width() > 32 {
-            self.get_ids_for_value_range_slow(range, id_range, data, positions)
+            self.get_ids_for_value_range_slow(range, id_range, data_offset, data, positions)
        } else {
            if *range.start() > u32::MAX as u64 {
                positions.clear();
                return;
            }
            let range_u32 = (*range.start() as u32)..=(*range.end()).min(u32::MAX as u64) as u32;
-            self.get_ids_for_value_range_fast(range_u32, id_range, data, positions)
+            self.get_ids_for_value_range_fast(range_u32, id_range, data_offset, data, positions)
        }
    }

@@ -216,6 +287,7 @@ impl BitUnpacker {
        &self,
        range: RangeInclusive<u64>,
        id_range: Range<u32>,
+        data_offset: usize,
        data: &[u8],
        positions: &mut Vec<u32>,
    ) {
@@ -223,7 +295,7 @@ impl BitUnpacker {
        for i in id_range {
            // If we cared we could make this branchless, but the slow implementation should rarely
            // kick in.
-            let val = self.get(i, data);
+            let val = self.get_from_subset(i, data_offset, data);
            if range.contains(&val) {
                positions.push(i);
            }
@@ -234,11 +306,12 @@ impl BitUnpacker {
        &self,
        value_range: RangeInclusive<u32>,
        id_range: Range<u32>,
+        data_offset: usize,
        data: &[u8],
        positions: &mut Vec<u32>,
    ) {
        positions.resize(id_range.len(), 0u32);
-        self.get_batch_u32s(id_range.start, data, positions);
+        self.get_batch_u32s(id_range.start, data_offset, data, positions);
        crate::filter_vec::filter_vec_in_place(value_range, id_range.start, positions)
    }
 }
@@ -258,7 +331,7 @@ mod test {
            bitpacker.write(val, num_bits, &mut data).unwrap();
        }
        bitpacker.close(&mut data).unwrap();
-        assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8);
+        assert_eq!(data.len(), ((num_bits as usize) * len).div_ceil(8));
        let bitunpacker = BitUnpacker::new(num_bits);
        (bitunpacker, vals, data)
    }
@@ -304,7 +377,7 @@ mod test {
            bitpacker.write(val, num_bits, &mut buffer).unwrap();
        }
        bitpacker.flush(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), (vals.len() * num_bits as usize + 7) / 8);
+        assert_eq!(buffer.len(), (vals.len() * num_bits as usize).div_ceil(8));
        let bitunpacker = BitUnpacker::new(num_bits);
        let max_val = if num_bits == 64 {
            u64::MAX
@@ -329,14 +402,14 @@ mod test {
    fn test_get_batch_panics_over_32_bits() {
        let bitunpacker = BitUnpacker::new(33);
        let mut output: [u32; 1] = [0u32];
-        bitunpacker.get_batch_u32s(0, &[0, 0, 0, 0, 0, 0, 0, 0], &mut output[..]);
+        bitunpacker.get_batch_u32s(0, 0, &[0, 0, 0, 0, 0, 0, 0, 0], &mut output[..]);
    }

    #[test]
    fn test_get_batch_limit() {
        let bitunpacker = BitUnpacker::new(1);
        let mut output: [u32; 3] = [0u32, 0u32, 0u32];
-        bitunpacker.get_batch_u32s(8 * 4 - 3, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
+        bitunpacker.get_batch_u32s(8 * 4 - 3, 0, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
    }

    #[test]
@@ -345,7 +418,7 @@ mod test {
        let bitunpacker = BitUnpacker::new(1);
        let mut output: [u32; 3] = [0u32, 0u32, 0u32];
        // We are missing exactly one bit.
-        bitunpacker.get_batch_u32s(8 * 4 - 2, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
+        bitunpacker.get_batch_u32s(8 * 4 - 2, 0, &[0u8, 0u8, 0u8, 0u8], &mut output[..]);
    }

    proptest::proptest! {
@@ -368,7 +441,7 @@ mod test {
            for len in [0, 1, 2, 32, 33, 34, 64] {
                for start_idx in 0u32..32u32 {
                    output.resize(len, 0);
-                    bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output);
+                    bitunpacker.get_batch_u32s(start_idx, 0, &buffer, &mut output);
                    for (i, output_byte) in output.iter().enumerate() {
                        let expected = (start_idx + i as u32) & mask;
                        assert_eq!(*output_byte, expected);
--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -140,10 +140,10 @@ impl BlockedBitpacker {
    pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
        // todo performance: we could decompress a whole block and cache it instead
        let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
-        let iter = (0..bitpacked_elems)
+
+        (0..bitpacked_elems)
            .map(move |idx| self.get(idx))
-            .chain(self.buffer.iter().cloned());
-        iter
+            .chain(self.buffer.iter().cloned())
    }
 }

--- a/bitpacker/src/filter_vec/avx2.rs
+++ b/bitpacker/src/filter_vec/avx2.rs
@@ -19,7 +19,7 @@ fn u32_to_i32(val: u32) -> i32 {
 #[inline]
 unsafe fn u32_to_i32_avx2(vals_u32x8s: DataType) -> DataType {
    const HIGHEST_BIT_MASK: DataType = from_u32x8([HIGHEST_BIT; NUM_LANES]);
-    op_xor(vals_u32x8s, HIGHEST_BIT_MASK)
+    unsafe { op_xor(vals_u32x8s, HIGHEST_BIT_MASK) }
 }

 pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
@@ -66,17 +66,19 @@ unsafe fn filter_vec_avx2_aux(
    ]);
    const SHIFT: __m256i = from_u32x8([NUM_LANES as u32; NUM_LANES]);
    for _ in 0..num_words {
-        let word = load_unaligned(input);
-        let word = u32_to_i32_avx2(word);
-        let keeper_bitset = compute_filter_bitset(word, range_simd.clone());
-        let added_len = keeper_bitset.count_ones();
-        let filtered_doc_ids = compact(ids, keeper_bitset);
-        store_unaligned(output_tail as *mut __m256i, filtered_doc_ids);
-        output_tail = output_tail.offset(added_len as isize);
-        ids = op_add(ids, SHIFT);
-        input = input.offset(1);
+        unsafe {
+            let word = load_unaligned(input);
+            let word = u32_to_i32_avx2(word);
+            let keeper_bitset = compute_filter_bitset(word, range_simd.clone());
+            let added_len = keeper_bitset.count_ones();
+            let filtered_doc_ids = compact(ids, keeper_bitset);
+            store_unaligned(output_tail as *mut __m256i, filtered_doc_ids);
+            output_tail = output_tail.offset(added_len as isize);
+            ids = op_add(ids, SHIFT);
+            input = input.offset(1);
+        }
    }
-    output_tail.offset_from(output) as usize
+    unsafe { output_tail.offset_from(output) as usize }
 }

 #[inline]
@@ -92,8 +94,7 @@ unsafe fn compute_filter_bitset(val: __m256i, range: std::ops::RangeInclusive<__
    let too_low = op_greater(*range.start(), val);
    let too_high = op_greater(val, *range.end());
    let inside = op_or(too_low, too_high);
-    255 - std::arch::x86_64::_mm256_movemask_ps(std::mem::transmute::<DataType, __m256>(inside))
-        as u8
+    255 - std::arch::x86_64::_mm256_movemask_ps(_mm256_castsi256_ps(inside)) as u8
 }

 union U8x32 {
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -16,7 +16,7 @@ stacker = { version= "0.6", path = "../stacker", package="tantivy-stacker"}
 sstable = { version= "0.6", path = "../sstable", package = "tantivy-sstable" }
 common = { version= "0.10", path = "../common", package = "tantivy-common" }
 tantivy-bitpacker = { version= "0.9", path = "../bitpacker/" }
-serde = "1.0.152"
+serde = { version = "1.0.152", features = ["derive"] }
 downcast-rs = "2.0.1"

 [dev-dependencies]
--- a/columnar/README.md
+++ b/columnar/README.md
@@ -73,7 +73,7 @@ The crate introduces the following concepts.
 `Columnar` is an equivalent of a dataframe.
 It maps `column_key` to `Column`.

-A `Column<T>` asssociates a `RowId` (u32) to any
+A `Column<T>` associates a `RowId` (u32) to any
 number of values.

 This is made possible by wrapping a `ColumnIndex` and a `ColumnValue` object.
--- a/columnar/benches/bench_access.rs
+++ b/columnar/benches/bench_access.rs
@@ -1,6 +1,6 @@
 use binggan::{InputGroup, black_box};
 use common::*;
-use tantivy_columnar::Column;
+use tantivy_columnar::{Column, ValueRange};

 pub mod common;

@@ -46,16 +46,16 @@ fn bench_group(mut runner: InputGroup<Column>) {
    runner.register("access_first_vals", |column| {
        let mut sum = 0;
        const BLOCK_SIZE: usize = 32;
-        let mut docs = vec![0; BLOCK_SIZE];
-        let mut buffer = vec![None; BLOCK_SIZE];
+        let mut docs = Vec::with_capacity(BLOCK_SIZE);
+        let mut buffer = Vec::with_capacity(BLOCK_SIZE);
        for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
-            // fill docs
-            #[allow(clippy::needless_range_loop)]
+            docs.clear();
            for idx in 0..BLOCK_SIZE {
-                docs[idx] = idx as u32 + i;
+                docs.push(idx as u32 + i);
            }

-            column.first_vals(&docs, &mut buffer);
+            buffer.clear();
+            column.first_vals_in_value_range(&mut docs, &mut buffer, ValueRange::All);
            for val in buffer.iter() {
                let Some(val) = val else { continue };
                sum += *val;
--- a/columnar/benches/bench_first_vals.rs
+++ b/columnar/benches/bench_first_vals.rs
@@ -89,13 +89,6 @@ fn main() {
        black_box(sum);
    });

-    group.register("first_block_fetch", |column| {
-        let mut block: Vec<Option<u64>> = vec![None; 64];
-        let fetch_docids = (0..64).collect::<Vec<_>>();
-        column.first_vals(&fetch_docids, &mut block);
-        black_box(block[0]);
-    });
-
    group.register("first_block_single_calls", |column| {
        let mut block: Vec<Option<u64>> = vec![None; 64];
        let fetch_docids = (0..64).collect::<Vec<_>>();
--- a/columnar/benches/bench_merge.rs
+++ b/columnar/benches/bench_merge.rs
@@ -40,7 +40,14 @@ fn main() {
                let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
                let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);

-                merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
+                merge_columnar(
+                    &columnar_readers,
+                    &[],
+                    merge_row_order.into(),
+                    &mut out,
+                    || false,
+                )
+                .unwrap();
                Some(out.len() as u64)
            },
        );
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -1,6 +1,7 @@
 mod dictionary_encoded;
 mod serialize;

+use std::cell::RefCell;
 use std::fmt::{self, Debug};
 use std::io::Write;
 use std::ops::{Range, RangeInclusive};
@@ -19,6 +20,11 @@ use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
 use crate::column_values::{ColumnValues, monotonic_map_column};
 use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};

+thread_local! {
+    static ROWS: RefCell<Vec<RowId>> = const { RefCell::new(Vec::new()) };
+    static DOCS: RefCell<Vec<DocId>> = const { RefCell::new(Vec::new()) };
+}
+
 #[derive(Clone)]
 pub struct Column<T = u64> {
    pub index: ColumnIndex,
@@ -89,31 +95,6 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.values_for_doc(row_id).next()
    }

-    /// Load the first value for each docid in the provided slice.
-    #[inline]
-    pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
-        match &self.index {
-            ColumnIndex::Empty { .. } => {}
-            ColumnIndex::Full => self.values.get_vals_opt(docids, output),
-            ColumnIndex::Optional(optional_index) => {
-                for (i, docid) in docids.iter().enumerate() {
-                    output[i] = optional_index
-                        .rank_if_exists(*docid)
-                        .map(|rowid| self.values.get_val(rowid));
-                }
-            }
-            ColumnIndex::Multivalued(multivalued_index) => {
-                for (i, docid) in docids.iter().enumerate() {
-                    let range = multivalued_index.range(*docid);
-                    let is_empty = range.start == range.end;
-                    if !is_empty {
-                        output[i] = Some(self.values.get_val(range.start));
-                    }
-                }
-            }
-        }
-    }
-
    /// Translates a block of docids to row_ids.
    ///
    /// returns the row_ids and the matching docids on the same index
@@ -131,6 +112,8 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.index.docids_to_rowids(doc_ids, doc_ids_out, row_ids)
    }

+    /// Get an iterator over the values for the provided docid.
+    #[inline]
    pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> + '_ {
        self.index
            .value_row_ids(doc_id)
@@ -141,7 +124,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    #[inline]
    pub fn get_docids_for_value_range(
        &self,
-        value_range: RangeInclusive<T>,
+        value_range: ValueRange<T>,
        selected_docid_range: Range<u32>,
        doc_ids: &mut Vec<u32>,
    ) {
@@ -158,15 +141,6 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
            .select_batch_in_place(selected_docid_range.start, doc_ids);
    }

-    /// Fills the output vector with the (possibly multiple values that are associated_with
-    /// `row_id`.
-    ///
-    /// This method clears the `output` vector.
-    pub fn fill_vals(&self, row_id: RowId, output: &mut Vec<T>) {
-        output.clear();
-        output.extend(self.values_for_doc(row_id));
-    }
-
    pub fn first_or_default_col(self, default_value: T) -> Arc<dyn ColumnValues<T>> {
        Arc::new(FirstValueWithDefault {
            column: self,
@@ -175,6 +149,181 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }
 }

+// Separate impl block for methods requiring `Default` for `T`.
+impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
+    /// Load the first value for each docid in the provided slice.
+    ///
+    /// The `docids` vector is mutated: documents that do not match the `value_range` are removed.
+    /// The `values` vector is populated with the values of the remaining documents.
+    #[inline]
+    pub fn first_vals_in_value_range(
+        &self,
+        input_docs: &[DocId],
+        output: &mut Vec<crate::ComparableDoc<Option<T>, DocId>>,
+        value_range: ValueRange<T>,
+    ) {
+        match (&self.index, value_range) {
+            (ColumnIndex::Empty { .. }, value_range) => {
+                let nulls_match = match &value_range {
+                    ValueRange::All => true,
+                    ValueRange::Inclusive(_) => false,
+                    ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
+                    ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThan(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
+                };
+                if nulls_match {
+                    for &doc in input_docs {
+                        output.push(crate::ComparableDoc {
+                            doc,
+                            sort_key: None,
+                        });
+                    }
+                }
+            }
+            (ColumnIndex::Full, value_range) => {
+                self.values
+                    .get_vals_in_value_range(input_docs, input_docs, output, value_range);
+            }
+            (ColumnIndex::Optional(optional_index), value_range) => {
+                let nulls_match = match &value_range {
+                    ValueRange::All => true,
+                    ValueRange::Inclusive(_) => false,
+                    ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
+                    ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThan(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
+                };
+
+                let fallback_needed = ROWS.with(|rows_cell| {
+                    DOCS.with(|docs_cell| {
+                        let mut rows = rows_cell.borrow_mut();
+                        let mut docs = docs_cell.borrow_mut();
+                        rows.clear();
+                        docs.clear();
+
+                        let mut has_nulls = false;
+
+                        for &doc_id in input_docs {
+                            if let Some(row_id) = optional_index.rank_if_exists(doc_id) {
+                                rows.push(row_id);
+                                docs.push(doc_id);
+                            } else {
+                                has_nulls = true;
+                                if nulls_match {
+                                    break;
+                                }
+                            }
+                        }
+
+                        if !has_nulls || !nulls_match {
+                            self.values.get_vals_in_value_range(
+                                &rows,
+                                &docs,
+                                output,
+                                value_range.clone(),
+                            );
+                            return false;
+                        }
+                        true
+                    })
+                });
+
+                if fallback_needed {
+                    for &doc_id in input_docs {
+                        if let Some(row_id) = optional_index.rank_if_exists(doc_id) {
+                            let val = self.values.get_val(row_id);
+                            let value_matches = match &value_range {
+                                ValueRange::All => true,
+                                ValueRange::Inclusive(r) => r.contains(&val),
+                                ValueRange::GreaterThan(t, _) => val > *t,
+                                ValueRange::GreaterThanOrEqual(t, _) => val >= *t,
+                                ValueRange::LessThan(t, _) => val < *t,
+                                ValueRange::LessThanOrEqual(t, _) => val <= *t,
+                            };
+
+                            if value_matches {
+                                output.push(crate::ComparableDoc {
+                                    doc: doc_id,
+                                    sort_key: Some(val),
+                                });
+                            }
+                        } else if nulls_match {
+                            output.push(crate::ComparableDoc {
+                                doc: doc_id,
+                                sort_key: None,
+                            });
+                        }
+                    }
+                }
+            }
+            (ColumnIndex::Multivalued(multivalued_index), value_range) => {
+                let nulls_match = match &value_range {
+                    ValueRange::All => true,
+                    ValueRange::Inclusive(_) => false,
+                    ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
+                    ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThan(_, nulls_match) => *nulls_match,
+                    ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
+                };
+                for i in 0..input_docs.len() {
+                    let docid = input_docs[i];
+                    let row_range = multivalued_index.range(docid);
+                    let is_empty = row_range.start == row_range.end;
+                    if !is_empty {
+                        let val = self.values.get_val(row_range.start);
+                        let matches = match &value_range {
+                            ValueRange::All => true,
+                            ValueRange::Inclusive(r) => r.contains(&val),
+                            ValueRange::GreaterThan(t, _) => val > *t,
+                            ValueRange::GreaterThanOrEqual(t, _) => val >= *t,
+                            ValueRange::LessThan(t, _) => val < *t,
+                            ValueRange::LessThanOrEqual(t, _) => val <= *t,
+                        };
+                        if matches {
+                            output.push(crate::ComparableDoc {
+                                doc: docid,
+                                sort_key: Some(val),
+                            });
+                        }
+                    } else if nulls_match {
+                        output.push(crate::ComparableDoc {
+                            doc: docid,
+                            sort_key: None,
+                        });
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// A range of values.
+///
+/// This type is intended to be used in batch APIs, where the cost of unpacking the enum
+/// is outweighed by the time spent processing a batch.
+///
+/// Implementers should pattern match on the variants to use optimized loops for each case.
+#[derive(Clone, Debug)]
+pub enum ValueRange<T> {
+    /// A range that includes both start and end.
+    Inclusive(RangeInclusive<T>),
+    /// A range that matches all values.
+    All,
+    /// A range that matches all values greater than the threshold.
+    /// The boolean flag indicates if null values should be included.
+    GreaterThan(T, bool),
+    /// A range that matches all values greater than or equal to the threshold.
+    /// The boolean flag indicates if null values should be included.
+    GreaterThanOrEqual(T, bool),
+    /// A range that matches all values less than the threshold.
+    /// The boolean flag indicates if null values should be included.
+    LessThan(T, bool),
+    /// A range that matches all values less than or equal to the threshold.
+    /// The boolean flag indicates if null values should be included.
+    LessThanOrEqual(T, bool),
+}
+
 impl BinarySerializable for Cardinality {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
        self.to_code().serialize(writer)
--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -2,7 +2,7 @@ use std::io;
 use std::io::Write;
 use std::sync::Arc;

-use common::OwnedBytes;
+use common::file_slice::FileSlice;
 use sstable::Dictionary;

 use crate::column::{BytesColumn, Column};
@@ -41,12 +41,13 @@ pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
 }

 pub fn open_column_u64<T: MonotonicallyMappableToU64>(
-    bytes: OwnedBytes,
+    file_slice: FileSlice,
    format_version: Version,
 ) -> io::Result<Column<T>> {
-    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
+    let (body, column_index_num_bytes_payload) = file_slice.split_from_end(4);
    let column_index_num_bytes = u32::from_le_bytes(
        column_index_num_bytes_payload
+            .read_bytes()?
            .as_slice()
            .try_into()
            .unwrap(),
@@ -61,12 +62,13 @@ pub fn open_column_u64<T: MonotonicallyMappableToU64>(
 }

 pub fn open_column_u128<T: MonotonicallyMappableToU128>(
-    bytes: OwnedBytes,
+    file_slice: FileSlice,
    format_version: Version,
 ) -> io::Result<Column<T>> {
-    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
+    let (body, column_index_num_bytes_payload) = file_slice.split_from_end(4);
    let column_index_num_bytes = u32::from_le_bytes(
        column_index_num_bytes_payload
+            .read_bytes()?
            .as_slice()
            .try_into()
            .unwrap(),
@@ -84,12 +86,13 @@ pub fn open_column_u128<T: MonotonicallyMappableToU128>(
 ///
 /// See [`open_u128_as_compact_u64`] for more details.
 pub fn open_column_u128_as_compact_u64(
-    bytes: OwnedBytes,
+    file_slice: FileSlice,
    format_version: Version,
 ) -> io::Result<Column<u64>> {
-    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
+    let (body, column_index_num_bytes_payload) = file_slice.split_from_end(4);
    let column_index_num_bytes = u32::from_le_bytes(
        column_index_num_bytes_payload
+            .read_bytes()?
            .as_slice()
            .try_into()
            .unwrap(),
@@ -103,11 +106,21 @@ pub fn open_column_u128_as_compact_u64(
    })
 }

-pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> io::Result<BytesColumn> {
-    let (body, dictionary_len_bytes) = data.rsplit(4);
-    let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
+pub fn open_column_bytes(
+    file_slice: FileSlice,
+    format_version: Version,
+) -> io::Result<BytesColumn> {
+    let (body, dictionary_len_bytes) = file_slice.split_from_end(4);
+    let dictionary_len = u32::from_le_bytes(
+        dictionary_len_bytes
+            .read_bytes()?
+            .as_slice()
+            .try_into()
+            .unwrap(),
+    );
    let (dictionary_bytes, column_bytes) = body.split(dictionary_len as usize);
-    let dictionary = Arc::new(Dictionary::from_bytes(dictionary_bytes)?);
+
+    let dictionary = Arc::new(Dictionary::open(dictionary_bytes)?);
    let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes, format_version)?;
    Ok(BytesColumn {
        dictionary,
@@ -115,7 +128,7 @@ pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> io::Resul
    })
 }

-pub fn open_column_str(data: OwnedBytes, format_version: Version) -> io::Result<StrColumn> {
-    let bytes_column = open_column_bytes(data, format_version)?;
+pub fn open_column_str(file_slice: FileSlice, format_version: Version) -> io::Result<StrColumn> {
+    let bytes_column = open_column_bytes(file_slice, format_version)?;
    Ok(StrColumn::wrap(bytes_column))
 }
--- a/columnar/src/column_index/merge/mod.rs
+++ b/columnar/src/column_index/merge/mod.rs
@@ -95,7 +95,7 @@ pub fn merge_column_index<'a>(

 #[cfg(test)]
 mod tests {
-    use common::OwnedBytes;
+    use common::file_slice::FileSlice;

    use crate::column_index::merge::detect_cardinality;
    use crate::column_index::multivalued_index::{
@@ -178,7 +178,7 @@ mod tests {
        let mut output = Vec::new();
        serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
        let multivalue =
-            open_multivalued_index(OwnedBytes::new(output), crate::Version::V2).unwrap();
+            open_multivalued_index(FileSlice::from(output), crate::Version::V2).unwrap();
        let start_indexes: Vec<RowId> = multivalue.get_start_index_column().iter().collect();
        assert_eq!(&start_indexes, &[0, 3, 5]);
    }
@@ -216,7 +216,7 @@ mod tests {
        let mut output = Vec::new();
        serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
        let multivalue =
-            open_multivalued_index(OwnedBytes::new(output), crate::Version::V2).unwrap();
+            open_multivalued_index(FileSlice::from(output), crate::Version::V2).unwrap();
        let start_indexes: Vec<RowId> = multivalue.get_start_index_column().iter().collect();
        assert_eq!(&start_indexes, &[0, 3, 5, 6]);
    }
--- a/columnar/src/column_index/merge/stacked.rs
+++ b/columnar/src/column_index/merge/stacked.rs
@@ -56,7 +56,7 @@ fn get_doc_ids_with_values<'a>(
        ColumnIndex::Full => Box::new(doc_range),
        ColumnIndex::Optional(optional_index) => Box::new(
            optional_index
-                .iter_docs()
+                .iter_non_null_docs()
                .map(move |row| row + doc_range.start),
        ),
        ColumnIndex::Multivalued(multivalued_index) => match multivalued_index {
@@ -73,7 +73,7 @@ fn get_doc_ids_with_values<'a>(
            MultiValueIndex::MultiValueIndexV2(multivalued_index) => Box::new(
                multivalued_index
                    .optional_index
-                    .iter_docs()
+                    .iter_non_null_docs()
                    .map(move |row| row + doc_range.start),
            ),
        },
@@ -105,10 +105,11 @@ fn get_num_values_iterator<'a>(
 ) -> Box<dyn Iterator<Item = u32> + 'a> {
    match column_index {
        ColumnIndex::Empty { .. } => Box::new(std::iter::empty()),
-        ColumnIndex::Full => Box::new(std::iter::repeat(1u32).take(num_docs as usize)),
-        ColumnIndex::Optional(optional_index) => {
-            Box::new(std::iter::repeat(1u32).take(optional_index.num_non_nulls() as usize))
-        }
+        ColumnIndex::Full => Box::new(std::iter::repeat_n(1u32, num_docs as usize)),
+        ColumnIndex::Optional(optional_index) => Box::new(std::iter::repeat_n(
+            1u32,
+            optional_index.num_non_nulls() as usize,
+        )),
        ColumnIndex::Multivalued(multivalued_index) => Box::new(
            multivalued_index
                .get_start_index_column()
@@ -177,7 +178,7 @@ impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
                        ColumnIndex::Full => Box::new(columnar_row_range),
                        ColumnIndex::Optional(optional_index) => Box::new(
                            optional_index
-                                .iter_docs()
+                                .iter_non_null_docs()
                                .map(move |row_id: RowId| columnar_row_range.start + row_id),
                        ),
                        ColumnIndex::Multivalued(_) => {
--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -3,7 +3,8 @@ use std::io::Write;
 use std::ops::Range;
 use std::sync::Arc;

-use common::{CountingWriter, OwnedBytes};
+use common::CountingWriter;
+use common::file_slice::FileSlice;

 use super::optional_index::{open_optional_index, serialize_optional_index};
 use super::{OptionalIndex, SerializableOptionalIndex, Set};
@@ -44,21 +45,26 @@ pub fn serialize_multivalued_index(
 }

 pub fn open_multivalued_index(
-    bytes: OwnedBytes,
+    file_slice: FileSlice,
    format_version: Version,
 ) -> io::Result<MultiValueIndex> {
    match format_version {
        Version::V1 => {
            let start_index_column: Arc<dyn ColumnValues<RowId>> =
-                load_u64_based_column_values(bytes)?;
+                load_u64_based_column_values(file_slice)?;
            Ok(MultiValueIndex::MultiValueIndexV1(MultiValueIndexV1 {
                start_index_column,
            }))
        }
        Version::V2 => {
-            let (body_bytes, optional_index_len) = bytes.rsplit(4);
-            let optional_index_len =
-                u32::from_le_bytes(optional_index_len.as_slice().try_into().unwrap());
+            let (body_bytes, optional_index_len) = file_slice.split_from_end(4);
+            let optional_index_len = u32::from_le_bytes(
+                optional_index_len
+                    .read_bytes()?
+                    .as_slice()
+                    .try_into()
+                    .unwrap(),
+            );
            let (optional_index_bytes, start_index_bytes) =
                body_bytes.split(optional_index_len as usize);
            let optional_index = open_optional_index(optional_index_bytes)?;
@@ -185,8 +191,8 @@ impl MultiValueIndex {
        };
        let mut buffer = Vec::new();
        serialize_multivalued_index(&serializable_multivalued_index, &mut buffer).unwrap();
-        let bytes = OwnedBytes::new(buffer);
-        open_multivalued_index(bytes, Version::V2).unwrap()
+        let file_slice = FileSlice::from(buffer);
+        open_multivalued_index(file_slice, Version::V2).unwrap()
    }

    pub fn get_start_index_column(&self) -> &Arc<dyn crate::ColumnValues<RowId>> {
@@ -215,6 +221,32 @@ impl MultiValueIndex {
        }
    }

+    /// Returns an iterator over document ids that have at least one value.
+    pub fn iter_non_null_docs(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
+        match self {
+            MultiValueIndex::MultiValueIndexV1(idx) => {
+                let mut doc: DocId = 0u32;
+                let num_docs = idx.num_docs();
+                Box::new(std::iter::from_fn(move || {
+                    // This is not the most efficient way to do this, but it's legacy code.
+                    while doc < num_docs {
+                        let cur = doc;
+                        doc += 1;
+                        let start = idx.start_index_column.get_val(cur);
+                        let end = idx.start_index_column.get_val(cur + 1);
+                        if end > start {
+                            return Some(cur);
+                        }
+                    }
+                    None
+                }))
+            }
+            MultiValueIndex::MultiValueIndexV2(idx) => {
+                Box::new(idx.optional_index.iter_non_null_docs())
+            }
+        }
+    }
+
    /// Converts a list of ranks (row ids of values) in a 1:n index to the corresponding list of
    /// docids. Positions are converted inplace to docids.
    ///
@@ -307,7 +339,7 @@ mod tests {
    use std::ops::Range;

    use super::MultiValueIndex;
-    use crate::{ColumnarReader, DynamicColumn};
+    use crate::{ColumnarReader, DynamicColumn, ValueRange};

    fn index_to_pos_helper(
        index: &MultiValueIndex,
@@ -387,7 +419,7 @@ mod tests {
        assert_eq!(row_id_range, 0..4);

        let check = |range, expected| {
-            let full_range = 0..=u64::MAX;
+            let full_range = ValueRange::All;
            let mut docids = Vec::new();
            column.get_docids_for_value_range(full_range, range, &mut docids);
            assert_eq!(docids, expected);
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -1,9 +1,10 @@
-use std::io::{self, Write};
+use std::io;
 use std::sync::Arc;

 mod set;
 mod set_block;

+use common::file_slice::FileSlice;
 use common::{BinarySerializable, OwnedBytes, VInt};
 pub use set::{SelectCursor, Set, SetCodec};
 use set_block::{
@@ -11,7 +12,7 @@ use set_block::{
 };

 use crate::iterable::Iterable;
-use crate::{DocId, InvalidData, RowId};
+use crate::{DocId, RowId};

 /// The threshold for for number of elements after which we switch to dense block encoding.
 ///
@@ -88,7 +89,7 @@ pub struct OptionalIndex {

 impl Iterable<u32> for &OptionalIndex {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
-        Box::new(self.iter_docs())
+        Box::new(self.iter_non_null_docs())
    }
 }

@@ -268,8 +269,8 @@ impl OptionalIndex {
        );
        let mut buffer = Vec::new();
        serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
-        let bytes = OwnedBytes::new(buffer);
-        open_optional_index(bytes).unwrap()
+        let file_slice = FileSlice::from(buffer);
+        open_optional_index(file_slice).unwrap()
    }

    pub fn num_docs(&self) -> RowId {
@@ -280,8 +281,9 @@ impl OptionalIndex {
        self.num_non_null_docs
    }

-    pub fn iter_docs(&self) -> impl Iterator<Item = RowId> + '_ {
-        // TODO optimize
+    pub fn iter_non_null_docs(&self) -> impl Iterator<Item = RowId> + '_ {
+        // TODO optimize. We could iterate over the blocks directly.
+        // We use the dense value ids and retrieve the doc ids via select.
        let mut select_batch = self.select_cursor();
        (0..self.num_non_null_docs).map(move |rank| select_batch.select(rank))
    }
@@ -334,38 +336,6 @@ enum Block<'a> {
    Sparse(SparseBlock<'a>),
 }

-#[derive(Debug, Copy, Clone)]
-enum OptionalIndexCodec {
-    Dense = 0,
-    Sparse = 1,
-}
-
-impl OptionalIndexCodec {
-    fn to_code(self) -> u8 {
-        self as u8
-    }
-
-    fn try_from_code(code: u8) -> Result<Self, InvalidData> {
-        match code {
-            0 => Ok(Self::Dense),
-            1 => Ok(Self::Sparse),
-            _ => Err(InvalidData),
-        }
-    }
-}
-
-impl BinarySerializable for OptionalIndexCodec {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        writer.write_all(&[self.to_code()])
-    }
-
-    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
-        let optional_codec_code = u8::deserialize(reader)?;
-        let optional_codec = Self::try_from_code(optional_codec_code)?;
-        Ok(optional_codec)
-    }
-}
-
 fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
    let is_sparse = is_sparse(block_els.len() as u32);
    if is_sparse {
@@ -517,10 +487,17 @@ fn deserialize_optional_index_block_metadatas(
    (block_metas.into_boxed_slice(), non_null_rows_before_block)
 }

-pub fn open_optional_index(bytes: OwnedBytes) -> io::Result<OptionalIndex> {
-    let (mut bytes, num_non_empty_blocks_bytes) = bytes.rsplit(2);
-    let num_non_empty_block_bytes =
-        u16::from_le_bytes(num_non_empty_blocks_bytes.as_slice().try_into().unwrap());
+pub fn open_optional_index(file_slice: FileSlice) -> io::Result<OptionalIndex> {
+    let (bytes, num_non_empty_blocks_bytes) = file_slice.split_from_end(2);
+    let num_non_empty_block_bytes = u16::from_le_bytes(
+        num_non_empty_blocks_bytes
+            .read_bytes()?
+            .as_slice()
+            .try_into()
+            .unwrap(),
+    );
+
+    let mut bytes = bytes.read_bytes()?;
    let num_docs = VInt::deserialize_u64(&mut bytes)? as u32;
    let block_metas_num_bytes =
        num_non_empty_block_bytes as usize * SERIALIZED_BLOCK_META_NUM_BYTES;
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -59,7 +59,7 @@ fn test_with_random_sets_simple() {
    let vals = 10..ELEMENTS_PER_BLOCK * 2;
    let mut out: Vec<u8> = Vec::new();
    serialize_optional_index(&vals, 100, &mut out).unwrap();
-    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
+    let null_index = open_optional_index(FileSlice::from(out)).unwrap();
    let ranks: Vec<u32> = (65_472u32..65_473u32).collect();
    let els: Vec<u32> = ranks.iter().copied().map(|rank| rank + 10).collect();
    let mut select_cursor = null_index.select_cursor();
@@ -102,7 +102,7 @@ impl<'a> Iterable<RowId> for &'a [bool] {
 fn test_null_index(data: &[bool]) {
    let mut out: Vec<u8> = Vec::new();
    serialize_optional_index(&data, data.len() as RowId, &mut out).unwrap();
-    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
+    let null_index = open_optional_index(FileSlice::from(out)).unwrap();
    let orig_idx_with_value: Vec<u32> = data
        .iter()
        .enumerate()
@@ -164,7 +164,11 @@ fn test_optional_index_large() {
 fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
    let optional_index = OptionalIndex::for_test(num_rows, row_ids);
    assert_eq!(optional_index.num_docs(), num_rows);
-    assert!(optional_index.iter_docs().eq(row_ids.iter().copied()));
+    assert!(
+        optional_index
+            .iter_non_null_docs()
+            .eq(row_ids.iter().copied())
+    );
 }

 #[test]
@@ -219,3 +223,170 @@ fn test_optional_index_for_tests() {
    assert!(!optional_index.contains(3));
    assert_eq!(optional_index.num_docs(), 4);
 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use test::Bencher;
+
+    use super::*;
+
+    const TOTAL_NUM_VALUES: u32 = 1_000_000;
+    fn gen_bools(fill_ratio: f64) -> OptionalIndex {
+        let mut out = Vec::new();
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        let vals: Vec<RowId> = (0..TOTAL_NUM_VALUES)
+            .map(|_| rng.gen_bool(fill_ratio))
+            .enumerate()
+            .filter(|(_pos, val)| *val)
+            .map(|(pos, _)| pos as RowId)
+            .collect();
+        serialize_optional_index(&&vals[..], TOTAL_NUM_VALUES, &mut out).unwrap();
+
+        open_optional_index(FileSlice::from(out)).unwrap()
+    }
+
+    fn random_range_iterator(
+        start: u32,
+        end: u32,
+        avg_step_size: u32,
+        avg_deviation: u32,
+    ) -> impl Iterator<Item = u32> {
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        let mut current = start;
+        std::iter::from_fn(move || {
+            current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
+            if current >= end { None } else { Some(current) }
+        })
+    }
+
+    fn n_percent_step_iterator(percent: f32, num_values: u32) -> impl Iterator<Item = u32> {
+        let ratio = percent / 100.0;
+        let step_size = (1f32 / ratio) as u32;
+        let deviation = step_size - 1;
+        random_range_iterator(0, num_values, step_size, deviation)
+    }
+
+    fn walk_over_data(codec: &OptionalIndex, avg_step_size: u32) -> Option<u32> {
+        walk_over_data_from_positions(
+            codec,
+            random_range_iterator(0, TOTAL_NUM_VALUES, avg_step_size, 0),
+        )
+    }
+
+    fn walk_over_data_from_positions(
+        codec: &OptionalIndex,
+        positions: impl Iterator<Item = u32>,
+    ) -> Option<u32> {
+        let mut dense_idx: Option<u32> = None;
+        for idx in positions {
+            dense_idx = dense_idx.or(codec.rank_if_exists(idx));
+        }
+        dense_idx
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_1percent_filled_10percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.01f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_5percent_filled_10percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.05f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_5percent_filled_1percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.05f64);
+        bench.iter(|| walk_over_data(&codec, 1000));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_full_scan_1percent_filled(bench: &mut Bencher) {
+        let codec = gen_bools(0.01f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_full_scan_10percent_filled(bench: &mut Bencher) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_full_scan_90percent_filled(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_10percent_filled_1percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_50percent_filled_1percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.5f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_translate_orig_to_codec_90percent_filled_1percent_hit(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_1percent_filled_0comma005percent_hit(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.01f64, 0.005f32, bench);
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_10percent_filled_0comma005percent_hit(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.1f64, 0.005f32, bench);
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_1percent_filled_10percent_hit(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.01f64, 10f32, bench);
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_1percent_filled_full_scan(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.01f64, 100f32, bench);
+    }
+
+    fn bench_translate_codec_to_orig_util(
+        percent_filled: f64,
+        percent_hit: f32,
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(percent_filled);
+        let num_non_nulls = codec.num_non_nulls();
+        let idxs: Vec<u32> = if percent_hit == 100.0f32 {
+            (0..num_non_nulls).collect()
+        } else {
+            n_percent_step_iterator(percent_hit, num_non_nulls).collect()
+        };
+        let mut output = vec![0u32; idxs.len()];
+        bench.iter(|| {
+            output.copy_from_slice(&idxs[..]);
+            codec.select_batch(&mut output);
+        });
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_90percent_filled_0comma005percent_hit(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.9f64, 0.005, bench);
+    }
+
+    #[bench]
+    fn bench_translate_codec_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
+        bench_translate_codec_to_orig_util(0.9f64, 100.0f32, bench);
+    }
+}
--- a/columnar/src/column_index/serialize.rs
+++ b/columnar/src/column_index/serialize.rs
@@ -1,7 +1,8 @@
 use std::io;
 use std::io::Write;

-use common::{CountingWriter, OwnedBytes};
+use common::file_slice::FileSlice;
+use common::{CountingWriter, HasLen};

 use super::OptionalIndex;
 use super::multivalued_index::SerializableMultivalueIndex;
@@ -65,27 +66,28 @@ pub fn serialize_column_index(

 /// Open a serialized column index.
 pub fn open_column_index(
-    mut bytes: OwnedBytes,
+    file_slice: FileSlice,
    format_version: Version,
 ) -> io::Result<ColumnIndex> {
-    if bytes.is_empty() {
+    if file_slice.len() == 0 {
        return Err(io::Error::new(
            io::ErrorKind::UnexpectedEof,
            "Failed to deserialize column index. Empty buffer.",
        ));
    }
-    let cardinality_code = bytes[0];
+    let (header, body) = file_slice.split(1);
+    let cardinality_code = header.read_bytes()?.as_slice()[0];
    let cardinality = Cardinality::try_from_code(cardinality_code)?;
-    bytes.advance(1);
+
    match cardinality {
        Cardinality::Full => Ok(ColumnIndex::Full),
        Cardinality::Optional => {
-            let optional_index = super::optional_index::open_optional_index(bytes)?;
+            let optional_index = super::optional_index::open_optional_index(body)?;
            Ok(ColumnIndex::Optional(optional_index))
        }
        Cardinality::Multivalued => {
            let multivalue_index =
-                super::multivalued_index::open_multivalued_index(bytes, format_version)?;
+                super::multivalued_index::open_multivalued_index(body, format_version)?;
            Ok(ColumnIndex::Multivalued(multivalue_index))
        }
    }
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -7,13 +7,15 @@
 //! - Monotonically map values to u64/u128

 use std::fmt::Debug;
-use std::ops::{Range, RangeInclusive};
+use std::ops::Range;
 use std::sync::Arc;

 use downcast_rs::DowncastSync;
 pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
 pub use monotonic_mapping_u128::MonotonicallyMappableToU128;

+use crate::column::ValueRange;
+
 mod merge;
 pub(crate) mod monotonic_mapping;
 pub(crate) mod monotonic_mapping_u128;
@@ -27,8 +29,7 @@ mod monotonic_column;
 pub(crate) use merge::MergedColumnValues;
 pub use stats::ColumnStats;
 pub use u64_based::{
-    ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
-    serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
+    ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values, serialize_u64_based_column_values,
 };
 pub use u128_based::{
    CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
@@ -109,6 +110,307 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
        }
    }

+    /// Load the values for the provided docids.
+    ///
+    /// The values are filtered by the provided value range.
+    fn get_vals_in_value_range(
+        &self,
+        input_indexes: &[u32],
+        input_doc_ids: &[u32],
+        output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
+        value_range: ValueRange<T>,
+    ) {
+        let len = input_indexes.len();
+        let mut read_head = 0;
+
+        match value_range {
+            ValueRange::All => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    output.push(crate::ComparableDoc {
+                        doc: doc0,
+                        sort_key: Some(val0),
+                    });
+                    output.push(crate::ComparableDoc {
+                        doc: doc1,
+                        sort_key: Some(val1),
+                    });
+                    output.push(crate::ComparableDoc {
+                        doc: doc2,
+                        sort_key: Some(val2),
+                    });
+                    output.push(crate::ComparableDoc {
+                        doc: doc3,
+                        sort_key: Some(val3),
+                    });
+
+                    read_head += 4;
+                }
+            }
+            ValueRange::Inclusive(ref range) => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    if range.contains(&val0) {
+                        output.push(crate::ComparableDoc {
+                            doc: doc0,
+                            sort_key: Some(val0),
+                        });
+                    }
+                    if range.contains(&val1) {
+                        output.push(crate::ComparableDoc {
+                            doc: doc1,
+                            sort_key: Some(val1),
+                        });
+                    }
+                    if range.contains(&val2) {
+                        output.push(crate::ComparableDoc {
+                            doc: doc2,
+                            sort_key: Some(val2),
+                        });
+                    }
+                    if range.contains(&val3) {
+                        output.push(crate::ComparableDoc {
+                            doc: doc3,
+                            sort_key: Some(val3),
+                        });
+                    }
+
+                    read_head += 4;
+                }
+            }
+            ValueRange::GreaterThan(ref threshold, _) => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    if val0 > *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc0,
+                            sort_key: Some(val0),
+                        });
+                    }
+                    if val1 > *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc1,
+                            sort_key: Some(val1),
+                        });
+                    }
+                    if val2 > *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc2,
+                            sort_key: Some(val2),
+                        });
+                    }
+                    if val3 > *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc3,
+                            sort_key: Some(val3),
+                        });
+                    }
+
+                    read_head += 4;
+                }
+            }
+            ValueRange::GreaterThanOrEqual(ref threshold, _) => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    if val0 >= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc0,
+                            sort_key: Some(val0),
+                        });
+                    }
+                    if val1 >= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc1,
+                            sort_key: Some(val1),
+                        });
+                    }
+                    if val2 >= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc2,
+                            sort_key: Some(val2),
+                        });
+                    }
+                    if val3 >= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc3,
+                            sort_key: Some(val3),
+                        });
+                    }
+
+                    read_head += 4;
+                }
+            }
+            ValueRange::LessThan(ref threshold, _) => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    if val0 < *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc0,
+                            sort_key: Some(val0),
+                        });
+                    }
+                    if val1 < *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc1,
+                            sort_key: Some(val1),
+                        });
+                    }
+                    if val2 < *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc2,
+                            sort_key: Some(val2),
+                        });
+                    }
+                    if val3 < *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc3,
+                            sort_key: Some(val3),
+                        });
+                    }
+
+                    read_head += 4;
+                }
+            }
+            ValueRange::LessThanOrEqual(ref threshold, _) => {
+                while read_head + 3 < len {
+                    let idx0 = input_indexes[read_head];
+                    let idx1 = input_indexes[read_head + 1];
+                    let idx2 = input_indexes[read_head + 2];
+                    let idx3 = input_indexes[read_head + 3];
+
+                    let doc0 = input_doc_ids[read_head];
+                    let doc1 = input_doc_ids[read_head + 1];
+                    let doc2 = input_doc_ids[read_head + 2];
+                    let doc3 = input_doc_ids[read_head + 3];
+
+                    let val0 = self.get_val(idx0);
+                    let val1 = self.get_val(idx1);
+                    let val2 = self.get_val(idx2);
+                    let val3 = self.get_val(idx3);
+
+                    if val0 <= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc0,
+                            sort_key: Some(val0),
+                        });
+                    }
+                    if val1 <= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc1,
+                            sort_key: Some(val1),
+                        });
+                    }
+                    if val2 <= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc2,
+                            sort_key: Some(val2),
+                        });
+                    }
+                    if val3 <= *threshold {
+                        output.push(crate::ComparableDoc {
+                            doc: doc3,
+                            sort_key: Some(val3),
+                        });
+                    }
+
+                    read_head += 4;
+                }
+            }
+        }
+        // Process remaining elements (0 to 3)
+        while read_head < len {
+            let idx = input_indexes[read_head];
+            let doc = input_doc_ids[read_head];
+            let val = self.get_val(idx);
+            let matches = match value_range {
+                // 'value_range' is still moved here. This is the outer `value_range`
+                ValueRange::All => true,
+                ValueRange::Inclusive(ref r) => r.contains(&val),
+                ValueRange::GreaterThan(ref t, _) => val > *t,
+                ValueRange::GreaterThanOrEqual(ref t, _) => val >= *t,
+                ValueRange::LessThan(ref t, _) => val < *t,
+                ValueRange::LessThanOrEqual(ref t, _) => val <= *t,
+            };
+            if matches {
+                output.push(crate::ComparableDoc {
+                    doc,
+                    sort_key: Some(val),
+                });
+            }
+            read_head += 1;
+        }
+    }
+
    /// Fills an output buffer with the fast field values
    /// associated with the `DocId` going from
    /// `start` to `start + output.len()`.
@@ -129,15 +431,54 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
    /// Note that position == docid for single value fast fields
    fn get_row_ids_for_value_range(
        &self,
-        value_range: RangeInclusive<T>,
+        value_range: ValueRange<T>,
        row_id_range: Range<RowId>,
        row_id_hits: &mut Vec<RowId>,
    ) {
        let row_id_range = row_id_range.start..row_id_range.end.min(self.num_vals());
-        for idx in row_id_range {
-            let val = self.get_val(idx);
-            if value_range.contains(&val) {
-                row_id_hits.push(idx);
+        match value_range {
+            ValueRange::Inclusive(range) => {
+                for idx in row_id_range {
+                    let val = self.get_val(idx);
+                    if range.contains(&val) {
+                        row_id_hits.push(idx);
+                    }
+                }
+            }
+            ValueRange::GreaterThan(threshold, _) => {
+                for idx in row_id_range {
+                    let val = self.get_val(idx);
+                    if val > threshold {
+                        row_id_hits.push(idx);
+                    }
+                }
+            }
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                for idx in row_id_range {
+                    let val = self.get_val(idx);
+                    if val >= threshold {
+                        row_id_hits.push(idx);
+                    }
+                }
+            }
+            ValueRange::LessThan(threshold, _) => {
+                for idx in row_id_range {
+                    let val = self.get_val(idx);
+                    if val < threshold {
+                        row_id_hits.push(idx);
+                    }
+                }
+            }
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                for idx in row_id_range {
+                    let val = self.get_val(idx);
+                    if val <= threshold {
+                        row_id_hits.push(idx);
+                    }
+                }
+            }
+            ValueRange::All => {
+                row_id_hits.extend(row_id_range);
            }
        }
    }
@@ -193,6 +534,17 @@ impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
    fn num_vals(&self) -> u32 {
        0
    }
+
+    fn get_vals_in_value_range(
+        &self,
+        input_indexes: &[u32],
+        input_doc_ids: &[u32],
+        output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
+        value_range: ValueRange<T>,
+    ) {
+        let _ = (input_indexes, input_doc_ids, output, value_range);
+        panic!("Internal Error: Called get_vals_in_value_range of empty column.")
+    }
 }

 impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
@@ -206,6 +558,18 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
        self.as_ref().get_vals_opt(indexes, output)
    }

+    #[inline(always)]
+    fn get_vals_in_value_range(
+        &self,
+        input_indexes: &[u32],
+        input_doc_ids: &[u32],
+        output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
+        value_range: ValueRange<T>,
+    ) {
+        self.as_ref()
+            .get_vals_in_value_range(input_indexes, input_doc_ids, output, value_range)
+    }
+
    #[inline(always)]
    fn min_value(&self) -> T {
        self.as_ref().min_value()
@@ -234,7 +598,7 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
    #[inline(always)]
    fn get_row_ids_for_value_range(
        &self,
-        range: RangeInclusive<T>,
+        range: ValueRange<T>,
        doc_id_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
--- a/columnar/src/column_values/monotonic_column.rs
+++ b/columnar/src/column_values/monotonic_column.rs
@@ -1,8 +1,9 @@
 use std::fmt::Debug;
 use std::marker::PhantomData;
-use std::ops::{Range, RangeInclusive};
+use std::ops::Range;

 use crate::ColumnValues;
+use crate::column::ValueRange;
 use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;

 struct MonotonicMappingColumn<C, T, Input> {
@@ -80,16 +81,52 @@ where

    fn get_row_ids_for_value_range(
        &self,
-        range: RangeInclusive<Output>,
+        range: ValueRange<Output>,
        doc_id_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
-        self.from_column.get_row_ids_for_value_range(
-            self.monotonic_mapping.inverse(range.start().clone())
-                ..=self.monotonic_mapping.inverse(range.end().clone()),
-            doc_id_range,
-            positions,
-        )
+        match range {
+            ValueRange::Inclusive(range) => self.from_column.get_row_ids_for_value_range(
+                ValueRange::Inclusive(
+                    self.monotonic_mapping.inverse(range.start().clone())
+                        ..=self.monotonic_mapping.inverse(range.end().clone()),
+                ),
+                doc_id_range,
+                positions,
+            ),
+            ValueRange::All => self.from_column.get_row_ids_for_value_range(
+                ValueRange::All,
+                doc_id_range,
+                positions,
+            ),
+            ValueRange::GreaterThan(threshold, _) => self.from_column.get_row_ids_for_value_range(
+                ValueRange::GreaterThan(self.monotonic_mapping.inverse(threshold), false),
+                doc_id_range,
+                positions,
+            ),
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                self.from_column.get_row_ids_for_value_range(
+                    ValueRange::GreaterThanOrEqual(
+                        self.monotonic_mapping.inverse(threshold),
+                        false,
+                    ),
+                    doc_id_range,
+                    positions,
+                )
+            }
+            ValueRange::LessThan(threshold, _) => self.from_column.get_row_ids_for_value_range(
+                ValueRange::LessThan(self.monotonic_mapping.inverse(threshold), false),
+                doc_id_range,
+                positions,
+            ),
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                self.from_column.get_row_ids_for_value_range(
+                    ValueRange::LessThanOrEqual(self.monotonic_mapping.inverse(threshold), false),
+                    doc_id_range,
+                    positions,
+                )
+            }
+        }
    }

    // We voluntarily do not implement get_range as it yields a regression,
--- a/columnar/src/column_values/monotonic_mapping_u128.rs
+++ b/columnar/src/column_values/monotonic_mapping_u128.rs
@@ -1,7 +1,7 @@
 use std::fmt::Debug;
 use std::net::Ipv6Addr;

-/// Montonic maps a value to u128 value space
+/// Monotonic maps a value to u128 value space
 /// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
 pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Debug + Send + Sync {
    /// Converts a value to u128.
--- a/columnar/src/column_values/stats.rs
+++ b/columnar/src/column_values/stats.rs
@@ -2,7 +2,8 @@ use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;

-use common::{BinarySerializable, VInt};
+use common::file_slice::FileSlice;
+use common::{BinarySerializable, HasLen, VInt};

 use crate::RowId;

@@ -27,6 +28,55 @@ impl ColumnStats {
    }
 }

+impl ColumnStats {
+    /// Deserialize from the tail of the given FileSlice, and return the stats and remaining prefix
+    /// FileSlice.
+    pub fn deserialize_from_tail(file_slice: FileSlice) -> io::Result<(Self, FileSlice)> {
+        // [`deserialize_with_size`] deserializes 4 variable-width encoded u64s, which
+        // could end up being, in the worst case, 9 bytes each. this is where the 36 comes from
+        let (stats, _) = file_slice.clone().split(36.min(file_slice.len())); // hope that's enough bytes
+        let mut stats = stats.read_bytes()?;
+        let (stats, stats_nbytes) = ColumnStats::deserialize_with_size(&mut stats)?;
+        let (_, remainder) = file_slice.split(stats_nbytes);
+        Ok((stats, remainder))
+    }
+
+    /// Same as [`BinarySeerializable::deserialize`] but also returns the number of bytes
+    /// consumed from the reader `R`
+    fn deserialize_with_size<R: io::Read>(reader: &mut R) -> io::Result<(Self, usize)> {
+        let mut nbytes = 0;
+
+        let (min_value, len) = VInt::deserialize_with_size(reader)?;
+        let min_value = min_value.0;
+        nbytes += len;
+
+        let (gcd, len) = VInt::deserialize_with_size(reader)?;
+        let gcd = gcd.0;
+        let gcd = NonZeroU64::new(gcd)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "GCD of 0 is forbidden"))?;
+        nbytes += len;
+
+        let (amplitude, len) = VInt::deserialize_with_size(reader)?;
+        let amplitude = amplitude.0 * gcd.get();
+        let max_value = min_value + amplitude;
+        nbytes += len;
+
+        let (num_rows, len) = VInt::deserialize_with_size(reader)?;
+        let num_rows = num_rows.0 as RowId;
+        nbytes += len;
+
+        Ok((
+            ColumnStats {
+                min_value,
+                max_value,
+                num_rows,
+                gcd,
+            },
+            nbytes,
+        ))
+    }
+}
+
 impl BinarySerializable for ColumnStats {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.min_value).serialize(writer)?;
--- a/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
+++ b/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
@@ -185,10 +185,10 @@ impl CompactSpaceBuilder {
        let mut covered_space = Vec::with_capacity(self.blanks.len());

        // beginning of the blanks
-        if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start) {
-            if *first_blank_start != 0 {
-                covered_space.push(0..=first_blank_start - 1);
-            }
+        if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start)
+            && *first_blank_start != 0
+        {
+            covered_space.push(0..=first_blank_start - 1);
        }

        // Between the blanks
@@ -202,10 +202,10 @@ impl CompactSpaceBuilder {
        covered_space.extend(between_blanks);

        // end of the blanks
-        if let Some(last_blank_end) = self.blanks.last().map(RangeInclusive::end) {
-            if *last_blank_end != u128::MAX {
-                covered_space.push(last_blank_end + 1..=u128::MAX);
-            }
+        if let Some(last_blank_end) = self.blanks.last().map(RangeInclusive::end)
+            && *last_blank_end != u128::MAX
+        {
+            covered_space.push(last_blank_end + 1..=u128::MAX);
        }

        if covered_space.is_empty() {
--- a/columnar/src/column_values/u128_based/compact_space/mod.rs
+++ b/columnar/src/column_values/u128_based/compact_space/mod.rs
@@ -25,6 +25,7 @@ use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
 use tantivy_bitpacker::{BitPacker, BitUnpacker};

 use crate::RowId;
+use crate::column::ValueRange;
 use crate::column_values::ColumnValues;

 /// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
@@ -338,14 +339,48 @@ impl ColumnValues<u64> for CompactSpaceU64Accessor {
    #[inline]
    fn get_row_ids_for_value_range(
        &self,
-        value_range: RangeInclusive<u64>,
+        value_range: ValueRange<u64>,
        position_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
-        let value_range = self.0.compact_to_u128(*value_range.start() as u32)
-            ..=self.0.compact_to_u128(*value_range.end() as u32);
-        self.0
-            .get_row_ids_for_value_range(value_range, position_range, positions)
+        match value_range {
+            ValueRange::Inclusive(value_range) => {
+                let value_range = ValueRange::Inclusive(
+                    self.0.compact_to_u128(*value_range.start() as u32)
+                        ..=self.0.compact_to_u128(*value_range.end() as u32),
+                );
+                self.0
+                    .get_row_ids_for_value_range(value_range, position_range, positions)
+            }
+            ValueRange::All => {
+                let position_range = position_range.start..position_range.end.min(self.num_vals());
+                positions.extend(position_range);
+            }
+            ValueRange::GreaterThan(threshold, _) => {
+                let value_range =
+                    ValueRange::GreaterThan(self.0.compact_to_u128(threshold as u32), false);
+                self.0
+                    .get_row_ids_for_value_range(value_range, position_range, positions)
+            }
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                let value_range =
+                    ValueRange::GreaterThanOrEqual(self.0.compact_to_u128(threshold as u32), false);
+                self.0
+                    .get_row_ids_for_value_range(value_range, position_range, positions)
+            }
+            ValueRange::LessThan(threshold, _) => {
+                let value_range =
+                    ValueRange::LessThan(self.0.compact_to_u128(threshold as u32), false);
+                self.0
+                    .get_row_ids_for_value_range(value_range, position_range, positions)
+            }
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                let value_range =
+                    ValueRange::LessThanOrEqual(self.0.compact_to_u128(threshold as u32), false);
+                self.0
+                    .get_row_ids_for_value_range(value_range, position_range, positions)
+            }
+        }
    }
 }

@@ -375,10 +410,47 @@ impl ColumnValues<u128> for CompactSpaceDecompressor {
    #[inline]
    fn get_row_ids_for_value_range(
        &self,
-        value_range: RangeInclusive<u128>,
+        value_range: ValueRange<u128>,
        position_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
+        let value_range = match value_range {
+            ValueRange::Inclusive(value_range) => value_range,
+            ValueRange::All => {
+                let position_range = position_range.start..position_range.end.min(self.num_vals());
+                positions.extend(position_range);
+                return;
+            }
+            ValueRange::GreaterThan(threshold, _) => {
+                let max = self.max_value();
+                if threshold >= max {
+                    return;
+                }
+                (threshold + 1)..=max
+            }
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                let max = self.max_value();
+                if threshold > max {
+                    return;
+                }
+                threshold..=max
+            }
+            ValueRange::LessThan(threshold, _) => {
+                let min = self.min_value();
+                if threshold <= min {
+                    return;
+                }
+                min..=(threshold - 1)
+            }
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                let min = self.min_value();
+                if threshold < min {
+                    return;
+                }
+                min..=threshold
+            }
+        };
+
        if value_range.start() > value_range.end() {
            return;
        }
@@ -560,7 +632,7 @@ mod tests {
                    .collect::<Vec<_>>();
                let mut positions = Vec::new();
                decompressor.get_row_ids_for_value_range(
-                    range,
+                    ValueRange::Inclusive(range),
                    0..decompressor.num_vals(),
                    &mut positions,
                );
@@ -604,7 +676,11 @@ mod tests {
            let val = *val;
            let pos = pos as u32;
            let mut positions = Vec::new();
-            decomp.get_row_ids_for_value_range(val..=val, pos..pos + 1, &mut positions);
+            decomp.get_row_ids_for_value_range(
+                ValueRange::Inclusive(val..=val),
+                pos..pos + 1,
+                &mut positions,
+            );
            assert_eq!(positions, vec![pos]);
        }

@@ -746,7 +822,11 @@ mod tests {
        doc_id_range: Range<u32>,
    ) -> Vec<u32> {
        let mut positions = Vec::new();
-        column.get_row_ids_for_value_range(value_range, doc_id_range, &mut positions);
+        column.get_row_ids_for_value_range(
+            ValueRange::Inclusive(value_range),
+            doc_id_range,
+            &mut positions,
+        );
        positions
    }

@@ -769,7 +849,7 @@ mod tests {
        ];
        let mut out = Vec::new();
        serialize_column_values_u128(&&vals[..], &mut out).unwrap();
-        let decomp = open_u128_mapped(OwnedBytes::new(out)).unwrap();
+        let decomp = open_u128_mapped(FileSlice::from(out)).unwrap();
        let complete_range = 0..vals.len() as u32;

        assert_eq!(
@@ -823,6 +903,7 @@ mod tests {
        let _data = test_aux_vals(vals);
    }

+    use common::file_slice::FileSlice;
    use proptest::prelude::*;

    fn num_strategy() -> impl Strategy<Value = u128> {
--- a/columnar/src/column_values/u128_based/mod.rs
+++ b/columnar/src/column_values/u128_based/mod.rs
@@ -5,7 +5,8 @@ use std::sync::Arc;

 mod compact_space;

-use common::{BinarySerializable, OwnedBytes, VInt};
+use common::file_slice::FileSlice;
+use common::{BinarySerializable, VInt};
 pub use compact_space::{
    CompactSpaceCompressor, CompactSpaceDecompressor, CompactSpaceU64Accessor,
 };
@@ -101,8 +102,9 @@ impl U128FastFieldCodecType {

 /// Returns the correct codec reader wrapped in the `Arc` for the data.
 pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
-    mut bytes: OwnedBytes,
+    file_slice: FileSlice,
 ) -> io::Result<Arc<dyn ColumnValues<T>>> {
+    let mut bytes = file_slice.read_bytes()?;
    let header = U128Header::deserialize(&mut bytes)?;
    assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
    let reader = CompactSpaceDecompressor::open(bytes)?;
@@ -120,7 +122,8 @@ pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
 /// # Notice
 /// In case there are new codecs added, check for usages of `CompactSpaceDecompressorU64` and
 /// also handle the new codecs.
-pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn ColumnValues<u64>>> {
+pub fn open_u128_as_compact_u64(file_slice: FileSlice) -> io::Result<Arc<dyn ColumnValues<u64>>> {
+    let mut bytes = file_slice.read_bytes()?;
    let header = U128Header::deserialize(&mut bytes)?;
    assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
    let reader = CompactSpaceU64Accessor::open(bytes)?;
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -1,11 +1,14 @@
 use std::io::{self, Write};
 use std::num::NonZeroU64;
 use std::ops::{Range, RangeInclusive};
+use std::sync::{Arc, OnceLock};

-use common::{BinarySerializable, OwnedBytes};
+use common::file_slice::FileSlice;
+use common::{BinarySerializable, HasLen, OwnedBytes};
 use fastdivide::DividerU64;
 use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};

+use crate::column::ValueRange;
 use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
 use crate::{ColumnValues, RowId};

@@ -13,9 +16,40 @@ use crate::{ColumnValues, RowId};
 /// fast field is required.
 #[derive(Clone)]
 pub struct BitpackedReader {
-    data: OwnedBytes,
+    data: FileSlice,
    bit_unpacker: BitUnpacker,
    stats: ColumnStats,
+    blocks: Arc<[OnceLock<Block>]>,
+}
+
+impl BitpackedReader {
+    #[inline(always)]
+    fn unpack_val(&self, doc: u32) -> u64 {
+        let block_num = self.bit_unpacker.block_num(doc);
+
+        if block_num == 0 && self.blocks.len() == 0 {
+            return 0;
+        }
+
+        let block = self.blocks[block_num].get_or_init(|| {
+            let block_range = self.bit_unpacker.block(block_num, self.data.len());
+            let offset = block_range.start;
+            let data = self
+                .data
+                .slice(block_range)
+                .read_bytes()
+                .expect("Failed to read column values.");
+            Block { offset, data }
+        });
+
+        self.bit_unpacker
+            .get_from_subset(doc, block.offset, &block.data)
+    }
+}
+
+struct Block {
+    offset: usize,
+    data: OwnedBytes,
 }

 #[inline(always)]
@@ -57,8 +91,9 @@ fn transform_range_before_linear_transformation(
 impl ColumnValues for BitpackedReader {
    #[inline(always)]
    fn get_val(&self, doc: u32) -> u64 {
-        self.stats.min_value + self.stats.gcd.get() * self.bit_unpacker.get(doc, &self.data)
+        self.stats.min_value + self.stats.gcd.get() * self.unpack_val(doc)
    }
+
    #[inline]
    fn min_value(&self) -> u64 {
        self.stats.min_value
@@ -72,24 +107,329 @@ impl ColumnValues for BitpackedReader {
        self.stats.num_rows
    }

+    fn get_vals_in_value_range(
+        &self,
+        input_indexes: &[u32],
+        input_doc_ids: &[u32],
+        output: &mut Vec<crate::ComparableDoc<Option<u64>, crate::DocId>>,
+        value_range: ValueRange<u64>,
+    ) {
+        match value_range {
+            ValueRange::All => {
+                for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                    output.push(crate::ComparableDoc {
+                        doc,
+                        sort_key: Some(self.get_val(idx)),
+                    });
+                }
+            }
+            ValueRange::Inclusive(range) => {
+                if let Some(transformed_range) =
+                    transform_range_before_linear_transformation(&self.stats, range)
+                {
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        let raw_val = self.unpack_val(idx);
+                        if transformed_range.contains(&raw_val) {
+                            output.push(crate::ComparableDoc {
+                                doc,
+                                sort_key: Some(
+                                    self.stats.min_value + self.stats.gcd.get() * raw_val,
+                                ),
+                            });
+                        }
+                    }
+                }
+            }
+            ValueRange::GreaterThan(threshold, _) => {
+                if threshold < self.stats.min_value {
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        output.push(crate::ComparableDoc {
+                            doc,
+                            sort_key: Some(self.get_val(idx)),
+                        });
+                    }
+                } else if threshold >= self.stats.max_value {
+                    // All filtered out
+                } else {
+                    let raw_threshold = (threshold - self.stats.min_value) / self.stats.gcd.get();
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        let raw_val = self.unpack_val(idx);
+                        if raw_val > raw_threshold {
+                            output.push(crate::ComparableDoc {
+                                doc,
+                                sort_key: Some(
+                                    self.stats.min_value + self.stats.gcd.get() * raw_val,
+                                ),
+                            });
+                        }
+                    }
+                }
+            }
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                if threshold <= self.stats.min_value {
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        output.push(crate::ComparableDoc {
+                            doc,
+                            sort_key: Some(self.get_val(idx)),
+                        });
+                    }
+                } else if threshold > self.stats.max_value {
+                    // All filtered out
+                } else {
+                    let diff = threshold - self.stats.min_value;
+                    let gcd = self.stats.gcd.get();
+                    let raw_threshold = (diff + gcd - 1) / gcd;
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        let raw_val = self.unpack_val(idx);
+                        if raw_val >= raw_threshold {
+                            output.push(crate::ComparableDoc {
+                                doc,
+                                sort_key: Some(
+                                    self.stats.min_value + self.stats.gcd.get() * raw_val,
+                                ),
+                            });
+                        }
+                    }
+                }
+            }
+            ValueRange::LessThan(threshold, _) => {
+                if threshold > self.stats.max_value {
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        output.push(crate::ComparableDoc {
+                            doc,
+                            sort_key: Some(self.get_val(idx)),
+                        });
+                    }
+                } else if threshold <= self.stats.min_value {
+                    // All filtered out
+                } else {
+                    let diff = threshold - self.stats.min_value;
+                    let gcd = self.stats.gcd.get();
+                    let raw_threshold = if diff % gcd == 0 {
+                        diff / gcd
+                    } else {
+                        diff / gcd + 1
+                    };
+
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        let raw_val = self.unpack_val(idx);
+                        if raw_val < raw_threshold {
+                            output.push(crate::ComparableDoc {
+                                doc,
+                                sort_key: Some(
+                                    self.stats.min_value + self.stats.gcd.get() * raw_val,
+                                ),
+                            });
+                        }
+                    }
+                }
+            }
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                if threshold >= self.stats.max_value {
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        output.push(crate::ComparableDoc {
+                            doc,
+                            sort_key: Some(self.get_val(idx)),
+                        });
+                    }
+                } else if threshold < self.stats.min_value {
+                    // All filtered out
+                } else {
+                    let diff = threshold - self.stats.min_value;
+                    let gcd = self.stats.gcd.get();
+                    let raw_threshold = diff / gcd;
+
+                    for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
+                        let raw_val = self.unpack_val(idx);
+                        if raw_val <= raw_threshold {
+                            output.push(crate::ComparableDoc {
+                                doc,
+                                sort_key: Some(
+                                    self.stats.min_value + self.stats.gcd.get() * raw_val,
+                                ),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+    }
    fn get_row_ids_for_value_range(
        &self,
-        range: RangeInclusive<u64>,
+        range: ValueRange<u64>,
        doc_id_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
-        let Some(transformed_range) =
-            transform_range_before_linear_transformation(&self.stats, range)
-        else {
-            positions.clear();
-            return;
-        };
-        self.bit_unpacker.get_ids_for_value_range(
-            transformed_range,
-            doc_id_range,
-            &self.data,
-            positions,
-        );
+        match range {
+            ValueRange::All => {
+                positions.extend(doc_id_range);
+                return;
+            }
+            ValueRange::Inclusive(range) => {
+                let Some(transformed_range) =
+                    transform_range_before_linear_transformation(&self.stats, range)
+                else {
+                    positions.clear();
+                    return;
+                };
+                // TODO: This does not use the `self.blocks` cache, because callers are usually
+                // already doing sequential, and fairly dense reads. Fix it to
+                // iterate over blocks if that assumption turns out to be incorrect!
+                let data_range = self
+                    .bit_unpacker
+                    .block_oblivious_range(doc_id_range.clone(), self.data.len());
+                let data_offset = data_range.start;
+                let data_subset = self
+                    .data
+                    .slice(data_range)
+                    .read_bytes()
+                    .expect("Failed to read column values.");
+                self.bit_unpacker.get_ids_for_value_range_from_subset(
+                    transformed_range,
+                    doc_id_range,
+                    data_offset,
+                    &data_subset,
+                    positions,
+                );
+            }
+            ValueRange::GreaterThan(threshold, _) => {
+                if threshold < self.stats.min_value {
+                    positions.extend(doc_id_range);
+                    return;
+                }
+                if threshold >= self.stats.max_value {
+                    return;
+                }
+                let raw_threshold = (threshold - self.stats.min_value) / self.stats.gcd.get();
+                // We want raw > raw_threshold.
+                // bit_unpacker.get_ids_for_value_range_from_subset takes a RangeInclusive.
+                // We can construct a RangeInclusive: (raw_threshold + 1) ..= u64::MAX
+                // But max raw value is known? (max_value - min_value) / gcd.
+                let max_raw = (self.stats.max_value - self.stats.min_value) / self.stats.gcd.get();
+                let transformed_range = (raw_threshold + 1)..=max_raw;
+
+                let data_range = self
+                    .bit_unpacker
+                    .block_oblivious_range(doc_id_range.clone(), self.data.len());
+                let data_offset = data_range.start;
+                let data_subset = self
+                    .data
+                    .slice(data_range)
+                    .read_bytes()
+                    .expect("Failed to read column values.");
+                self.bit_unpacker.get_ids_for_value_range_from_subset(
+                    transformed_range,
+                    doc_id_range,
+                    data_offset,
+                    &data_subset,
+                    positions,
+                );
+            }
+            ValueRange::GreaterThanOrEqual(threshold, _) => {
+                if threshold <= self.stats.min_value {
+                    positions.extend(doc_id_range);
+                    return;
+                }
+                if threshold > self.stats.max_value {
+                    return;
+                }
+                let diff = threshold - self.stats.min_value;
+                let gcd = self.stats.gcd.get();
+                let raw_threshold = (diff + gcd - 1) / gcd;
+                // We want raw >= raw_threshold.
+                let max_raw = (self.stats.max_value - self.stats.min_value) / self.stats.gcd.get();
+                let transformed_range = raw_threshold..=max_raw;
+
+                let data_range = self
+                    .bit_unpacker
+                    .block_oblivious_range(doc_id_range.clone(), self.data.len());
+                let data_offset = data_range.start;
+                let data_subset = self
+                    .data
+                    .slice(data_range)
+                    .read_bytes()
+                    .expect("Failed to read column values.");
+                self.bit_unpacker.get_ids_for_value_range_from_subset(
+                    transformed_range,
+                    doc_id_range,
+                    data_offset,
+                    &data_subset,
+                    positions,
+                );
+            }
+            ValueRange::LessThan(threshold, _) => {
+                if threshold > self.stats.max_value {
+                    positions.extend(doc_id_range);
+                    return;
+                }
+                if threshold <= self.stats.min_value {
+                    return;
+                }
+
+                let diff = threshold - self.stats.min_value;
+                let gcd = self.stats.gcd.get();
+                // We want raw < raw_threshold_limit
+                // raw <= raw_threshold_limit - 1
+                let raw_threshold_limit = if diff % gcd == 0 {
+                    diff / gcd
+                } else {
+                    diff / gcd + 1
+                };
+
+                if raw_threshold_limit == 0 {
+                    return;
+                }
+                let transformed_range = 0..=(raw_threshold_limit - 1);
+
+                let data_range = self
+                    .bit_unpacker
+                    .block_oblivious_range(doc_id_range.clone(), self.data.len());
+                let data_offset = data_range.start;
+                let data_subset = self
+                    .data
+                    .slice(data_range)
+                    .read_bytes()
+                    .expect("Failed to read column values.");
+                self.bit_unpacker.get_ids_for_value_range_from_subset(
+                    transformed_range,
+                    doc_id_range,
+                    data_offset,
+                    &data_subset,
+                    positions,
+                );
+            }
+            ValueRange::LessThanOrEqual(threshold, _) => {
+                if threshold >= self.stats.max_value {
+                    positions.extend(doc_id_range);
+                    return;
+                }
+                if threshold < self.stats.min_value {
+                    return;
+                }
+                let diff = threshold - self.stats.min_value;
+                let gcd = self.stats.gcd.get();
+                // We want raw <= raw_threshold.
+                let raw_threshold = diff / gcd;
+                let transformed_range = 0..=raw_threshold;
+
+                let data_range = self
+                    .bit_unpacker
+                    .block_oblivious_range(doc_id_range.clone(), self.data.len());
+                let data_offset = data_range.start;
+                let data_subset = self
+                    .data
+                    .slice(data_range)
+                    .read_bytes()
+                    .expect("Failed to read column values.");
+                self.bit_unpacker.get_ids_for_value_range_from_subset(
+                    transformed_range,
+                    doc_id_range,
+                    data_offset,
+                    &data_subset,
+                    positions,
+                );
+            }
+        }
    }
 }

@@ -105,7 +445,7 @@ impl ColumnCodecEstimator for BitpackedCodecEstimator {

    fn estimate(&self, stats: &ColumnStats) -> Option<u64> {
        let num_bits_per_value = num_bits(stats);
-        Some(stats.num_bytes() + (stats.num_rows as u64 * (num_bits_per_value as u64) + 7) / 8)
+        Some(stats.num_bytes() + (stats.num_rows as u64 * (num_bits_per_value as u64)).div_ceil(8))
    }

    fn serialize(
@@ -133,14 +473,20 @@ impl ColumnCodec for BitpackedCodec {
    type Estimator = BitpackedCodecEstimator;

    /// Opens a fast field given a file.
-    fn load(mut data: OwnedBytes) -> io::Result<Self::ColumnValues> {
-        let stats = ColumnStats::deserialize(&mut data)?;
+    fn load(file_slice: FileSlice) -> io::Result<Self::ColumnValues> {
+        let (stats, data) = ColumnStats::deserialize_from_tail(file_slice)?;
+
        let num_bits = num_bits(&stats);
        let bit_unpacker = BitUnpacker::new(num_bits);
+        let block_count = bit_unpacker.block_count(data.len());
        Ok(BitpackedReader {
            data,
            bit_unpacker,
            stats,
+            blocks: (0..block_count)
+                .into_iter()
+                .map(|_| OnceLock::new())
+                .collect(),
        })
    }
 }
--- a/columnar/src/column_values/u64_based/blockwise_linear.rs
+++ b/columnar/src/column_values/u64_based/blockwise_linear.rs
@@ -1,8 +1,10 @@
+use std::io;
 use std::io::Write;
-use std::sync::Arc;
-use std::{io, iter};
+use std::ops::{Deref, DerefMut};
+use std::sync::{Arc, OnceLock};

-use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
+use common::file_slice::FileSlice;
+use common::{BinarySerializable, CountingWriter, DeserializeFrom, HasLen, OwnedBytes};
 use fastdivide::DividerU64;
 use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};

@@ -172,32 +174,63 @@ impl ColumnCodec<u64> for BlockwiseLinearCodec {

    type Estimator = BlockwiseLinearEstimator;

-    fn load(mut bytes: OwnedBytes) -> io::Result<Self::ColumnValues> {
-        let stats = ColumnStats::deserialize(&mut bytes)?;
-        let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
-        let footer_offset = bytes.len() - 4 - footer_len as usize;
-        let (data, mut footer) = bytes.split(footer_offset);
+    fn load(file_slice: FileSlice) -> io::Result<Self::ColumnValues> {
+        let (stats, body) = ColumnStats::deserialize_from_tail(file_slice)?;
+
+        let (_, footer) = body.clone().split_from_end(4);
+
+        let footer_len: u32 = footer.read_bytes()?.as_slice().deserialize()?;
+        let (data, footer) = body.split_from_end(footer_len as usize + 4);
+
+        let mut footer = footer.read_bytes()?;
        let num_blocks = compute_num_blocks(stats.num_rows);
-        let mut blocks: Vec<Block> = iter::repeat_with(|| Block::deserialize(&mut footer))
-            .take(num_blocks as usize)
-            .collect::<io::Result<_>>()?;
+
        let mut start_offset = 0;
-        for block in &mut blocks {
+        let mut blocks = Vec::with_capacity(num_blocks as usize);
+
+        for _ in 0..num_blocks {
+            let mut block = Block::deserialize(&mut footer)?;
+            let len = (block.bit_unpacker.bit_width() as usize) * BLOCK_SIZE as usize / 8;
+
            block.data_start_offset = start_offset;
-            start_offset += (block.bit_unpacker.bit_width() as usize) * BLOCK_SIZE as usize / 8;
+            blocks.push(BlockWithData {
+                block,
+                file_slice: data.slice(start_offset..(start_offset + len).min(data.len())),
+                data: Default::default(),
+            });
+
+            start_offset += len;
        }
        Ok(BlockwiseLinearReader {
            blocks: blocks.into_boxed_slice().into(),
-            data,
            stats,
        })
    }
 }

+struct BlockWithData {
+    block: Block,
+    file_slice: FileSlice,
+    data: OnceLock<OwnedBytes>,
+}
+
+impl Deref for BlockWithData {
+    type Target = Block;
+
+    fn deref(&self) -> &Self::Target {
+        &self.block
+    }
+}
+
+impl DerefMut for BlockWithData {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.block
+    }
+}
+
 #[derive(Clone)]
 pub struct BlockwiseLinearReader {
-    blocks: Arc<[Block]>,
-    data: OwnedBytes,
+    blocks: Arc<[BlockWithData]>,
    stats: ColumnStats,
 }

@@ -208,7 +241,9 @@ impl ColumnValues for BlockwiseLinearReader {
        let idx_within_block = idx % BLOCK_SIZE;
        let block = &self.blocks[block_id];
        let interpoled_val: u64 = block.line.eval(idx_within_block);
-        let block_bytes = &self.data[block.data_start_offset..];
+        let block_bytes = block
+            .data
+            .get_or_init(|| block.file_slice.read_bytes().unwrap());
        let bitpacked_diff = block.bit_unpacker.get(idx_within_block, block_bytes);
        // TODO optimize me! the line parameters could be tweaked to include the multiplication and
        // remove the dependency.
--- a/columnar/src/column_values/u64_based/line.rs
+++ b/columnar/src/column_values/u64_based/line.rs
@@ -8,7 +8,7 @@ use crate::column_values::ColumnValues;
 const MID_POINT: u64 = (1u64 << 32) - 1u64;

 /// `Line` describes a line function `y: ax + b` using integer
-/// arithmetics.
+/// arithmetic.
 ///
 /// The slope is in fact a decimal split into a 32 bit integer value,
 /// and a 32-bit decimal value.
@@ -94,7 +94,7 @@ impl Line {
        // `(i, ys[])`.
        //
        // The best intercept therefore has the form
-        // `y[i] - line.eval(i)` (using wrapping arithmetics).
+        // `y[i] - line.eval(i)` (using wrapping arithmetic).
        // In other words, the best intercept is one of the `y - Line::eval(ys[i])`
        // and our task is just to pick the one that minimizes our error.
        //
--- a/columnar/src/column_values/u64_based/linear.rs
+++ b/columnar/src/column_values/u64_based/linear.rs
@@ -1,5 +1,6 @@
 use std::io;

+use common::file_slice::FileSlice;
 use common::{BinarySerializable, OwnedBytes};
 use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};

@@ -117,7 +118,7 @@ impl ColumnCodecEstimator for LinearCodecEstimator {
        Some(
            stats.num_bytes()
                + linear_params.num_bytes()
-                + (num_bits as u64 * stats.num_rows as u64 + 7) / 8,
+                + (num_bits as u64 * stats.num_rows as u64).div_ceil(8),
        )
    }

@@ -190,7 +191,8 @@ impl ColumnCodec for LinearCodec {

    type Estimator = LinearCodecEstimator;

-    fn load(mut data: OwnedBytes) -> io::Result<Self::ColumnValues> {
+    fn load(file_slice: FileSlice) -> io::Result<Self::ColumnValues> {
+        let mut data = file_slice.read_bytes()?;
        let stats = ColumnStats::deserialize(&mut data)?;
        let linear_params = LinearParams::deserialize(&mut data)?;
        Ok(LinearReader {
--- a/columnar/src/column_values/u64_based/mod.rs
+++ b/columnar/src/column_values/u64_based/mod.rs
@@ -8,7 +8,8 @@ use std::io;
 use std::io::Write;
 use std::sync::Arc;

-use common::{BinarySerializable, OwnedBytes};
+use common::BinarySerializable;
+use common::file_slice::FileSlice;

 use crate::column_values::monotonic_mapping::{
    StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
@@ -52,7 +53,7 @@ pub trait ColumnCodecEstimator<T = u64>: 'static {
    ) -> io::Result<()>;
 }

-/// A column codec describes a colunm serialization format.
+/// A column codec describes a column serialization format.
 pub trait ColumnCodec<T: PartialOrd = u64> {
    /// Specialized `ColumnValues` type.
    type ColumnValues: ColumnValues<T> + 'static;
@@ -60,7 +61,7 @@ pub trait ColumnCodec<T: PartialOrd = u64> {
    type Estimator: ColumnCodecEstimator + Default;

    /// Loads a column that has been serialized using this codec.
-    fn load(bytes: OwnedBytes) -> io::Result<Self::ColumnValues>;
+    fn load(file_slice: FileSlice) -> io::Result<Self::ColumnValues>;

    /// Returns an estimator.
    fn estimator() -> Self::Estimator {
@@ -111,20 +112,22 @@ impl CodecType {

    fn load<T: MonotonicallyMappableToU64>(
        &self,
-        bytes: OwnedBytes,
+        file_slice: FileSlice,
    ) -> io::Result<Arc<dyn ColumnValues<T>>> {
        match self {
-            CodecType::Bitpacked => load_specific_codec::<BitpackedCodec, T>(bytes),
-            CodecType::Linear => load_specific_codec::<LinearCodec, T>(bytes),
-            CodecType::BlockwiseLinear => load_specific_codec::<BlockwiseLinearCodec, T>(bytes),
+            CodecType::Bitpacked => load_specific_codec::<BitpackedCodec, T>(file_slice),
+            CodecType::Linear => load_specific_codec::<LinearCodec, T>(file_slice),
+            CodecType::BlockwiseLinear => {
+                load_specific_codec::<BlockwiseLinearCodec, T>(file_slice)
+            }
        }
    }
 }

 fn load_specific_codec<C: ColumnCodec, T: MonotonicallyMappableToU64>(
-    bytes: OwnedBytes,
+    file_slice: FileSlice,
 ) -> io::Result<Arc<dyn ColumnValues<T>>> {
-    let reader = C::load(bytes)?;
+    let reader = C::load(file_slice)?;
    let reader_typed = monotonic_map_column(
        reader,
        StrictlyMonotonicMappingInverter::from(StrictlyMonotonicMappingToInternal::<T>::new()),
@@ -189,25 +192,28 @@ pub fn serialize_u64_based_column_values<T: MonotonicallyMappableToU64>(
 ///
 /// This method first identifies the codec off the first byte.
 pub fn load_u64_based_column_values<T: MonotonicallyMappableToU64>(
-    mut bytes: OwnedBytes,
+    file_slice: FileSlice,
 ) -> io::Result<Arc<dyn ColumnValues<T>>> {
-    let codec_type: CodecType = bytes
-        .first()
-        .copied()
+    let (header, body) = file_slice.split(1);
+    let codec_type: CodecType = header
+        .read_bytes()?
+        .as_slice()
+        .get(0)
+        .cloned()
        .and_then(CodecType::try_from_code)
        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Failed to read codec type"))?;
-    bytes.advance(1);
-    codec_type.load(bytes)
+    codec_type.load(body)
 }

 /// Helper function to serialize a column (autodetect from all codecs) and then open it
+#[cfg(test)]
 pub fn serialize_and_load_u64_based_column_values<T: MonotonicallyMappableToU64>(
    vals: &dyn Iterable,
    codec_types: &[CodecType],
 ) -> Arc<dyn ColumnValues<T>> {
    let mut buffer = Vec::new();
    serialize_u64_based_column_values(vals, codec_types, &mut buffer).unwrap();
-    load_u64_based_column_values::<T>(OwnedBytes::new(buffer)).unwrap()
+    load_u64_based_column_values::<T>(FileSlice::from(buffer)).unwrap()
 }

 #[cfg(test)]
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -1,3 +1,4 @@
+use common::HasLen;
 use proptest::prelude::*;
 use proptest::{prop_oneof, proptest};
 use rand::Rng;
@@ -13,7 +14,7 @@ fn test_serialize_and_load_simple() {
    )
    .unwrap();
    assert_eq!(buffer.len(), 7);
-    let col = load_u64_based_column_values::<u64>(OwnedBytes::new(buffer)).unwrap();
+    let col = load_u64_based_column_values::<u64>(FileSlice::from(buffer)).unwrap();
    assert_eq!(col.num_vals(), 3);
    assert_eq!(col.get_val(0), 1);
    assert_eq!(col.get_val(1), 2);
@@ -30,7 +31,7 @@ fn test_empty_column_i64() {
            continue;
        }
        num_acceptable_codecs += 1;
-        let col = load_u64_based_column_values::<i64>(OwnedBytes::new(buffer)).unwrap();
+        let col = load_u64_based_column_values::<i64>(FileSlice::from(buffer)).unwrap();
        assert_eq!(col.num_vals(), 0);
        assert_eq!(col.min_value(), i64::MIN);
        assert_eq!(col.max_value(), i64::MIN);
@@ -48,7 +49,7 @@ fn test_empty_column_u64() {
            continue;
        }
        num_acceptable_codecs += 1;
-        let col = load_u64_based_column_values::<u64>(OwnedBytes::new(buffer)).unwrap();
+        let col = load_u64_based_column_values::<u64>(FileSlice::from(buffer)).unwrap();
        assert_eq!(col.num_vals(), 0);
        assert_eq!(col.min_value(), u64::MIN);
        assert_eq!(col.max_value(), u64::MIN);
@@ -66,7 +67,7 @@ fn test_empty_column_f64() {
            continue;
        }
        num_acceptable_codecs += 1;
-        let col = load_u64_based_column_values::<f64>(OwnedBytes::new(buffer)).unwrap();
+        let col = load_u64_based_column_values::<f64>(FileSlice::from(buffer)).unwrap();
        assert_eq!(col.num_vals(), 0);
        // FIXME. f64::MIN would be better!
        assert!(col.min_value().is_nan());
@@ -97,7 +98,7 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(

    let actual_compression = buffer.len() as u64;

-    let reader = TColumnCodec::load(OwnedBytes::new(buffer)).unwrap();
+    let reader = TColumnCodec::load(FileSlice::from(buffer)).unwrap();
    assert_eq!(reader.num_vals(), vals.len() as u32);
    let mut buffer = Vec::new();
    for (doc, orig_val) in vals.iter().copied().enumerate() {
@@ -131,7 +132,7 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
            .collect();
        let mut positions = Vec::new();
        reader.get_row_ids_for_value_range(
-            vals[test_rand_idx]..=vals[test_rand_idx],
+            crate::column::ValueRange::Inclusive(vals[test_rand_idx]..=vals[test_rand_idx]),
            0..vals.len() as u32,
            &mut positions,
        );
@@ -326,7 +327,7 @@ fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: usize) ->
        &[codec_type],
        &mut buffer,
    )?;
-    let buffer = OwnedBytes::new(buffer);
+    let buffer = FileSlice::from(buffer);
    let column = crate::column_values::load_u64_based_column_values::<i64>(buffer.clone())?;
    assert_eq!(column.get_val(0), -4000i64);
    assert_eq!(column.get_val(1), -3000i64);
@@ -343,7 +344,7 @@ fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: usize) ->
        &[codec_type],
        &mut buffer_without_gcd,
    )?;
-    let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd);
+    let buffer_without_gcd = FileSlice::from(buffer_without_gcd);
    assert!(buffer_without_gcd.len() > buffer.len());

    Ok(())
@@ -369,7 +370,7 @@ fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: usize) ->
        &[codec_type],
        &mut buffer,
    )?;
-    let buffer = OwnedBytes::new(buffer);
+    let buffer = FileSlice::from(buffer);
    let column = crate::column_values::load_u64_based_column_values::<u64>(buffer.clone())?;
    assert_eq!(column.get_val(0), 1000u64);
    assert_eq!(column.get_val(1), 2000u64);
@@ -386,7 +387,7 @@ fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: usize) ->
        &[codec_type],
        &mut buffer_without_gcd,
    )?;
-    let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd);
+    let buffer_without_gcd = FileSlice::from(buffer_without_gcd);
    assert!(buffer_without_gcd.len() > buffer.len());
    Ok(())
 }
@@ -405,7 +406,7 @@ fn test_fastfield_gcd_u64() -> io::Result<()> {

 #[test]
 pub fn test_fastfield2() {
-    let test_fastfield = crate::column_values::serialize_and_load_u64_based_column_values::<u64>(
+    let test_fastfield = serialize_and_load_u64_based_column_values::<u64>(
        &&[100u64, 200u64, 300u64][..],
        &ALL_U64_CODEC_TYPES,
    );
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -4,6 +4,7 @@ mod term_merger;

 use std::collections::{BTreeMap, HashSet};
 use std::io;
+use std::io::ErrorKind;
 use std::net::Ipv6Addr;
 use std::sync::Arc;

@@ -78,6 +79,7 @@ pub fn merge_columnar(
    required_columns: &[(String, ColumnType)],
    merge_row_order: MergeRowOrder,
    output: &mut impl io::Write,
+    cancel: impl Fn() -> bool,
 ) -> io::Result<()> {
    let mut serializer = ColumnarSerializer::new(output);
    let num_docs_per_columnar = columnar_readers
@@ -87,6 +89,9 @@ pub fn merge_columnar(

    let columns_to_merge = group_columns_for_merge(columnar_readers, required_columns)?;
    for res in columns_to_merge {
+        if cancel() {
+            return Err(io::Error::new(ErrorKind::Interrupted, "Merge cancelled"));
+        }
        let ((column_name, _column_type_category), grouped_columns) = res;
        let grouped_columns = grouped_columns.open(&merge_row_order)?;
        if grouped_columns.is_empty() {
@@ -367,7 +372,7 @@ fn is_empty_after_merge(
                    ColumnIndex::Empty { .. } => true,
                    ColumnIndex::Full => alive_bitset.len() == 0,
                    ColumnIndex::Optional(optional_index) => {
-                        for doc in optional_index.iter_docs() {
+                        for doc in optional_index.iter_non_null_docs() {
                            if alive_bitset.contains(doc) {
                                return false;
                            }
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -205,6 +205,7 @@ fn test_merge_columnar_numbers() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -233,6 +234,7 @@ fn test_merge_columnar_texts() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -282,6 +284,7 @@ fn test_merge_columnar_byte() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -338,6 +341,7 @@ fn test_merge_columnar_byte_with_missing() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -390,6 +394,7 @@ fn test_merge_columnar_different_types() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -455,6 +460,7 @@ fn test_merge_columnar_different_empty_cardinality() {
        &[],
        MergeRowOrder::Stack(stack_merge_order),
        &mut buffer,
+        || false,
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
@@ -565,6 +571,7 @@ proptest! {
            &[],
            MergeRowOrder::Stack(stack_merge_order),
            &mut out,
+            || false,
        ).unwrap();

        let merged_reader = ColumnarReader::open(out).unwrap();
@@ -582,6 +589,7 @@ proptest! {
            &[],
            MergeRowOrder::Stack(stack_merge_order),
            &mut out,
+            || false,
        ).unwrap();

    }
--- a/columnar/src/columnar/writer/column_operation.rs
+++ b/columnar/src/columnar/writer/column_operation.rs
@@ -244,7 +244,7 @@ impl SymbolValue for UnorderedId {

 fn compute_num_bytes_for_u64(val: u64) -> usize {
    let msb = (64u32 - val.leading_zeros()) as usize;
-    (msb + 7) / 8
+    msb.div_ceil(8)
 }

 fn encode_zig_zag(n: i64) -> u64 {
--- a/columnar/src/comparable_doc.rs
+++ b/columnar/src/comparable_doc.rs
@@ -0,0 +1,22 @@
+use serde::{Deserialize, Serialize};
+
+/// Contains a feature (field, score, etc.) of a document along with the document address.
+///
+/// Used only by TopNComputer, which implements the actual comparison via a `Comparator`.
+#[derive(Clone, Default, Eq, PartialEq, Serialize, Deserialize)]
+pub struct ComparableDoc<T, D> {
+    /// The feature of the document. In practice, this is
+    /// is a type which can be compared with a `Comparator<T>`.
+    pub sort_key: T,
+    /// The document address. In practice, this is either a `DocId` or `DocAddress`.
+    pub doc: D,
+}
+
+impl<T: std::fmt::Debug, D: std::fmt::Debug> std::fmt::Debug for ComparableDoc<T, D> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("ComparableDoc")
+            .field("feature", &self.sort_key)
+            .field("doc", &self.doc)
+            .finish()
+    }
+}
--- a/columnar/src/compat_tests.rs
+++ b/columnar/src/compat_tests.rs
@@ -71,7 +71,14 @@ fn test_format(path: &str) {
    let columnar_readers = vec![&reader, &reader2];
    let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
    let mut out = Vec::new();
-    merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
+    merge_columnar(
+        &columnar_readers,
+        &[],
+        merge_row_order.into(),
+        &mut out,
+        || false,
+    )
+    .unwrap();
    let reader = ColumnarReader::open(out).unwrap();
    check_columns(&reader);
 }
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -3,7 +3,8 @@ use std::sync::Arc;
 use std::{fmt, io};

 use common::file_slice::FileSlice;
-use common::{ByteCount, DateTime, HasLen, OwnedBytes};
+use common::{ByteCount, DateTime};
+use serde::{Deserialize, Serialize};

 use crate::column::{BytesColumn, Column, StrColumn};
 use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
@@ -238,8 +239,7 @@ pub struct DynamicColumnHandle {
 impl DynamicColumnHandle {
    // TODO rename load
    pub fn open(&self) -> io::Result<DynamicColumn> {
-        let column_bytes: OwnedBytes = self.file_slice.read_bytes()?;
-        self.open_internal(column_bytes)
+        self.open_internal(self.file_slice.clone())
    }

    #[doc(hidden)]
@@ -258,16 +258,15 @@ impl DynamicColumnHandle {
    /// If not, the fastfield reader will returns the u64-value associated with the original
    /// FastValue.
    pub fn open_u64_lenient(&self) -> io::Result<Option<Column<u64>>> {
-        let column_bytes = self.file_slice.read_bytes()?;
        match self.column_type {
            ColumnType::Str | ColumnType::Bytes => {
                let column: BytesColumn =
-                    crate::column::open_column_bytes(column_bytes, self.format_version)?;
+                    crate::column::open_column_bytes(self.file_slice.clone(), self.format_version)?;
                Ok(Some(column.term_ord_column))
            }
            ColumnType::IpAddr => {
                let column = crate::column::open_column_u128_as_compact_u64(
-                    column_bytes,
+                    self.file_slice.clone(),
                    self.format_version,
                )?;
                Ok(Some(column))
@@ -277,50 +276,129 @@ impl DynamicColumnHandle {
            | ColumnType::U64
            | ColumnType::F64
            | ColumnType::DateTime => {
-                let column =
-                    crate::column::open_column_u64::<u64>(column_bytes, self.format_version)?;
+                let column = crate::column::open_column_u64::<u64>(
+                    self.file_slice.clone(),
+                    self.format_version,
+                )?;
                Ok(Some(column))
            }
        }
    }

-    fn open_internal(&self, column_bytes: OwnedBytes) -> io::Result<DynamicColumn> {
+    fn open_internal(&self, file_slice: FileSlice) -> io::Result<DynamicColumn> {
        let dynamic_column: DynamicColumn = match self.column_type {
            ColumnType::Bytes => {
-                crate::column::open_column_bytes(column_bytes, self.format_version)?.into()
+                crate::column::open_column_bytes(file_slice, self.format_version)?.into()
            }
            ColumnType::Str => {
-                crate::column::open_column_str(column_bytes, self.format_version)?.into()
+                crate::column::open_column_str(file_slice, self.format_version)?.into()
            }
            ColumnType::I64 => {
-                crate::column::open_column_u64::<i64>(column_bytes, self.format_version)?.into()
+                crate::column::open_column_u64::<i64>(file_slice, self.format_version)?.into()
            }
            ColumnType::U64 => {
-                crate::column::open_column_u64::<u64>(column_bytes, self.format_version)?.into()
+                crate::column::open_column_u64::<u64>(file_slice, self.format_version)?.into()
            }
            ColumnType::F64 => {
-                crate::column::open_column_u64::<f64>(column_bytes, self.format_version)?.into()
+                crate::column::open_column_u64::<f64>(file_slice, self.format_version)?.into()
            }
            ColumnType::Bool => {
-                crate::column::open_column_u64::<bool>(column_bytes, self.format_version)?.into()
+                crate::column::open_column_u64::<bool>(file_slice, self.format_version)?.into()
            }
            ColumnType::IpAddr => {
-                crate::column::open_column_u128::<Ipv6Addr>(column_bytes, self.format_version)?
-                    .into()
+                crate::column::open_column_u128::<Ipv6Addr>(file_slice, self.format_version)?.into()
            }
            ColumnType::DateTime => {
-                crate::column::open_column_u64::<DateTime>(column_bytes, self.format_version)?
-                    .into()
+                crate::column::open_column_u64::<DateTime>(file_slice, self.format_version)?.into()
            }
        };
        Ok(dynamic_column)
    }

    pub fn num_bytes(&self) -> ByteCount {
-        self.file_slice.len().into()
+        self.file_slice.num_bytes()
+    }
+
+    /// Legacy helper returning the column space usage.
+    pub fn column_and_dictionary_num_bytes(&self) -> io::Result<ColumnSpaceUsage> {
+        self.space_usage()
+    }
+
+    /// Return the space usage of the column, optionally broken down by dictionary and column
+    /// values.
+    ///
+    /// For dictionary encoded columns (strings and bytes), this splits the total footprint into
+    /// the dictionary and the remaining column data (including index and values).
+    /// For all other column types, the dictionary size is `None` and the column size
+    /// equals the total bytes.
+    pub fn space_usage(&self) -> io::Result<ColumnSpaceUsage> {
+        let total_num_bytes = self.num_bytes();
+        let dynamic_column = self.open()?;
+        let dictionary_num_bytes = match &dynamic_column {
+            DynamicColumn::Bytes(bytes_column) => bytes_column.dictionary().num_bytes(),
+            DynamicColumn::Str(str_column) => str_column.dictionary().num_bytes(),
+            _ => {
+                return Ok(ColumnSpaceUsage::new(self.num_bytes(), None));
+            }
+        };
+        assert!(dictionary_num_bytes <= total_num_bytes);
+        let column_num_bytes =
+            ByteCount::from(total_num_bytes.get_bytes() - dictionary_num_bytes.get_bytes());
+        Ok(ColumnSpaceUsage::new(
+            column_num_bytes,
+            Some(dictionary_num_bytes),
+        ))
    }

    pub fn column_type(&self) -> ColumnType {
        self.column_type
    }
 }
+
+/// Represents space usage of a column.
+///
+/// `column_num_bytes` tracks the column payload (index, values and footer).
+/// For dictionary encoded columns, `dictionary_num_bytes` captures the dictionary footprint.
+/// [`ColumnSpaceUsage::total_num_bytes`] returns the sum of both parts.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ColumnSpaceUsage {
+    column_num_bytes: ByteCount,
+    dictionary_num_bytes: Option<ByteCount>,
+}
+
+impl ColumnSpaceUsage {
+    pub(crate) fn new(
+        column_num_bytes: ByteCount,
+        dictionary_num_bytes: Option<ByteCount>,
+    ) -> Self {
+        ColumnSpaceUsage {
+            column_num_bytes,
+            dictionary_num_bytes,
+        }
+    }
+
+    pub fn column_num_bytes(&self) -> ByteCount {
+        self.column_num_bytes
+    }
+
+    pub fn dictionary_num_bytes(&self) -> Option<ByteCount> {
+        self.dictionary_num_bytes
+    }
+
+    pub fn total_num_bytes(&self) -> ByteCount {
+        self.column_num_bytes + self.dictionary_num_bytes.unwrap_or_default()
+    }
+
+    /// Merge two space usage values by summing their components.
+    pub fn merge(&self, other: &ColumnSpaceUsage) -> ColumnSpaceUsage {
+        let dictionary_num_bytes = match (self.dictionary_num_bytes, other.dictionary_num_bytes) {
+            (Some(lhs), Some(rhs)) => Some(lhs + rhs),
+            (Some(val), None) | (None, Some(val)) => Some(val),
+            (None, None) => None,
+        };
+        ColumnSpaceUsage {
+            column_num_bytes: self.column_num_bytes + other.column_num_bytes,
+            dictionary_num_bytes,
+        }
+    }
+}
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -29,6 +29,7 @@ mod column;
 pub mod column_index;
 pub mod column_values;
 mod columnar;
+mod comparable_doc;
 mod dictionary;
 mod dynamic_column;
 mod iterable;
@@ -36,7 +37,7 @@ pub(crate) mod utils;
 mod value;

 pub use block_accessor::ColumnBlockAccessor;
-pub use column::{BytesColumn, Column, StrColumn};
+pub use column::{BytesColumn, Column, StrColumn, ValueRange};
 pub use column_index::ColumnIndex;
 pub use column_values::{
    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
@@ -45,10 +46,11 @@ pub use columnar::{
    CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
 };
+pub use comparable_doc::ComparableDoc;
 use sstable::VoidSSTable;
 pub use value::{NumericalType, NumericalValue};

-pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
+pub use self::dynamic_column::{ColumnSpaceUsage, DynamicColumn, DynamicColumnHandle};

 pub type RowId = u32;
 pub type DocId = u32;
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -641,7 +641,7 @@ proptest! {
        let columnar_readers_arr: Vec<&ColumnarReader> = columnar_readers.iter().collect();
        let mut output: Vec<u8> = Vec::new();
        let stack_merge_order = StackMergeOrder::stack(&columnar_readers_arr[..]).into();
-        crate::merge_columnar(&columnar_readers_arr[..], &[], stack_merge_order, &mut output).unwrap();
+        crate::merge_columnar(&columnar_readers_arr[..], &[], stack_merge_order, &mut output, || false,).unwrap();
        let merged_columnar = ColumnarReader::open(output).unwrap();
        let concat_rows: Vec<Vec<(&'static str, ColumnValue)>> = columnar_docs.iter().flatten().cloned().collect();
        let expected_merged_columnar = build_columnar(&concat_rows[..]);
@@ -665,6 +665,7 @@ fn test_columnar_merging_empty_columnar() {
        &[],
        crate::MergeRowOrder::Stack(stack_merge_order),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
@@ -702,6 +703,7 @@ fn test_columnar_merging_number_columns() {
        &[],
        crate::MergeRowOrder::Stack(stack_merge_order),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
@@ -775,6 +777,7 @@ fn test_columnar_merge_and_remap(
        &[],
        shuffle_merge_order.into(),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
@@ -817,6 +820,7 @@ fn test_columnar_merge_empty() {
        &[],
        shuffle_merge_order.into(),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
@@ -843,6 +847,7 @@ fn test_columnar_merge_single_str_column() {
        &[],
        shuffle_merge_order.into(),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
@@ -875,6 +880,7 @@ fn test_delete_decrease_cardinality() {
        &[],
        shuffle_merge_order.into(),
        &mut output,
+        || false,
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
--- a/columnar/src/value.rs
+++ b/columnar/src/value.rs
@@ -1,3 +1,5 @@
+use std::str::FromStr;
+
 use common::DateTime;

 use crate::InvalidData;
@@ -9,6 +11,23 @@ pub enum NumericalValue {
    F64(f64),
 }

+impl FromStr for NumericalValue {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, ()> {
+        if let Ok(val_i64) = s.parse::<i64>() {
+            return Ok(val_i64.into());
+        }
+        if let Ok(val_u64) = s.parse::<u64>() {
+            return Ok(val_u64.into());
+        }
+        if let Ok(val_f64) = s.parse::<f64>() {
+            return Ok(NumericalValue::from(val_f64).normalize());
+        }
+        Err(())
+    }
+}
+
 impl NumericalValue {
    pub fn numerical_type(&self) -> NumericalType {
        match self {
@@ -26,7 +45,7 @@ impl NumericalValue {
                if val <= i64::MAX as u64 {
                    NumericalValue::I64(val as i64)
                } else {
-                    NumericalValue::F64(val as f64)
+                    NumericalValue::U64(val)
                }
            }
            NumericalValue::I64(val) => NumericalValue::I64(val),
@@ -141,6 +160,7 @@ impl Coerce for DateTime {
 #[cfg(test)]
 mod tests {
    use super::NumericalType;
+    use crate::NumericalValue;

    #[test]
    fn test_numerical_type_code() {
@@ -153,4 +173,58 @@ mod tests {
        }
        assert_eq!(num_numerical_type, 3);
    }
+
+    #[test]
+    fn test_parse_numerical() {
+        assert_eq!(
+            "123".parse::<NumericalValue>().unwrap(),
+            NumericalValue::I64(123)
+        );
+        assert_eq!(
+            "18446744073709551615".parse::<NumericalValue>().unwrap(),
+            NumericalValue::U64(18446744073709551615u64)
+        );
+        assert_eq!(
+            "1.0".parse::<NumericalValue>().unwrap(),
+            NumericalValue::I64(1i64)
+        );
+        assert_eq!(
+            "1.1".parse::<NumericalValue>().unwrap(),
+            NumericalValue::F64(1.1f64)
+        );
+        assert_eq!(
+            "-1.0".parse::<NumericalValue>().unwrap(),
+            NumericalValue::I64(-1i64)
+        );
+    }
+
+    #[test]
+    fn test_normalize_numerical() {
+        assert_eq!(
+            NumericalValue::from(1u64).normalize(),
+            NumericalValue::I64(1i64),
+        );
+        let limit_val = i64::MAX as u64 + 1u64;
+        assert_eq!(
+            NumericalValue::from(limit_val).normalize(),
+            NumericalValue::U64(limit_val),
+        );
+        assert_eq!(
+            NumericalValue::from(-1i64).normalize(),
+            NumericalValue::I64(-1i64),
+        );
+        assert_eq!(
+            NumericalValue::from(-2.0f64).normalize(),
+            NumericalValue::I64(-2i64),
+        );
+        assert_eq!(
+            NumericalValue::from(-2.1f64).normalize(),
+            NumericalValue::F64(-2.1f64),
+        );
+        let large_float = 2.0f64.powf(70.0f64);
+        assert_eq!(
+            NumericalValue::from(large_float).normalize(),
+            NumericalValue::F64(large_float),
+        );
+    }
 }
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -183,7 +183,7 @@ pub struct BitSet {
 }

 fn num_buckets(max_val: u32) -> u32 {
-    (max_val + 63u32) / 64u32
+    max_val.div_ceil(64u32)
 }

 impl BitSet {
--- a/common/src/buffered_file_slice.rs
+++ b/common/src/buffered_file_slice.rs
@@ -0,0 +1,106 @@
+use std::cell::RefCell;
+use std::cmp::min;
+use std::io;
+use std::ops::Range;
+
+use super::file_slice::FileSlice;
+use super::{HasLen, OwnedBytes};
+
+const DEFAULT_BUFFER_MAX_SIZE: usize = 512 * 1024; // 512K
+
+/// A buffered reader for a FileSlice.
+///
+/// Reads the underlying `FileSlice` in large, sequential chunks to amortize
+/// the cost of `read_bytes` calls, while keeping peak memory usage under control.
+///
+/// TODO: Rather than wrapping a `FileSlice` in buffering, it will usually be better to adjust a
+/// `FileHandle` to directly handle buffering itself.
+/// TODO: See: https://github.com/paradedb/paradedb/issues/3374
+pub struct BufferedFileSlice {
+    file_slice: FileSlice,
+    buffer: RefCell<OwnedBytes>,
+    buffer_range: RefCell<Range<u64>>,
+    buffer_max_size: usize,
+}
+
+impl BufferedFileSlice {
+    /// Creates a new `BufferedFileSlice`.
+    ///
+    /// The `buffer_max_size` is the amount of data that will be read from the
+    /// `FileSlice` on a buffer miss.
+    pub fn new(file_slice: FileSlice, buffer_max_size: usize) -> Self {
+        Self {
+            file_slice,
+            buffer: RefCell::new(OwnedBytes::empty()),
+            buffer_range: RefCell::new(0..0),
+            buffer_max_size,
+        }
+    }
+
+    /// Creates a new `BufferedFileSlice` with a default buffer max size.
+    pub fn new_with_default_buffer_size(file_slice: FileSlice) -> Self {
+        Self::new(file_slice, DEFAULT_BUFFER_MAX_SIZE)
+    }
+
+    /// Creates an empty `BufferedFileSlice`.
+    pub fn empty() -> Self {
+        Self::new(FileSlice::empty(), 0)
+    }
+
+    /// Returns an `OwnedBytes` corresponding to the given `required_range`.
+    ///
+    /// If the requested range is not in the buffer, this will trigger a read
+    /// from the underlying `FileSlice`.
+    ///
+    /// If the requested range is larger than the buffer_max_size, it will be read directly from the
+    /// source without buffering.
+    ///
+    /// # Errors
+    ///
+    /// Returns an `io::Error` if the underlying read fails or the range is
+    /// out of bounds.
+    pub fn get_bytes(&self, required_range: Range<u64>) -> io::Result<OwnedBytes> {
+        let buffer_range = self.buffer_range.borrow();
+
+        // Cache miss condition: the required range is not fully contained in the current buffer.
+        if required_range.start < buffer_range.start || required_range.end > buffer_range.end {
+            drop(buffer_range); // release borrow before mutating
+
+            if required_range.end > self.file_slice.len() as u64 {
+                return Err(io::Error::new(
+                    io::ErrorKind::UnexpectedEof,
+                    "Requested range extends beyond the end of the file slice.",
+                ));
+            }
+
+            if (required_range.end - required_range.start) as usize > self.buffer_max_size {
+                // This read is larger than our buffer max size.
+                // Read it directly and bypass the buffer to avoid churning.
+                return self
+                    .file_slice
+                    .read_bytes_slice(required_range.start as usize..required_range.end as usize);
+            }
+
+            let new_buffer_start = required_range.start;
+            let new_buffer_end = min(
+                new_buffer_start + self.buffer_max_size as u64,
+                self.file_slice.len() as u64,
+            );
+            let read_range = new_buffer_start..new_buffer_end;
+
+            let new_buffer = self
+                .file_slice
+                .read_bytes_slice(read_range.start as usize..read_range.end as usize)?;
+
+            self.buffer.replace(new_buffer);
+            self.buffer_range.replace(read_range);
+        }
+
+        // Now the data is guaranteed to be in the buffer.
+        let buffer = self.buffer.borrow();
+        let buffer_range = self.buffer_range.borrow();
+        let local_start = (required_range.start - buffer_range.start) as usize;
+        let local_end = (required_range.end - buffer_range.start) as usize;
+        Ok(buffer.slice(local_start..local_end))
+    }
+}
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -1,7 +1,7 @@
 use std::fs::File;
 use std::ops::{Deref, Range, RangeBounds};
 use std::path::Path;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 use std::{fmt, io};

 use async_trait::async_trait;
@@ -339,6 +339,27 @@ impl FileHandle for OwnedBytes {
    }
 }

+pub struct DeferredFileSlice {
+    opener: Arc<dyn Fn() -> io::Result<FileSlice> + Send + Sync + 'static>,
+    file_slice: OnceLock<std::io::Result<FileSlice>>,
+}
+
+impl DeferredFileSlice {
+    pub fn new(opener: impl Fn() -> io::Result<FileSlice> + Send + Sync + 'static) -> Self {
+        DeferredFileSlice {
+            opener: Arc::new(opener),
+            file_slice: OnceLock::default(),
+        }
+    }
+
+    pub fn open(&self) -> io::Result<&FileSlice> {
+        match self.file_slice.get_or_init(|| (self.opener)()) {
+            Ok(file_slice) => Ok(file_slice),
+            Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::io;
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -6,6 +6,7 @@ pub use byteorder::LittleEndian as Endianness;

 mod bitset;
 pub mod bounds;
+pub mod buffered_file_slice;
 mod byte_count;
 mod datetime;
 pub mod file_slice;
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -28,7 +28,9 @@ impl BinarySerializable for VIntU128 {
        writer.write_all(&buffer)
    }

+    #[allow(clippy::unbuffered_bytes)]
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        #[allow(clippy::unbuffered_bytes)]
        let mut bytes = reader.bytes();
        let mut result = 0u128;
        let mut shift = 0u64;
@@ -56,6 +58,33 @@ impl BinarySerializable for VIntU128 {
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct VInt(pub u64);

+impl VInt {
+    pub fn deserialize_with_size<R: Read>(reader: &mut R) -> io::Result<(Self, usize)> {
+        let mut nbytes = 0;
+        let mut bytes = reader.bytes();
+        let mut result = 0u64;
+        let mut shift = 0u64;
+        loop {
+            match bytes.next() {
+                Some(Ok(b)) => {
+                    nbytes += 1;
+                    result |= u64::from(b % 128u8) << shift;
+                    if b >= STOP_BIT {
+                        return Ok((VInt(result), nbytes));
+                    }
+                    shift += 7;
+                }
+                _ => {
+                    return Err(io::Error::new(
+                        io::ErrorKind::InvalidData,
+                        "Reach end of buffer while reading VInt",
+                    ));
+                }
+            }
+        }
+    }
+}
+
 const STOP_BIT: u8 = 128;

 #[inline]
@@ -195,7 +224,9 @@ impl BinarySerializable for VInt {
        writer.write_all(&buffer[0..num_bytes])
    }

+    #[allow(clippy::unbuffered_bytes)]
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        #[allow(clippy::unbuffered_bytes)]
        let mut bytes = reader.bytes();
        let mut result = 0u64;
        let mut shift = 0u64;
@@ -221,7 +252,6 @@ impl BinarySerializable for VInt {

 #[cfg(test)]
 mod tests {
-
    use super::{BinarySerializable, VInt, serialize_vint_u32};

    fn aux_test_vint(val: u64) {
--- a/doc/assets/images/searchbenchmark.png
+++ b/doc/assets/images/searchbenchmark.png
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -208,7 +208,7 @@ fn main() -> tantivy::Result<()> {
    // is the role of the `TopDocs` collector.

    // We can now perform our query.
-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

    // The actual documents still need to be
    // retrieved from Tantivy's store.
@@ -226,7 +226,7 @@ fn main() -> tantivy::Result<()> {
    let query = query_parser.parse_query("title:sea^20 body:whale^70")?;

    let (_score, doc_address) = searcher
-        .search(&query, &TopDocs::with_limit(1))?
+        .search(&query, &TopDocs::with_limit(1).order_by_score())?
        .into_iter()
        .next()
        .unwrap();
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -100,7 +100,7 @@ fn main() -> tantivy::Result<()> {
    // here we want to get a hit on the 'ken' in Frankenstein
    let query = query_parser.parse_query("ken")?;

-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

    for (_, doc_address) in top_docs {
        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
--- a/examples/date_time_field.rs
+++ b/examples/date_time_field.rs
@@ -50,14 +50,14 @@ fn main() -> tantivy::Result<()> {
    {
        // Simple exact search on the date
        let query = query_parser.parse_query("occurred_at:\"2022-06-22T12:53:50.53Z\"")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(5).order_by_score())?;
        assert_eq!(count_docs.len(), 1);
    }
    {
        // Range query on the date field
        let query = query_parser
            .parse_query(r#"occurred_at:[2022-06-22T12:58:00Z TO 2022-06-23T00:00:00Z}"#)?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(4).order_by_score())?;
        assert_eq!(count_docs.len(), 1);
        for (_score, doc_address) in count_docs {
            let retrieved_doc = searcher.doc::<TantivyDocument>(doc_address)?;
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -28,7 +28,7 @@ fn extract_doc_given_isbn(
    // The second argument is here to tell we don't care about decoding positions,
    // or term frequencies.
    let term_query = TermQuery::new(isbn_term.clone(), IndexRecordOption::Basic);
-    let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1))?;
+    let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1).order_by_score())?;

    if let Some((_score, doc_address)) = top_docs.first() {
        let doc = searcher.doc(*doc_address)?;
--- a/examples/filter_aggregation.rs
+++ b/examples/filter_aggregation.rs
@@ -0,0 +1,212 @@
+// # Filter Aggregation Example
+//
+// This example demonstrates filter aggregations - creating buckets of documents
+// matching specific queries, with nested aggregations computed on each bucket.
+//
+// Filter aggregations are useful for computing metrics on different subsets of
+// your data in a single query, like "average price overall + average price for
+// electronics + count of in-stock items".
+
+use serde_json::json;
+use tantivy::aggregation::agg_req::Aggregations;
+use tantivy::aggregation::AggregationCollector;
+use tantivy::query::AllQuery;
+use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
+use tantivy::{doc, Index};
+
+fn main() -> tantivy::Result<()> {
+    // Create a simple product schema
+    let mut schema_builder = Schema::builder();
+    schema_builder.add_text_field("category", TEXT | FAST);
+    schema_builder.add_text_field("brand", TEXT | FAST);
+    schema_builder.add_u64_field("price", FAST);
+    schema_builder.add_f64_field("rating", FAST);
+    schema_builder.add_bool_field("in_stock", FAST | INDEXED);
+    let schema = schema_builder.build();
+
+    // Create index and add sample products
+    let index = Index::create_in_ram(schema.clone());
+    let mut writer = index.writer(50_000_000)?;
+
+    writer.add_document(doc!(
+        schema.get_field("category")? => "electronics",
+        schema.get_field("brand")? => "apple",
+        schema.get_field("price")? => 999u64,
+        schema.get_field("rating")? => 4.5f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "electronics",
+        schema.get_field("brand")? => "samsung",
+        schema.get_field("price")? => 799u64,
+        schema.get_field("rating")? => 4.2f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "clothing",
+        schema.get_field("brand")? => "nike",
+        schema.get_field("price")? => 120u64,
+        schema.get_field("rating")? => 4.1f64,
+        schema.get_field("in_stock")? => false
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "books",
+        schema.get_field("brand")? => "penguin",
+        schema.get_field("price")? => 25u64,
+        schema.get_field("rating")? => 4.8f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+
+    writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+
+    // Example 1: Basic filter with metric aggregation
+    println!("=== Example 1: Electronics average price ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": {
+                "avg_price": { "avg": { "field": "price" } }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "avg_price": { "value": 899.0 }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 2: Multiple independent filters
+    println!("=== Example 2: Multiple filters in one query ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": { "avg_price": { "avg": { "field": "price" } } }
+        },
+        "in_stock": {
+            "filter": "in_stock:true",
+            "aggs": { "count": { "value_count": { "field": "brand" } } }
+        },
+        "high_rated": {
+            "filter": "rating:[4.5 TO *]",
+            "aggs": { "count": { "value_count": { "field": "brand" } } }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "avg_price": { "value": 899.0 }
+        },
+        "in_stock": {
+            "doc_count": 3,
+            "count": { "value": 3.0 }
+        },
+        "high_rated": {
+            "doc_count": 2,
+            "count": { "value": 2.0 }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 3: Nested filters - progressive refinement
+    println!("=== Example 3: Nested filters ===");
+    let agg_req = json!({
+        "in_stock": {
+            "filter": "in_stock:true",
+            "aggs": {
+                "electronics": {
+                    "filter": "category:electronics",
+                    "aggs": {
+                        "expensive": {
+                            "filter": "price:[800 TO *]",
+                            "aggs": {
+                                "avg_rating": { "avg": { "field": "rating" } }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "in_stock": {
+            "doc_count": 3,  // apple, samsung, penguin
+            "electronics": {
+                "doc_count": 2,  // apple, samsung
+                "expensive": {
+                    "doc_count": 1,  // only apple (999)
+                    "avg_rating": { "value": 4.5 }
+                }
+            }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 4: Filter with sub-aggregation (terms)
+    println!("=== Example 4: Filter with terms sub-aggregation ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": {
+                "by_brand": {
+                    "terms": { "field": "brand" },
+                    "aggs": {
+                        "avg_price": { "avg": { "field": "price" } }
+                    }
+                }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "by_brand": {
+                "buckets": [
+                    {
+                        "key": "samsung",
+                        "doc_count": 1,
+                        "avg_price": { "value": 799.0 }
+                    },
+                    {
+                        "key": "apple",
+                        "doc_count": 1,
+                        "avg_price": { "value": 999.0 }
+                    }
+                ],
+                "sum_other_doc_count": 0,
+                "doc_count_error_upper_bound": 0
+            }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}", serde_json::to_string_pretty(&result)?);
+
+    Ok(())
+}
--- a/examples/fuzzy_search.rs
+++ b/examples/fuzzy_search.rs
@@ -85,7 +85,6 @@ fn main() -> tantivy::Result<()> {
    index_writer.add_document(doc!(
        title => "The Diary of a Young Girl",
    ))?;
-    index_writer.commit()?;

    // ### Committing
    //
@@ -146,7 +145,7 @@ fn main() -> tantivy::Result<()> {
        let query = FuzzyTermQuery::new(term, 2, true);

        let (top_docs, count) = searcher
-            .search(&query, &(TopDocs::with_limit(5), Count))
+            .search(&query, &(TopDocs::with_limit(5).order_by_score(), Count))
            .unwrap();
        assert_eq!(count, 3);
        assert_eq!(top_docs.len(), 3);
--- a/examples/ip_field.rs
+++ b/examples/ip_field.rs
@@ -69,25 +69,25 @@ fn main() -> tantivy::Result<()> {
    {
        // Inclusive range queries
        let query = query_parser.parse_query("ip:[192.168.0.80 TO 192.168.0.100]")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(5).order_by_score())?;
        assert_eq!(count_docs.len(), 1);
    }
    {
        // Exclusive range queries
        let query = query_parser.parse_query("ip:{192.168.0.80 TO 192.168.1.100]")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(count_docs.len(), 0);
    }
    {
        // Find docs with IP addresses smaller equal 192.168.1.100
        let query = query_parser.parse_query("ip:[* TO 192.168.1.100]")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(count_docs.len(), 2);
    }
    {
        // Find docs with IP addresses smaller than 192.168.1.100
        let query = query_parser.parse_query("ip:[* TO 192.168.1.100}")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(count_docs.len(), 2);
    }

--- a/examples/json_field.rs
+++ b/examples/json_field.rs
@@ -59,12 +59,12 @@ fn main() -> tantivy::Result<()> {
    let query_parser = QueryParser::for_index(&index, vec![event_type, attributes]);
    {
        let query = query_parser.parse_query("target:submit-button")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(count_docs.len(), 2);
    }
    {
        let query = query_parser.parse_query("target:submit")?;
-        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let count_docs = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(count_docs.len(), 2);
    }
    {
@@ -74,33 +74,33 @@ fn main() -> tantivy::Result<()> {
    }
    {
        let query = query_parser.parse_query("click AND cart.product_id:133")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(hits.len(), 1);
    }
    {
        // The sub-fields in the json field marked as default field still need to be explicitly
        // addressed
        let query = query_parser.parse_query("click AND 133")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(hits.len(), 0);
    }
    {
        // Default json fields are ignored if they collide with the schema
        let query = query_parser.parse_query("event_type:holiday-sale")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(hits.len(), 0);
    }
    // # Query via full attribute path
    {
        // This only searches in our schema's `event_type` field
        let query = query_parser.parse_query("event_type:click")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(hits.len(), 2);
    }
    {
        // Default json fields can still be accessed by full path
        let query = query_parser.parse_query("attributes.event_type:holiday-sale")?;
-        let hits = searcher.search(&*query, &TopDocs::with_limit(2))?;
+        let hits = searcher.search(&*query, &TopDocs::with_limit(2).order_by_score())?;
        assert_eq!(hits.len(), 1);
    }
    Ok(())
--- a/examples/multiple_snippets.rs
+++ b/examples/multiple_snippets.rs
@@ -0,0 +1,86 @@
+// # Multiple Snippets Example
+//
+// This example demonstrates how to return multiple text fragments
+// from a document, useful for long documents with matches in different locations.
+
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::snippet::SnippetGenerator;
+use tantivy::{doc, Index, IndexWriter};
+use tempfile::TempDir;
+
+fn main() -> tantivy::Result<()> {
+    let index_path = TempDir::new()?;
+
+    // Define the schema
+    let mut schema_builder = Schema::builder();
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let body = schema_builder.add_text_field("body", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    // Create the index
+    let index = Index::create_in_dir(&index_path, schema)?;
+    let mut index_writer: IndexWriter = index.writer(50_000_000)?;
+
+    // Index a long document with multiple occurrences of "rust"
+    index_writer.add_document(doc!(
+        title => "The Rust Programming Language",
+        body => "Rust is a systems programming language that runs blazingly fast, prevents \
+                 segfaults, and guarantees thread safety. Lorem ipsum dolor sit amet, \
+                 consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore. \
+                 Rust empowers everyone to build reliable and efficient software. More filler \
+                 text to create distance between matches. Ut enim ad minim veniam, quis nostrud \
+                 exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. \
+                 The Rust compiler is known for its helpful error messages. Duis aute irure \
+                 dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
+                 pariatur. Rust has a strong type system and ownership model."
+    ))?;
+
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+    let query = query_parser.parse_query("rust")?;
+
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;
+
+    // Create snippet generator
+    let mut snippet_generator = SnippetGenerator::create(&searcher, &*query, body)?;
+
+    println!("=== Single Snippet (Default Behavior) ===\n");
+    for (score, doc_address) in &top_docs {
+        let doc = searcher.doc::<TantivyDocument>(*doc_address)?;
+        let snippet = snippet_generator.snippet_from_doc(&doc);
+        println!("Document score: {}", score);
+        println!("Title: {}", doc.get_first(title).unwrap().as_str().unwrap());
+        println!("Single snippet: {}\n", snippet.to_html());
+    }
+
+    println!("\n=== Multiple Snippets (New Feature) ===\n");
+
+    // Configure to return multiple snippets
+    // Get up to 3 snippets
+    snippet_generator.set_snippets_limit(3);
+    // Smaller fragments
+    snippet_generator.set_max_num_chars(80);
+    // By default, multiple snippets are sorted by score. You can change this to sort by position.
+    // snippet_generator.set_sort_order(SnippetSortOrder::Position);
+
+    for (score, doc_address) in top_docs {
+        let doc = searcher.doc::<TantivyDocument>(doc_address)?;
+        let snippets = snippet_generator.snippets_from_doc(&doc);
+
+        println!("Document score: {}", score);
+        println!("Title: {}", doc.get_first(title).unwrap().as_str().unwrap());
+        println!("Found {} snippets:", snippets.len());
+
+        for (i, snippet) in snippets.iter().enumerate() {
+            println!("  Snippet {}: {}", i + 1, snippet.to_html());
+        }
+        println!();
+    }
+
+    Ok(())
+}
--- a/examples/phrase_prefix_search.rs
+++ b/examples/phrase_prefix_search.rs
@@ -63,7 +63,7 @@ fn main() -> Result<()> {
    // but not "in the Gulf Stream".
    let query = query_parser.parse_query("\"in the su\"*")?;

-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;
    let mut titles = top_docs
        .into_iter()
        .map(|(_score, doc_address)| {
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -107,7 +107,8 @@ fn main() -> tantivy::Result<()> {
        IndexRecordOption::Basic,
    );

-    let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
+    let (top_docs, count) =
+        searcher.search(&query, &(TopDocs::with_limit(2).order_by_score(), Count))?;

    assert_eq!(count, 2);

@@ -128,7 +129,8 @@ fn main() -> tantivy::Result<()> {
        IndexRecordOption::Basic,
    );

-    let (_top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
+    let (_top_docs, count) =
+        searcher.search(&query, &(TopDocs::with_limit(2).order_by_score(), Count))?;

    assert_eq!(count, 0);

--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -50,7 +50,7 @@ fn main() -> tantivy::Result<()> {
    let query_parser = QueryParser::for_index(&index, vec![title, body]);
    let query = query_parser.parse_query("sycamore spring")?;

-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

    let snippet_generator = SnippetGenerator::create(&searcher, &*query, body)?;

--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -102,7 +102,7 @@ fn main() -> tantivy::Result<()> {
    // stop words are applied on the query as well.
    // The following will be equivalent to `title:frankenstein`
    let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

    for (score, doc_address) in top_docs {
        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -164,7 +164,7 @@ fn main() -> tantivy::Result<()> {
        move |doc_id: DocId| Reverse(price[doc_id as usize])
    };

-    let most_expensive_first = TopDocs::with_limit(10).custom_score(score_by_price);
+    let most_expensive_first = TopDocs::with_limit(10).order_by(score_by_price);

    let hits = searcher.search(&query, &most_expensive_first)?;
    assert_eq!(
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -15,3 +15,5 @@ edition = "2024"
 nom = "7"
 serde = { version = "1.0.219", features = ["derive"] }
 serde_json = "1.0.140"
+ordered-float = "5.0.0"
+fnv = "1.0.7"
--- a/query-grammar/src/infallible.rs
+++ b/query-grammar/src/infallible.rs
@@ -117,6 +117,22 @@ where F: nom::Parser<I, (O, ErrorList), Infallible> {
    }
 }

+pub(crate) fn terminated_infallible<I, O1, O2, F, G>(
+    mut first: F,
+    mut second: G,
+) -> impl FnMut(I) -> JResult<I, O1>
+where
+    F: nom::Parser<I, (O1, ErrorList), Infallible>,
+    G: nom::Parser<I, (O2, ErrorList), Infallible>,
+{
+    move |input: I| {
+        let (input, (o1, mut err)) = first.parse(input)?;
+        let (input, (_, mut err2)) = second.parse(input)?;
+        err.append(&mut err2);
+        Ok((input, (o1, err)))
+    }
+}
+
 pub(crate) fn delimited_infallible<I, O1, O2, O3, F, G, H>(
    mut first: F,
    mut second: G,
--- a/query-grammar/src/lib.rs
+++ b/query-grammar/src/lib.rs
@@ -31,7 +31,17 @@ pub fn parse_query_lenient(query: &str) -> (UserInputAst, Vec<LenientError>) {

 #[cfg(test)]
 mod tests {
-    use crate::{parse_query, parse_query_lenient};
+    use crate::{UserInputAst, parse_query, parse_query_lenient};
+
+    #[test]
+    fn test_deduplication() {
+        let ast: UserInputAst = parse_query("a a").unwrap();
+        let json = serde_json::to_string(&ast).unwrap();
+        assert_eq!(
+            json,
+            r#"{"type":"bool","clauses":[[null,{"type":"literal","field_name":null,"phrase":"a","delimiter":"none","slop":0,"prefix":false}]]}"#
+        );
+    }

    #[test]
    fn test_parse_query_serialization() {
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1,6 +1,7 @@
 use std::borrow::Cow;
 use std::iter::once;

+use fnv::FnvHashSet;
 use nom::IResult;
 use nom::branch::alt;
 use nom::bytes::complete::tag;
@@ -68,7 +69,7 @@ fn interpret_escape(source: &str) -> String {

 /// Consume a word outside of any context.
 // TODO should support escape sequences
-fn word(inp: &str) -> IResult<&str, Cow<str>> {
+fn word(inp: &str) -> IResult<&str, Cow<'_, str>> {
    map_res(
        recognize(tuple((
            alt((
@@ -305,15 +306,14 @@ fn term_group_infallible(inp: &str) -> JResult<&str, UserInputAst> {
    let (inp, (field_name, _, _, _)) =
        tuple((field_name, multispace0, char('('), multispace0))(inp).expect("precondition failed");

-    let res = delimited_infallible(
+    delimited_infallible(
        nothing,
        map(ast_infallible, |(mut ast, errors)| {
            ast.set_default_field(field_name.to_string());
            (ast, errors)
        }),
        opt_i_err(char(')'), "expected ')'"),
-    )(inp);
-    res
+    )(inp)
 }

 fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
@@ -367,7 +367,10 @@ fn literal(inp: &str) -> IResult<&str, UserInputAst> {
    // something (a field name) got parsed before
    alt((
        map(
-            tuple((opt(field_name), alt((range, set, exists, term_or_phrase)))),
+            tuple((
+                opt(field_name),
+                alt((range, set, exists, regex, term_or_phrase)),
+            )),
            |(field_name, leaf): (Option<String>, UserInputLeaf)| leaf.set_field(field_name).into(),
        ),
        term_group,
@@ -389,6 +392,10 @@ fn literal_no_group_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>>
                        value((), peek(one_of("{[><"))),
                        map(range_infallible, |(range, errs)| (Some(range), errs)),
                    ),
+                    (
+                        value((), peek(one_of("/"))),
+                        map(regex_infallible, |(regex, errs)| (Some(regex), errs)),
+                    ),
                ),
                delimited_infallible(space0_infallible, term_or_phrase_infallible, nothing),
            ),
@@ -689,6 +696,61 @@ fn set_infallible(mut inp: &str) -> JResult<&str, UserInputLeaf> {
    }
 }

+fn regex(inp: &str) -> IResult<&str, UserInputLeaf> {
+    map(
+        terminated(
+            delimited(
+                char('/'),
+                many1(alt((preceded(char('\\'), char('/')), none_of("/")))),
+                char('/'),
+            ),
+            peek(alt((multispace1, eof))),
+        ),
+        |elements| UserInputLeaf::Regex {
+            field: None,
+            pattern: elements.into_iter().collect::<String>(),
+        },
+    )(inp)
+}
+
+fn regex_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
+    match terminated_infallible(
+        delimited_infallible(
+            opt_i_err(char('/'), "missing delimiter /"),
+            opt_i(many1(alt((preceded(char('\\'), char('/')), none_of("/"))))),
+            opt_i_err(char('/'), "missing delimiter /"),
+        ),
+        opt_i_err(
+            peek(alt((multispace1, eof))),
+            "expected whitespace or end of input",
+        ),
+    )(inp)
+    {
+        Ok((rest, (elements_part, errors))) => {
+            let pattern = match elements_part {
+                Some(elements_part) => elements_part.into_iter().collect(),
+                None => String::new(),
+            };
+            let res = UserInputLeaf::Regex {
+                field: None,
+                pattern,
+            };
+            Ok((rest, (res, errors)))
+        }
+        Err(e) => {
+            let errs = vec![LenientErrorInternal {
+                pos: inp.len(),
+                message: e.to_string(),
+            }];
+            let res = UserInputLeaf::Regex {
+                field: None,
+                pattern: String::new(),
+            };
+            Ok((inp, (res, errs)))
+        }
+    }
+}
+
 fn negate(expr: UserInputAst) -> UserInputAst {
    expr.unary(Occur::MustNot)
 }
@@ -696,7 +758,17 @@ fn negate(expr: UserInputAst) -> UserInputAst {
 fn leaf(inp: &str) -> IResult<&str, UserInputAst> {
    alt((
        delimited(char('('), ast, char(')')),
-        map(char('*'), |_| UserInputAst::from(UserInputLeaf::All)),
+        map(
+            terminated(
+                char('*'),
+                peek(alt((
+                    value((), multispace1),
+                    value((), char(')')),
+                    value((), eof),
+                ))),
+            ),
+            |_| UserInputAst::from(UserInputLeaf::All),
+        ),
        map(preceded(tuple((tag("NOT"), multispace1)), leaf), negate),
        literal,
    ))(inp)
@@ -717,7 +789,17 @@ fn leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
                ),
            ),
            (
-                value((), char('*')),
+                value(
+                    (),
+                    terminated(
+                        char('*'),
+                        peek(alt((
+                            value((), multispace1),
+                            value((), char(')')),
+                            value((), eof),
+                        ))),
+                    ),
+                ),
                map(nothing, |_| {
                    (Some(UserInputAst::from(UserInputLeaf::All)), Vec::new())
                }),
@@ -753,7 +835,7 @@ fn boosted_leaf(inp: &str) -> IResult<&str, UserInputAst> {
        tuple((leaf, fallible(boost))),
        |(leaf, boost_opt)| match boost_opt {
            Some(boost) if (boost - 1.0).abs() > f64::EPSILON => {
-                UserInputAst::Boost(Box::new(leaf), boost)
+                UserInputAst::Boost(Box::new(leaf), boost.into())
            }
            _ => leaf,
        },
@@ -765,7 +847,7 @@ fn boosted_leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
        tuple_infallible((leaf_infallible, boost)),
        |((leaf, boost_opt), error)| match boost_opt {
            Some(boost) if (boost - 1.0).abs() > f64::EPSILON => (
-                leaf.map(|leaf| UserInputAst::Boost(Box::new(leaf), boost)),
+                leaf.map(|leaf| UserInputAst::Boost(Box::new(leaf), boost.into())),
                error,
            ),
            _ => (leaf, error),
@@ -1016,12 +1098,25 @@ pub fn parse_to_ast_lenient(query_str: &str) -> (UserInputAst, Vec<LenientError>
    (rewrite_ast(res), errors)
 }

-/// Removes unnecessary children clauses in AST
-///
-/// Motivated by [issue #1433](https://github.com/quickwit-oss/tantivy/issues/1433)
 fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
-    if let UserInputAst::Clause(terms) = &mut input {
-        for term in terms {
+    if let UserInputAst::Clause(sub_clauses) = &mut input {
+        // call rewrite_ast recursively on children clauses if applicable
+        let mut new_clauses = Vec::with_capacity(sub_clauses.len());
+        for (occur, clause) in sub_clauses.drain(..) {
+            let rewritten_clause = rewrite_ast(clause);
+            new_clauses.push((occur, rewritten_clause));
+        }
+        *sub_clauses = new_clauses;
+
+        // remove duplicate child clauses
+        // e.g. (+a +b) OR (+c +d) OR (+a +b)  => (+a +b) OR (+c +d)
+        let mut seen = FnvHashSet::default();
+        sub_clauses.retain(|term| seen.insert(term.clone()));
+
+        // Removes unnecessary children clauses in AST
+        //
+        // Motivated by [issue #1433](https://github.com/quickwit-oss/tantivy/issues/1433)
+        for term in sub_clauses {
            rewrite_ast_clause(term);
        }
    }
@@ -1596,6 +1691,21 @@ mod test {
        test_parse_query_to_ast_helper("abc:a b", "(*\"abc\":a *b)");
        test_parse_query_to_ast_helper("abc:\"a b\"", "\"abc\":\"a b\"");
        test_parse_query_to_ast_helper("foo:[1 TO 5]", "\"foo\":[\"1\" TO \"5\"]");
+
+        // Phrase prefixed with *
+        test_parse_query_to_ast_helper("foo:(*A)", "\"foo\":*A");
+        test_parse_query_to_ast_helper("*A", "*A");
+        test_parse_query_to_ast_helper("(*A)", "*A");
+        test_parse_query_to_ast_helper("foo:(A OR B)", "(?\"foo\":A ?\"foo\":B)");
+        test_parse_query_to_ast_helper("foo:(A* OR B*)", "(?\"foo\":A* ?\"foo\":B*)");
+        test_parse_query_to_ast_helper("foo:(*A OR *B)", "(?\"foo\":*A ?\"foo\":*B)");
+    }
+
+    #[test]
+    fn test_parse_query_all() {
+        test_parse_query_to_ast_helper("*", "*");
+        test_parse_query_to_ast_helper("(*)", "*");
+        test_parse_query_to_ast_helper("(* )", "*");
    }

    #[test]
@@ -1694,6 +1804,63 @@ mod test {
        test_is_parse_err(r#"!bc:def"#, "!bc:def");
    }

+    #[test]
+    fn test_regex_parser() {
+        let r = parse_to_ast(r#"a:/joh?n(ath[oa]n)/"#);
+        assert!(r.is_ok(), "Failed to parse custom query: {r:?}");
+        let (_, input) = r.unwrap();
+        match input {
+            UserInputAst::Leaf(leaf) => match leaf.as_ref() {
+                UserInputLeaf::Regex { field, pattern } => {
+                    assert_eq!(field, &Some("a".to_string()));
+                    assert_eq!(pattern, "joh?n(ath[oa]n)");
+                }
+                _ => panic!("Expected a regex leaf, got {leaf:?}"),
+            },
+            _ => panic!("Expected a leaf"),
+        }
+        let r = parse_to_ast(r#"a:/\\/cgi-bin\\/luci.*/"#);
+        assert!(r.is_ok(), "Failed to parse custom query: {r:?}");
+        let (_, input) = r.unwrap();
+        match input {
+            UserInputAst::Leaf(leaf) => match leaf.as_ref() {
+                UserInputLeaf::Regex { field, pattern } => {
+                    assert_eq!(field, &Some("a".to_string()));
+                    assert_eq!(pattern, "\\/cgi-bin\\/luci.*");
+                }
+                _ => panic!("Expected a regex leaf, got {leaf:?}"),
+            },
+            _ => panic!("Expected a leaf"),
+        }
+    }
+
+    #[test]
+    fn test_regex_parser_lenient() {
+        let literal = |query| literal_infallible(query).unwrap().1;
+
+        let (res, errs) = literal(r#"a:/joh?n(ath[oa]n)/"#);
+        let expected = UserInputLeaf::Regex {
+            field: Some("a".to_string()),
+            pattern: "joh?n(ath[oa]n)".to_string(),
+        }
+        .into();
+        assert_eq!(res.unwrap(), expected);
+        assert!(errs.is_empty(), "Expected no errors, got: {errs:?}");
+
+        let (res, errs) = literal("title:/joh?n(ath[oa]n)");
+        let expected = UserInputLeaf::Regex {
+            field: Some("title".to_string()),
+            pattern: "joh?n(ath[oa]n)".to_string(),
+        }
+        .into();
+        assert_eq!(res.unwrap(), expected);
+        assert_eq!(errs.len(), 1, "Expected 1 error, got: {errs:?}");
+        assert_eq!(
+            errs[0].message, "missing delimiter /",
+            "Unexpected error message",
+        );
+    }
+
    #[test]
    fn test_space_before_value() {
        test_parse_query_to_ast_helper("field : a", r#""field":a"#);
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -5,7 +5,7 @@ use serde::Serialize;

 use crate::Occur;

-#[derive(PartialEq, Clone, Serialize)]
+#[derive(PartialEq, Eq, Hash, Clone, Serialize)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum UserInputLeaf {
@@ -23,6 +23,10 @@ pub enum UserInputLeaf {
    Exists {
        field: String,
    },
+    Regex {
+        field: Option<String>,
+        pattern: String,
+    },
 }

 impl UserInputLeaf {
@@ -46,6 +50,7 @@ impl UserInputLeaf {
            UserInputLeaf::Exists { field: _ } => UserInputLeaf::Exists {
                field: field.expect("Exist query without a field isn't allowed"),
            },
+            UserInputLeaf::Regex { field: _, pattern } => UserInputLeaf::Regex { field, pattern },
        }
    }

@@ -103,11 +108,19 @@ impl Debug for UserInputLeaf {
            UserInputLeaf::Exists { field } => {
                write!(formatter, "$exists(\"{field}\")")
            }
+            UserInputLeaf::Regex { field, pattern } => {
+                if let Some(field) = field {
+                    // TODO properly escape field (in case of \")
+                    write!(formatter, "\"{field}\":")?;
+                }
+                // TODO properly escape pattern (in case of \")
+                write!(formatter, "/{pattern}/")
+            }
        }
    }
 }

-#[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize)]
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize)]
 #[serde(rename_all = "snake_case")]
 pub enum Delimiter {
    SingleQuotes,
@@ -115,7 +128,7 @@ pub enum Delimiter {
    None,
 }

-#[derive(PartialEq, Clone, Serialize)]
+#[derive(PartialEq, Eq, Hash, Clone, Serialize)]
 #[serde(rename_all = "snake_case")]
 pub struct UserInputLiteral {
    pub field_name: Option<String>,
@@ -154,7 +167,7 @@ impl fmt::Debug for UserInputLiteral {
    }
 }

-#[derive(PartialEq, Debug, Clone, Serialize)]
+#[derive(PartialEq, Eq, Hash, Debug, Clone, Serialize)]
 #[serde(tag = "type", content = "value")]
 #[serde(rename_all = "snake_case")]
 pub enum UserInputBound {
@@ -191,11 +204,11 @@ impl UserInputBound {
    }
 }

-#[derive(PartialEq, Clone, Serialize)]
+#[derive(PartialEq, Eq, Hash, Clone, Serialize)]
 #[serde(into = "UserInputAstSerde")]
 pub enum UserInputAst {
    Clause(Vec<(Option<Occur>, UserInputAst)>),
-    Boost(Box<UserInputAst>, f64),
+    Boost(Box<UserInputAst>, ordered_float::OrderedFloat<f64>),
    Leaf(Box<UserInputLeaf>),
 }

@@ -217,9 +230,10 @@ impl From<UserInputAst> for UserInputAstSerde {
    fn from(ast: UserInputAst) -> Self {
        match ast {
            UserInputAst::Clause(clause) => UserInputAstSerde::Bool { clauses: clause },
-            UserInputAst::Boost(underlying, boost) => {
-                UserInputAstSerde::Boost { underlying, boost }
-            }
+            UserInputAst::Boost(underlying, boost) => UserInputAstSerde::Boost {
+                underlying,
+                boost: boost.into_inner(),
+            },
            UserInputAst::Leaf(leaf) => UserInputAstSerde::Leaf(leaf),
        }
    }
@@ -378,7 +392,7 @@ mod tests {
    #[test]
    fn test_boost_serialization() {
        let inner_ast = UserInputAst::Leaf(Box::new(UserInputLeaf::All));
-        let boost_ast = UserInputAst::Boost(Box::new(inner_ast), 2.5);
+        let boost_ast = UserInputAst::Boost(Box::new(inner_ast), 2.5.into());
        let json = serde_json::to_string(&boost_ast).unwrap();
        assert_eq!(
            json,
@@ -405,7 +419,7 @@ mod tests {
                    }))),
                ),
            ])),
-            2.5,
+            2.5.into(),
        );
        let json = serde_json::to_string(&boost_ast).unwrap();
        assert_eq!(
--- a/runtests.sh
+++ b/runtests.sh
@@ -0,0 +1,3 @@
+#! /bin/bash
+
+cargo +stable nextest run --features quickwit,mmap,stopwords,lz4-compression,zstd-compression,failpoints --verbose --workspace
--- a/src/aggregation/README.md
+++ b/src/aggregation/README.md
@@ -20,17 +20,16 @@ Contains all metric aggregations, like average aggregation. Metric aggregations
 #### agg_req
 agg_req contains the users aggregation request. Deserialization from json is compatible with elasticsearch aggregation requests.

-#### agg_req_with_accessor
-agg_req_with_accessor contains the users aggregation request enriched with fast field accessors etc, which are
+#### agg_data
+agg_data contains the users aggregation request enriched with fast field accessors etc, which are
 used during collection.

 #### segment_agg_result
 segment_agg_result contains the aggregation result tree, which is used for collection of a segment.
-The tree from agg_req_with_accessor is passed during collection.
+agg_data is passed during collection.

 #### intermediate_agg_result
 intermediate_agg_result contains the aggregation tree for merging with other trees.

 #### agg_result
 agg_result contains the final aggregation tree.
-
--- a/src/aggregation/accessor_helpers.rs
+++ b/src/aggregation/accessor_helpers.rs
@@ -0,0 +1,105 @@
+//! This will enhance the request tree with access to the fastfield and metadata.
+
+use std::io;
+
+use columnar::{Column, ColumnType};
+
+use crate::aggregation::{f64_to_fastfield_u64, Key};
+use crate::index::SegmentReader;
+
+/// Get the missing value as internal u64 representation
+///
+/// For terms we use u64::MAX as sentinel value
+/// For numerical data we convert the value into the representation
+/// we would get from the fast field, when we open it as u64_lenient_for_type.
+///
+/// That way we can use it the same way as if it would come from the fastfield.
+pub(crate) fn get_missing_val_as_u64_lenient(
+    column_type: ColumnType,
+    column_max_value: u64,
+    missing: &Key,
+    field_name: &str,
+) -> crate::Result<Option<u64>> {
+    let missing_val = match missing {
+        Key::Str(_) if column_type == ColumnType::Str => Some(column_max_value + 1),
+        // Allow fallback to number on text fields
+        Key::F64(_) if column_type == ColumnType::Str => Some(column_max_value + 1),
+        Key::U64(_) if column_type == ColumnType::Str => Some(column_max_value + 1),
+        Key::I64(_) if column_type == ColumnType::Str => Some(column_max_value + 1),
+        Key::F64(val) if column_type.numerical_type().is_some() => {
+            f64_to_fastfield_u64(*val, &column_type)
+        }
+        // NOTE: We may loose precision of the passed missing value by casting i64 and u64 to f64.
+        Key::I64(val) if column_type.numerical_type().is_some() => {
+            f64_to_fastfield_u64(*val as f64, &column_type)
+        }
+        Key::U64(val) if column_type.numerical_type().is_some() => {
+            f64_to_fastfield_u64(*val as f64, &column_type)
+        }
+        _ => {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "Missing value {missing:?} for field {field_name} is not supported for column \
+                 type {column_type:?}"
+            )));
+        }
+    };
+    Ok(missing_val)
+}
+
+pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
+    &[
+        ColumnType::F64,
+        ColumnType::U64,
+        ColumnType::I64,
+        ColumnType::DateTime,
+    ]
+}
+
+/// Get fast field reader or empty as default.
+pub(crate) fn get_ff_reader(
+    reader: &SegmentReader,
+    field_name: &str,
+    allowed_column_types: Option<&[ColumnType]>,
+) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
+    let ff_fields = reader.fast_fields();
+    let ff_field_with_type = ff_fields
+        .u64_lenient_for_type(allowed_column_types, field_name)?
+        .unwrap_or_else(|| {
+            (
+                Column::build_empty_column(reader.num_docs()),
+                ColumnType::U64,
+            )
+        });
+    Ok(ff_field_with_type)
+}
+
+pub(crate) fn get_dynamic_columns(
+    reader: &SegmentReader,
+    field_name: &str,
+) -> crate::Result<Vec<columnar::DynamicColumn>> {
+    let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?;
+    let cols = ff_fields
+        .iter()
+        .map(|h| h.open())
+        .collect::<io::Result<_>>()?;
+    assert!(!ff_fields.is_empty(), "field {field_name} not found");
+    Ok(cols)
+}
+
+/// Get all fast field reader or empty as default.
+///
+/// Is guaranteed to return at least one column.
+pub(crate) fn get_all_ff_reader_or_empty(
+    reader: &SegmentReader,
+    field_name: &str,
+    allowed_column_types: Option<&[ColumnType]>,
+    fallback_type: ColumnType,
+) -> crate::Result<Vec<(columnar::Column<u64>, ColumnType)>> {
+    let ff_fields = reader.fast_fields();
+    let mut ff_field_with_type =
+        ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
+    if ff_field_with_type.is_empty() {
+        ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type));
+    }
+    Ok(ff_field_with_type)
+}
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -35,6 +35,7 @@ pub struct AggregationLimitsGuard {
    /// Allocated memory with this guard.
    allocated_with_the_guard: u64,
 }
+
 impl Clone for AggregationLimitsGuard {
    fn clone(&self) -> Self {
        Self {
@@ -70,7 +71,7 @@ impl AggregationLimitsGuard {
    /// *memory_limit*
    /// memory_limit is defined in bytes.
    /// Aggregation fails when the estimated memory consumption of the aggregation is higher than
-    /// memory_limit.     
+    /// memory_limit.
    /// memory_limit will default to `DEFAULT_MEMORY_LIMIT` (500MB)
    ///
    /// *bucket_limit*
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -26,12 +26,14 @@
 //! let _agg_req: Aggregations = serde_json::from_str(elasticsearch_compatible_json_req).unwrap();
 //! ```

-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;

+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use super::bucket::{
-    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
+    DateHistogramAggregationReq, FilterAggregation, HistogramAggregation, RangeAggregation,
+    TermsAggregation,
 };
 use super::metric::{
    AverageAggregation, CardinalityAggregationReq, CountAggregation, ExtendedStatsAggregation,
@@ -43,7 +45,7 @@ use super::metric::{
 /// defined names. It is also used in buckets aggregations to define sub-aggregations.
 ///
 /// The key is the user defined name of the aggregation.
-pub type Aggregations = HashMap<String, Aggregation>;
+pub type Aggregations = FxHashMap<String, Aggregation>;

 /// Aggregation request.
 ///
@@ -129,6 +131,9 @@ pub enum AggregationVariants {
    /// Put data into buckets of terms.
    #[serde(rename = "terms")]
    Terms(TermsAggregation),
+    /// Filter documents into a single bucket.
+    #[serde(rename = "filter")]
+    Filter(FilterAggregation),

    // Metric aggregation types
    /// Computes the average of the extracted values.
@@ -174,6 +179,7 @@ impl AggregationVariants {
            AggregationVariants::Range(range) => vec![range.field.as_str()],
            AggregationVariants::Histogram(histogram) => vec![histogram.field.as_str()],
            AggregationVariants::DateHistogram(histogram) => vec![histogram.field.as_str()],
+            AggregationVariants::Filter(filter) => filter.get_fast_field_names(),
            AggregationVariants::Average(avg) => vec![avg.field_name()],
            AggregationVariants::Count(count) => vec![count.field_name()],
            AggregationVariants::Max(max) => vec![max.field_name()],
@@ -208,13 +214,6 @@ impl AggregationVariants {
            _ => None,
        }
    }
-    pub(crate) fn as_top_hits(&self) -> Option<&TopHitsAggregationReq> {
-        match &self {
-            AggregationVariants::TopHits(top_hits) => Some(top_hits),
-            _ => None,
-        }
-    }
-
    pub(crate) fn as_percentile(&self) -> Option<&PercentilesAggregationReq> {
        match &self {
            AggregationVariants::Percentiles(percentile_req) => Some(percentile_req),
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -1,471 +0,0 @@
-//! This will enhance the request tree with access to the fastfield and metadata.
-
-use std::collections::HashMap;
-use std::io;
-
-use columnar::{Column, ColumnBlockAccessor, ColumnType, DynamicColumn, StrColumn};
-
-use super::agg_req::{Aggregation, AggregationVariants, Aggregations};
-use super::bucket::{
-    DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
-};
-use super::metric::{
-    AverageAggregation, CardinalityAggregationReq, CountAggregation, ExtendedStatsAggregation,
-    MaxAggregation, MinAggregation, StatsAggregation, SumAggregation,
-};
-use super::segment_agg_result::AggregationLimitsGuard;
-use super::VecWithNames;
-use crate::aggregation::{f64_to_fastfield_u64, Key};
-use crate::index::SegmentReader;
-use crate::SegmentOrdinal;
-
-#[derive(Default)]
-pub(crate) struct AggregationsWithAccessor {
-    pub aggs: VecWithNames<AggregationWithAccessor>,
-}
-
-impl AggregationsWithAccessor {
-    fn from_data(aggs: VecWithNames<AggregationWithAccessor>) -> Self {
-        Self { aggs }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.aggs.is_empty()
-    }
-}
-
-pub struct AggregationWithAccessor {
-    pub(crate) segment_ordinal: SegmentOrdinal,
-    /// In general there can be buckets without fast field access, e.g. buckets that are created
-    /// based on search terms. That is not that case currently, but eventually this needs to be
-    /// Option or moved.
-    pub(crate) accessor: Column<u64>,
-    /// Load insert u64 for missing use case
-    pub(crate) missing_value_for_accessor: Option<u64>,
-    pub(crate) str_dict_column: Option<StrColumn>,
-    pub(crate) field_type: ColumnType,
-    pub(crate) sub_aggregation: AggregationsWithAccessor,
-    pub(crate) limits: AggregationLimitsGuard,
-    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
-    /// Used for missing term aggregation, which checks all columns for existence.
-    /// And also for `top_hits` aggregation, which may sort on multiple fields.
-    /// By convention the missing aggregation is chosen, when this property is set
-    /// (instead bein set in `agg`).
-    /// If this needs to used by other aggregations, we need to refactor this.
-    // NOTE: we can make all other aggregations use this instead of the `accessor` and `field_type`
-    // (making them obsolete) But will it have a performance impact?
-    pub(crate) accessors: Vec<(Column<u64>, ColumnType)>,
-    /// Map field names to all associated column accessors.
-    /// This field is used for `docvalue_fields`, which is currently only supported for `top_hits`.
-    pub(crate) value_accessors: HashMap<String, Vec<DynamicColumn>>,
-    pub(crate) agg: Aggregation,
-}
-
-impl AggregationWithAccessor {
-    /// May return multiple accessors if the aggregation is e.g. on mixed field types.
-    fn try_from_agg(
-        agg: &Aggregation,
-        sub_aggregation: &Aggregations,
-        reader: &SegmentReader,
-        segment_ordinal: SegmentOrdinal,
-        limits: AggregationLimitsGuard,
-    ) -> crate::Result<Vec<AggregationWithAccessor>> {
-        let mut agg = agg.clone();
-
-        let add_agg_with_accessor = |agg: &Aggregation,
-                                     accessor: Column<u64>,
-                                     column_type: ColumnType,
-                                     aggs: &mut Vec<AggregationWithAccessor>|
-         -> crate::Result<()> {
-            let res = AggregationWithAccessor {
-                segment_ordinal,
-                accessor,
-                accessors: Default::default(),
-                value_accessors: Default::default(),
-                field_type: column_type,
-                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
-                    sub_aggregation,
-                    reader,
-                    segment_ordinal,
-                    &limits,
-                )?,
-                agg: agg.clone(),
-                limits: limits.clone(),
-                missing_value_for_accessor: None,
-                str_dict_column: None,
-                column_block_accessor: Default::default(),
-            };
-            aggs.push(res);
-            Ok(())
-        };
-
-        let add_agg_with_accessors = |agg: &Aggregation,
-                                      accessors: Vec<(Column<u64>, ColumnType)>,
-                                      aggs: &mut Vec<AggregationWithAccessor>,
-                                      value_accessors: HashMap<String, Vec<DynamicColumn>>|
-         -> crate::Result<()> {
-            let (accessor, field_type) = accessors.first().expect("at least one accessor");
-            let limits = limits.clone();
-            let res = AggregationWithAccessor {
-                segment_ordinal,
-                // TODO: We should do away with the `accessor` field altogether
-                accessor: accessor.clone(),
-                value_accessors,
-                field_type: *field_type,
-                accessors,
-                sub_aggregation: get_aggs_with_segment_accessor_and_validate(
-                    sub_aggregation,
-                    reader,
-                    segment_ordinal,
-                    &limits,
-                )?,
-                agg: agg.clone(),
-                limits,
-                missing_value_for_accessor: None,
-                str_dict_column: None,
-                column_block_accessor: Default::default(),
-            };
-            aggs.push(res);
-            Ok(())
-        };
-
-        let mut res: Vec<AggregationWithAccessor> = Vec::new();
-        use AggregationVariants::*;
-
-        match agg.agg {
-            Range(RangeAggregation {
-                field: ref field_name,
-                ..
-            }) => {
-                let (accessor, column_type) =
-                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            Histogram(HistogramAggregation {
-                field: ref field_name,
-                ..
-            }) => {
-                let (accessor, column_type) =
-                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            DateHistogram(DateHistogramAggregationReq {
-                field: ref field_name,
-                ..
-            }) => {
-                let (accessor, column_type) =
-                    // Only DateTime is supported for DateHistogram
-                    get_ff_reader(reader, field_name, Some(&[ColumnType::DateTime]))?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            Terms(TermsAggregation {
-                field: ref field_name,
-                ref missing,
-                ..
-            })
-            | Cardinality(CardinalityAggregationReq {
-                field: ref field_name,
-                ref missing,
-                ..
-            }) => {
-                let str_dict_column = reader.fast_fields().str(field_name)?;
-                let allowed_column_types = [
-                    ColumnType::I64,
-                    ColumnType::U64,
-                    ColumnType::F64,
-                    ColumnType::Str,
-                    ColumnType::DateTime,
-                    ColumnType::Bool,
-                    ColumnType::IpAddr,
-                    // ColumnType::Bytes Unsupported
-                ];
-
-                // In case the column is empty we want the shim column to match the missing type
-                let fallback_type = missing
-                    .as_ref()
-                    .map(|missing| match missing {
-                        Key::Str(_) => ColumnType::Str,
-                        Key::F64(_) => ColumnType::F64,
-                        Key::I64(_) => ColumnType::I64,
-                        Key::U64(_) => ColumnType::U64,
-                    })
-                    .unwrap_or(ColumnType::U64);
-                let column_and_types = get_all_ff_reader_or_empty(
-                    reader,
-                    field_name,
-                    Some(&allowed_column_types),
-                    fallback_type,
-                )?;
-                let missing_and_more_than_one_col = column_and_types.len() > 1 && missing.is_some();
-                let text_on_non_text_col = column_and_types.len() == 1
-                    && column_and_types[0].1.numerical_type().is_some()
-                    && missing
-                        .as_ref()
-                        .map(|m| matches!(m, Key::Str(_)))
-                        .unwrap_or(false);
-
-                // Actually we could convert the text to a number and have the fast path, if it is
-                // provided in Rfc3339 format. But this use case is probably common
-                // enough to justify the effort.
-                let text_on_date_col = column_and_types.len() == 1
-                    && column_and_types[0].1 == ColumnType::DateTime
-                    && missing
-                        .as_ref()
-                        .map(|m| matches!(m, Key::Str(_)))
-                        .unwrap_or(false);
-
-                let use_special_missing_agg =
-                    missing_and_more_than_one_col || text_on_non_text_col || text_on_date_col;
-                if use_special_missing_agg {
-                    let column_and_types =
-                        get_all_ff_reader_or_empty(reader, field_name, None, fallback_type)?;
-
-                    let accessors = column_and_types
-                        .iter()
-                        .map(|c_t| (c_t.0.clone(), c_t.1))
-                        .collect();
-                    add_agg_with_accessors(&agg, accessors, &mut res, Default::default())?;
-                }
-
-                for (accessor, column_type) in column_and_types {
-                    let missing_value_term_agg = if use_special_missing_agg {
-                        None
-                    } else {
-                        missing.clone()
-                    };
-
-                    let missing_value_for_accessor =
-                        if let Some(missing) = missing_value_term_agg.as_ref() {
-                            get_missing_val_as_u64_lenient(
-                                column_type,
-                                missing,
-                                agg.agg.get_fast_field_names()[0],
-                            )?
-                        } else {
-                            None
-                        };
-
-                    let limits = limits.clone();
-                    let agg = AggregationWithAccessor {
-                        segment_ordinal,
-                        missing_value_for_accessor,
-                        accessor,
-                        accessors: Default::default(),
-                        value_accessors: Default::default(),
-                        field_type: column_type,
-                        sub_aggregation: get_aggs_with_segment_accessor_and_validate(
-                            sub_aggregation,
-                            reader,
-                            segment_ordinal,
-                            &limits,
-                        )?,
-                        agg: agg.clone(),
-                        str_dict_column: str_dict_column.clone(),
-                        limits,
-                        column_block_accessor: Default::default(),
-                    };
-                    res.push(agg);
-                }
-            }
-            Average(AverageAggregation {
-                field: ref field_name,
-                ..
-            })
-            | Max(MaxAggregation {
-                field: ref field_name,
-                ..
-            })
-            | Min(MinAggregation {
-                field: ref field_name,
-                ..
-            })
-            | Stats(StatsAggregation {
-                field: ref field_name,
-                ..
-            })
-            | ExtendedStats(ExtendedStatsAggregation {
-                field: ref field_name,
-                ..
-            })
-            | Sum(SumAggregation {
-                field: ref field_name,
-                ..
-            }) => {
-                let (accessor, column_type) =
-                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            Count(CountAggregation {
-                field: ref field_name,
-                ..
-            }) => {
-                let allowed_column_types = [
-                    ColumnType::I64,
-                    ColumnType::U64,
-                    ColumnType::F64,
-                    ColumnType::Str,
-                    ColumnType::DateTime,
-                    ColumnType::Bool,
-                    ColumnType::IpAddr,
-                    // ColumnType::Bytes Unsupported
-                ];
-                let (accessor, column_type) =
-                    get_ff_reader(reader, field_name, Some(&allowed_column_types))?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            Percentiles(ref percentiles) => {
-                let (accessor, column_type) = get_ff_reader(
-                    reader,
-                    percentiles.field_name(),
-                    Some(get_numeric_or_date_column_types()),
-                )?;
-                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
-            }
-            TopHits(ref mut top_hits) => {
-                top_hits.validate_and_resolve_field_names(reader.fast_fields().columnar())?;
-                let accessors: Vec<(Column<u64>, ColumnType)> = top_hits
-                    .field_names()
-                    .iter()
-                    .map(|field| {
-                        get_ff_reader(reader, field, Some(get_numeric_or_date_column_types()))
-                    })
-                    .collect::<crate::Result<_>>()?;
-
-                let value_accessors = top_hits
-                    .value_field_names()
-                    .iter()
-                    .map(|field_name| {
-                        Ok((
-                            field_name.to_string(),
-                            get_dynamic_columns(reader, field_name)?,
-                        ))
-                    })
-                    .collect::<crate::Result<_>>()?;
-
-                add_agg_with_accessors(&agg, accessors, &mut res, value_accessors)?;
-            }
-        };
-
-        Ok(res)
-    }
-}
-
-/// Get the missing value as internal u64 representation
-///
-/// For terms we use u64::MAX as sentinel value
-/// For numerical data we convert the value into the representation
-/// we would get from the fast field, when we open it as u64_lenient_for_type.
-///
-/// That way we can use it the same way as if it would come from the fastfield.
-fn get_missing_val_as_u64_lenient(
-    column_type: ColumnType,
-    missing: &Key,
-    field_name: &str,
-) -> crate::Result<Option<u64>> {
-    let missing_val = match missing {
-        Key::Str(_) if column_type == ColumnType::Str => Some(u64::MAX),
-        // Allow fallback to number on text fields
-        Key::F64(_) if column_type == ColumnType::Str => Some(u64::MAX),
-        Key::U64(_) if column_type == ColumnType::Str => Some(u64::MAX),
-        Key::I64(_) if column_type == ColumnType::Str => Some(u64::MAX),
-        Key::F64(val) if column_type.numerical_type().is_some() => {
-            f64_to_fastfield_u64(*val, &column_type)
-        }
-        // NOTE: We may loose precision of the passed missing value by casting i64 and u64 to f64.
-        Key::I64(val) if column_type.numerical_type().is_some() => {
-            f64_to_fastfield_u64(*val as f64, &column_type)
-        }
-        Key::U64(val) if column_type.numerical_type().is_some() => {
-            f64_to_fastfield_u64(*val as f64, &column_type)
-        }
-        _ => {
-            return Err(crate::TantivyError::InvalidArgument(format!(
-                "Missing value {missing:?} for field {field_name} is not supported for column \
-                 type {column_type:?}"
-            )));
-        }
-    };
-    Ok(missing_val)
-}
-
-fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
-    &[
-        ColumnType::F64,
-        ColumnType::U64,
-        ColumnType::I64,
-        ColumnType::DateTime,
-    ]
-}
-
-pub(crate) fn get_aggs_with_segment_accessor_and_validate(
-    aggs: &Aggregations,
-    reader: &SegmentReader,
-    segment_ordinal: SegmentOrdinal,
-    limits: &AggregationLimitsGuard,
-) -> crate::Result<AggregationsWithAccessor> {
-    let mut aggss = Vec::new();
-    for (key, agg) in aggs.iter() {
-        let aggs = AggregationWithAccessor::try_from_agg(
-            agg,
-            agg.sub_aggregation(),
-            reader,
-            segment_ordinal,
-            limits.clone(),
-        )?;
-        for agg in aggs {
-            aggss.push((key.to_string(), agg));
-        }
-    }
-    Ok(AggregationsWithAccessor::from_data(
-        VecWithNames::from_entries(aggss),
-    ))
-}
-
-/// Get fast field reader or empty as default.
-fn get_ff_reader(
-    reader: &SegmentReader,
-    field_name: &str,
-    allowed_column_types: Option<&[ColumnType]>,
-) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
-    let ff_fields = reader.fast_fields();
-    let ff_field_with_type = ff_fields
-        .u64_lenient_for_type(allowed_column_types, field_name)?
-        .unwrap_or_else(|| {
-            (
-                Column::build_empty_column(reader.num_docs()),
-                ColumnType::U64,
-            )
-        });
-    Ok(ff_field_with_type)
-}
-
-fn get_dynamic_columns(
-    reader: &SegmentReader,
-    field_name: &str,
-) -> crate::Result<Vec<columnar::DynamicColumn>> {
-    let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?;
-    let cols = ff_fields
-        .iter()
-        .map(|h| h.open())
-        .collect::<io::Result<_>>()?;
-    assert!(!ff_fields.is_empty(), "field {field_name} not found");
-    Ok(cols)
-}
-
-/// Get all fast field reader or empty as default.
-///
-/// Is guaranteed to return at least one column.
-fn get_all_ff_reader_or_empty(
-    reader: &SegmentReader,
-    field_name: &str,
-    allowed_column_types: Option<&[ColumnType]>,
-    fallback_type: ColumnType,
-) -> crate::Result<Vec<(columnar::Column<u64>, ColumnType)>> {
-    let ff_fields = reader.fast_fields();
-    let mut ff_field_with_type =
-        ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
-    if ff_field_with_type.is_empty() {
-        ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type));
-    }
-    Ok(ff_field_with_type)
-}
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -16,7 +16,7 @@ use super::{AggregationError, Key};
 use crate::TantivyError;

 #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
-/// The final aggegation result.
+/// The final aggregation result.
 pub struct AggregationResults(pub FxHashMap<String, AggregationResult>);

 impl AggregationResults {
@@ -156,6 +156,8 @@ pub enum BucketResult {
        /// The upper bound error for the doc count of each term.
        doc_count_error_upper_bound: Option<u64>,
    },
+    /// This is the filter result - a single bucket with sub-aggregations
+    Filter(FilterBucketResult),
 }

 impl BucketResult {
@@ -172,6 +174,11 @@ impl BucketResult {
                sum_other_doc_count: _,
                doc_count_error_upper_bound: _,
            } => buckets.iter().map(|bucket| bucket.get_bucket_count()).sum(),
+            BucketResult::Filter(filter_result) => {
+                // Filter doesn't add to bucket count - it's not a user-facing bucket
+                // Only count sub-aggregation buckets
+                filter_result.sub_aggregations.get_bucket_count()
+            }
        }
    }
 }
@@ -308,3 +315,25 @@ impl RangeBucketEntry {
        1 + self.sub_aggregation.get_bucket_count()
    }
 }
+
+/// This is the filter bucket result, which contains the document count and sub-aggregations.
+///
+/// # JSON Format
+/// ```json
+/// {
+///   "electronics_only": {
+///     "doc_count": 2,
+///     "avg_price": {
+///       "value": 150.0
+///     }
+///   }
+/// }
+/// ```
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct FilterBucketResult {
+    /// Number of documents in the filter bucket
+    pub doc_count: u64,
+    /// Sub-aggregation results
+    #[serde(flatten)]
+    pub sub_aggregations: AggregationResults,
+}
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -5,7 +5,6 @@ use crate::aggregation::agg_result::AggregationResults;
 use crate::aggregation::buf_collector::DOC_BLOCK_SIZE;
 use crate::aggregation::collector::AggregationCollector;
 use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
-use crate::aggregation::segment_agg_result::AggregationLimitsGuard;
 use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values_and_terms};
 use crate::aggregation::DistributedAggregationCollector;
 use crate::query::{AllQuery, TermQuery};
@@ -128,10 +127,8 @@ fn test_aggregation_flushing(
            .unwrap();

    let agg_res: AggregationResults = if use_distributed_collector {
-        let collector = DistributedAggregationCollector::from_aggs(
-            agg_req.clone(),
-            AggregationLimitsGuard::default(),
-        );
+        let collector =
+            DistributedAggregationCollector::from_aggs(agg_req.clone(), Default::default());

        let searcher = reader.searcher();
        let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
@@ -155,7 +152,7 @@ fn test_aggregation_flushing(
        searcher.search(&AllQuery, &collector).unwrap()
    };

-    let res: Value = serde_json::to_value(&agg_res)?;
+    let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;

    assert_eq!(res["bucketsL1"]["buckets"][0]["doc_count"], 3);
    assert_eq!(
@@ -270,7 +267,7 @@ fn test_aggregation_level1_simple() -> crate::Result<()> {
    let searcher = reader.searcher();
    let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();

-    let res: Value = serde_json::to_value(&agg_res)?;
+    let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
    assert_eq!(res["average"]["value"], 12.142857142857142);
    assert_eq!(
        res["range"]["buckets"],
@@ -304,29 +301,6 @@ fn test_aggregation_level1_simple() -> crate::Result<()> {
    Ok(())
 }

-#[test]
-fn test_aggregation_term_truncate_sum_other_doc_count() {
-    let index = get_test_index_2_segments(true).unwrap();
-    let reader = index.reader().unwrap();
-    let count_per_text: Aggregation = serde_json::from_value(json!({ "terms": { "field": "text", "size": 1 } })).unwrap();
-    let aggs: Aggregations = vec![("group_by_term_truncate".to_string(),  count_per_text)]
-        .into_iter()
-        .collect();
-
-    let collector = get_collector(aggs);
-    let searcher = reader.searcher();
-    let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-
-    let res: Value = serde_json::to_value(&agg_res).unwrap();
-    assert_eq!(res, serde_json::json!({
-        "group_by_term_truncate": {
-             "buckets": [{ "doc_count": 7, "key": "cool" }],
-             "doc_count_error_upper_bound": 0,
-             "sum_other_doc_count": 2,
-         },
-    }));
-}
-
 #[test]
 fn test_aggregation_level1() -> crate::Result<()> {
    let index = get_test_index_2_segments(true)?;
@@ -365,7 +339,7 @@ fn test_aggregation_level1() -> crate::Result<()> {
    let searcher = reader.searcher();
    let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();

-    let res: Value = serde_json::to_value(&agg_res)?;
+    let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
    assert_eq!(res["average"]["value"], 12.142857142857142);
    assert_eq!(res["average_f64"]["value"], 12.214285714285714);
    assert_eq!(res["average_i64"]["value"], 12.142857142857142);
@@ -420,7 +394,7 @@ fn test_aggregation_level2(
        IndexRecordOption::Basic,
    );

-    let elasticsearch_compatible_json_req = serde_json::json!(
+    let elasticsearch_compatible_json_req = r#"
 {
  "rangef64": {
    "range": {
@@ -473,8 +447,9 @@ fn test_aggregation_level2(
      "term_agg": { "terms": { "field": "text" } }
    }
  }
-});
-    let agg_req: Aggregations = serde_json::from_value(elasticsearch_compatible_json_req).unwrap();
+}
+"#;
+    let agg_req: Aggregations = serde_json::from_str(elasticsearch_compatible_json_req).unwrap();

    let agg_res: AggregationResults = if use_distributed_collector {
        let collector =
@@ -491,7 +466,7 @@ fn test_aggregation_level2(
        searcher.search(&term_query, &collector).unwrap()
    };

-    let res: Value = serde_json::to_value(agg_res)?;
+    let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;

    assert_eq!(res["range"]["buckets"][1]["key"], "3-7");
    assert_eq!(res["range"]["buckets"][1]["doc_count"], 2u64);
--- a/src/aggregation/bucket/filter.rs
+++ b/src/aggregation/bucket/filter.rs
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,25 +1,54 @@
 use std::cmp::Ordering;

+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use tantivy_bitpacker::minmax;

+use crate::aggregation::agg_data::{
+    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
+};
 use crate::aggregation::agg_limits::MemoryConsumption;
 use crate::aggregation::agg_req::Aggregations;
-use crate::aggregation::agg_req_with_accessor::{
-    AggregationWithAccessor, AggregationsWithAccessor,
-};
 use crate::aggregation::agg_result::BucketEntry;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateHistogramBucketEntry,
 };
-use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, SegmentAggregationCollector,
-};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::TantivyError;

+/// Contains all information required by the SegmentHistogramCollector to perform the
+/// histogram or date_histogram aggregation on a segment.
+pub struct HistogramAggReqData {
+    /// The column accessor to access the fast field values.
+    pub accessor: Column<u64>,
+    /// The field type of the fast field.
+    pub field_type: ColumnType,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
+    /// The name of the aggregation.
+    pub name: String,
+    /// The sub aggregation blueprint, used to create sub aggregations for each bucket.
+    /// Will be filled during initialization of the collector.
+    pub sub_aggregation_blueprint: Option<Box<dyn SegmentAggregationCollector>>,
+    /// The histogram aggregation request.
+    pub req: HistogramAggregation,
+    /// True if this is a date_histogram aggregation.
+    pub is_date_histogram: bool,
+    /// The bounds to limit the buckets to.
+    pub bounds: HistogramBounds,
+    /// The offset used to calculate the bucket position.
+    pub offset: f64,
+}
+impl HistogramAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}
+
 /// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
 /// Each document value is rounded down to its bucket.
 ///
@@ -234,12 +263,12 @@ impl SegmentHistogramBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
        sub_aggregation: Option<Box<dyn SegmentAggregationCollector>>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateHistogramBucketEntry> {
        let mut sub_aggregation_res = IntermediateAggregationResults::default();
        if let Some(sub_aggregation) = sub_aggregation {
            sub_aggregation
-                .add_intermediate_aggregation_result(agg_with_accessor, &mut sub_aggregation_res)?;
+                .add_intermediate_aggregation_result(agg_data, &mut sub_aggregation_res)?;
        }
        Ok(IntermediateHistogramBucketEntry {
            key: self.key,
@@ -256,24 +285,20 @@ pub struct SegmentHistogramCollector {
    /// The buckets containing the aggregation data.
    buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
    sub_aggregations: FxHashMap<i64, Box<dyn SegmentAggregationCollector>>,
-    sub_aggregation_blueprint: Option<Box<dyn SegmentAggregationCollector>>,
-    column_type: ColumnType,
-    interval: f64,
-    offset: f64,
-    bounds: HistogramBounds,
    accessor_idx: usize,
 }

 impl SegmentAggregationCollector for SegmentHistogramCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
-        let agg_with_accessor = &agg_with_accessor.aggs.values[self.accessor_idx];
-
-        let bucket = self.into_intermediate_bucket_result(agg_with_accessor)?;
+        let name = agg_data
+            .get_histogram_req_data(self.accessor_idx)
+            .name
+            .clone();
+        let bucket = self.into_intermediate_bucket_result(agg_data)?;
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;

        Ok(())
@@ -283,56 +308,52 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.collect_block(&[doc], agg_with_accessor)
+        self.collect_block(&[doc], agg_data)
    }

    #[inline]
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let bucket_agg_accessor = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-
+        let mut req = agg_data.take_histogram_req_data(self.accessor_idx);
        let mem_pre = self.get_memory_consumption();

-        let bounds = self.bounds;
-        let interval = self.interval;
-        let offset = self.offset;
-        let get_bucket_pos = |val| (get_bucket_pos_f64(val, interval, offset) as i64);
+        let bounds = req.bounds;
+        let interval = req.req.interval;
+        let offset = req.offset;
+        let get_bucket_pos = |val| get_bucket_pos_f64(val, interval, offset) as i64;

-        bucket_agg_accessor
+        req.column_block_accessor.fetch_block(docs, &req.accessor);
+        for (doc, val) in req
            .column_block_accessor
-            .fetch_block(docs, &bucket_agg_accessor.accessor);
-
-        for (doc, val) in bucket_agg_accessor
-            .column_block_accessor
-            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
+            .iter_docid_vals(docs, &req.accessor)
        {
-            let val = self.f64_from_fastfield_u64(val);
-
+            let val = f64_from_fastfield_u64(val, &req.field_type);
            let bucket_pos = get_bucket_pos(val);
-
            if bounds.contains(val) {
                let bucket = self.buckets.entry(bucket_pos).or_insert_with(|| {
                    let key = get_bucket_key_from_pos(bucket_pos as f64, interval, offset);
                    SegmentHistogramBucketEntry { key, doc_count: 0 }
                });
                bucket.doc_count += 1;
-                if let Some(sub_aggregation_blueprint) = self.sub_aggregation_blueprint.as_mut() {
+                if let Some(sub_aggregation_blueprint) = req.sub_aggregation_blueprint.as_ref() {
                    self.sub_aggregations
                        .entry(bucket_pos)
                        .or_insert_with(|| sub_aggregation_blueprint.clone())
-                        .collect(doc, &mut bucket_agg_accessor.sub_aggregation)?;
+                        .collect(doc, agg_data)?;
                }
            }
        }
+        agg_data.put_back_histogram_req_data(self.accessor_idx, req);

        let mem_delta = self.get_memory_consumption() - mem_pre;
        if mem_delta > 0 {
-            bucket_agg_accessor
+            agg_data
+                .context
                .limits
                .add_memory_consumed(mem_delta as u64)?;
        }
@@ -340,12 +361,9 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        Ok(())
    }

-    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
-        let sub_aggregation_accessor =
-            &mut agg_with_accessor.aggs.values[self.accessor_idx].sub_aggregation;
-
+    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        for sub_aggregation in self.sub_aggregations.values_mut() {
-            sub_aggregation.flush(sub_aggregation_accessor)?;
+            sub_aggregation.flush(agg_data)?;
        }

        Ok(())
@@ -362,65 +380,58 @@ impl SegmentHistogramCollector {
    /// Converts the collector result into a intermediate bucket result.
    pub fn into_intermediate_bucket_result(
        self,
-        agg_with_accessor: &AggregationWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateBucketResult> {
        let mut buckets = Vec::with_capacity(self.buckets.len());

        for (bucket_pos, bucket) in self.buckets {
            let bucket_res = bucket.into_intermediate_bucket_entry(
                self.sub_aggregations.get(&bucket_pos).cloned(),
-                &agg_with_accessor.sub_aggregation,
+                agg_data,
            );

            buckets.push(bucket_res?);
        }
        buckets.sort_unstable_by(|b1, b2| b1.key.total_cmp(&b2.key));

+        let is_date_agg = agg_data
+            .get_histogram_req_data(self.accessor_idx)
+            .field_type
+            == ColumnType::DateTime;
        Ok(IntermediateBucketResult::Histogram {
            buckets,
-            is_date_agg: self.column_type == ColumnType::DateTime,
+            is_date_agg,
        })
    }

    pub(crate) fn from_req_and_validate(
-        mut req: HistogramAggregation,
-        sub_aggregation: &mut AggregationsWithAccessor,
-        field_type: ColumnType,
-        accessor_idx: usize,
+        agg_data: &mut AggregationsSegmentCtx,
+        node: &AggRefNode,
    ) -> crate::Result<Self> {
-        req.validate()?;
-        if field_type == ColumnType::DateTime {
-            req.normalize_date_time();
-        }
-
-        let sub_aggregation_blueprint = if sub_aggregation.is_empty() {
-            None
+        let blueprint = if !node.children.is_empty() {
+            Some(build_segment_agg_collectors(agg_data, &node.children)?)
        } else {
-            let sub_aggregation = build_segment_agg_collector(sub_aggregation)?;
-            Some(sub_aggregation)
+            None
        };
-
-        let bounds = req.hard_bounds.unwrap_or(HistogramBounds {
+        let req_data = agg_data.get_histogram_req_data_mut(node.idx_in_req_data);
+        req_data.req.validate()?;
+        if req_data.field_type == ColumnType::DateTime && !req_data.is_date_histogram {
+            req_data.req.normalize_date_time();
+        }
+        req_data.bounds = req_data.req.hard_bounds.unwrap_or(HistogramBounds {
            min: f64::MIN,
            max: f64::MAX,
        });
+        req_data.offset = req_data.req.offset.unwrap_or(0.0);
+
+        req_data.sub_aggregation_blueprint = blueprint;

        Ok(Self {
            buckets: Default::default(),
-            column_type: field_type,
-            interval: req.interval,
-            offset: req.offset.unwrap_or(0.0),
-            bounds,
            sub_aggregations: Default::default(),
-            sub_aggregation_blueprint,
-            accessor_idx,
+            accessor_idx: node.idx_in_req_data,
        })
    }
-
-    #[inline]
-    fn f64_from_fastfield_u64(&self, val: u64) -> f64 {
-        f64_from_fastfield_u64(val, &self.column_type)
-    }
 }

 #[inline]
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -22,6 +22,7 @@
 //! - [Range](RangeAggregation)
 //! - [Terms](TermsAggregation)

+mod filter;
 mod histogram;
 mod range;
 mod term_agg;
@@ -30,6 +31,7 @@ mod term_missing_agg;
 use std::collections::HashMap;
 use std::fmt;

+pub use filter::*;
 pub use histogram::*;
 pub use range::*;
 use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,20 +1,43 @@
 use std::fmt::Debug;
 use std::ops::Range;

+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

-use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
+use crate::aggregation::agg_data::{
+    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
+};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
 };
-use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, SegmentAggregationCollector,
-};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::TantivyError;

+/// Contains all information required by the SegmentRangeCollector to perform the
+/// range aggregation on a segment.
+pub struct RangeAggReqData {
+    /// The column accessor to access the fast field values.
+    pub accessor: Column<u64>,
+    /// The type of the fast field.
+    pub field_type: ColumnType,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
+    /// The range aggregation request.
+    pub req: RangeAggregation,
+    /// The name of the aggregation.
+    pub name: String,
+}
+
+impl RangeAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}
+
 /// Provide user-defined buckets to aggregate on.
 ///
 /// Two special buckets will automatically be created to cover the whole range of values.
@@ -161,12 +184,12 @@ impl Debug for SegmentRangeBucketEntry {
 impl SegmentRangeBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateRangeBucketEntry> {
        let mut sub_aggregation_res = IntermediateAggregationResults::default();
        if let Some(sub_aggregation) = self.sub_aggregation {
            sub_aggregation
-                .add_intermediate_aggregation_result(agg_with_accessor, &mut sub_aggregation_res)?
+                .add_intermediate_aggregation_result(agg_data, &mut sub_aggregation_res)?
        } else {
            Default::default()
        };
@@ -184,12 +207,14 @@ impl SegmentRangeBucketEntry {
 impl SegmentAggregationCollector for SegmentRangeCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
        let field_type = self.column_type;
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
-        let sub_agg = &agg_with_accessor.aggs.values[self.accessor_idx].sub_aggregation;
+        let name = agg_data
+            .get_range_req_data(self.accessor_idx)
+            .name
+            .to_string();

        let buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry> = self
            .buckets
@@ -199,7 +224,7 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
                    range_to_string(&range_bucket.range, &field_type)?,
                    range_bucket
                        .bucket
-                        .into_intermediate_bucket_entry(sub_agg)?,
+                        .into_intermediate_bucket_entry(agg_data)?,
                ))
            })
            .collect::<crate::Result<_>>()?;
@@ -218,66 +243,70 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.collect_block(&[doc], agg_with_accessor)
+        self.collect_block(&[doc], agg_data)
    }

    #[inline]
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let bucket_agg_accessor = &mut agg_with_accessor.aggs.values[self.accessor_idx];
+        // Take request data to avoid borrow conflicts during sub-aggregation
+        let mut req = agg_data.take_range_req_data(self.accessor_idx);

-        bucket_agg_accessor
-            .column_block_accessor
-            .fetch_block(docs, &bucket_agg_accessor.accessor);
+        req.column_block_accessor.fetch_block(docs, &req.accessor);

-        for (doc, val) in bucket_agg_accessor
+        for (doc, val) in req
            .column_block_accessor
-            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
+            .iter_docid_vals(docs, &req.accessor)
        {
            let bucket_pos = self.get_bucket_pos(val);
-
            let bucket = &mut self.buckets[bucket_pos];
-
            bucket.bucket.doc_count += 1;
-            if let Some(sub_aggregation) = &mut bucket.bucket.sub_aggregation {
-                sub_aggregation.collect(doc, &mut bucket_agg_accessor.sub_aggregation)?;
+            if let Some(sub_agg) = bucket.bucket.sub_aggregation.as_mut() {
+                sub_agg.collect(doc, agg_data)?;
            }
        }

+        agg_data.put_back_range_req_data(self.accessor_idx, req);
+
        Ok(())
    }

-    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
-        let sub_aggregation_accessor =
-            &mut agg_with_accessor.aggs.values[self.accessor_idx].sub_aggregation;
-
+    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        for bucket in self.buckets.iter_mut() {
            if let Some(sub_agg) = bucket.bucket.sub_aggregation.as_mut() {
-                sub_agg.flush(sub_aggregation_accessor)?;
+                sub_agg.flush(agg_data)?;
            }
        }
-
        Ok(())
    }
 }

 impl SegmentRangeCollector {
    pub(crate) fn from_req_and_validate(
-        req: &RangeAggregation,
-        sub_aggregation: &mut AggregationsWithAccessor,
-        limits: &mut AggregationLimitsGuard,
-        field_type: ColumnType,
-        accessor_idx: usize,
+        req_data: &mut AggregationsSegmentCtx,
+        node: &AggRefNode,
    ) -> crate::Result<Self> {
+        let accessor_idx = node.idx_in_req_data;
+        let (field_type, ranges) = {
+            let req_view = req_data.get_range_req_data(node.idx_in_req_data);
+            (req_view.field_type, req_view.req.ranges.clone())
+        };
+
        // The range input on the request is f64.
        // We need to convert to u64 ranges, because we read the values as u64.
        // The mapping from the conversion is monotonic so ordering is preserved.
-        let buckets: Vec<_> = extend_validate_ranges(&req.ranges, &field_type)?
+        let sub_agg_prototype = if !node.children.is_empty() {
+            Some(build_segment_agg_collectors(req_data, &node.children)?)
+        } else {
+            None
+        };
+
+        let buckets: Vec<_> = extend_validate_ranges(&ranges, &field_type)?
            .iter()
            .map(|range| {
                let key = range
@@ -295,11 +324,7 @@ impl SegmentRangeCollector {
                } else {
                    Some(f64_from_fastfield_u64(range.range.start, &field_type))
                };
-                let sub_aggregation = if sub_aggregation.is_empty() {
-                    None
-                } else {
-                    Some(build_segment_agg_collector(sub_aggregation)?)
-                };
+                let sub_aggregation = sub_agg_prototype.clone();

                Ok(SegmentRangeAndBucketEntry {
                    range: range.range.clone(),
@@ -314,7 +339,7 @@ impl SegmentRangeCollector {
            })
            .collect::<crate::Result<_>>()?;

-        limits.add_memory_consumed(
+        req_data.context.limits.add_memory_consumed(
            buckets.len() as u64 * std::mem::size_of::<SegmentRangeAndBucketEntry>() as u64,
        )?;

@@ -467,15 +492,45 @@ mod tests {
            ranges,
            ..Default::default()
        };
+        // Build buckets directly as in from_req_and_validate without AggregationsData
+        let buckets: Vec<_> = extend_validate_ranges(&req.ranges, &field_type)
+            .expect("unexpected error in extend_validate_ranges")
+            .iter()
+            .map(|range| {
+                let key = range
+                    .key
+                    .clone()
+                    .map(|key| Ok(Key::Str(key)))
+                    .unwrap_or_else(|| range_to_key(&range.range, &field_type))
+                    .expect("unexpected error in range_to_key");
+                let to = if range.range.end == u64::MAX {
+                    None
+                } else {
+                    Some(f64_from_fastfield_u64(range.range.end, &field_type))
+                };
+                let from = if range.range.start == u64::MIN {
+                    None
+                } else {
+                    Some(f64_from_fastfield_u64(range.range.start, &field_type))
+                };
+                SegmentRangeAndBucketEntry {
+                    range: range.range.clone(),
+                    bucket: SegmentRangeBucketEntry {
+                        doc_count: 0,
+                        sub_aggregation: None,
+                        key,
+                        from,
+                        to,
+                    },
+                }
+            })
+            .collect();

-        SegmentRangeCollector::from_req_and_validate(
-            &req,
-            &mut Default::default(),
-            &mut AggregationLimitsGuard::default(),
-            field_type,
-            0,
-        )
-        .expect("unexpected error")
+        SegmentRangeCollector {
+            buckets,
+            column_type: field_type,
+            accessor_idx: 0,
+        }
    }

    #[test]
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -1,13 +1,39 @@
+use columnar::{Column, ColumnType};
 use rustc_hash::FxHashMap;

-use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
+use crate::aggregation::agg_data::{
+    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
+};
+use crate::aggregation::bucket::term_agg::TermsAggregation;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
 };
-use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, SegmentAggregationCollector,
-};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
+
+/// Special aggregation to handle missing values for term aggregations.
+/// This missing aggregation will check multiple columns for existence.
+///
+/// This is needed when:
+/// - The field is multi-valued and we therefore have multiple columns
+/// - The field is not text and missing is provided as string (we cannot use the numeric missing
+///   value optimization)
+#[derive(Default)]
+pub struct MissingTermAggReqData {
+    /// The accessors to check for existence of a value.
+    pub accessors: Vec<(Column<u64>, ColumnType)>,
+    /// The name of the aggregation.
+    pub name: String,
+    /// The original terms aggregation request.
+    pub req: TermsAggregation,
+}
+
+impl MissingTermAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}

 /// The specialized missing term aggregation.
 #[derive(Default, Debug, Clone)]
@@ -18,12 +44,13 @@ pub struct TermMissingAgg {
 }
 impl TermMissingAgg {
    pub(crate) fn new(
-        accessor_idx: usize,
-        sub_aggregations: &mut AggregationsWithAccessor,
+        req_data: &mut AggregationsSegmentCtx,
+        node: &AggRefNode,
    ) -> crate::Result<Self> {
-        let has_sub_aggregations = !sub_aggregations.is_empty();
+        let has_sub_aggregations = !node.children.is_empty();
+        let accessor_idx = node.idx_in_req_data;
        let sub_agg = if has_sub_aggregations {
-            let sub_aggregation = build_segment_agg_collector(sub_aggregations)?;
+            let sub_aggregation = build_segment_agg_collectors(req_data, &node.children)?;
            Some(sub_aggregation)
        } else {
            None
@@ -40,16 +67,11 @@ impl TermMissingAgg {
 impl SegmentAggregationCollector for TermMissingAgg {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
-        let agg_with_accessor = &agg_with_accessor.aggs.values[self.accessor_idx];
-        let term_agg = agg_with_accessor
-            .agg
-            .agg
-            .as_term()
-            .expect("TermMissingAgg collector must be term agg req");
+        let req_data = agg_data.get_missing_term_req_data(self.accessor_idx);
+        let term_agg = &req_data.req;
        let missing = term_agg
            .missing
            .as_ref()
@@ -64,10 +86,7 @@ impl SegmentAggregationCollector for TermMissingAgg {
        };
        if let Some(sub_agg) = self.sub_agg {
            let mut res = IntermediateAggregationResults::default();
-            sub_agg.add_intermediate_aggregation_result(
-                &agg_with_accessor.sub_aggregation,
-                &mut res,
-            )?;
+            sub_agg.add_intermediate_aggregation_result(agg_data, &mut res)?;
            missing_entry.sub_aggregation = res;
        }
        entries.insert(missing.into(), missing_entry);
@@ -80,7 +99,10 @@ impl SegmentAggregationCollector for TermMissingAgg {
            },
        };

-        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
+        results.push(
+            req_data.name.to_string(),
+            IntermediateAggregationResult::Bucket(bucket),
+        )?;

        Ok(())
    }
@@ -88,17 +110,17 @@ impl SegmentAggregationCollector for TermMissingAgg {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let agg = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        let has_value = agg
+        let req_data = agg_data.get_missing_term_req_data(self.accessor_idx);
+        let has_value = req_data
            .accessors
            .iter()
            .any(|(acc, _)| acc.index.has_value(doc));
        if !has_value {
            self.missing_count += 1;
            if let Some(sub_agg) = self.sub_agg.as_mut() {
-                sub_agg.collect(doc, &mut agg.sub_aggregation)?;
+                sub_agg.collect(doc, agg_data)?;
            }
        }
        Ok(())
@@ -107,10 +129,10 @@ impl SegmentAggregationCollector for TermMissingAgg {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        for doc in docs {
-            self.collect(*doc, agg_with_accessor)?;
+            self.collect(*doc, agg_data)?;
        }
        Ok(())
    }
--- a/src/aggregation/buf_collector.rs
+++ b/src/aggregation/buf_collector.rs
@@ -1,9 +1,14 @@
-use super::agg_req_with_accessor::AggregationsWithAccessor;
 use super::intermediate_agg_result::IntermediateAggregationResults;
 use super::segment_agg_result::SegmentAggregationCollector;
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::DocId;

+#[cfg(test)]
 pub(crate) const DOC_BLOCK_SIZE: usize = 64;
+
+#[cfg(not(test))]
+pub(crate) const DOC_BLOCK_SIZE: usize = 256;
+
 pub(crate) type DocBlock = [DocId; DOC_BLOCK_SIZE];

 /// BufAggregationCollector buffers documents before calling collect_block().
@@ -15,7 +20,7 @@ pub(crate) struct BufAggregationCollector {
 }

 impl std::fmt::Debug for BufAggregationCollector {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        f.debug_struct("SegmentAggregationResultsCollector")
            .field("staged_docs", &&self.staged_docs[..self.num_staged_docs])
            .field("num_staged_docs", &self.num_staged_docs)
@@ -37,23 +42,23 @@ impl SegmentAggregationCollector for BufAggregationCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        Box::new(self.collector).add_intermediate_aggregation_result(agg_with_accessor, results)
+        Box::new(self.collector).add_intermediate_aggregation_result(agg_data, results)
    }

    #[inline]
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        self.staged_docs[self.num_staged_docs] = doc;
        self.num_staged_docs += 1;
        if self.num_staged_docs == self.staged_docs.len() {
            self.collector
-                .collect_block(&self.staged_docs[..self.num_staged_docs], agg_with_accessor)?;
+                .collect_block(&self.staged_docs[..self.num_staged_docs], agg_data)?;
            self.num_staged_docs = 0;
        }
        Ok(())
@@ -63,20 +68,19 @@ impl SegmentAggregationCollector for BufAggregationCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.collector.collect_block(docs, agg_with_accessor)?;
-
+        self.collector.collect_block(docs, agg_data)?;
        Ok(())
    }

    #[inline]
-    fn flush(&mut self, agg_with_accessor: &mut AggregationsWithAccessor) -> crate::Result<()> {
+    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        self.collector
-            .collect_block(&self.staged_docs[..self.num_staged_docs], agg_with_accessor)?;
+            .collect_block(&self.staged_docs[..self.num_staged_docs], agg_data)?;
        self.num_staged_docs = 0;

-        self.collector.flush(agg_with_accessor)?;
+        self.collector.flush(agg_data)?;

        Ok(())
    }
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -1,12 +1,12 @@
 use super::agg_req::Aggregations;
-use super::agg_req_with_accessor::AggregationsWithAccessor;
 use super::agg_result::AggregationResults;
 use super::buf_collector::BufAggregationCollector;
 use super::intermediate_agg_result::IntermediateAggregationResults;
-use super::segment_agg_result::{
-    build_segment_agg_collector, AggregationLimitsGuard, SegmentAggregationCollector,
+use super::segment_agg_result::SegmentAggregationCollector;
+use super::AggContextParams;
+use crate::aggregation::agg_data::{
+    build_aggregations_data_from_req, build_segment_agg_collectors_root, AggregationsSegmentCtx,
 };
-use crate::aggregation::agg_req_with_accessor::get_aggs_with_segment_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
 use crate::index::SegmentReader;
 use crate::{DocId, SegmentOrdinal, TantivyError};
@@ -22,7 +22,7 @@ pub const DEFAULT_MEMORY_LIMIT: u64 = 500_000_000;
 /// The collector collects all aggregations by the underlying aggregation request.
 pub struct AggregationCollector {
    agg: Aggregations,
-    limits: AggregationLimitsGuard,
+    context: AggContextParams,
 }

 impl AggregationCollector {
@@ -30,8 +30,8 @@ impl AggregationCollector {
    ///
    /// Aggregation fails when the limits in `AggregationLimits` is exceeded. (memory limit and
    /// bucket limit)
-    pub fn from_aggs(agg: Aggregations, limits: AggregationLimitsGuard) -> Self {
-        Self { agg, limits }
+    pub fn from_aggs(agg: Aggregations, context: AggContextParams) -> Self {
+        Self { agg, context }
    }
 }

@@ -45,7 +45,7 @@ impl AggregationCollector {
 /// into the final `AggregationResults` via the `into_final_result()` method.
 pub struct DistributedAggregationCollector {
    agg: Aggregations,
-    limits: AggregationLimitsGuard,
+    context: AggContextParams,
 }

 impl DistributedAggregationCollector {
@@ -53,8 +53,8 @@ impl DistributedAggregationCollector {
    ///
    /// Aggregation fails when the limits in `AggregationLimits` is exceeded. (memory limit and
    /// bucket limit)
-    pub fn from_aggs(agg: Aggregations, limits: AggregationLimitsGuard) -> Self {
-        Self { agg, limits }
+    pub fn from_aggs(agg: Aggregations, context: AggContextParams) -> Self {
+        Self { agg, context }
    }
 }

@@ -72,7 +72,7 @@ impl Collector for DistributedAggregationCollector {
            &self.agg,
            reader,
            segment_local_id,
-            &self.limits,
+            &self.context,
        )
    }

@@ -102,7 +102,7 @@ impl Collector for AggregationCollector {
            &self.agg,
            reader,
            segment_local_id,
-            &self.limits,
+            &self.context,
        )
    }

@@ -115,7 +115,7 @@ impl Collector for AggregationCollector {
        segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
    ) -> crate::Result<Self::Fruit> {
        let res = merge_fruits(segment_fruits)?;
-        res.into_final_result(self.agg.clone(), self.limits.clone())
+        res.into_final_result(self.agg.clone(), self.context.limits.clone())
    }
 }

@@ -135,7 +135,7 @@ fn merge_fruits(

 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
-    aggs_with_accessor: AggregationsWithAccessor,
+    aggs_with_accessor: AggregationsSegmentCtx,
    agg_collector: BufAggregationCollector,
    error: Option<TantivyError>,
 }
@@ -147,14 +147,15 @@ impl AggregationSegmentCollector {
        agg: &Aggregations,
        reader: &SegmentReader,
        segment_ordinal: SegmentOrdinal,
-        limits: &AggregationLimitsGuard,
+        context: &AggContextParams,
    ) -> crate::Result<Self> {
-        let mut aggs_with_accessor =
-            get_aggs_with_segment_accessor_and_validate(agg, reader, segment_ordinal, limits)?;
+        let mut agg_data =
+            build_aggregations_data_from_req(agg, reader, segment_ordinal, context.clone())?;
        let result =
-            BufAggregationCollector::new(build_segment_agg_collector(&mut aggs_with_accessor)?);
+            BufAggregationCollector::new(build_segment_agg_collectors_root(&mut agg_data)?);
+
        Ok(AggregationSegmentCollector {
-            aggs_with_accessor,
+            aggs_with_accessor: agg_data,
            agg_collector: result,
            error: None,
        })
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -24,7 +24,9 @@ use super::metric::{
 };
 use super::segment_agg_result::AggregationLimitsGuard;
 use super::{format_date, AggregationError, Key, SerializedKey};
-use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
+use crate::aggregation::agg_result::{
+    AggregationResults, BucketEntries, BucketEntry, FilterBucketResult,
+};
 use crate::aggregation::bucket::TermsAggregationInternal;
 use crate::aggregation::metric::CardinalityCollector;
 use crate::TantivyError;
@@ -179,12 +181,17 @@ impl IntermediateAggregationResults {
    }

    /// Merge another intermediate aggregation result into this result.
-    ///
-    /// The order of the values need to be the same on both results. This is ensured when the same
-    /// (key values) are present on the underlying `VecWithNames` struct.
-    pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) -> crate::Result<()> {
-        for (left, right) in self.aggs_res.values_mut().zip(other.aggs_res.into_values()) {
-            left.merge_fruits(right)?;
+    pub fn merge_fruits(&mut self, mut other: IntermediateAggregationResults) -> crate::Result<()> {
+        for (key, left) in self.aggs_res.iter_mut() {
+            if let Some(key) = other.aggs_res.remove(key) {
+                left.merge_fruits(key)?;
+            }
+        }
+        // Move remainder of other aggs_res into self.
+        // Note: Currently we don't expect this to happen, as we create empty intermediate results
+        // via [IntermediateAggregationResults::empty_from_req].
+        for (key, value) in other.aggs_res {
+            self.aggs_res.insert(key, value);
        }
        Ok(())
    }
@@ -241,11 +248,16 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
        Cardinality(_) => IntermediateAggregationResult::Metric(
            IntermediateMetricResult::Cardinality(CardinalityCollector::default()),
        ),
+        Filter(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Filter {
+            doc_count: 0,
+            sub_aggregations: IntermediateAggregationResults::default(),
+        }),
    }
 }

 /// An aggregation is either a bucket or a metric.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[allow(clippy::large_enum_variant)]
 pub enum IntermediateAggregationResult {
    /// Bucket variant
    Bucket(IntermediateBucketResult),
@@ -426,6 +438,13 @@ pub enum IntermediateBucketResult {
        /// The term buckets
        buckets: IntermediateTermBucketResult,
    },
+    /// Filter aggregation - a single bucket with sub-aggregations
+    Filter {
+        /// Document count in the filter bucket
+        doc_count: u64,
+        /// Sub-aggregation results
+        sub_aggregations: IntermediateAggregationResults,
+    },
 }

 impl IntermediateBucketResult {
@@ -509,6 +528,18 @@ impl IntermediateBucketResult {
                req.sub_aggregation(),
                limits,
            ),
+            IntermediateBucketResult::Filter {
+                doc_count,
+                sub_aggregations,
+            } => {
+                // Convert sub-aggregation results to final format
+                let final_sub_aggregations = sub_aggregations
+                    .into_final_result(req.sub_aggregation().clone(), limits.clone())?;
+                Ok(BucketResult::Filter(FilterBucketResult {
+                    doc_count,
+                    sub_aggregations: final_sub_aggregations,
+                }))
+            }
        }
    }

@@ -562,6 +593,19 @@ impl IntermediateBucketResult {

                *buckets_left = buckets?;
            }
+            (
+                IntermediateBucketResult::Filter {
+                    doc_count: doc_count_left,
+                    sub_aggregations: sub_aggs_left,
+                },
+                IntermediateBucketResult::Filter {
+                    doc_count: doc_count_right,
+                    sub_aggregations: sub_aggs_right,
+                },
+            ) => {
+                *doc_count_left += doc_count_right;
+                sub_aggs_left.merge_fruits(sub_aggs_right)?;
+            }
            (IntermediateBucketResult::Range(_), _) => {
                panic!("try merge on different types")
            }
@@ -571,6 +615,9 @@ impl IntermediateBucketResult {
            (IntermediateBucketResult::Terms { .. }, _) => {
                panic!("try merge on different types")
            }
+            (IntermediateBucketResult::Filter { .. }, _) => {
+                panic!("try merge on different types")
+            }
        }
        Ok(())
    }
--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
@@ -2,15 +2,13 @@ use std::collections::hash_map::DefaultHasher;
 use std::hash::{BuildHasher, Hasher};

 use columnar::column_values::CompactSpaceU64Accessor;
-use columnar::Dictionary;
+use columnar::{Column, ColumnBlockAccessor, ColumnType, Dictionary, StrColumn};
 use common::f64_to_u64;
 use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
 use rustc_hash::FxHashSet;
 use serde::{Deserialize, Serialize};

-use crate::aggregation::agg_req_with_accessor::{
-    AggregationWithAccessor, AggregationsWithAccessor,
-};
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
@@ -97,6 +95,32 @@ pub struct CardinalityAggregationReq {
    pub missing: Option<Key>,
 }

+/// Contains all information required by the SegmentCardinalityCollector to perform the
+/// cardinality aggregation on a segment.
+pub struct CardinalityAggReqData {
+    /// The column accessor to access the fast field values.
+    pub accessor: Column<u64>,
+    /// The column_type of the field.
+    pub column_type: ColumnType,
+    /// The string dictionary column if the field is of type string.
+    pub str_dict_column: Option<StrColumn>,
+    /// The missing value normalized to the internal u64 representation of the field type.
+    pub missing_value_for_accessor: Option<u64>,
+    /// The column block accessor to access the fast field values.
+    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
+    /// The name of the aggregation.
+    pub name: String,
+    /// The aggregation request.
+    pub req: CardinalityAggregationReq,
+}
+
+impl CardinalityAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}
+
 impl CardinalityAggregationReq {
    /// Creates a new [`CardinalityAggregationReq`] instance from a field name.
    pub fn from_field_name(field_name: String) -> Self {
@@ -115,47 +139,44 @@ impl CardinalityAggregationReq {
 pub(crate) struct SegmentCardinalityCollector {
    cardinality: CardinalityCollector,
    entries: FxHashSet<u64>,
-    column_type: ColumnType,
    accessor_idx: usize,
-    missing: Option<Key>,
 }

 impl SegmentCardinalityCollector {
-    pub fn from_req(column_type: ColumnType, accessor_idx: usize, missing: &Option<Key>) -> Self {
+    pub fn from_req(column_type: ColumnType, accessor_idx: usize) -> Self {
        Self {
            cardinality: CardinalityCollector::new(column_type as u8),
            entries: Default::default(),
-            column_type,
            accessor_idx,
-            missing: missing.clone(),
        }
    }

    fn fetch_block_with_field(
        &mut self,
        docs: &[crate::DocId],
-        agg_accessor: &mut AggregationWithAccessor,
+        agg_data: &mut CardinalityAggReqData,
    ) {
-        if let Some(missing) = agg_accessor.missing_value_for_accessor {
-            agg_accessor.column_block_accessor.fetch_block_with_missing(
+        if let Some(missing) = agg_data.missing_value_for_accessor {
+            agg_data.column_block_accessor.fetch_block_with_missing(
                docs,
-                &agg_accessor.accessor,
+                &agg_data.accessor,
                missing,
            );
        } else {
-            agg_accessor
+            agg_data
                .column_block_accessor
-                .fetch_block(docs, &agg_accessor.accessor);
+                .fetch_block(docs, &agg_data.accessor);
        }
    }

    fn into_intermediate_metric_result(
        mut self,
-        agg_with_accessor: &AggregationWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateMetricResult> {
-        if self.column_type == ColumnType::Str {
+        let req_data = &agg_data.get_cardinality_req_data(self.accessor_idx);
+        if req_data.column_type == ColumnType::Str {
            let fallback_dict = Dictionary::empty();
-            let dict = agg_with_accessor
+            let dict = req_data
                .str_dict_column
                .as_ref()
                .map(|el| el.dictionary())
@@ -180,10 +201,10 @@ impl SegmentCardinalityCollector {
            })?;
            if has_missing {
                // Replace missing with the actual value provided
-                let missing_key = self
-                    .missing
-                    .as_ref()
-                    .expect("Found sentinel value u64::MAX for term_ord but `missing` is not set");
+                let missing_key =
+                    req_data.req.missing.as_ref().expect(
+                        "Found sentinel value u64::MAX for term_ord but `missing` is not set",
+                    );
                match missing_key {
                    Key::Str(missing) => {
                        self.cardinality.sketch.insert_any(&missing);
@@ -209,13 +230,13 @@ impl SegmentCardinalityCollector {
 impl SegmentAggregationCollector for SegmentCardinalityCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
-        let agg_with_accessor = &agg_with_accessor.aggs.values[self.accessor_idx];
+        let req_data = &agg_data.get_cardinality_req_data(self.accessor_idx);
+        let name = req_data.name.to_string();

-        let intermediate_result = self.into_intermediate_metric_result(agg_with_accessor)?;
+        let intermediate_result = self.into_intermediate_metric_result(agg_data)?;
        results.push(
            name,
            IntermediateAggregationResult::Metric(intermediate_result),
@@ -227,26 +248,26 @@ impl SegmentAggregationCollector for SegmentCardinalityCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.collect_block(&[doc], agg_with_accessor)
+        self.collect_block(&[doc], agg_data)
    }

    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let bucket_agg_accessor = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        self.fetch_block_with_field(docs, bucket_agg_accessor);
+        let req_data = agg_data.get_cardinality_req_data_mut(self.accessor_idx);
+        self.fetch_block_with_field(docs, req_data);

-        let col_block_accessor = &bucket_agg_accessor.column_block_accessor;
-        if self.column_type == ColumnType::Str {
+        let col_block_accessor = &req_data.column_block_accessor;
+        if req_data.column_type == ColumnType::Str {
            for term_ord in col_block_accessor.iter_vals() {
                self.entries.insert(term_ord);
            }
-        } else if self.column_type == ColumnType::IpAddr {
-            let compact_space_accessor = bucket_agg_accessor
+        } else if req_data.column_type == ColumnType::IpAddr {
+            let compact_space_accessor = req_data
                .accessor
                .values
                .clone()
--- a/src/aggregation/metric/extended_stats.rs
+++ b/src/aggregation/metric/extended_stats.rs
@@ -4,12 +4,11 @@ use std::mem;
 use serde::{Deserialize, Serialize};

 use super::*;
-use crate::aggregation::agg_req_with_accessor::{
-    AggregationWithAccessor, AggregationsWithAccessor,
-};
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::{DocId, TantivyError};
@@ -63,7 +62,7 @@ impl ExtendedStatsAggregation {

 /// Extended stats contains a collection of statistics
 /// they extends stats adding variance, standard deviation
-/// and bound informations
+/// and bound information
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct ExtendedStats {
    /// The number of documents.
@@ -348,20 +347,20 @@ impl SegmentExtendedStatsCollector {
    pub(crate) fn collect_block_with_field(
        &mut self,
        docs: &[DocId],
-        agg_accessor: &mut AggregationWithAccessor,
+        req_data: &mut MetricAggReqData,
    ) {
        if let Some(missing) = self.missing.as_ref() {
-            agg_accessor.column_block_accessor.fetch_block_with_missing(
+            req_data.column_block_accessor.fetch_block_with_missing(
                docs,
-                &agg_accessor.accessor,
+                &req_data.accessor,
                *missing,
            );
        } else {
-            agg_accessor
+            req_data
                .column_block_accessor
-                .fetch_block(docs, &agg_accessor.accessor);
+                .fetch_block(docs, &req_data.accessor);
        }
-        for val in agg_accessor.column_block_accessor.iter_vals() {
+        for val in req_data.column_block_accessor.iter_vals() {
            let val1 = f64_from_fastfield_u64(val, &self.field_type);
            self.extended_stats.collect(val1);
        }
@@ -372,10 +371,10 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let name = agg_data.get_metric_req_data(self.accessor_idx).name.clone();
        results.push(
            name,
            IntermediateAggregationResult::Metric(IntermediateMetricResult::ExtendedStats(
@@ -390,12 +389,12 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);
        if let Some(missing) = self.missing {
            let mut has_val = false;
-            for val in field.values_for_doc(doc) {
+            for val in req_data.accessor.values_for_doc(doc) {
                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.extended_stats.collect(val1);
                has_val = true;
@@ -405,7 +404,7 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
            }
        } else {
-            for val in field.values_for_doc(doc) {
+            for val in req_data.accessor.values_for_doc(doc) {
                let val1 = f64_from_fastfield_u64(val, &self.field_type);
                self.extended_stats.collect(val1);
            }
@@ -418,10 +417,10 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        self.collect_block_with_field(docs, field);
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
 }
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -31,6 +31,7 @@ use std::collections::HashMap;

 pub use average::*;
 pub use cardinality::*;
+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 pub use count::*;
 pub use extended_stats::*;
 pub use max::*;
@@ -44,6 +45,35 @@ pub use top_hits::*;

 use crate::schema::OwnedValue;

+/// Contains all information required by metric aggregations like avg, min, max, sum, stats,
+/// extended_stats, count, percentiles.
+#[repr(C)]
+pub struct MetricAggReqData {
+    /// True if the field is of number or date type.
+    pub is_number_or_date_type: bool,
+    /// The type of the field.
+    pub field_type: ColumnType,
+    /// The missing value normalized to the internal u64 representation of the field type.
+    pub missing_u64: Option<u64>,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
+    /// The column accessor to access the fast field values.
+    pub accessor: Column<u64>,
+    /// Used when converting to intermediate result
+    pub collecting_for: StatsType,
+    /// The missing value
+    pub missing: Option<f64>,
+    /// The name of the aggregation.
+    pub name: String,
+}
+
+impl MetricAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}
+
 /// Single-metric aggregations use this common result structure.
 ///
 /// Main reason to wrap it in value is to match elasticsearch output structure.
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -3,12 +3,11 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};

 use super::*;
-use crate::aggregation::agg_req_with_accessor::{
-    AggregationWithAccessor, AggregationsWithAccessor,
-};
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::{DocId, TantivyError};
@@ -112,7 +111,8 @@ impl PercentilesAggregationReq {
        &self.field
    }

-    fn validate(&self) -> crate::Result<()> {
+    /// Validates the request parameters.
+    pub fn validate(&self) -> crate::Result<()> {
        if let Some(percents) = self.percents.as_ref() {
            let all_in_range = percents
                .iter()
@@ -133,10 +133,8 @@ impl PercentilesAggregationReq {

 #[derive(Clone, Debug, PartialEq)]
 pub(crate) struct SegmentPercentilesCollector {
-    field_type: ColumnType,
    pub(crate) percentiles: PercentilesCollector,
    pub(crate) accessor_idx: usize,
-    missing: Option<u64>,
 }

 #[derive(Clone, Serialize, Deserialize)]
@@ -231,43 +229,32 @@ impl PercentilesCollector {
 }

 impl SegmentPercentilesCollector {
-    pub fn from_req_and_validate(
-        req: &PercentilesAggregationReq,
-        field_type: ColumnType,
-        accessor_idx: usize,
-    ) -> crate::Result<Self> {
-        req.validate()?;
-        let missing = req
-            .missing
-            .and_then(|val| f64_to_fastfield_u64(val, &field_type));
-
+    pub fn from_req_and_validate(accessor_idx: usize) -> crate::Result<Self> {
        Ok(Self {
-            field_type,
            percentiles: PercentilesCollector::new(),
            accessor_idx,
-            missing,
        })
    }
    #[inline]
    pub(crate) fn collect_block_with_field(
        &mut self,
        docs: &[DocId],
-        agg_accessor: &mut AggregationWithAccessor,
+        req_data: &mut MetricAggReqData,
    ) {
-        if let Some(missing) = self.missing.as_ref() {
-            agg_accessor.column_block_accessor.fetch_block_with_missing(
+        if let Some(missing) = req_data.missing_u64.as_ref() {
+            req_data.column_block_accessor.fetch_block_with_missing(
                docs,
-                &agg_accessor.accessor,
+                &req_data.accessor,
                *missing,
            );
        } else {
-            agg_accessor
+            req_data
                .column_block_accessor
-                .fetch_block(docs, &agg_accessor.accessor);
+                .fetch_block(docs, &req_data.accessor);
        }

-        for val in agg_accessor.column_block_accessor.iter_vals() {
-            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+        for val in req_data.column_block_accessor.iter_vals() {
+            let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
            self.percentiles.collect(val1);
        }
    }
@@ -277,10 +264,10 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let name = agg_data.get_metric_req_data(self.accessor_idx).name.clone();
        let intermediate_metric_result = IntermediateMetricResult::Percentiles(self.percentiles);

        results.push(
@@ -295,24 +282,24 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);

-        if let Some(missing) = self.missing {
+        if let Some(missing) = req_data.missing_u64 {
            let mut has_val = false;
-            for val in field.values_for_doc(doc) {
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
                self.percentiles.collect(val1);
                has_val = true;
            }
            if !has_val {
                self.percentiles
-                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
+                    .collect(f64_from_fastfield_u64(missing, &req_data.field_type));
            }
        } else {
-            for val in field.values_for_doc(doc) {
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
                self.percentiles.collect(val1);
            }
        }
@@ -324,10 +311,10 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        self.collect_block_with_field(docs, field);
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
 }
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -3,12 +3,11 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};

 use super::*;
-use crate::aggregation::agg_req_with_accessor::{
-    AggregationWithAccessor, AggregationsWithAccessor,
-};
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::{DocId, TantivyError};
@@ -166,74 +165,65 @@ impl IntermediateStats {
    }
 }

-#[derive(Clone, Debug, PartialEq)]
-pub(crate) enum SegmentStatsType {
+/// The type of stats aggregation to perform.
+/// Note that not all stats types are supported in the stats aggregation.
+#[derive(Clone, Copy, Debug)]
+pub enum StatsType {
+    /// The average of the values.
    Average,
+    /// The count of the values.
    Count,
+    /// The maximum value.
    Max,
+    /// The minimum value.
    Min,
+    /// The stats (count, sum, min, max, avg) of the values.
    Stats,
+    /// The extended stats (count, sum, min, max, avg, sum_of_squares, variance, std_deviation,
+    ExtendedStats(Option<f64>), // sigma
+    /// The sum of the values.
    Sum,
+    /// The percentiles of the values.
+    Percentiles,
 }

-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub(crate) struct SegmentStatsCollector {
-    missing: Option<u64>,
-    field_type: ColumnType,
-    pub(crate) collecting_for: SegmentStatsType,
    pub(crate) stats: IntermediateStats,
    pub(crate) accessor_idx: usize,
-    val_cache: Vec<u64>,
 }

 impl SegmentStatsCollector {
-    pub fn from_req(
-        field_type: ColumnType,
-        collecting_for: SegmentStatsType,
-        accessor_idx: usize,
-        missing: Option<f64>,
-    ) -> Self {
-        let missing = missing.and_then(|val| f64_to_fastfield_u64(val, &field_type));
+    pub fn from_req(accessor_idx: usize) -> Self {
        Self {
-            field_type,
-            collecting_for,
            stats: IntermediateStats::default(),
            accessor_idx,
-            missing,
-            val_cache: Default::default(),
        }
    }
    #[inline]
    pub(crate) fn collect_block_with_field(
        &mut self,
        docs: &[DocId],
-        agg_accessor: &mut AggregationWithAccessor,
+        req_data: &mut MetricAggReqData,
    ) {
-        if let Some(missing) = self.missing.as_ref() {
-            agg_accessor.column_block_accessor.fetch_block_with_missing(
+        if let Some(missing) = req_data.missing_u64.as_ref() {
+            req_data.column_block_accessor.fetch_block_with_missing(
                docs,
-                &agg_accessor.accessor,
+                &req_data.accessor,
                *missing,
            );
        } else {
-            agg_accessor
+            req_data
                .column_block_accessor
-                .fetch_block(docs, &agg_accessor.accessor);
+                .fetch_block(docs, &req_data.accessor);
        }
-        if [
-            ColumnType::I64,
-            ColumnType::U64,
-            ColumnType::F64,
-            ColumnType::DateTime,
-        ]
-        .contains(&self.field_type)
-        {
-            for val in agg_accessor.column_block_accessor.iter_vals() {
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+        if req_data.is_number_or_date_type {
+            for val in req_data.column_block_accessor.iter_vals() {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
                self.stats.collect(val1);
            }
        } else {
-            for _val in agg_accessor.column_block_accessor.iter_vals() {
+            for _val in req_data.column_block_accessor.iter_vals() {
                // we ignore the value and simply record that we got something
                self.stats.collect(0.0);
            }
@@ -245,27 +235,28 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let req = agg_data.get_metric_req_data(self.accessor_idx);
+        let name = req.name.clone();

-        let intermediate_metric_result = match self.collecting_for {
-            SegmentStatsType::Average => {
+        let intermediate_metric_result = match req.collecting_for {
+            StatsType::Average => {
                IntermediateMetricResult::Average(IntermediateAverage::from_collector(*self))
            }
-            SegmentStatsType::Count => {
+            StatsType::Count => {
                IntermediateMetricResult::Count(IntermediateCount::from_collector(*self))
            }
-            SegmentStatsType::Max => {
-                IntermediateMetricResult::Max(IntermediateMax::from_collector(*self))
-            }
-            SegmentStatsType::Min => {
-                IntermediateMetricResult::Min(IntermediateMin::from_collector(*self))
-            }
-            SegmentStatsType::Stats => IntermediateMetricResult::Stats(self.stats),
-            SegmentStatsType::Sum => {
-                IntermediateMetricResult::Sum(IntermediateSum::from_collector(*self))
+            StatsType::Max => IntermediateMetricResult::Max(IntermediateMax::from_collector(*self)),
+            StatsType::Min => IntermediateMetricResult::Min(IntermediateMin::from_collector(*self)),
+            StatsType::Stats => IntermediateMetricResult::Stats(self.stats),
+            StatsType::Sum => IntermediateMetricResult::Sum(IntermediateSum::from_collector(*self)),
+            _ => {
+                return Err(TantivyError::InvalidArgument(format!(
+                    "Unsupported stats type for stats aggregation: {:?}",
+                    req.collecting_for
+                )))
            }
        };

@@ -281,23 +272,23 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
    fn collect(
        &mut self,
        doc: crate::DocId,
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
-        if let Some(missing) = self.missing {
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);
+        if let Some(missing) = req_data.missing_u64 {
            let mut has_val = false;
-            for val in field.values_for_doc(doc) {
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
                self.stats.collect(val1);
                has_val = true;
            }
            if !has_val {
                self.stats
-                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
+                    .collect(f64_from_fastfield_u64(missing, &req_data.field_type));
            }
        } else {
-            for val in field.values_for_doc(doc) {
-                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
                self.stats.collect(val1);
            }
        }
@@ -309,10 +300,10 @@ impl SegmentAggregationCollector for SegmentStatsCollector {
    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
-        self.collect_block_with_field(docs, field);
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
 }
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -1,7 +1,8 @@
+use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::net::Ipv6Addr;

-use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn};
+use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn, ValueRange};
 use common::json_path_writer::JSON_PATH_SEGMENT_SEP_STR;
 use common::DateTime;
 use regex::Regex;
@@ -9,15 +10,41 @@ use serde::ser::SerializeMap;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};

 use super::{TopHitsMetricResult, TopHitsVecEntry};
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::bucket::Order;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::AggregationError;
+use crate::collector::sort_key::{Comparator, ReverseComparator};
 use crate::collector::TopNComputer;
 use crate::schema::OwnedValue;
 use crate::{DocAddress, DocId, SegmentOrdinal};
+// duplicate import removed; already imported above
+
+/// Contains all information required by the TopHitsSegmentCollector to perform the
+/// top_hits aggregation on a segment.
+#[derive(Default)]
+pub struct TopHitsAggReqData {
+    /// The accessors to access the fast field values.
+    pub accessors: Vec<(Column<u64>, ColumnType)>,
+    /// The accessors to access the fast field values for retrieving document fields.
+    pub value_accessors: HashMap<String, Vec<DynamicColumn>>,
+    /// The ordinal of the segment this request data is for.
+    pub segment_ordinal: SegmentOrdinal,
+    /// The name of the aggregation.
+    pub name: String,
+    /// The top_hits aggregation request.
+    pub req: TopHitsAggregationReq,
+}
+
+impl TopHitsAggReqData {
+    /// Estimate the memory consumption of this struct in bytes.
+    pub fn get_memory_consumption(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}

 /// # Top Hits
 ///
@@ -357,7 +384,7 @@ impl From<FastFieldValue> for OwnedValue {

 /// Holds a fast field value in its u64 representation, and the order in which it should be sorted.
 #[derive(Clone, Serialize, Deserialize, Debug)]
-struct DocValueAndOrder {
+pub(crate) struct DocValueAndOrder {
    /// A fast field value in its u64 representation.
    value: Option<u64>,
    /// Sort order for the value
@@ -429,11 +456,42 @@ impl PartialEq for DocSortValuesAndFields {

 impl Eq for DocSortValuesAndFields {}

+impl Comparator<DocSortValuesAndFields> for ReverseComparator {
+    #[inline(always)]
+    fn compare(&self, lhs: &DocSortValuesAndFields, rhs: &DocSortValuesAndFields) -> Ordering {
+        rhs.cmp(lhs)
+    }
+
+    fn threshold_to_valuerange(
+        &self,
+        threshold: DocSortValuesAndFields,
+    ) -> ValueRange<DocSortValuesAndFields> {
+        ValueRange::LessThan(threshold, true)
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct TopHitsSegmentSortKey(pub Vec<DocValueAndOrder>);
+
+impl Comparator<TopHitsSegmentSortKey> for ReverseComparator {
+    #[inline(always)]
+    fn compare(&self, lhs: &TopHitsSegmentSortKey, rhs: &TopHitsSegmentSortKey) -> Ordering {
+        rhs.cmp(lhs)
+    }
+
+    fn threshold_to_valuerange(
+        &self,
+        threshold: TopHitsSegmentSortKey,
+    ) -> ValueRange<TopHitsSegmentSortKey> {
+        ValueRange::LessThan(threshold, true)
+    }
+}
+
 /// The TopHitsCollector used for collecting over segments and merging results.
 #[derive(Clone, Serialize, Deserialize, Debug)]
 pub struct TopHitsTopNComputer {
    req: TopHitsAggregationReq,
-    top_n: TopNComputer<DocSortValuesAndFields, DocAddress, false>,
+    top_n: TopNComputer<DocSortValuesAndFields, DocAddress, ReverseComparator>,
 }

 impl std::cmp::PartialEq for TopHitsTopNComputer {
@@ -457,7 +515,7 @@ impl TopHitsTopNComputer {

    pub(crate) fn merge_fruits(&mut self, other_fruit: Self) -> crate::Result<()> {
        for doc in other_fruit.top_n.into_vec() {
-            self.collect(doc.feature, doc.doc);
+            self.collect(doc.sort_key, doc.doc);
        }
        Ok(())
    }
@@ -469,9 +527,9 @@ impl TopHitsTopNComputer {
            .into_sorted_vec()
            .into_iter()
            .map(|doc| TopHitsVecEntry {
-                sort: doc.feature.sorts.iter().map(|f| f.value).collect(),
+                sort: doc.sort_key.sorts.iter().map(|f| f.value).collect(),
                doc_value_fields: doc
-                    .feature
+                    .sort_key
                    .doc_value_fields
                    .into_iter()
                    .map(|(k, v)| (k, v.into()))
@@ -492,7 +550,7 @@ impl TopHitsTopNComputer {
 pub(crate) struct TopHitsSegmentCollector {
    segment_ordinal: SegmentOrdinal,
    accessor_idx: usize,
-    top_n: TopNComputer<Vec<DocValueAndOrder>, DocAddress, false>,
+    top_n: TopNComputer<TopHitsSegmentSortKey, DocAddress, ReverseComparator>,
 }

 impl TopHitsSegmentCollector {
@@ -513,13 +571,15 @@ impl TopHitsSegmentCollector {
        req: &TopHitsAggregationReq,
    ) -> TopHitsTopNComputer {
        let mut top_hits_computer = TopHitsTopNComputer::new(req);
+        // Map TopHitsSegmentSortKey back to Vec<DocValueAndOrder> if needed or use directly
+        // The TopNComputer here stores TopHitsSegmentSortKey.
        let top_results = self.top_n.into_vec();

        for res in top_results {
            let doc_value_fields = req.get_document_field_data(value_accessors, res.doc.doc_id);
            top_hits_computer.collect(
                DocSortValuesAndFields {
-                    sorts: res.feature,
+                    sorts: res.sort_key.0,
                    doc_value_fields,
                },
                res.doc,
@@ -553,7 +613,7 @@ impl TopHitsSegmentCollector {
            .collect();

        self.top_n.push(
-            sorts,
+            TopHitsSegmentSortKey(sorts),
            DocAddress {
                segment_ord: self.segment_ordinal,
                doc_id,
@@ -566,23 +626,18 @@ impl TopHitsSegmentCollector {
 impl SegmentAggregationCollector for TopHitsSegmentCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
-        agg_with_accessor: &crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
+        agg_data: &AggregationsSegmentCtx,
        results: &mut crate::aggregation::intermediate_agg_result::IntermediateAggregationResults,
    ) -> crate::Result<()> {
-        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
+        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);

-        let value_accessors = &agg_with_accessor.aggs.values[self.accessor_idx].value_accessors;
-        let tophits_req = &agg_with_accessor.aggs.values[self.accessor_idx]
-            .agg
-            .agg
-            .as_top_hits()
-            .expect("aggregation request must be of type top hits");
+        let value_accessors = &req_data.value_accessors;

        let intermediate_result = IntermediateMetricResult::TopHits(
-            self.into_top_hits_collector(value_accessors, tophits_req),
+            self.into_top_hits_collector(value_accessors, &req_data.req),
        );
        results.push(
-            name,
+            req_data.name.to_string(),
            IntermediateAggregationResult::Metric(intermediate_result),
        )
    }
@@ -591,32 +646,22 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
    fn collect(
        &mut self,
        doc_id: crate::DocId,
-        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let tophits_req = &agg_with_accessor.aggs.values[self.accessor_idx]
-            .agg
-            .agg
-            .as_top_hits()
-            .expect("aggregation request must be of type top hits");
-        let accessors = &agg_with_accessor.aggs.values[self.accessor_idx].accessors;
-        self.collect_with(doc_id, tophits_req, accessors)?;
+        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);
+        self.collect_with(doc_id, &req_data.req, &req_data.accessors)?;
        Ok(())
    }

    fn collect_block(
        &mut self,
        docs: &[crate::DocId],
-        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let tophits_req = &agg_with_accessor.aggs.values[self.accessor_idx]
-            .agg
-            .agg
-            .as_top_hits()
-            .expect("aggregation request must be of type top hits");
-        let accessors = &agg_with_accessor.aggs.values[self.accessor_idx].accessors;
+        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);
        // TODO: Consider getting fields with the column block accessor.
        for doc in docs {
-            self.collect_with(*doc, tophits_req, accessors)?;
+            self.collect_with(*doc, &req_data.req, &req_data.accessors)?;
        }
        Ok(())
    }
@@ -635,6 +680,7 @@ mod tests {
    use crate::aggregation::bucket::tests::get_test_index_from_docs;
    use crate::aggregation::tests::get_test_index_from_values;
    use crate::aggregation::AggregationCollector;
+    use crate::collector::sort_key::ReverseComparator;
    use crate::collector::ComparableDoc;
    use crate::query::AllQuery;
    use crate::schema::OwnedValue;
@@ -650,7 +696,7 @@ mod tests {

    fn collector_with_capacity(capacity: usize) -> super::TopHitsTopNComputer {
        super::TopHitsTopNComputer {
-            top_n: super::TopNComputer::new(capacity),
+            top_n: super::TopNComputer::new_with_comparator(capacity, ReverseComparator),
            req: Default::default(),
        }
    }
@@ -764,12 +810,12 @@ mod tests {
    #[test]
    fn test_top_hits_collector_single_feature() -> crate::Result<()> {
        let docs = vec![
-            ComparableDoc::<_, _, false> {
+            ComparableDoc::<_, _> {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 0,
                },
-                feature: DocSortValuesAndFields {
+                sort_key: DocSortValuesAndFields {
                    sorts: vec![DocValueAndOrder {
                        value: Some(1),
                        order: Order::Asc,
@@ -782,7 +828,7 @@ mod tests {
                    segment_ord: 0,
                    doc_id: 2,
                },
-                feature: DocSortValuesAndFields {
+                sort_key: DocSortValuesAndFields {
                    sorts: vec![DocValueAndOrder {
                        value: Some(3),
                        order: Order::Asc,
@@ -795,7 +841,7 @@ mod tests {
                    segment_ord: 0,
                    doc_id: 1,
                },
-                feature: DocSortValuesAndFields {
+                sort_key: DocSortValuesAndFields {
                    sorts: vec![DocValueAndOrder {
                        value: Some(5),
                        order: Order::Asc,
@@ -807,7 +853,7 @@ mod tests {

        let mut collector = collector_with_capacity(3);
        for doc in docs.clone() {
-            collector.collect(doc.feature, doc.doc);
+            collector.collect(doc.sort_key, doc.doc);
        }

        let res = collector.into_final_result();
@@ -817,15 +863,15 @@ mod tests {
            super::TopHitsMetricResult {
                hits: vec![
                    super::TopHitsVecEntry {
-                        sort: vec![docs[0].feature.sorts[0].value],
+                        sort: vec![docs[0].sort_key.sorts[0].value],
                        doc_value_fields: Default::default(),
                    },
                    super::TopHitsVecEntry {
-                        sort: vec![docs[1].feature.sorts[0].value],
+                        sort: vec![docs[1].sort_key.sorts[0].value],
                        doc_value_fields: Default::default(),
                    },
                    super::TopHitsVecEntry {
-                        sort: vec![docs[2].feature.sorts[0].value],
+                        sort: vec![docs[2].sort_key.sorts[0].value],
                        doc_value_fields: Default::default(),
                    },
                ]
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -127,9 +127,10 @@
 //! [`AggregationResults`](agg_result::AggregationResults) via the
 //! [`into_final_result`](intermediate_agg_result::IntermediateAggregationResults::into_final_result) method.

+mod accessor_helpers;
+mod agg_data;
 mod agg_limits;
 pub mod agg_req;
-mod agg_req_with_accessor;
 pub mod agg_result;
 pub mod bucket;
 mod buf_collector;
@@ -140,7 +141,6 @@ pub mod intermediate_agg_result;
 pub mod metric;

 mod segment_agg_result;
-use std::collections::HashMap;
 use std::fmt::Display;

 #[cfg(test)]
@@ -160,6 +160,28 @@ use itertools::Itertools;
 use serde::de::{self, Visitor};
 use serde::{Deserialize, Deserializer, Serialize};

+use crate::tokenizer::TokenizerManager;
+
+/// Context parameters for aggregation execution
+///
+/// This struct holds shared resources needed during aggregation execution:
+/// - `limits`: Memory and bucket limits for the aggregation
+/// - `tokenizers`: TokenizerManager for parsing query strings in filter aggregations
+#[derive(Clone, Default)]
+pub struct AggContextParams {
+    /// Aggregation limits (memory and bucket count)
+    pub limits: AggregationLimitsGuard,
+    /// Tokenizer manager for query string parsing
+    pub tokenizers: TokenizerManager,
+}
+
+impl AggContextParams {
+    /// Create new aggregation context parameters
+    pub fn new(limits: AggregationLimitsGuard, tokenizers: TokenizerManager) -> Self {
+        Self { limits, tokenizers }
+    }
+}
+
 fn parse_str_into_f64<E: de::Error>(value: &str) -> Result<f64, E> {
    let parsed = value
        .parse::<f64>()
@@ -257,80 +279,6 @@ where D: Deserializer<'de> {
    deserializer.deserialize_any(StringOrFloatVisitor)
 }

-/// Represents an associative array `(key => values)` in a very efficient manner.
-#[derive(PartialEq, Serialize, Deserialize)]
-pub(crate) struct VecWithNames<T> {
-    pub(crate) values: Vec<T>,
-    keys: Vec<String>,
-}
-
-impl<T: Clone> Clone for VecWithNames<T> {
-    fn clone(&self) -> Self {
-        Self {
-            values: self.values.clone(),
-            keys: self.keys.clone(),
-        }
-    }
-}
-
-impl<T> Default for VecWithNames<T> {
-    fn default() -> Self {
-        Self {
-            values: Default::default(),
-            keys: Default::default(),
-        }
-    }
-}
-
-impl<T: std::fmt::Debug> std::fmt::Debug for VecWithNames<T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_map().entries(self.iter()).finish()
-    }
-}
-
-impl<T> From<HashMap<String, T>> for VecWithNames<T> {
-    fn from(map: HashMap<String, T>) -> Self {
-        VecWithNames::from_entries(map.into_iter().collect_vec())
-    }
-}
-
-impl<T> VecWithNames<T> {
-    fn from_entries(mut entries: Vec<(String, T)>) -> Self {
-        // Sort to ensure order of elements match across multiple instances
-        entries.sort_by(|left, right| left.0.cmp(&right.0));
-        let mut data = Vec::with_capacity(entries.len());
-        let mut data_names = Vec::with_capacity(entries.len());
-        for entry in entries {
-            data_names.push(entry.0);
-            data.push(entry.1);
-        }
-        VecWithNames {
-            values: data,
-            keys: data_names,
-        }
-    }
-    fn iter(&self) -> impl Iterator<Item = (&str, &T)> + '_ {
-        self.keys().zip(self.values.iter())
-    }
-    fn keys(&self) -> impl Iterator<Item = &str> + '_ {
-        self.keys.iter().map(|key| key.as_str())
-    }
-    fn values_mut(&mut self) -> impl Iterator<Item = &mut T> + '_ {
-        self.values.iter_mut()
-    }
-    fn is_empty(&self) -> bool {
-        self.keys.is_empty()
-    }
-    fn len(&self) -> usize {
-        self.keys.len()
-    }
-    fn get(&self, name: &str) -> Option<&T> {
-        self.keys()
-            .position(|key| key == name)
-            .map(|pos| &self.values[pos])
-    }
-}
-
 /// The serialized key is used in a `HashMap`.
 pub type SerializedKey = String;

@@ -464,7 +412,10 @@ mod tests {
        query: Option<(&str, &str)>,
        limits: AggregationLimitsGuard,
    ) -> crate::Result<Value> {
-        let collector = AggregationCollector::from_aggs(agg_req, limits);
+        let collector = AggregationCollector::from_aggs(
+            agg_req,
+            AggContextParams::new(limits, index.tokenizers().clone()),
+        );

        let reader = index.reader()?;
        let searcher = reader.searcher();
--- a/Show More
+++ b/Show More