chore: Release 0.26.1

perf(agg): only measure active parent bucket in composite collect
Same change as 26a589e for SegmentCompositeCollector: get_memory_consumption summed across all parent_buckets on every block, scaling with outer bucket cardinality. Pass parent_bucket_id and index the single bucket.
2026-06-22 18:30:41 +00:00 · 2026-04-21 07:30:14 +02:00 · 2026-04-21 07:29:35 +02:00 · 2026-04-21 07:29:35 +02:00 · 2026-04-21 07:29:35 +02:00 · 2026-03-31 15:10:59 +08:00
112 changed files with 1601 additions and 9775 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,8 +6,6 @@ updates:
    interval: daily
    time: "20:00"
  open-pull-requests-limit: 10
-  cooldown:
-    default-days: 2

 - package-ecosystem: "github-actions"
  directory: "/"
@@ -15,5 +13,3 @@ updates:
    interval: daily
    time: "20:00"
  open-pull-requests-limit: 10
-  cooldown:
-    default-days: 2
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -4,9 +4,6 @@ on:
  push:
    branches: [main]

-permissions:
-  contents: read
-
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -15,20 +12,16 @@ concurrency:
 jobs:
  coverage:
    runs-on: ubuntu-latest
-
-    permissions:
-      contents: read
-
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+      - uses: actions/checkout@v4
      - name: Install Rust
        run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
-      - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
-      - uses: taiki-e/install-action@e4b3a0453201addddc06d3a72db90326aad87084 # cargo-llvm-cov
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
        run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0
+        uses: codecov/codecov-action@v3
        continue-on-error: true
        with:
          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -8,9 +8,6 @@ env:
  CARGO_TERM_COLOR: always
  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000

-permissions:
-  contents: read
-
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -21,13 +18,10 @@ jobs:

    runs-on: ubuntu-latest

-    permissions:
-      contents: read
-
    steps:
-    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+    - uses: actions/checkout@v4
    - name: Install stable
-      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
+      uses: actions-rs/toolchain@v1
      with:
          toolchain: stable
          profile: minimal
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -1,49 +0,0 @@
-name: OpenSSF Scorecard
-
-on:
-  schedule:
-    - cron: '0 0 * * 0'
-  push:
-    branches:
-      - main
-
-permissions:
-  contents: read
-
-jobs:
-  analysis:
-    name: Scorecards analysis
-    runs-on: ubuntu-latest
-    permissions:
-      # Needed to upload the results to code-scanning dashboard.
-      security-events: write
-      # Needed to publish results
-      id-token: write
-
-    steps:
-      - name: 'Checkout code'
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
-        with:
-          persist-credentials: false
-
-      - name: 'Run analysis'
-        uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3
-        with:
-          results_file: results.sarif
-          results_format: sarif
-          repo_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_results: true
-
-      # Upload the results as artifacts.
-      - name: 'Upload artifact'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
-        with:
-          name: SARIF file
-          path: results.sarif
-          retention-days: 5
-
-      # Upload the results to GitHub's code scanning dashboard.
-      - name: 'Upload to code-scanning'
-        uses: github/codeql-action/upload-sarif@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
-        with:
-          sarif_file: results.sarif
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,9 +9,6 @@ on:
 env:
  CARGO_TERM_COLOR: always

-permissions:
-  contents: read
-
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -22,27 +19,23 @@ jobs:

    runs-on: ubuntu-latest

-    permissions:
-      contents: read
-      checks: write
-
    steps:
-    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+    - uses: actions/checkout@v4

    - name: Install nightly
-      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
+      uses: actions-rs/toolchain@v1
      with:
            toolchain: nightly
            profile: minimal
            components: rustfmt
    - name: Install stable
-      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
+      uses: actions-rs/toolchain@v1
      with:
            toolchain: stable
            profile: minimal
            components: clippy

-    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
+    - uses: Swatinem/rust-cache@v2

    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
@@ -54,7 +47,7 @@ jobs:
    - name: Check Bench Compilation
      run: cargo +nightly bench --no-run --profile=dev --all-features

-    - uses: actions-rs/clippy-check@b5b5f21f4797c02da247df37026fcd0a5024aa4d # v1.0.7
+    - uses: actions-rs/clippy-check@v1
      with:
        toolchain: stable
        token: ${{ secrets.GITHUB_TOKEN }}
@@ -64,9 +57,6 @@ jobs:

    runs-on: ubuntu-latest

-    permissions:
-      contents: read
-
    strategy:
      matrix:
        features:
@@ -77,17 +67,17 @@ jobs:
    name: test-${{ matrix.features.label}}

    steps:
-    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+    - uses: actions/checkout@v4

    - name: Install stable
-      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
+      uses: actions-rs/toolchain@v1
      with:
            toolchain: stable
            profile: minimal
            override: true

-    - uses: taiki-e/install-action@56cc9adf3a3e2c23eafb56e8acaf9d0373cb845a # nextest
-    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
+    - uses: taiki-e/install-action@nextest
+    - uses: Swatinem/rust-cache@v2

    - name: Run tests
      run: |
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,9 @@
 Tantivy 0.26.1
 ================================

-## Performance
- Fix quadratic runtime in nested term and composite aggregations: memory accounting scanned all parent buckets on every collect instead of just the current parent (@PSeitz @fulmicoton)
+## Bugfixes
+- Fix memory consumption accounting in nested term aggregation to only scan the active parent bucket (@PSeitz)
+- Fix memory consumption accounting in composite aggregation to only scan the active parent bucket (@PSeitz)

 Tantivy 0.26 (Unreleased)
 ================================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.26.0"
+version = "0.26.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -65,7 +65,7 @@ tantivy-bitpacker = { version = "0.10", path = "./bitpacker" }
 common = { version = "0.11", path = "./common/", package = "tantivy-common" }
 tokenizer-api = { version = "0.7", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.4", features = ["use_serde"] }
-datasketches = { version = "0.3.0", features = ["hll"] }
+datasketches = "0.2.0"
 futures-util = { version = "0.3.28", optional = true }
 futures-channel = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
@@ -75,7 +75,7 @@ typetag = "0.2.21"
 winapi = "0.3.9"

 [dev-dependencies]
-binggan = "0.17.0"
+binggan = "0.15.3"
 rand = "0.9"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -92,7 +92,7 @@ postcard = { version = "1.0.4", features = [
 ], default-features = false }

 [target.'cfg(not(windows))'.dev-dependencies]
-criterion = { version = "0.8", default-features = false }
+criterion = { version = "0.5", default-features = false }

 [dev-dependencies.fail]
 version = "0.5.0"
@@ -201,11 +201,3 @@ harness = false
 [[bench]]
 name = "regex_all_terms"
 harness = false
-
-[[bench]]
-name = "query_parser_nested"
-harness = false
-
-[[bench]]
-name = "intersection_bench"
-harness = false
--- a/README.md
+++ b/README.md
@@ -1,7 +1,6 @@
 [![Docs](https://docs.rs/tantivy/badge.svg)](https://docs.rs/crate/tantivy/)
 [![Build Status](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml/badge.svg)](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml)
 [![codecov](https://codecov.io/gh/quickwit-oss/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/quickwit-oss/tantivy)
-[![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/quickwit-oss/tantivy/badge)](https://scorecard.dev/viewer/?uri=github.com/quickwit-oss/tantivy)
 [![Join the chat at https://discord.gg/MT27AG5EVE](https://shields.io/discord/908281611840282624?label=chat%20on%20discord)](https://discord.gg/MT27AG5EVE)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -63,12 +63,7 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, terms_all_unique_with_avg_sub_agg);
    register!(group, terms_many_with_avg_sub_agg);
    register!(group, terms_status_with_avg_sub_agg);
-    register!(group, terms_status_with_terms_zipf_1000_sub_agg);
-    register!(group, terms_zipf_1000_with_terms_status_sub_agg);
    register!(group, terms_status_with_histogram);
-    register!(group, terms_status_with_date_histogram);
-    register!(group, terms_status_with_date_histogram_hard_bounds);
-    register!(group, terms_status_with_date_histogram_and_sibling_terms);
    register!(group, terms_zipf_1000);
    register!(group, terms_zipf_1000_with_histogram);
    register!(group, terms_zipf_1000_with_avg_sub_agg);
@@ -82,12 +77,7 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, composite_histogram_calendar);

    register!(group, cardinality_agg);
-    register!(group, cardinality_agg_high_card);
-    register!(group, cardinality_agg_low_card);
    register!(group, terms_status_with_cardinality_agg);
-    register!(group, terms_100_buckets_with_cardinality_agg);
-    register!(group, terms_many_with_single_term_order_by_card);
-    register!(group, terms_many_with_single_term_2_order_by_card);

    register!(group, range_agg);
    register!(group, range_agg_with_avg_sub_agg);
@@ -175,52 +165,10 @@ fn cardinality_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
-// Full-scan cardinality on a near-1M-cardinality string field.
-// Hits the dense (PagedBitset) path: every doc has a unique term,
-// so the bucket promotes from FxHashSet shortly into the scan.
-fn cardinality_agg_high_card(index: &Index) {
-    let agg_req = json!({
-        "cardinality": {
-            "cardinality": {
-                "field": "text_all_unique_terms"
-            },
-        }
-    });
-    execute_agg(index, agg_req);
-}
-// Full-scan cardinality on a tiny-cardinality string field (7 distinct
-// values). Stays on the FxHashSet path — the promotion threshold is
-// never crossed. Validates no regression on the sparse path.
-fn cardinality_agg_low_card(index: &Index) {
-    let agg_req = json!({
-        "cardinality": {
-            "cardinality": {
-                "field": "text_few_terms_status"
-            },
-        }
-    });
-    execute_agg(index, agg_req);
-}
 fn terms_status_with_cardinality_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_few_terms_status" },
-            "aggs": {
-                "cardinality": {
-                    "cardinality": {
-                        "field": "text_few_terms_status"
-                    },
-                }
-            }
-        },
-    });
-    execute_agg(index, agg_req);
-}
-
-fn terms_100_buckets_with_cardinality_agg(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_1000_terms_zipf", "size": 100 },
            "aggs": {
                "cardinality": {
                    "cardinality": {
@@ -233,58 +181,6 @@ fn terms_100_buckets_with_cardinality_agg(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_many_with_single_term_order_by_card(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_many_terms" },
-            "aggs": {
-                "nested_terms": {
-                    "terms": {
-                        "field": "single_term",
-                        "order": { "cardinality": "desc" }
-                    },
-                    "aggs": {
-                        "cardinality": {
-                            "cardinality": { "field": "text_few_terms" }
-                        }
-                    }
-                }
-            }
-        },
-    });
-    execute_agg(index, agg_req);
-}
-
-// Two-level terms ordered by cardinality at each level: a high-card outer terms
-// (text_many_terms) ordered by a cardinality sub-agg, with a nested low-card terms
-// (text_few_terms_status) also ordered by a cardinality sub-agg, plus an avg.
-fn terms_many_with_single_term_2_order_by_card(index: &Index) {
-    let agg_req = json!({
-        "by_ip": {
-            "terms": {
-                "field": "text_many_terms",
-                "order": { "card_few_terms": "desc" }
-            },
-            "aggs": {
-                "card_few_terms": {
-                    "cardinality": { "field": "text_few_terms" }
-                },
-                "nested_terms": {
-                    "terms": {
-                        "field": " single_term",
-                        "order": { "distinct_path2": "desc" }
-                    },
-                    "aggs": {
-                        "avg_botscore": { "avg": { "field": "score" } },
-                        "distinct_path2": { "cardinality": { "field": "text_few_terms" } }
-                    }
-                }
-            }
-        }
-    });
-    execute_agg(index, agg_req);
-}
-
 fn terms_7(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_few_terms_status" } },
@@ -357,30 +253,6 @@ fn terms_all_unique_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
-fn terms_status_with_terms_zipf_1000_sub_agg(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
-            "aggs": {
-                "nested_terms": { "terms": { "field": "text_1000_terms_zipf" } }
-            }
-        }
-    });
-    execute_agg(index, agg_req);
-}
-
-fn terms_zipf_1000_with_terms_status_sub_agg(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_1000_terms_zipf" },
-            "aggs": {
-                "nested_terms": { "terms": { "field": "text_few_terms_status" } }
-            }
-        }
-    });
-    execute_agg(index, agg_req);
-}
-
 fn terms_status_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -393,57 +265,6 @@ fn terms_status_with_histogram(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_status_with_date_histogram(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
-            "aggs": {
-                "over_time": { "date_histogram": { "field": "timestamp", "fixed_interval": "1h" } }
-            }
-        }
-    });
-    execute_agg(index, agg_req);
-}
-
-/// Same fused terms × date_histogram, but with `hard_bounds`. The timestamps span 0..120h; the
-/// bounds drop only the first and last hour (ms: 1h=3_600_000, 119h=428_400_000), so almost every
-/// doc is in-bounds. This exercises the collector's hard-bounds path: `bounds.contains` runs per
-/// doc (the `all_docs_in_bounds` short-circuit is off) and the rare out-of-bounds doc takes the
-/// `term_counts` branch.
-fn terms_status_with_date_histogram_hard_bounds(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
-            "aggs": {
-                "over_time": {
-                    "date_histogram": {
-                        "field": "timestamp",
-                        "fixed_interval": "1h",
-                        "hard_bounds": { "min": 3_600_000, "max": 428_400_000 }
-                    }
-                }
-            }
-        }
-    });
-    execute_agg(index, agg_req);
-}
-
-/// Same fused terms × date_histogram, but with a sibling terms aggregation next to it. The fused
-/// fast path should still trigger for `my_texts` (sibling aggregations are independent top-level
-/// aggregations, so they don't change its eligibility).
-fn terms_status_with_date_histogram_and_sibling_terms(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
-            "aggs": {
-                "over_time": { "date_histogram": { "field": "timestamp", "fixed_interval": "1h" } }
-            }
-        },
-        "other_texts": { "terms": { "field": "text_few_terms" } }
-    });
-    execute_agg(index, agg_req);
-}
-
 fn terms_zipf_1000_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -745,8 +566,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
        )
        .set_stored();
-    let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
-    let single_term = schema_builder.add_text_field("single_term", FAST);
+    let text_field = schema_builder.add_text_field("text", text_fieldtype);
    let json_field = schema_builder.add_json_field("json", FAST);
    let text_field_all_unique_terms =
        schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
@@ -810,8 +630,6 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
-                single_term => "single_term",
-                single_term => "single_term",
                text_field => "cool",
                text_field => "cool",
                text_field_all_unique_terms => "cool",
@@ -837,9 +655,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            doc_with_value /= 20;
        }
        let _val_max = 1_000_000.0;
-        const SPAN_MS: i64 = 120 * 3600 * 1000; // 120 hours in ms
-        const NOISE_MS: i64 = 2 * 3600 * 1000; // ±2h noise
-        for i in 0..doc_with_value {
+        for _ in 0..doc_with_value {
            let val: f64 = rng.random_range(0.0..1_000_000.0);
            let json = if rng.random_bool(0.1) {
                // 10% are numeric values
@@ -847,11 +663,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            } else {
                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
            };
-            let base_ms = (i as i64 * SPAN_MS) / doc_with_value as i64;
-            let noise_ms = rng.random_range(-NOISE_MS..NOISE_MS);
-            let ts_ms = (base_ms + noise_ms).clamp(0, SPAN_MS);
            index_writer.add_document(doc!(
-                single_term => "single_term",
                text_field => "cool",
                json_field => json,
                text_field_all_unique_terms => format!("unique_term_{}", rng.random::<u64>()),
@@ -862,7 +674,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
                score_field => val as u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => val as i64,
-                date_field => DateTime::from_timestamp_millis(ts_ms),
+                date_field => DateTime::from_timestamp_millis((val * 1_000_000.) as i64),
            ))?;
            if cardinality == Cardinality::OptionalSparse {
                for _ in 0..20 {
--- a/benches/intersection_bench.rs
+++ b/benches/intersection_bench.rs
@@ -1,149 +0,0 @@
-// Benchmarks top-K intersection of term scorers (block_wand_intersection).
-//
-// What's measured:
-// - Conjunctive queries (+a +b, +a +b +c) with top-10 by score
-// - Varying doc-frequency balance between terms (balanced, skewed, very skewed)
-// - Realistic term frequencies (geometric distribution, mostly low)
-// - 1M-doc single segment
-//
-// Run with: cargo bench --bench intersection_bench
-
-use binggan::{black_box, BenchRunner};
-use rand::prelude::*;
-use rand::rngs::StdRng;
-use rand::SeedableRng;
-use tantivy::collector::TopDocs;
-use tantivy::query::QueryParser;
-use tantivy::schema::{Schema, TEXT};
-use tantivy::{doc, Index, ReloadPolicy, Searcher};
-
-const NUM_DOCS: usize = 1_000_000;
-
-struct BenchIndex {
-    searcher: Searcher,
-    query_parser: QueryParser,
-}
-
-/// Generate term frequency from a geometric-like distribution.
-/// Most values are 1, a few are 2-3, rarely higher.
-/// p controls the decay: higher p → more weight on tf=1.
-fn random_term_freq(rng: &mut StdRng, p: f64) -> u32 {
-    let mut tf = 1u32;
-    while tf < 10 && rng.random_bool(1.0 - p) {
-        tf += 1;
-    }
-    tf
-}
-
-/// Build an index with three terms (a, b, c) with given doc-frequency probabilities.
-/// Each term occurrence has a realistic term frequency (geometric distribution).
-/// Field length is padded with filler tokens to create varied fieldnorms.
-fn build_index(p_a: f64, p_b: f64, p_c: f64) -> BenchIndex {
-    let mut schema_builder = Schema::builder();
-    let body = schema_builder.add_text_field("body", TEXT);
-    let schema = schema_builder.build();
-    let index = Index::create_in_ram(schema);
-
-    let mut rng = StdRng::from_seed([42u8; 32]);
-
-    {
-        let mut writer = index.writer_with_num_threads(1, 500_000_000).unwrap();
-        for _ in 0..NUM_DOCS {
-            let mut tokens: Vec<String> = Vec::new();
-
-            if rng.random_bool(p_a) {
-                let tf = random_term_freq(&mut rng, 0.7);
-                for _ in 0..tf {
-                    tokens.push("aaa".to_string());
-                }
-            }
-            if rng.random_bool(p_b) {
-                let tf = random_term_freq(&mut rng, 0.7);
-                for _ in 0..tf {
-                    tokens.push("bbb".to_string());
-                }
-            }
-            if rng.random_bool(p_c) {
-                let tf = random_term_freq(&mut rng, 0.7);
-                for _ in 0..tf {
-                    tokens.push("ccc".to_string());
-                }
-            }
-
-            // Pad with filler to create varied field lengths (5-30 tokens).
-            let filler_count = rng.random_range(5u32..30u32);
-            for _ in 0..filler_count {
-                tokens.push("filler".to_string());
-            }
-
-            let text = tokens.join(" ");
-            writer.add_document(doc!(body => text)).unwrap();
-        }
-        writer.commit().unwrap();
-    }
-
-    let reader = index
-        .reader_builder()
-        .reload_policy(ReloadPolicy::Manual)
-        .try_into()
-        .unwrap();
-    let searcher = reader.searcher();
-    let query_parser = QueryParser::for_index(&index, vec![body]);
-
-    BenchIndex {
-        searcher,
-        query_parser,
-    }
-}
-
-fn main() {
-    // Scenarios: (label, p_a, p_b, p_c)
-    //
-    // "balanced":    all terms ~10% → intersection ~1% of docs
-    // "skewed":      one common (50%), one rare (2%) → intersection ~1%
-    // "very_skewed": one very common (80%), one very rare (0.5%) → intersection ~0.4%
-    // "three_balanced": three terms ~20% each → intersection ~0.8%
-    // "three_skewed":   50% / 10% / 2% → intersection ~0.1%
-    let scenarios: Vec<(&str, f64, f64, f64)> = vec![
-        ("balanced_10%_10%", 0.10, 0.10, 0.0),
-        ("skewed_50%_2%", 0.50, 0.02, 0.0),
-        ("very_skewed_80%_0.5%", 0.80, 0.005, 0.0),
-        ("three_balanced_20%_20%_20%", 0.20, 0.20, 0.20),
-        ("three_skewed_50%_10%_2%", 0.50, 0.10, 0.02),
-    ];
-
-    let mut runner = BenchRunner::new();
-
-    for (label, p_a, p_b, p_c) in &scenarios {
-        let bench_index = build_index(*p_a, *p_b, *p_c);
-
-        let mut group = runner.new_group();
-        group.set_name(format!("intersection — {label}"));
-
-        // Two-term intersection
-        if *p_a > 0.0 && *p_b > 0.0 {
-            let query_str = "+aaa +bbb";
-            let query = bench_index.query_parser.parse_query(query_str).unwrap();
-            let searcher = bench_index.searcher.clone();
-            group.register(format!("{query_str} top10"), move |_| {
-                let collector = TopDocs::with_limit(10).order_by_score();
-                black_box(searcher.search(&query, &collector).unwrap());
-                1usize
-            });
-        }
-
-        // Three-term intersection
-        if *p_c > 0.0 {
-            let query_str = "+aaa +bbb +ccc";
-            let query = bench_index.query_parser.parse_query(query_str).unwrap();
-            let searcher = bench_index.searcher.clone();
-            group.register(format!("{query_str} top10"), move |_| {
-                let collector = TopDocs::with_limit(10).order_by_score();
-                black_box(searcher.search(&query, &collector).unwrap());
-                1usize
-            });
-        }
-
-        group.run();
-    }
-}
--- a/benches/query_parser_nested.rs
+++ b/benches/query_parser_nested.rs
@@ -1,35 +0,0 @@
-// Benchmark for the query grammar parsing deeply nested queries.
-//
-// Regression guard for https://github.com/quickwit-oss/tantivy/issues/2498:
-// at depth 20/21 the old parser took 0.87 s / 1.72 s respectively because
-// `ast()` retried `occur_leaf` on backtrack, giving O(2^n) time. With the
-// fix parsing is linear and completes in microseconds.
-//
-// Run with: `cargo bench --bench query_parser_nested`.
-
-use binggan::{black_box, BenchRunner};
-use tantivy::query_grammar::parse_query;
-
-fn nested_query(depth: usize, leading_plus: bool) -> String {
-    let leading = "(".repeat(depth);
-    let trailing = ")".repeat(depth);
-    let prefix = if leading_plus { "+" } else { "" };
-    format!("{prefix}{leading}title:test{trailing}")
-}
-
-fn main() {
-    let mut runner = BenchRunner::new();
-
-    for depth in [20, 21] {
-        for leading_plus in [false, true] {
-            let query = nested_query(depth, leading_plus);
-            let label = format!(
-                "parse_nested_depth_{depth}_{}",
-                if leading_plus { "plus" } else { "plain" },
-            );
-            runner.bench_function(&label, move |_| {
-                black_box(parse_query(black_box(&query)).unwrap());
-            });
-        }
-    }
-}
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -18,10 +18,5 @@ homepage = "https://github.com/quickwit-oss/tantivy"
 bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }

 [dev-dependencies]
-binggan = "0.17.0"
 rand = "0.9"
 proptest = "1"
-
-[[bench]]
-name = "bench"
-harness = false
--- a/bitpacker/benches/bench.rs
+++ b/bitpacker/benches/bench.rs
@@ -1,110 +1,65 @@
-use std::cell::RefCell;
+#![feature(test)]

-use binggan::{BenchRunner, black_box};
-use rand::rng;
-use rand::seq::IteratorRandom;
-use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};
+extern crate test;

-fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
-    let mut bitpacker = BitPacker::new();
-    let mut buffer = Vec::new();
-    for _ in 0..num_els {
-        bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
-        bitpacker.flush(&mut buffer).unwrap();
-    }
-    buffer
-}
+#[cfg(test)]
+mod tests {
+    use rand::rng;
+    use rand::seq::IteratorRandom;
+    use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};
+    use test::Bencher;

-const N: usize = 100_000;
-const MAX_VAL: u64 = 1_000;
-const BIT_WIDTH: u8 = 10; // 2^10 = 1024 > MAX_VAL
-
-fn create_packed_data() -> (BitUnpacker, Vec<u8>) {
-    let mut bitpacker = BitPacker::new();
-    let mut data = Vec::new();
-    for i in 0..N as u64 {
-        let val = i * MAX_VAL / N as u64;
-        bitpacker.write(val, BIT_WIDTH, &mut data).unwrap();
-    }
-    bitpacker.close(&mut data).unwrap();
-    (BitUnpacker::new(BIT_WIDTH), data)
-}
-
-fn bench_bitpacking() {
-    let mut runner = BenchRunner::new();
-    let bit_width = 3;
-    let num_els = 1_000_000u32;
-    let bit_unpacker = BitUnpacker::new(bit_width);
-    let data = create_bitpacked_data(bit_width, num_els);
-    let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
-    runner.bench_function("bitpacking_read", move |_| {
-        let mut out = 0u64;
-        for &idx in &idxs {
-            out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
+    #[inline(never)]
+    fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
+        let mut bitpacker = BitPacker::new();
+        let mut buffer = Vec::new();
+        for _ in 0..num_els {
+            // the values do not matter.
+            bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
+            bitpacker.flush(&mut buffer).unwrap();
        }
-        black_box(out);
-    });
-}
-
-fn bench_blocked_bitpacker() {
-    let mut runner = BenchRunner::new();
-    let mut blocked_bitpacker = BlockedBitpacker::new();
-    for val in 0..=21500 {
-        blocked_bitpacker.add(val * val);
+        buffer
    }
-    runner.bench_function("blockedbitp_read", move |_| {
-        let mut out = 0u64;
-        for val in 0..=21500 {
-            out = out.wrapping_add(blocked_bitpacker.get(val));
-        }
-        black_box(out);
-    });
-    runner.bench_function("blockedbitp_create", |_| {
+
+    #[bench]
+    fn bench_bitpacking_read(b: &mut Bencher) {
+        let bit_width = 3;
+        let num_els = 1_000_000u32;
+        let bit_unpacker = BitUnpacker::new(bit_width);
+        let data = create_bitpacked_data(bit_width, num_els);
+        let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
+        b.iter(|| {
+            let mut out = 0u64;
+            for &idx in &idxs {
+                out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
+            }
+            out
+        });
+    }
+
+    #[bench]
+    fn bench_blockedbitp_read(b: &mut Bencher) {
        let mut blocked_bitpacker = BlockedBitpacker::new();
        for val in 0..=21500 {
            blocked_bitpacker.add(val * val);
        }
-        black_box(blocked_bitpacker);
-    });
-}
-
-fn bench_filter_vec() {
-    let mut runner = BenchRunner::new();
-
-    let (unpacker, data) = create_packed_data();
-    let positions = RefCell::new(Vec::with_capacity(N));
-    runner.bench_function("filter_vec_dense", move |_| {
-        unpacker.get_ids_for_value_range(
-            250..=750,
-            0..N as u32,
-            &data,
-            &mut positions.borrow_mut(),
-        );
-        black_box(positions.borrow().len());
-    });
-
-    let (unpacker, data) = create_packed_data();
-    let positions = RefCell::new(Vec::with_capacity(N));
-    runner.bench_function("filter_vec_sparse", move |_| {
-        unpacker.get_ids_for_value_range(0..=50, 0..N as u32, &data, &mut positions.borrow_mut());
-        black_box(positions.borrow().len());
-    });
-
-    let (unpacker, data) = create_packed_data();
-    let positions = RefCell::new(Vec::with_capacity(N));
-    runner.bench_function("filter_vec_full", move |_| {
-        unpacker.get_ids_for_value_range(
-            0..=MAX_VAL,
-            0..N as u32,
-            &data,
-            &mut positions.borrow_mut(),
-        );
-        black_box(positions.borrow().len());
-    });
-}
-
-fn main() {
-    bench_bitpacking();
-    bench_blocked_bitpacker();
-    bench_filter_vec();
+        b.iter(|| {
+            let mut out = 0u64;
+            for val in 0..=21500 {
+                out = out.wrapping_add(blocked_bitpacker.get(val));
+            }
+            out
+        });
+    }
+
+    #[bench]
+    fn bench_blockedbitp_create(b: &mut Bencher) {
+        b.iter(|| {
+            let mut blocked_bitpacker = BlockedBitpacker::new();
+            for val in 0..=21500 {
+                blocked_bitpacker.add(val * val);
+            }
+            blocked_bitpacker
+        });
+    }
 }
--- a/bitpacker/src/filter_vec/mod.rs
+++ b/bitpacker/src/filter_vec/mod.rs
@@ -1,17 +1,8 @@
-#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-use std::arch::is_aarch64_feature_detected;
 use std::ops::RangeInclusive;

 #[cfg(target_arch = "x86_64")]
 mod avx2;

-#[cfg(target_arch = "aarch64")]
-mod neon;
-
-// SVE intrinsics are not exposed on aarch64-apple-darwin.
-#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-mod sve;
-
 mod scalar;

 #[derive(Clone, Copy, Eq, PartialEq, Debug)]
@@ -19,10 +10,6 @@ mod scalar;
 enum FilterImplPerInstructionSet {
    #[cfg(target_arch = "x86_64")]
    AVX2 = 0u8,
-    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-    SVE = 3u8,
-    #[cfg(target_arch = "aarch64")]
-    Neon = 2u8,
    Scalar = 1u8,
 }

@@ -32,57 +19,29 @@ impl FilterImplPerInstructionSet {
        match *self {
            #[cfg(target_arch = "x86_64")]
            FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
-            #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-            FilterImplPerInstructionSet::SVE => is_aarch64_feature_detected!("sve"),
-            // TIL Neon is required on aarch 64.
-            #[cfg(target_arch = "aarch64")]
-            FilterImplPerInstructionSet::Neon => true,
            FilterImplPerInstructionSet::Scalar => true,
        }
    }
 }

-// List of available implementations in preferred order.
+// List of available implementation in preferred order.
 #[cfg(target_arch = "x86_64")]
 const IMPLS: [FilterImplPerInstructionSet; 2] = [
    FilterImplPerInstructionSet::AVX2,
    FilterImplPerInstructionSet::Scalar,
 ];

-// Non-Apple aarch64: try SVE, NEON, Scalar.
-#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-const IMPLS: [FilterImplPerInstructionSet; 3] = [
-    FilterImplPerInstructionSet::SVE,
-    FilterImplPerInstructionSet::Neon,
-    FilterImplPerInstructionSet::Scalar,
-];
-
-// Apple aarch64 (M-series): SVE not available; use NEON or Scalar.
-#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
-const IMPLS: [FilterImplPerInstructionSet; 2] = [
-    FilterImplPerInstructionSet::Neon,
-    FilterImplPerInstructionSet::Scalar,
-];
-
-#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
+#[cfg(not(target_arch = "x86_64"))]
 const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];

 impl FilterImplPerInstructionSet {
    #[inline]
-    #[allow(unused_variables)]
+    #[allow(unused_variables)] // on non-x86_64, code is unused.
    fn from(code: u8) -> FilterImplPerInstructionSet {
        #[cfg(target_arch = "x86_64")]
        if code == FilterImplPerInstructionSet::AVX2 as u8 {
            return FilterImplPerInstructionSet::AVX2;
        }
-        #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-        if code == FilterImplPerInstructionSet::SVE as u8 {
-            return FilterImplPerInstructionSet::SVE;
-        }
-        #[cfg(target_arch = "aarch64")]
-        if code == FilterImplPerInstructionSet::Neon as u8 {
-            return FilterImplPerInstructionSet::Neon;
-        }
        FilterImplPerInstructionSet::Scalar
    }

@@ -91,13 +50,6 @@ impl FilterImplPerInstructionSet {
        match self {
            #[cfg(target_arch = "x86_64")]
            FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
-            #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-            // SAFETY: SVE availability was verified by is_available() before selecting this impl.
-            FilterImplPerInstructionSet::SVE => unsafe {
-                sve::filter_vec_in_place(range, offset, output)
-            },
-            #[cfg(target_arch = "aarch64")]
-            FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
            FilterImplPerInstructionSet::Scalar => {
                scalar::filter_vec_in_place(range, offset, output)
            }
@@ -105,12 +57,6 @@ impl FilterImplPerInstructionSet {
    }
 }

-fn available_impls() -> impl Iterator<Item = FilterImplPerInstructionSet> {
-    IMPLS
-        .into_iter()
-        .filter(FilterImplPerInstructionSet::is_available)
-}
-
 #[inline]
 fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
    use std::sync::atomic::{AtomicU8, Ordering};
@@ -118,7 +64,10 @@ fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
    let instruction_set_byte: u8 = INSTRUCTION_SET_BYTE.load(Ordering::Relaxed);
    if instruction_set_byte == u8::MAX {
        // Let's initialize the instruction set and cache it.
-        let instruction_set = available_impls().next().unwrap();
+        let instruction_set = IMPLS
+            .into_iter()
+            .find(FilterImplPerInstructionSet::is_available)
+            .unwrap();
        INSTRUCTION_SET_BYTE.store(instruction_set as u8, Ordering::Relaxed);
        return instruction_set;
    }
@@ -131,12 +80,12 @@ pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut

 #[cfg(test)]
 mod tests {
-    use proptest::strategy::Strategy;
-
    use super::*;

    #[test]
    fn test_get_best_available_instruction_set() {
+        // This does not test much unfortunately.
+        // We just make sure the function returns without crashing and returns the same result.
        let instruction_set = get_best_available_instruction_set();
        assert_eq!(get_best_available_instruction_set(), instruction_set);
    }
@@ -153,31 +102,6 @@ mod tests {
        }
    }

-    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-    #[test]
-    fn test_instruction_set_to_code_from_code() {
-        for instruction_set in [
-            FilterImplPerInstructionSet::SVE,
-            FilterImplPerInstructionSet::Neon,
-            FilterImplPerInstructionSet::Scalar,
-        ] {
-            let code = instruction_set as u8;
-            assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
-        }
-    }
-
-    #[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
-    #[test]
-    fn test_instruction_set_to_code_from_code() {
-        for instruction_set in [
-            FilterImplPerInstructionSet::Neon,
-            FilterImplPerInstructionSet::Scalar,
-        ] {
-            let code = instruction_set as u8;
-            assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
-        }
-    }
-
    fn test_filter_impl_empty_aux(filter_impl: FilterImplPerInstructionSet) {
        let mut output = vec![];
        filter_impl.filter_vec_in_place(0..=u32::MAX, 0, &mut output);
@@ -202,20 +126,11 @@ mod tests {
        assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
    }

-    fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) {
-        // start > end: RangeInclusive::contains always returns false; output must be empty.
-        // The SVE path's wrapping_sub would otherwise produce a huge range_width.
-        let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
-        filter_impl.filter_vec_in_place(10..=5, 0, &mut output);
-        assert_eq!(&output, &[]);
-    }
-
    fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
        test_filter_impl_empty_aux(filter_impl);
        test_filter_impl_simple_aux(filter_impl);
        test_filter_impl_simple_aux_shifted(filter_impl);
        test_filter_impl_simple_outside_i32_range(filter_impl);
-        test_filter_impl_empty_range_aux(filter_impl);
    }

    #[test]
@@ -226,60 +141,25 @@ mod tests {
        }
    }

-    #[test]
-    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
-    fn test_filter_implementation_sve() {
-        if FilterImplPerInstructionSet::SVE.is_available() {
-            test_filter_impl_test_suite(FilterImplPerInstructionSet::SVE);
-        }
-    }
-
-    #[test]
-    #[cfg(target_arch = "aarch64")]
-    fn test_filter_implementation_neon() {
-        test_filter_impl_test_suite(FilterImplPerInstructionSet::Neon);
-    }
-
    #[test]
    fn test_filter_implementation_scalar() {
        test_filter_impl_test_suite(FilterImplPerInstructionSet::Scalar);
    }

-    fn max_val_strategy() -> impl proptest::strategy::Strategy<Value = u32> {
-        proptest::prop_oneof![
-            0u32..10u32,
-            255u32..258u32,
-            proptest::prelude::Just(1u32 << 25),
-            proptest::prelude::Just(u32::MAX - 1),
-            proptest::prelude::Just(u32::MAX),
-        ]
-    }
-
-    fn vals_strategy() -> impl proptest::strategy::Strategy<Value = Vec<u32>> {
-        proptest::prop_oneof![
-            proptest::collection::vec(proptest::prelude::any::<u32>(), 0..300),
-            max_val_strategy()
-                .prop_flat_map(|max_val| { proptest::collection::vec(0..=max_val, 0..300) })
-        ]
-    }
-
+    #[cfg(target_arch = "x86_64")]
    proptest::proptest! {
        #[test]
-        fn test_filter_compare_scalar_and_impls_impl_proptest(
-            start in 0u32..400u32,
-            end in 0u32..400u32,
+        fn test_filter_compare_scalar_and_avx2_impl_proptest(
+            start in proptest::prelude::any::<u32>(),
+            end in proptest::prelude::any::<u32>(),
            offset in 0u32..2u32,
-            vals in vals_strategy()) {
-                for implementation in available_impls() {
-                    if implementation == FilterImplPerInstructionSet::Scalar {
-                        continue;
-                    }
-                    let mut impl_output = vals.clone();
-                    let mut scalar_output = vals.clone();
-                    implementation.filter_vec_in_place(start..=end, offset, &mut impl_output);
-                    FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut scalar_output);
-                    assert_eq!(&impl_output, &scalar_output);
-                }
+            mut vals in proptest::collection::vec(0..u32::MAX, 0..30)) {
+            if FilterImplPerInstructionSet::AVX2.is_available() {
+                let mut vals_clone = vals.clone();
+                FilterImplPerInstructionSet::AVX2.filter_vec_in_place(start..=end, offset, &mut vals);
+                FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
+                assert_eq!(&vals, &vals_clone);
+            }
       }
    }
 }
--- a/bitpacker/src/filter_vec/neon.rs
+++ b/bitpacker/src/filter_vec/neon.rs
@@ -1,118 +0,0 @@
-use std::arch::aarch64::*;
-use std::ops::RangeInclusive;
-
-const NUM_LANES: usize = 4;
-
-// Compacts matching lanes to the front using a byte-level shuffle.
-// `mask` is a 4-bit value: bit k=1 means lane k should appear in the output.
-#[inline]
-#[target_feature(enable = "neon")]
-unsafe fn compact(data: uint32x4_t, mask: u8) -> uint32x4_t {
-    unsafe {
-        // SAFETY: mask is always in [0, 15] by construction (max sum of [1,2,4,8]).
-        // BYTE_SHUFFLE_TABLE has 16 entries, so this is always in bounds.
-        let shuffle = BYTE_SHUFFLE_TABLE.get_unchecked(mask as usize);
-        let shuffle_vec = vld1q_u8(shuffle.as_ptr());
-        vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(data), shuffle_vec))
-    }
-}
-
-// Safe (not unsafe) because NEON is mandatory on aarch64: no runtime feature check needed.
-#[inline(never)]
-pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
-    let num_words = output.len() / NUM_LANES;
-    let mut output_len = unsafe {
-        filter_vec_neon_aux(
-            output.as_ptr(),
-            range.clone(),
-            output.as_mut_ptr(),
-            offset,
-            num_words,
-        )
-    };
-    let remainder_start = num_words * NUM_LANES;
-    for i in remainder_start..output.len() {
-        let val = output[i];
-        output[output_len] = offset + i as u32;
-        output_len += if range.contains(&val) { 1 } else { 0 };
-    }
-    output.truncate(output_len);
-}
-
-#[target_feature(enable = "neon")]
-unsafe fn filter_vec_neon_aux(
-    input: *const u32,
-    range: RangeInclusive<u32>,
-    output: *mut u32,
-    offset: u32,
-    num_words: usize,
-) -> usize {
-    unsafe {
-        let mut input = input;
-        let mut output_tail = output;
-        let range_start_simd = vdupq_n_u32(*range.start());
-        let range_end_simd = vdupq_n_u32(*range.end());
-        let mut ids = vld1q_u32([offset, offset + 1, offset + 2, offset + 3].as_ptr());
-        let shift = vdupq_n_u32(NUM_LANES as u32);
-        let bit_weights = vld1q_u32([1u32, 2, 4, 8].as_ptr());
-
-        for _ in 0..num_words {
-            let word = vld1q_u32(input);
-
-            // Unsigned compares: CMHS (compare higher or same) tests `word >= start`
-            // and `end >= word`. ANDing both gives the inside-range mask directly,
-            // which is cheaper than computing `outside` and then negating.
-            let ge_start = vcgeq_u32(word, range_start_simd);
-            let le_end = vcleq_u32(word, range_end_simd);
-            // inside[k] = 0xFFFFFFFF if val[k] is in range, 0 otherwise.
-            let inside = vandq_u32(ge_start, le_end);
-
-            // Build the 4-bit mask: AND bit_weights with the inside lane mask, so each
-            // inside lane contributes its bit_weight (1, 2, 4, or 8). Summing yields the
-            // 4-bit mask in one addv.
-            let inside_bits = vandq_u32(bit_weights, inside);
-            let mask = vaddvq_u32(inside_bits) as u8;
-            // mask is mathematically bounded: max value is 1+2+4+8=15 (all lanes match)
-            debug_assert!(mask <= 15, "mask must fit in 4 bits: {}", mask);
-
-            // Count of matching lanes = popcount(mask). Derives the count directly from
-            // the mask instead of running a parallel SIMD reduction over `outside`.
-            let added_len = mask.count_ones() as usize;
-
-            // Safe because mask is guaranteed to be in [0, 15]
-            let filtered_ids = compact(ids, mask);
-            vst1q_u32(output_tail, filtered_ids);
-            output_tail = output_tail.add(added_len);
-            ids = vaddq_u32(ids, shift);
-            input = input.add(NUM_LANES);
-        }
-
-        output_tail.offset_from(output) as usize
-    }
-}
-
-// Byte shuffle patterns to compact matching lanes to the front of the vector.
-// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
-// The j-th set bit determines which input lane goes to output position j.
-const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
-    [
-        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    ], // 0b0000: none
-    [0, 1, 2, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0001: lane 0
-    [4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0010: lane 1
-    [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16],     // 0b0011: lanes 0,1
-    [8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0100: lane 2
-    [0, 1, 2, 3, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16],   // 0b0101: lanes 0,2
-    [4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16],   // 0b0110: lanes 1,2
-    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16],       // 0b0111: lanes 0,1,2
-    [
-        12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    ], // 0b1000: lane 3
-    [0, 1, 2, 3, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1001: lanes 0,3
-    [4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1010: lanes 1,3
-    [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16],     // 0b1011: lanes 0,1,3
-    [8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1100: lanes 2,3
-    [0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16],   // 0b1101: lanes 0,2,3
-    [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16],   // 0b1110: lanes 1,2,3
-    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],       // 0b1111: all lanes
-];
--- a/bitpacker/src/filter_vec/sve.rs
+++ b/bitpacker/src/filter_vec/sve.rs
@@ -1,260 +0,0 @@
-use std::ops::RangeInclusive;
-
-// SVE vector length (in u32 lanes) is not a compile-time constant; query at runtime.
-// Safe to call only when SVE is confirmed available via is_aarch64_feature_detected!("sve").
-#[target_feature(enable = "sve")]
-unsafe fn num_lanes() -> usize {
-    let vl: usize;
-    unsafe {
-        core::arch::asm!(
-            "cntw {vl}",
-            vl = out(reg) vl,
-            options(nostack, nomem, preserves_flags),
-        );
-    }
-    vl
-}
-
-// SAFETY: caller must ensure SVE is available (checked via is_aarch64_feature_detected!("sve")).
-// Unlike NEON, SVE is optional on aarch64 and not guaranteed by the target architecture.
-pub unsafe fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
-    if range.start() > range.end() {
-        output.clear();
-        return;
-    }
-    let vl = unsafe { num_lanes() };
-    let num_words = output.len() / vl;
-    let range_start = *range.start();
-    // Unsigned subtraction trick: val ∈ [lo, hi] ↔ (val - lo) ≤ᵤ (hi - lo).
-    // Values below lo wrap around to large u32, so the single unsigned ≤ excludes them.
-    let range_width = range.end().wrapping_sub(range_start);
-    let mut output_len = unsafe {
-        filter_vec_sve_aux(
-            output.as_ptr(),
-            range_start,
-            range_width,
-            output.as_mut_ptr(),
-            offset,
-            num_words,
-            vl,
-        )
-    };
-    let remainder_start = num_words * vl;
-    for i in remainder_start..output.len() {
-        let val = output[i];
-        output[output_len] = offset + i as u32;
-        output_len += if range.contains(&val) { 1 } else { 0 };
-    }
-    output.truncate(output_len);
-}
-
-// Register allocation for the asm! blocks:
-//   z0        ids_a (index vector for first half of each pair, advances by step2 each iter)
-//   z1        range_width broadcast
-//   z2        range_start broadcast
-//   z3        step2 broadcast (2 * vl)
-//   z4        ids_b (index vector for second half, = ids_a + step, advances by step2)
-//   z5        scratch: loaded word_a, then compacted_a
-//   z6        scratch: loaded word_b, then compacted_b
-//   p0        all-true predicate (ptrue p0.s)
-//   p1        in-range mask for word_a
-//   p2        in-range mask for word_b
-#[target_feature(enable = "sve")]
-unsafe fn filter_vec_sve_aux(
-    input: *const u32,
-    range_start: u32,
-    range_width: u32,
-    output: *mut u32,
-    offset: u32,
-    num_words: usize,
-    vl: usize,
-) -> usize {
-    let num_pairs = num_words / 2;
-    let mut input_ptr = input;
-    let mut output_tail = output;
-
-    if num_pairs > 0 {
-        unsafe {
-            // We rely on asm! because the SVE intrinsics are not available in stable Rust.
-            // The code that follows was generated by Rustc nightly based on the intrinsics version
-            // at the bottom of this file.
-            core::arch::asm!(
-                // --- Setup ---
-                // All-true predicate for 32-bit lanes.
-                "ptrue p0.s",
-                // ids_a = [offset, offset+1, offset+2, ...]
-                "index z0.s, {offset:w}, #1",
-                // Broadcast scalars into SVE vectors.
-                "mov z1.s, {range_width:w}",
-                "mov z2.s, {range_start:w}",
-                // vl_gpr = number of 32-bit lanes (cntw).
-                "cntw {vl_gpr}",
-                // step2_bytes will first hold 2*vl (for the step2 vector), then 2*VL in bytes.
-                "lsl {step2_bytes}, {vl_gpr}, #1",
-                // z4 = step = [vl, vl, ...]; will become ids_b after the add below.
-                "mov z4.s, {vl_gpr:w}",
-                // z3 = step2 = [2*vl, 2*vl, ...], used to advance both id vectors each iter.
-                "mov z3.s, {step2_bytes:w}",
-                // Repurpose step2_bytes to hold the byte stride for advancing the input pointer
-                // by two full SVE vectors per iteration.
-                "rdvl {step2_bytes}, #2",
-                // ids_b = ids_a + step = [offset+vl, offset+vl+1, ...]
-                "add z4.s, z0.s, z4.s",
-
-                // --- Main loop: process two SVE vectors (ids_a and ids_b) per iteration ---
-                "0:",
-                // Load two consecutive SVE vectors from input.
-                "ld1w {{z5.s}}, p0/z, [{input}]",
-                "ld1w {{z6.s}}, p0/z, [{input}, #1, mul vl]",
-                // Advance input pointer by 2 * VL bytes.
-                "add {input}, {input}, {step2_bytes}",
-                // Unsigned shift: subtract range_start so in-range check becomes a single cmpu ≤.
-                "sub z5.s, z5.s, z2.s",
-                "sub z6.s, z6.s, z2.s",
-                // in_range: shifted value ≤ range_width  (unsigned, so values below lo also fail).
-                "cmphs p1.s, p0/z, z1.s, z5.s",
-                "cmphs p2.s, p0/z, z1.s, z6.s",
-                // Count matching lanes; both cntp calls have independent inputs for OOO parallelism.
-                "cntp {cnt_a}, p0, p1.s",
-                "compact z5.s, p1, z0.s",
-                "compact z6.s, p2, z4.s",
-                "cntp {cnt_b}, p0, p2.s",
-                // Advance id vectors for the next iteration.
-                "add z0.s, z0.s, z3.s",
-                "add z4.s, z4.s, z3.s",
-                // Store compacted ids. Only the first cnt_a / cnt_b slots are valid; the rest
-                // will be overwritten by subsequent iterations before the final truncate.
-                "str z5, [{out}]",
-                "st1w {{z6.s}}, p0, [{out}, {cnt_a}, lsl #2]",
-                "add {out}, {out}, {cnt_a}, lsl #2",
-                "add {out}, {out}, {cnt_b}, lsl #2",
-                "subs {pairs}, {pairs}, #1",
-                "b.ne 0b",
-
-                // --- Operands ---
-                input       = inout(reg) input_ptr,
-                out         = inout(reg) output_tail,
-                pairs       = inout(reg) num_pairs => _,
-                offset      = in(reg) offset,
-                range_start = in(reg) range_start,
-                range_width = in(reg) range_width,
-                vl_gpr      = out(reg) _,
-                step2_bytes = out(reg) _,
-                cnt_a       = out(reg) _,
-                cnt_b       = out(reg) _,
-                out("p0") _, out("p1") _, out("p2") _,
-                out("v0") _, out("v1") _, out("v2") _, out("v3") _,
-                out("v4") _, out("v5") _, out("v6") _,
-                options(nostack),
-            );
-        }
-    }
-
-    // Handle an odd trailing vector.
-    if num_words % 2 == 1 {
-        // ids_a for the odd word starts at offset + num_pairs * 2 * vl.
-        // input_ptr was advanced by the main loop and now points at the odd word.
-        let odd_offset =
-            offset.wrapping_add((num_pairs as u32).wrapping_mul(2).wrapping_mul(vl as u32));
-        unsafe {
-            core::arch::asm!(
-                "ptrue p0.s",
-                "index z0.s, {odd_offset:w}, #1",
-                "mov z1.s, {range_width:w}",
-                "mov z2.s, {range_start:w}",
-                "ld1w {{z3.s}}, p0/z, [{input}]",
-                "sub z3.s, z3.s, z2.s",
-                "cmphs p1.s, p0/z, z1.s, z3.s",
-                "cntp {cnt}, p0, p1.s",
-                "compact z0.s, p1, z0.s",
-                "str z0, [{out}]",
-                "add {out}, {out}, {cnt}, lsl #2",
-                odd_offset  = in(reg) odd_offset,
-                range_width = in(reg) range_width,
-                range_start = in(reg) range_start,
-                input       = in(reg) input_ptr,
-                out         = inout(reg) output_tail,
-                cnt         = out(reg) _,
-                out("p0") _, out("p1") _,
-                out("v0") _, out("v1") _, out("v2") _, out("v3") _,
-                options(nostack),
-            );
-        }
-    }
-
-    unsafe { output_tail.offset_from(output) as usize }
-}
-
-// SVE implements with intrinsics.
-//
-// #[target_feature(enable = "sve")]
-// unsafe fn filter_vec_sve_aux(
-//     input: *const u32,
-//     range_start: u32,
-//     range_width: u32,
-//     output: *mut u32,
-//     offset: u32,
-//     num_words: usize,
-//     vl: usize,
-// ) -> usize {
-//     unsafe {
-//         let all_true = svptrue_b32();
-//         let range_start_simd = svdup_n_u32(range_start);
-//         let range_width_simd = svdup_n_u32(range_width);
-//         // ids_a covers [offset .. offset+vl), ids_b covers the next vl ids.
-//         // Keeping them separate breaks the loop-carried dependency through ids so
-//         // both compact/cntp chains are fully independent within each unrolled body.
-//         let mut ids_a = svindex_u32(offset, 1);
-//         let step = svdup_n_u32(vl as u32);
-//         let step2 = svdup_n_u32(2 * vl as u32);
-//         let mut ids_b = svadd_u32_x(all_true, ids_a, step);
-
-//         let mut input = input;
-//         let mut output_tail = output;
-
-//         // Unrolled ×2: both cntp calls have independent inputs and execute in parallel.
-//         // The two output_tail updates are sequential but together cost 4+1+1=6 cy per
-//         // pair vs 5+5=10 cy for two scalar iterations, breaking the cntp latency chain.
-//         let num_pairs = num_words / 2;
-//         for _ in 0..num_pairs {
-//             let word_a = svld1_u32(all_true, input);
-//             let word_b = svld1_u32(all_true, input.add(vl));
-
-//             let shifted_a = svsub_u32_x(all_true, word_a, range_start_simd);
-//             let shifted_b = svsub_u32_x(all_true, word_b, range_start_simd);
-
-//             let in_range_a = svcmple_u32(all_true, shifted_a, range_width_simd);
-//             let in_range_b = svcmple_u32(all_true, shifted_b, range_width_simd);
-
-//             let compacted_a = svcompact_u32(in_range_a, ids_a);
-//             let compacted_b = svcompact_u32(in_range_b, ids_b);
-//             // cntp_a and cntp_b have independent inputs: OOO engine issues them in parallel.
-//             let added_len_a = svcntp_b32(all_true, in_range_a) as usize;
-//             let added_len_b = svcntp_b32(all_true, in_range_b) as usize;
-
-//             // Write the full vector — only the first added_len slots are valid.
-//             // Subsequent iterations overwrite the trailing zeros before truncate.
-//             svst1_u32(all_true, output_tail, compacted_a);
-//             output_tail = output_tail.add(added_len_a);
-//             svst1_u32(all_true, output_tail, compacted_b);
-//             output_tail = output_tail.add(added_len_b);
-
-//             ids_a = svadd_u32_x(all_true, ids_a, step2);
-//             ids_b = svadd_u32_x(all_true, ids_b, step2);
-//             input = input.add(2 * vl);
-//         }
-
-//         // Handle an odd trailing word.
-//         if num_words % 2 == 1 {
-//             let word = svld1_u32(all_true, input);
-//             let shifted = svsub_u32_x(all_true, word, range_start_simd);
-//             let in_range = svcmple_u32(all_true, shifted, range_width_simd);
-//             let added_len = svcntp_b32(all_true, in_range) as usize;
-//             let compacted_ids = svcompact_u32(in_range, ids_a);
-//             svst1_u32(all_true, output_tail, compacted_ids);
-//             output_tail = output_tail.add(added_len);
-//         }
-
-//         output_tail.offset_from(output) as usize
-//     }
-// }
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -23,7 +23,7 @@ downcast-rs = "2.0.1"
 proptest = "1"
 more-asserts = "0.3.1"
 rand = "0.9"
-binggan = "0.17.0"
+binggan = "0.15.3"

 [[bench]]
 name = "bench_merge"
--- a/columnar/benches/bench_first_vals.rs
+++ b/columnar/benches/bench_first_vals.rs
@@ -28,7 +28,7 @@ fn get_test_columns() -> Columns {
    }
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
-        .serialize(data.len() as u32, None, &mut buffer)
+        .serialize(data.len() as u32, &mut buffer)
        .unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();

--- a/columnar/benches/common.rs
+++ b/columnar/benches/common.rs
@@ -54,6 +54,6 @@ pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_name: &str)
    }

    let mut wrt: Vec<u8> = Vec::new();
-    columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
+    columnar_writer.serialize(num_docs, &mut wrt).unwrap();
    ColumnarReader::open(wrt).unwrap()
 }
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -15,37 +15,9 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
 {
    #[inline]
    pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<T>) {
-        self.fetch_block_with_is_full(docs, accessor, accessor.index.get_cardinality().is_full());
-    }
-
-    /// Like [`Self::fetch_block`] but takes the column's fullness instead of querying
-    /// `accessor.index.get_cardinality()` each call — for callers that know it up front (e.g.
-    /// checked once at construction). `is_full` must equal
-    /// `accessor.index.get_cardinality().is_full()`.
-    #[inline]
-    pub fn fetch_block_with_is_full<'a>(
-        &'a mut self,
-        docs: &'a [u32],
-        accessor: &Column<T>,
-        is_full: bool,
-    ) {
-        if is_full {
-            // Skip the resize when already the right length (common case: fixed-size blocks).
-            if self.val_cache.len() != docs.len() {
-                self.val_cache.resize(docs.len(), T::default());
-            }
-            // When the docs form a contiguous ascending run we can fetch the values
-            // as a single range. This lets codecs (e.g. bitpacked) bulk-decode the
-            // slice instead of gathering value-by-value, and avoids per-value dynamic
-            // dispatch. `docs` is always sorted ascending and free of duplicates here,
-            // so comparing the endpoints is enough to detect contiguity.
-            if is_contiguous(docs) {
-                accessor
-                    .values
-                    .get_range(docs[0] as u64, &mut self.val_cache);
-            } else {
-                accessor.values.get_vals(docs, &mut self.val_cache);
-            }
+        if accessor.index.get_cardinality().is_full() {
+            self.val_cache.resize(docs.len(), T::default());
+            accessor.values.get_vals(docs, &mut self.val_cache);
        } else {
            self.docid_cache.clear();
            self.row_id_cache.clear();
@@ -61,14 +33,14 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
        &mut self,
        docs: &[u32],
        accessor: &Column<T>,
-        missing_opt: Option<T>,
+        missing: Option<T>,
    ) {
        self.fetch_block(docs, accessor);
        // no missing values
        if accessor.index.get_cardinality().is_full() {
            return;
        }
-        let Some(missing) = missing_opt else {
+        let Some(missing) = missing else {
            return;
        };

@@ -186,22 +158,6 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
    }
 }

-/// Returns true if `docs` is a contiguous ascending run `[d, d + 1, ..., d + n - 1]`.
-///
-/// Assumes `docs` is sorted ascending and free of duplicates (the invariant for the
-/// doc blocks passed to `fetch_block`), so comparing the endpoints is sufficient.
-#[inline]
-fn is_contiguous(docs: &[u32]) -> bool {
-    let (Some(&first), Some(&last)) = (docs.first(), docs.last()) else {
-        return false;
-    };
-    debug_assert!(
-        docs.windows(2).all(|w| w[0] < w[1]),
-        "fetch_block requires docs sorted ascending without duplicates"
-    );
-    (last - first) as usize + 1 == docs.len()
-}
-
 /// Given two sorted lists of docids `docs` and `hits`, hits is a subset of `docs`.
 /// Return all docs that are not in `hits`.
 fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
@@ -235,7 +191,6 @@ where F: FnMut(u32) {
 }

 #[cfg(test)]
-#[allow(clippy::field_reassign_with_default)]
 mod tests {
    use super::*;

@@ -332,46 +287,4 @@ mod tests {
        assert_eq!(accessor.docid_cache, vec![0]);
        assert_eq!(accessor.val_cache, vec![1]);
    }
-
-    #[test]
-    fn test_is_contiguous() {
-        assert!(!is_contiguous(&[]));
-        assert!(is_contiguous(&[5]));
-        assert!(is_contiguous(&[5, 6, 7, 8]));
-        assert!(is_contiguous(&[0, 1, 2]));
-        assert!(!is_contiguous(&[5, 7, 8]));
-        assert!(!is_contiguous(&[0, 1, 3]));
-    }
-
-    #[test]
-    fn test_fetch_block_contiguous_and_gather_match() {
-        use crate::column_index::ColumnIndex;
-        use crate::column_values::{
-            ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
-        };
-
-        let vals: Vec<u64> = (0..200u64).map(|i| i * 7 + 3).collect();
-        let values =
-            serialize_and_load_u64_based_column_values::<u64>(&&vals[..], &ALL_U64_CODEC_TYPES);
-        let column = Column {
-            index: ColumnIndex::Full,
-            values,
-        };
-
-        let check = |accessor: &mut ColumnBlockAccessor<u64>, docs: &[u32]| {
-            accessor.fetch_block(docs, &column);
-            let got: Vec<(u32, u64)> = accessor.iter_docid_vals(docs, &column).collect();
-            let expected: Vec<(u32, u64)> = docs.iter().map(|&d| (d, vals[d as usize])).collect();
-            assert_eq!(got, expected);
-        };
-
-        let mut accessor = ColumnBlockAccessor::<u64>::default();
-        // Contiguous block -> get_range fast path.
-        check(&mut accessor, &(10..74).collect::<Vec<u32>>());
-        // Non-contiguous block -> get_vals gather path.
-        check(&mut accessor, &[0, 5, 9, 100, 199]);
-        // Single doc and full span.
-        check(&mut accessor, &[42]);
-        check(&mut accessor, &(0..200).collect::<Vec<u32>>());
-    }
 }
--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -375,7 +375,7 @@ mod tests {
        columnar_writer.record_numerical(5, "full", u64::MAX);

        let mut wrt: Vec<u8> = Vec::new();
-        columnar_writer.serialize(7, None, &mut wrt).unwrap();
+        columnar_writer.serialize(7, &mut wrt).unwrap();

        let reader = ColumnarReader::open(wrt).unwrap();
        // Open the column as u64
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -15,9 +15,7 @@ fn test_optional_index_with_num_docs(num_docs: u32) {
    let mut dataframe_writer = ColumnarWriter::default();
    dataframe_writer.record_numerical(100, "score", 80i64);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_docs, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_docs, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("score").unwrap();
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -119,18 +119,8 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
    /// the segment's `maxdoc`.
    #[inline(always)]
    fn get_range(&self, start: u64, output: &mut [T]) {
-        let mut out_chunks = output.chunks_exact_mut(4);
-        let mut idx = start;
-        for out_x4 in out_chunks.by_ref() {
-            out_x4[0] = self.get_val(idx as u32);
-            out_x4[1] = self.get_val((idx + 1) as u32);
-            out_x4[2] = self.get_val((idx + 2) as u32);
-            out_x4[3] = self.get_val((idx + 3) as u32);
-            idx += 4;
-        }
-        for out in out_chunks.into_remainder() {
+        for (out, idx) in output.iter_mut().zip(start..) {
            *out = self.get_val(idx as u32);
-            idx += 1;
        }
    }

--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -121,22 +121,6 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
    reader.get_vals(&all_docs, &mut buffer);
    assert_eq!(vals, buffer);

-    // Validate `get_range` over the full column and a sub-range. The sub-range starts
-    // at a non-zero offset to exercise the entrance-ramp alignment of the batch decode.
-    buffer.resize(all_docs.len(), 0);
-    reader.get_range(0, &mut buffer);
-    assert_eq!(vals, buffer, "get_range (full) mismatch in data set {name}");
-    if vals.len() >= 2 {
-        let start = 1usize;
-        buffer.resize(vals.len() - start, 0);
-        reader.get_range(start as u64, &mut buffer);
-        assert_eq!(
-            &vals[start..],
-            &buffer[..],
-            "get_range (sub-range) mismatch in data set {name}"
-        );
-    }
-
    if !vals.is_empty() {
        let test_rand_idx = rand::rng().random_range(0..=vals.len() - 1);
        let expected_positions: Vec<u32> = vals
--- a/columnar/src/columnar/merge/merge_dict_column.rs
+++ b/columnar/src/columnar/merge/merge_dict_column.rs
@@ -33,25 +33,6 @@ pub fn merge_bytes_or_str_column(
    Ok(())
 }

-/// Computes a per-segment mapping from old term ordinal to merged term ordinal.
-///
-/// Performs a streaming k-way merge of per-segment term dictionaries (SSTable-backed) to build
-/// a unified ordering. For each segment, the output is a `Vec<TermOrdinal>` where index `i`
-/// holds the merged global ordinal corresponding to segment-local ordinal `i`.
-///
-/// This is used by index sorting to compare terms from different segments without materializing
-/// term bytes in memory — only ordinals are compared.
-#[doc(hidden)]
-pub fn compute_merged_term_ord_mapping(
-    bytes_columns: &[BytesColumn],
-) -> io::Result<Vec<Vec<TermOrdinal>>> {
-    let bytes_columns_opt: Vec<Option<BytesColumn>> =
-        bytes_columns.iter().cloned().map(Some).collect();
-    let term_ord_mapping =
-        merge_dict_and_compute_term_ord_mapping(&bytes_columns_opt, |_| true, |_| Ok(()))?;
-    Ok(term_ord_mapping.into_per_segment_new_term_ordinals())
-}
-
 struct RemappedTermOrdinalsValues<'a> {
    bytes_columns: &'a [Option<BytesColumn>],
    term_ord_mapping: &'a TermOrdinalMapping,
@@ -137,14 +118,14 @@ fn is_term_present(bitsets: &[Option<BitSet>], term_merger: &TermMerger) -> bool
    false
 }

-fn merge_dict_and_compute_term_ord_mapping(
+fn serialize_merged_dict(
    bytes_columns: &[Option<BytesColumn>],
-    mut should_keep_term: impl FnMut(&TermMerger) -> bool,
-    mut emit_term: impl FnMut(&[u8]) -> io::Result<()>,
+    merge_row_order: &MergeRowOrder,
+    output: &mut impl Write,
 ) -> io::Result<TermOrdinalMapping> {
    let mut term_ord_mapping = TermOrdinalMapping::default();

-    let mut field_term_streams = Vec::with_capacity(bytes_columns.len());
+    let mut field_term_streams = Vec::new();
    for (segment_ord, column_opt) in bytes_columns.iter().enumerate() {
        if let Some(column) = column_opt {
            term_ord_mapping.add_segment(column.dictionary.num_terms());
@@ -160,33 +141,21 @@ fn merge_dict_and_compute_term_ord_mapping(
    }

    let mut merged_terms = TermMerger::new(field_term_streams);
-    let mut current_term_ord = 0;
-    while merged_terms.advance() {
-        if !should_keep_term(&merged_terms) {
-            continue;
-        }
-        emit_term(merged_terms.key())?;
-        for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
-            term_ord_mapping.register_from_to(segment_ord, from_term_ord, current_term_ord);
-        }
-        current_term_ord += 1;
-    }
-
-    Ok(term_ord_mapping)
-}
-
-fn serialize_merged_dict(
-    bytes_columns: &[Option<BytesColumn>],
-    merge_row_order: &MergeRowOrder,
-    output: &mut impl Write,
-) -> io::Result<TermOrdinalMapping> {
    let mut sstable_builder = sstable::VoidSSTable::writer(output);
-    let term_ord_mapping = match merge_row_order {
-        MergeRowOrder::Stack(_) => merge_dict_and_compute_term_ord_mapping(
-            bytes_columns,
-            |_| true,
-            |term_bytes| sstable_builder.insert(term_bytes, &()),
-        )?,
+
+    match merge_row_order {
+        MergeRowOrder::Stack(_) => {
+            let mut current_term_ord = 0;
+            while merged_terms.advance() {
+                let term_bytes: &[u8] = merged_terms.key();
+                sstable_builder.insert(term_bytes, &())?;
+                for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
+                    term_ord_mapping.register_from_to(segment_ord, from_term_ord, current_term_ord);
+                }
+                current_term_ord += 1;
+            }
+            sstable_builder.finish()?;
+        }
        MergeRowOrder::Shuffled(shuffle_merge_order) => {
            assert_eq!(shuffle_merge_order.alive_bitsets.len(), bytes_columns.len());
            let mut term_bitsets: Vec<Option<BitSet>> = Vec::with_capacity(bytes_columns.len());
@@ -205,14 +174,21 @@ fn serialize_merged_dict(
                    }
                }
            }
-            merge_dict_and_compute_term_ord_mapping(
-                bytes_columns,
-                |merged_terms| is_term_present(&term_bitsets[..], merged_terms),
-                |term_bytes| sstable_builder.insert(term_bytes, &()),
-            )?
+            let mut current_term_ord = 0;
+            while merged_terms.advance() {
+                let term_bytes: &[u8] = merged_terms.key();
+                if !is_term_present(&term_bitsets[..], &merged_terms) {
+                    continue;
+                }
+                sstable_builder.insert(term_bytes, &())?;
+                for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
+                    term_ord_mapping.register_from_to(segment_ord, from_term_ord, current_term_ord);
+                }
+                current_term_ord += 1;
+            }
+            sstable_builder.finish()?;
        }
-    };
-    sstable_builder.finish()?;
+    }
    Ok(term_ord_mapping)
 }

@@ -235,8 +211,4 @@ impl TermOrdinalMapping {
    fn get_segment(&self, segment_ord: u32) -> &[TermOrdinal] {
        &self.per_segment_new_term_ordinals[segment_ord as usize]
    }
-
-    fn into_per_segment_new_term_ordinals(self) -> Vec<Vec<TermOrdinal>> {
-        self.per_segment_new_term_ordinals
-    }
 }
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -7,7 +7,6 @@ use std::io;
 use std::net::Ipv6Addr;
 use std::sync::Arc;

-pub use merge_dict_column::compute_merged_term_ord_mapping;
 pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};

 use super::writer::ColumnarSerializer;
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -17,7 +17,7 @@ fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
    }
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
-        .serialize(vals.len() as RowId, None, &mut buffer)
+        .serialize(vals.len() as RowId, &mut buffer)
        .unwrap();
    ColumnarReader::open(buffer).unwrap()
 }
@@ -143,9 +143,7 @@ fn make_numerical_columnar_multiple_columns(
        .max()
        .unwrap_or(0u32);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

@@ -168,9 +166,7 @@ fn make_byte_columnar_multiple_columns(
        }
    }
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

@@ -189,9 +185,7 @@ fn make_text_columnar_multiple_columns(columns: &[(&str, &[&[&str]])]) -> Column
        .max()
        .unwrap_or(0u32);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(num_rows, None, &mut buffer)
-        .unwrap();
+    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

@@ -550,7 +544,7 @@ fn build_columnar(spec: &ColumnarSpec) -> ColumnarReader {
    }

    let mut buffer = Vec::new();
-    writer.serialize(max_row_id + 1, None, &mut buffer).unwrap();
+    writer.serialize(max_row_id + 1, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }

--- a/columnar/src/columnar/mod.rs
+++ b/columnar/src/columnar/mod.rs
@@ -8,9 +8,6 @@ pub use column_type::{ColumnType, HasAssociatedColumnType};
 pub use format_version::{CURRENT_VERSION, Version};
 #[cfg(test)]
 pub(crate) use merge::ColumnTypeCategory;
-pub use merge::{
-    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, compute_merged_term_ord_mapping,
-    merge_columnar,
-};
+pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
 pub use reader::ColumnarReader;
 pub use writer::ColumnarWriter;
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -226,7 +226,7 @@ mod tests {
        columnar_writer.record_column_type("col1", ColumnType::Str, false);
        columnar_writer.record_column_type("col2", ColumnType::U64, false);
        let mut buffer = Vec::new();
-        columnar_writer.serialize(1, None, &mut buffer).unwrap();
+        columnar_writer.serialize(1, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        let columns = columnar.list_columns().unwrap();
        assert_eq!(columns.len(), 2);
@@ -242,7 +242,7 @@ mod tests {
        columnar_writer.record_column_type("count", ColumnType::U64, false);
        columnar_writer.record_numerical(1, "count", 1u64);
        let mut buffer = Vec::new();
-        columnar_writer.serialize(2, None, &mut buffer).unwrap();
+        columnar_writer.serialize(2, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        let columns = columnar.list_columns().unwrap();
        assert_eq!(columns.len(), 1);
@@ -256,7 +256,7 @@ mod tests {
        columnar_writer.record_column_type("col", ColumnType::U64, false);
        columnar_writer.record_numerical(1, "col", 1u64);
        let mut buffer = Vec::new();
-        columnar_writer.serialize(2, None, &mut buffer).unwrap();
+        columnar_writer.serialize(2, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        {
            let columns = columnar.read_columns("col").unwrap();
@@ -285,7 +285,7 @@ mod tests {
        columnar_writer.record_str(1, "col1", "hello");
        columnar_writer.record_str(0, "col2", "hello");
        let mut buffer = Vec::new();
-        columnar_writer.serialize(2, None, &mut buffer).unwrap();
+        columnar_writer.serialize(2, &mut buffer).unwrap();

        let columnar = ColumnarReader::open(buffer).unwrap();
        {
--- a/columnar/src/columnar/writer/column_writers.rs
+++ b/columnar/src/columnar/writer/column_writers.rs
@@ -41,31 +41,10 @@ impl ColumnWriter {
    pub(super) fn operation_iterator<'a, V: SymbolValue>(
        &self,
        arena: &MemoryArena,
-        old_to_new_ids_opt: Option<&[RowId]>,
        buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
        buffer.clear();
        self.values.read_to_end(arena, buffer);
-        if let Some(old_to_new_ids) = old_to_new_ids_opt {
-            // TODO avoid the extra deserialization / serialization.
-            let mut sorted_ops: Vec<(RowId, ColumnOperation<V>)> = Vec::new();
-            let mut new_doc = 0u32;
-            let mut cursor = &buffer[..];
-            for op in std::iter::from_fn(|| ColumnOperation::<V>::deserialize(&mut cursor)) {
-                if let ColumnOperation::NewDoc(doc) = &op {
-                    new_doc = old_to_new_ids[*doc as usize];
-                    sorted_ops.push((new_doc, ColumnOperation::NewDoc(new_doc)));
-                } else {
-                    sorted_ops.push((new_doc, op));
-                }
-            }
-            // stable sort is crucial here.
-            sorted_ops.sort_by_key(|(new_doc_id, _)| *new_doc_id);
-            buffer.clear();
-            for (_, op) in sorted_ops {
-                buffer.extend_from_slice(op.serialize().as_ref());
-            }
-        }
        let mut cursor: &[u8] = &buffer[..];
        std::iter::from_fn(move || ColumnOperation::deserialize(&mut cursor))
    }
@@ -232,11 +211,9 @@ impl NumericalColumnWriter {
    pub(super) fn operation_iterator<'a>(
        self,
        arena: &MemoryArena,
-        old_to_new_ids: Option<&[RowId]>,
        buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
-        self.column_writer
-            .operation_iterator(arena, old_to_new_ids, buffer)
+        self.column_writer.operation_iterator(arena, buffer)
    }
 }

@@ -278,11 +255,9 @@ impl StrOrBytesColumnWriter {
    pub(super) fn operation_iterator<'a>(
        &self,
        arena: &MemoryArena,
-        old_to_new_ids: Option<&[RowId]>,
        byte_buffer: &'a mut Vec<u8>,
    ) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
-        self.column_writer
-            .operation_iterator(arena, old_to_new_ids, byte_buffer)
+        self.column_writer.operation_iterator(arena, byte_buffer)
    }
 }

--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -44,7 +44,7 @@ struct SpareBuffers {
 /// columnar_writer.record_str(1u32 /* doc id */, "product_name", "Apple");
 /// columnar_writer.record_numerical(0u32 /* doc id */, "price", 10.5f64); //< uh oh we ended up mixing integer and floats.
 /// let mut wrt: Vec<u8> =  Vec::new();
-/// columnar_writer.serialize(2u32, None, &mut wrt).unwrap();
+/// columnar_writer.serialize(2u32, &mut wrt).unwrap();
 /// ```
 #[derive(Default)]
 pub struct ColumnarWriter {
@@ -76,75 +76,6 @@ impl ColumnarWriter {
                .sum::<usize>()
    }

-    /// Returns the list of doc ids from 0..num_docs sorted by the `sort_field`
-    /// column.
-    ///
-    /// If the column is multivalued, use the first value for scoring.
-    /// If no value is associated to a specific row, the document is assigned
-    /// the lowest possible score.
-    ///
-    /// The sort applied is stable.
-    pub fn sort_order(&self, sort_field: &str, num_docs: RowId, reversed: bool) -> Vec<u32> {
-        let Some(numerical_col_writer) = self
-            .numerical_field_hash_map
-            .get::<NumericalColumnWriter>(sort_field.as_bytes())
-            .or_else(|| {
-                self.datetime_field_hash_map
-                    .get::<NumericalColumnWriter>(sort_field.as_bytes())
-            })
-        else {
-            let str_or_bytes_column_opt = self
-                .str_field_hash_map
-                .get::<StrOrBytesColumnWriter>(sort_field.as_bytes())
-                .or_else(|| {
-                    self.bytes_field_hash_map
-                        .get::<StrOrBytesColumnWriter>(sort_field.as_bytes())
-                });
-            let Some(str_or_bytes_column) = str_or_bytes_column_opt else {
-                return Vec::new();
-            };
-
-            let dictionary_builder = &self.dictionaries[str_or_bytes_column.dictionary_id as usize];
-            let term_id_mapping = dictionary_builder.build_term_id_mapping(&self.arena);
-            let mut symbols_buffer = Vec::new();
-
-            return collect_sort_order_from_ops(
-                str_or_bytes_column.operation_iterator(&self.arena, None, &mut symbols_buffer),
-                num_docs,
-                reversed,
-                |uid| Some(term_id_mapping.to_ord(uid).0),
-                None,
-                |a, b| a.cmp(b),
-            );
-        };
-        let mut symbols_buffer = Vec::new();
-        collect_sort_order_from_ops(
-            numerical_col_writer.operation_iterator(&self.arena, None, &mut symbols_buffer),
-            num_docs,
-            reversed,
-            // MonotonicallyMappableToU64 converts each value to u64 in an
-            // order-preserving way (u64: identity, i64: XOR sign bit, f64: bit
-            // manipulation). Converting once per document lets the comparator be
-            // a simple u64 cmp instead of unwrapping the NumericalValue variant
-            // on every comparison.
-            //
-            // For f64, NaN maps to a deterministic u64 via raw bit manipulation,
-            // so it sorts to a consistent position. Sorting only requires total
-            // ordering, not IEEE 754 equality semantics where NaN != NaN.
-            |nv| {
-                Some(match nv {
-                    NumericalValue::U64(v) => v.to_u64(),
-                    NumericalValue::I64(v) => v.to_u64(),
-                    NumericalValue::F64(v) => v.to_u64(),
-                })
-            },
-            // None for missing values. Option<u64> sorts None < Some(_),
-            // placing nulls before non-null values.
-            None,
-            |a, b| a.cmp(b),
-        )
-    }
-
    /// Records a column type. This is useful to bypass the coercion process,
    /// makes sure the empty is present in the resulting columnar, or set
    /// the `sort_values_within_row`.
@@ -315,12 +246,7 @@ impl ColumnarWriter {
            },
        );
    }
-    pub fn serialize(
-        &mut self,
-        num_docs: RowId,
-        old_to_new_row_ids: Option<&[RowId]>,
-        wrt: &mut dyn io::Write,
-    ) -> io::Result<()> {
+    pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
        let mut serializer = ColumnarSerializer::new(wrt);

        let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
@@ -377,11 +303,7 @@ impl ColumnarWriter {
                    serialize_bool_column(
                        cardinality,
                        num_docs,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -395,11 +317,7 @@ impl ColumnarWriter {
                    serialize_ip_addr_column(
                        cardinality,
                        num_docs,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -424,11 +342,8 @@ impl ColumnarWriter {
                        num_docs,
                        str_or_bytes_column_writer.sort_values_within_row,
                        dictionary_builder,
-                        str_or_bytes_column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        str_or_bytes_column_writer
+                            .operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &self.arena,
                        &mut column_serializer,
@@ -446,11 +361,7 @@ impl ColumnarWriter {
                        cardinality,
                        num_docs,
                        numerical_type,
-                        numerical_column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        numerical_column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -465,11 +376,7 @@ impl ColumnarWriter {
                        cardinality,
                        num_docs,
                        NumericalType::I64,
-                        column_writer.operation_iterator(
-                            arena,
-                            old_to_new_row_ids,
-                            &mut symbol_byte_buffer,
-                        ),
+                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
@@ -482,56 +389,6 @@ impl ColumnarWriter {
    }
 }

-/// Shared sorting pattern for both numeric and Str/Bytes sort fields.
-///
-/// Iterates column operations, fills gaps for missing docs with `default_key`, converts each value
-/// to a sort key via `value_to_key`, then sorts by the key using `cmp_keys`. Returns the doc ids
-/// in sorted order.
-fn collect_sort_order_from_ops<V, K: Clone>(
-    ops: impl Iterator<Item = ColumnOperation<V>>,
-    num_docs: RowId,
-    reversed: bool,
-    value_to_key: impl Fn(V) -> K,
-    default_key: K,
-    cmp_keys: impl Fn(&K, &K) -> std::cmp::Ordering,
-) -> Vec<u32> {
-    let mut doc_sort_keys: Vec<(K, RowId)> = Vec::with_capacity(num_docs as usize);
-    let mut start_doc_check_fill: RowId = 0;
-    let mut current_doc_opt: Option<RowId> = None;
-
-    for op in ops {
-        match op {
-            ColumnOperation::NewDoc(doc) => {
-                current_doc_opt = Some(doc);
-            }
-            ColumnOperation::Value(val) => {
-                if let Some(current_doc) = current_doc_opt {
-                    // Fill gaps since the last doc with the default key.
-                    doc_sort_keys.extend(
-                        (start_doc_check_fill..current_doc).map(|doc| (default_key.clone(), doc)),
-                    );
-                    start_doc_check_fill = current_doc + 1;
-                    // For multivalued fields, only the first value is used.
-                    current_doc_opt = None;
-
-                    doc_sort_keys.push((value_to_key(val), current_doc));
-                }
-            }
-        }
-    }
-    // Fill remaining docs at the tail.
-    doc_sort_keys.extend((start_doc_check_fill..num_docs).map(|doc| (default_key.clone(), doc)));
-
-    doc_sort_keys.sort_by(|(left_key, _), (right_key, _)| {
-        let cmp = cmp_keys(left_key, right_key);
-        if reversed { cmp.reverse() } else { cmp }
-    });
-    doc_sort_keys
-        .into_iter()
-        .map(|(_sort_key, doc)| doc)
-        .collect()
-}
-
 // Serialize [Dictionary, Column, dictionary num bytes U32::LE]
 // Column: [Column Index, Column Values, column index num bytes U32::LE]
 #[expect(clippy::too_many_arguments)]
@@ -832,7 +689,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(3), Cardinality::Full);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 6);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
@@ -861,7 +718,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(3), Cardinality::Optional);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 4);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(1u32)));
@@ -884,7 +741,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(2), Cardinality::Optional);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 2);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
@@ -903,7 +760,7 @@ mod tests {
        assert_eq!(column_writer.get_cardinality(1), Cardinality::Multivalued);
        let mut buffer = Vec::new();
        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
-            .operation_iterator(&arena, None, &mut buffer)
+            .operation_iterator(&arena, &mut buffer)
            .collect();
        assert_eq!(symbols.len(), 3);
        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
--- a/columnar/src/compat_tests.rs
+++ b/columnar/src/compat_tests.rs
@@ -27,7 +27,7 @@ fn generate_columnar(num_docs: u32, value_offset: u64) -> Vec<u8> {
    }

    let mut wrt: Vec<u8> = Vec::new();
-    columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
+    columnar_writer.serialize(num_docs, &mut wrt).unwrap();

    wrt
 }
--- a/columnar/src/dictionary.rs
+++ b/columnar/src/dictionary.rs
@@ -51,16 +51,6 @@ impl DictionaryBuilder {
        UnorderedId(unordered_id)
    }

-    fn build_sorted_terms<'a>(&'a self, arena: &'a MemoryArena) -> Vec<(&'a [u8], UnorderedId)> {
-        let mut terms: Vec<(&[u8], UnorderedId)> = self
-            .dict
-            .iter(arena)
-            .map(|(k, v)| (k, arena.read(v)))
-            .collect();
-        terms.sort_unstable_by_key(|(key, _)| *key);
-        terms
-    }
-
    /// Serialize the dictionary into an fst, and returns the
    /// `UnorderedId -> TermOrdinal` map.
    pub fn serialize<'a, W: io::Write + 'a>(
@@ -68,7 +58,12 @@ impl DictionaryBuilder {
        arena: &MemoryArena,
        wrt: &mut W,
    ) -> io::Result<TermIdMapping> {
-        let terms = self.build_sorted_terms(arena);
+        let mut terms: Vec<(&[u8], UnorderedId)> = self
+            .dict
+            .iter(arena)
+            .map(|(k, v)| (k, arena.read(v)))
+            .collect();
+        terms.sort_unstable_by_key(|(key, _)| *key);
        // TODO Remove the allocation.
        let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
        let mut sstable_builder = sstable::VoidSSTable::writer(wrt);
@@ -81,16 +76,6 @@ impl DictionaryBuilder {
        Ok(TermIdMapping { unordered_to_ord })
    }

-    /// Build the `UnorderedId -> OrderedId` mapping in memory without serializing.
-    pub fn build_term_id_mapping(&self, arena: &MemoryArena) -> TermIdMapping {
-        let terms = self.build_sorted_terms(arena);
-        let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
-        for (ord, (_key, unordered_id)) in terms.into_iter().enumerate() {
-            unordered_to_ord[unordered_id.0 as usize] = OrderedId(ord as u32);
-        }
-        TermIdMapping { unordered_to_ord }
-    }
-
    pub(crate) fn mem_usage(&self) -> usize {
        self.dict.mem_usage()
    }
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -43,8 +43,7 @@ pub use column_values::{
 };
 pub use columnar::{
    CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
-    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, compute_merged_term_ord_mapping,
-    merge_columnar,
+    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
 };
 use sstable::VoidSSTable;
 pub use value::{NumericalType, NumericalValue};
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -21,7 +21,7 @@ fn test_dataframe_writer_str() {
    dataframe_writer.record_str(1u32, "my_string", "hello");
    dataframe_writer.record_str(3u32, "my_string", "helloeee");
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
@@ -35,7 +35,7 @@ fn test_dataframe_writer_bytes() {
    dataframe_writer.record_bytes(1u32, "my_string", b"hello");
    dataframe_writer.record_bytes(3u32, "my_string", b"helloeee");
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
@@ -49,7 +49,7 @@ fn test_dataframe_writer_bool() {
    dataframe_writer.record_bool(1u32, "bool.value", false);
    dataframe_writer.record_bool(3u32, "bool.value", true);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("bool.value").unwrap();
@@ -74,7 +74,7 @@ fn test_dataframe_writer_u64_multivalued() {
    dataframe_writer.record_numerical(6u32, "divisor", 2u64);
    dataframe_writer.record_numerical(6u32, "divisor", 3u64);
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(7, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(7, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("divisor").unwrap();
@@ -97,7 +97,7 @@ fn test_dataframe_writer_ip_addr() {
    dataframe_writer.record_ip_addr(1, "ip_addr", Ipv6Addr::from_u128(1001));
    dataframe_writer.record_ip_addr(3, "ip_addr", Ipv6Addr::from_u128(1050));
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(5, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(5, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("ip_addr").unwrap();
@@ -128,7 +128,7 @@ fn test_dataframe_writer_numerical() {
    dataframe_writer.record_numerical(2u32, "srical.value", NumericalValue::U64(13u64));
    dataframe_writer.record_numerical(4u32, "srical.value", NumericalValue::U64(15u64));
    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(6, None, &mut buffer).unwrap();
+    dataframe_writer.serialize(6, &mut buffer).unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar.num_columns(), 1);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("srical.value").unwrap();
@@ -153,46 +153,6 @@ fn test_dataframe_writer_numerical() {
    assert_eq!(column_i64.first(6), None); //< we can change the spec for that one.
 }

-#[test]
-fn test_dataframe_sort_by_full() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(0u32, "value", NumericalValue::U64(1));
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
-    let data = dataframe_writer.sort_order("value", 2, false);
-    assert_eq!(data, vec![0, 1]);
-}
-
-#[test]
-fn test_dataframe_sort_by_opt() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(3));
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(2));
-    let data = dataframe_writer.sort_order("value", 5, false);
-    // 0, 2, 4 is 0.0
-    assert_eq!(data, vec![0, 2, 4, 3, 1]);
-    let data = dataframe_writer.sort_order("value", 5, true);
-    assert_eq!(
-        data,
-        vec![4, 2, 0, 3, 1].into_iter().rev().collect::<Vec<_>>()
-    );
-}
-
-#[test]
-fn test_dataframe_sort_by_multi() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    // valid for sort
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(2));
-    // those are ignored for sort
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
-    dataframe_writer.record_numerical(1u32, "value", NumericalValue::U64(4));
-    // valid for sort
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(3));
-    // ignored, would change sort order
-    dataframe_writer.record_numerical(3u32, "value", NumericalValue::U64(1));
-    let data = dataframe_writer.sort_order("value", 4, false);
-    assert_eq!(data, vec![0, 2, 1, 3]);
-}
-
 #[test]
 fn test_dictionary_encoded_str() {
    let mut buffer = Vec::new();
@@ -201,7 +161,7 @@ fn test_dictionary_encoded_str() {
    columnar_writer.record_str(3, "my.column", "c");
    columnar_writer.record_str(3, "my.column2", "different_column!");
    columnar_writer.record_str(4, "my.column", "b");
-    columnar_writer.serialize(5, None, &mut buffer).unwrap();
+    columnar_writer.serialize(5, &mut buffer).unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar_reader.num_columns(), 2);
    let col_handles = columnar_reader.read_columns("my.column").unwrap();
@@ -235,7 +195,7 @@ fn test_dictionary_encoded_bytes() {
    columnar_writer.record_bytes(3, "my.column", b"c");
    columnar_writer.record_bytes(3, "my.column2", b"different_column!");
    columnar_writer.record_bytes(4, "my.column", b"b");
-    columnar_writer.serialize(5, None, &mut buffer).unwrap();
+    columnar_writer.serialize(5, &mut buffer).unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
    assert_eq!(columnar_reader.num_columns(), 2);
    let col_handles = columnar_reader.read_columns("my.column").unwrap();
@@ -272,93 +232,6 @@ fn test_dictionary_encoded_bytes() {
    assert_eq!(term_buffer, b"b");
 }

-#[test]
-fn test_sort_order_str_asc_desc() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_str(0, "s", "z");
-    dataframe_writer.record_str(2, "s", "a");
-    dataframe_writer.record_str(3, "s", "m");
-
-    let asc = dataframe_writer.sort_order("s", 4, false);
-    assert_eq!(asc, vec![1, 2, 3, 0]); // None, a, m, z
-
-    let desc = dataframe_writer.sort_order("s", 4, true);
-    assert_eq!(desc, vec![0, 3, 2, 1]); // z, m, a, None
-}
-
-#[test]
-fn test_sort_order_str_empty_vs_missing() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_str(0, "s", "");
-
-    let asc = dataframe_writer.sort_order("s", 2, false);
-    assert_eq!(asc, vec![1, 0]); // None first, then empty string
-}
-
-#[test]
-fn test_sort_order_str_multivalued_stable() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_str(0, "s", "z");
-    dataframe_writer.record_str(0, "s", "a"); // multivalued; first value wins
-    dataframe_writer.record_str(1, "s", "b");
-    dataframe_writer.record_str(2, "s", "b");
-
-    let asc = dataframe_writer.sort_order("s", 3, false);
-    assert_eq!(asc, vec![1, 2, 0]); // b, b (stable), z
-}
-
-#[test]
-fn test_sort_order_bytes_asc() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_bytes(1, "b", &[0x01]);
-    dataframe_writer.record_bytes(3, "b", &[0x00]);
-
-    let asc = dataframe_writer.sort_order("b", 4, false);
-    assert_eq!(asc, vec![0, 2, 3, 1]); // None, None, 0x00, 0x01
-}
-
-#[test]
-fn test_sort_order_numeric_u64_above_2_24() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(0, "n", 16_777_217u64);
-    dataframe_writer.record_numerical(1, "n", 16_777_216u64);
-
-    let asc = dataframe_writer.sort_order("n", 2, false);
-    assert_eq!(asc, vec![1, 0]); // 16,777,216 then 16,777,217
-}
-
-#[test]
-fn test_sort_order_numeric_u64_above_2_53() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(0, "n", 9_007_199_254_740_993u64);
-    dataframe_writer.record_numerical(1, "n", 9_007_199_254_740_992u64);
-
-    let asc = dataframe_writer.sort_order("n", 2, false);
-    assert_eq!(asc, vec![1, 0]); // smaller value first
-}
-
-#[test]
-fn test_sort_order_numeric_null_vs_zero() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_numerical(0, "n", 0u64);
-
-    let asc = dataframe_writer.sort_order("n", 2, false);
-    assert_eq!(asc, vec![1, 0]); // None first, then 0
-}
-
-#[test]
-fn test_sort_order_datetime_close_timestamps() {
-    let mut dataframe_writer = ColumnarWriter::default();
-    // Two timestamps 1 nanosecond apart. As f32, both round to the same value.
-    let dt1 = DateTime::from_timestamp_nanos(1_700_000_000_000_000_001);
-    let dt2 = DateTime::from_timestamp_nanos(1_700_000_000_000_000_000);
-    dataframe_writer.record_datetime(0, "ts", dt1);
-    dataframe_writer.record_datetime(1, "ts", dt2);
-
-    let asc = dataframe_writer.sort_order("ts", 2, false);
-    assert_eq!(asc, vec![1, 0]); // smaller timestamp first
-}
-
 fn num_strategy() -> impl Strategy<Value = NumericalValue> {
    prop_oneof![
        3 => Just(NumericalValue::U64(0u64)),
@@ -456,26 +329,12 @@ fn columnar_docs_strategy() -> impl Strategy<Value = Vec<Vec<(&'static str, Colu
        .prop_flat_map(|num_docs| proptest::collection::vec(doc_strategy(), num_docs))
 }

-fn columnar_docs_and_mapping_strategy()
-> impl Strategy<Value = (Vec<Vec<(&'static str, ColumnValue)>>, Vec<RowId>)> {
-    columnar_docs_strategy().prop_flat_map(|docs| {
-        permutation_strategy(docs.len()).prop_map(move |permutation| (docs.clone(), permutation))
-    })
-}
-
-fn permutation_strategy(n: usize) -> impl Strategy<Value = Vec<RowId>> {
-    Just((0u32..n as RowId).collect()).prop_shuffle()
-}
-
 fn permutation_and_subset_strategy(n: usize) -> impl Strategy<Value = Vec<usize>> {
    let vals: Vec<usize> = (0..n).collect();
    subsequence(vals, 0..=n).prop_shuffle()
 }

-fn build_columnar_with_mapping(
-    docs: &[Vec<(&'static str, ColumnValue)>],
-    old_to_new_row_ids_opt: Option<&[RowId]>,
-) -> ColumnarReader {
+fn build_columnar_with_mapping(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
    let num_docs = docs.len() as u32;
    let mut buffer = Vec::new();
    let mut columnar_writer = ColumnarWriter::default();
@@ -503,15 +362,13 @@ fn build_columnar_with_mapping(
            }
        }
    }
-    columnar_writer
-        .serialize(num_docs, old_to_new_row_ids_opt, &mut buffer)
-        .unwrap();
+    columnar_writer.serialize(num_docs, &mut buffer).unwrap();

    ColumnarReader::open(buffer).unwrap()
 }

 fn build_columnar(docs: &[Vec<(&'static str, ColumnValue)>]) -> ColumnarReader {
-    build_columnar_with_mapping(docs, None)
+    build_columnar_with_mapping(docs)
 }

 fn assert_columnar_eq_strict(left: &ColumnarReader, right: &ColumnarReader) {
@@ -771,54 +628,6 @@ proptest! {
    }
 }

-// Same as `test_single_columnar_builder_proptest` but with a shuffling mapping.
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(500))]
-    #[test]
-    fn test_single_columnar_builder_with_shuffle_proptest((docs, mapping) in columnar_docs_and_mapping_strategy()) {
-        let columnar = build_columnar_with_mapping(&docs[..], Some(&mapping));
-        assert_eq!(columnar.num_docs() as usize, docs.len());
-        let mut expected_columns: HashMap<(&str, ColumnTypeCategory), HashMap<u32, Vec<&ColumnValue>> > = Default::default();
-        for (doc_id, doc_vals) in docs.iter().enumerate() {
-            for (col_name, col_val) in doc_vals {
-                expected_columns
-                    .entry((col_name, col_val.column_type_category()))
-                    .or_default()
-                    .entry(mapping[doc_id])
-                    .or_default()
-                    .push(col_val);
-            }
-        }
-        let column_list = columnar.list_columns().unwrap();
-        assert_eq!(expected_columns.len(), column_list.len());
-        for (column_name, column) in column_list {
-            let dynamic_column = column.open().unwrap();
-            let col_category: ColumnTypeCategory = dynamic_column.column_type().into();
-            let expected_col_values: &HashMap<u32, Vec<&ColumnValue>> = expected_columns.get(&(column_name.as_str(), col_category)).unwrap();
-            for _doc_id in 0..columnar.num_docs() {
-                match &dynamic_column {
-                    DynamicColumn::Bool(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::I64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::U64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::F64(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::IpAddr(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::DateTime(col) =>
-                        assert_column_values(col, expected_col_values),
-                    DynamicColumn::Bytes(col) =>
-                        assert_bytes_column_values(col, expected_col_values, false),
-                    DynamicColumn::Str(col) =>
-                        assert_bytes_column_values(col, expected_col_values, true),
-                }
-            }
-        }
-    }
-}
-
 // This tests create 2 or 3 random small columnar and attempts to merge them.
 // It compares the resulting merged dataframe with what would have been obtained by building the
 // dataframe from the concatenated rows to begin with.
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -19,6 +19,6 @@ time = { version = "0.3.47", features = ["serde-well-known"] }
 serde = { version = "1.0.136", features = ["derive"] }

 [dev-dependencies]
-binggan = "0.17.0"
+binggan = "0.15.3"
 proptest = "1.0.0"
 rand = "0.9"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -47,9 +47,6 @@ impl TinySet {
        TinySet(val)
    }

-    /// An empty `TinySet` constant.
-    pub const EMPTY: TinySet = TinySet(0u64);
-
    /// Returns an empty `TinySet`.
    #[inline]
    pub fn empty() -> TinySet {
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -121,7 +121,7 @@ pub struct FileSlice {

 impl fmt::Debug for FileSlice {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FileSlice({:?}, {:?})", self.data, self.range)
+        write!(f, "FileSlice({:?}, {:?})", &self.data, self.range)
    }
 }

--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -7,6 +7,11 @@
    - [Other](#other)
  - [Usage](#usage)

+# Index Sorting has been removed!
+More infos here:
+
+https://github.com/quickwit-oss/tantivy/issues/2352
+
 # Index Sorting

 Tantivy allows you to sort the index according to a property.
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -327,9 +327,7 @@ fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
            peek(alt((
                value(
                    "",
-                    satisfy(|c: char| {
-                        c.is_whitespace() || (ESCAPE_IN_WORD.contains(&c) && c != '\\')
-                    }),
+                    satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
                ),
                eof,
            ))),
@@ -347,9 +345,7 @@ fn exists_precond(inp: &str) -> IResult<&str, (), ()> {
            peek(alt((
                value(
                    "",
-                    satisfy(|c: char| {
-                        c.is_whitespace() || (ESCAPE_IN_WORD.contains(&c) && c != '\\')
-                    }),
+                    satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
                ),
                eof,
            ))), // we need to check this isn't a wildcard query
@@ -711,7 +707,6 @@ fn regex(inp: &str) -> IResult<&str, UserInputLeaf> {
            peek(alt((
                value((), multispace1),
                value((), char(')')),
-                value((), char('^')),
                value((), eof),
            ))),
        ),
@@ -733,10 +728,9 @@ fn regex_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
            peek(alt((
                value((), multispace1),
                value((), char(')')),
-                value((), char('^')),
                value((), eof),
            ))),
-            "expected whitespace, closing parenthesis, boost, or end of input",
+            "expected whitespace, closing parenthesis, or end of input",
        ),
    )(inp)
    {
@@ -779,10 +773,6 @@ fn leaf(inp: &str) -> IResult<&str, UserInputAst> {
                    value((), multispace1),
                    value((), char(')')),
                    value((), eof),
-                    value(
-                        (),
-                        satisfy(|c: char| ESCAPE_IN_WORD.contains(&c) && c != '\\'),
-                    ),
                ))),
            ),
            |_| UserInputAst::from(UserInputLeaf::All),
@@ -815,10 +805,6 @@ fn leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
                            value((), multispace1),
                            value((), char(')')),
                            value((), eof),
-                            value(
-                                (),
-                                satisfy(|c: char| ESCAPE_IN_WORD.contains(&c) && c != '\\'),
-                            ),
                        ))),
                    ),
                ),
@@ -1059,43 +1045,18 @@ fn operand_leaf(inp: &str) -> IResult<&str, (Option<BinaryOperand>, Option<Occur
 }

 fn ast(inp: &str) -> IResult<&str, UserInputAst> {
-    // Parse `occur_leaf` once, then conditionally extend into a boolean
-    // expression. The previous implementation used `alt((boolean_expr,
-    // single_leaf))` which, when the input was a single leaf with no
-    // following operand, would parse `occur_leaf` once for `boolean_expr`,
-    // fail at `multispace1`, backtrack, then re-parse `occur_leaf` for
-    // `single_leaf`. With recursively-nested groups like `(+(+(+a)))`, that
-    // doubling at every level produced O(2^n) parse time. Parsing once and
-    // peeking ahead for the operand keeps it O(n).
-    delimited(
-        multispace0,
-        |inp| {
-            let (rest, first) = occur_leaf(inp)?;
-            // Only fall back on `Err::Error` (recoverable), mirroring
-            // `alt`'s behaviour. `Err::Failure` and `Err::Incomplete`
-            // must propagate so cut points and streaming needs are not
-            // accidentally swallowed if they are ever introduced in the
-            // operand parsers.
-            match preceded(multispace1, many1(operand_leaf))(rest) {
-                Ok((rest, more)) => {
-                    let combined = aggregate_binary_expressions(first, more)
-                        .map_err(|_| nom::Err::Error(Error::new(inp, ErrorKind::MapRes)))?;
-                    Ok((rest, combined))
-                }
-                Err(nom::Err::Error(_)) => {
-                    let (occur, ast) = first;
-                    let single = if occur == Some(Occur::MustNot) {
-                        ast.unary(Occur::MustNot)
-                    } else {
-                        ast
-                    };
-                    Ok((rest, single))
-                }
-                Err(e) => Err(e),
-            }
-        },
-        multispace0,
-    )(inp)
+    let boolean_expr = map_res(
+        separated_pair(occur_leaf, multispace1, many1(operand_leaf)),
+        |(left, right)| aggregate_binary_expressions(left, right),
+    );
+    let single_leaf = map(occur_leaf, |(occur, ast)| {
+        if occur == Some(Occur::MustNot) {
+            ast.unary(Occur::MustNot)
+        } else {
+            ast
+        }
+    });
+    delimited(multispace0, alt((boolean_expr, single_leaf)), multispace0)(inp)
 }

 fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
@@ -1765,8 +1726,6 @@ mod test {
        test_parse_query_to_ast_helper("*", "*");
        test_parse_query_to_ast_helper("(*)", "*");
        test_parse_query_to_ast_helper("(* )", "*");
-        // All query with boost
-        test_parse_query_to_ast_helper("*^2", "(*)^2");
    }

    #[test]
@@ -1829,7 +1788,6 @@ mod test {
        test_parse_query_to_ast_helper("a:b*", "\"a\":b*");
        test_parse_query_to_ast_helper("a:*b", "\"a\":*b");
        test_parse_query_to_ast_helper(r#"a:*def*"#, "\"a\":*def*");
-        test_parse_query_to_ast_helper("a:*\\:foo", "\"a\":*:foo");
    }

    #[test]
@@ -1894,8 +1852,6 @@ mod test {
            },
            _ => panic!("Expected a leaf"),
        }
-        // Regex followed by `^boost`
-        test_parse_query_to_ast_helper(r#"foo:/bar/^2"#, r#"("foo":/bar/)^2"#);
    }

    #[test]
@@ -1935,23 +1891,4 @@ mod test {
            r#"(+"field":'happy tax payer' +"other_field":1)"#,
        );
    }
-
-    // Regression test for https://github.com/quickwit-oss/tantivy/issues/2498:
-    // deeply nested parenthesized queries used to take O(2^n) time because the
-    // top-level `ast()` parser tried `boolean_expr` first and re-parsed the
-    // inner `occur_leaf` when it backtracked to `single_leaf`. Depth 60 would
-    // take ~10^18 operations under the regression; with the fix it parses
-    // instantly. We use `test_parse_query_to_ast_helper` so this test would
-    // never finish if the regression returned.
-    #[test]
-    fn test_parse_deeply_nested_query() {
-        let depth = 60;
-        let leading: String = "(".repeat(depth);
-        let trailing: String = ")".repeat(depth);
-        let query = format!("{leading}title:test{trailing}");
-        test_parse_query_to_ast_helper(&query, r#""title":test"#);
-
-        let query_with_plus = format!("+{leading}title:test{trailing}");
-        test_parse_query_to_ast_helper(&query_with_plus, r#""title":test"#);
-    }
 }
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
@@ -10,18 +10,18 @@ use crate::aggregation::accessor_helpers::{
 };
 use crate::aggregation::agg_req::{Aggregation, AggregationVariants, Aggregations};
 use crate::aggregation::bucket::{
-    build_segment_filter_collector, build_segment_histogram_collector,
-    build_segment_range_collector, CompositeAggReqData, CompositeAggregation,
-    CompositeSourceAccessors, FilterAggReqData, HistogramAggReqData, HistogramBounds,
-    IncludeExcludeParam, MissingTermAggReqData, RangeAggReqData, TermMissingAgg, TermsAggReqData,
-    TermsAggregation, TermsAggregationInternal,
+    build_segment_filter_collector, build_segment_range_collector, CompositeAggReqData,
+    CompositeAggregation, CompositeSourceAccessors, FilterAggReqData, HistogramAggReqData,
+    HistogramBounds, IncludeExcludeParam, MissingTermAggReqData, RangeAggReqData,
+    SegmentHistogramCollector, TermMissingAgg, TermsAggReqData, TermsAggregation,
+    TermsAggregationInternal,
 };
 use crate::aggregation::metric::{
    build_segment_stats_collector, AverageAggregation, CardinalityAggReqData,
    CardinalityAggregationReq, CountAggregation, ExtendedStatsAggregation, MaxAggregation,
    MetricAggReqData, MinAggregation, SegmentCardinalityCollector, SegmentExtendedStatsCollector,
-    SegmentPercentilesCollector, StatsAggregation, StatsType, SumAggregation, TermOrdSet,
-    TopHitsAggReqData, TopHitsSegmentCollector, BITSET_MAX_TERM_ORD,
+    SegmentPercentilesCollector, StatsAggregation, StatsType, SumAggregation, TopHitsAggReqData,
+    TopHitsSegmentCollector,
 };
 use crate::aggregation::segment_agg_result::{
    GenericSegmentAggregationResultsCollector, SegmentAggregationCollector,
@@ -41,7 +41,7 @@ pub struct AggregationsSegmentCtx {

 impl AggregationsSegmentCtx {
    pub(crate) fn push_term_req_data(&mut self, data: TermsAggReqData) -> usize {
-        self.per_request.term_req_data.push(data);
+        self.per_request.term_req_data.push(Some(Box::new(data)));
        self.per_request.term_req_data.len() - 1
    }
    pub(crate) fn push_cardinality_req_data(&mut self, data: CardinalityAggReqData) -> usize {
@@ -61,25 +61,31 @@ impl AggregationsSegmentCtx {
        self.per_request.missing_term_req_data.len() - 1
    }
    pub(crate) fn push_histogram_req_data(&mut self, data: HistogramAggReqData) -> usize {
-        self.per_request.histogram_req_data.push(data);
+        self.per_request
+            .histogram_req_data
+            .push(Some(Box::new(data)));
        self.per_request.histogram_req_data.len() - 1
    }
    pub(crate) fn push_range_req_data(&mut self, data: RangeAggReqData) -> usize {
-        self.per_request.range_req_data.push(data);
+        self.per_request.range_req_data.push(Some(Box::new(data)));
        self.per_request.range_req_data.len() - 1
    }
    pub(crate) fn push_filter_req_data(&mut self, data: FilterAggReqData) -> usize {
-        self.per_request.filter_req_data.push(data);
+        self.per_request.filter_req_data.push(Some(Box::new(data)));
        self.per_request.filter_req_data.len() - 1
    }
    pub(crate) fn push_composite_req_data(&mut self, data: CompositeAggReqData) -> usize {
-        self.per_request.composite_req_data.push(data);
+        self.per_request
+            .composite_req_data
+            .push(Some(Box::new(data)));
        self.per_request.composite_req_data.len() - 1
    }

    #[inline]
    pub(crate) fn get_term_req_data(&self, idx: usize) -> &TermsAggReqData {
-        &self.per_request.term_req_data[idx]
+        self.per_request.term_req_data[idx]
+            .as_deref()
+            .expect("term_req_data slot is empty (taken)")
    }
    #[inline]
    pub(crate) fn get_cardinality_req_data(&self, idx: usize) -> &CardinalityAggReqData {
@@ -97,6 +103,116 @@ impl AggregationsSegmentCtx {
    pub(crate) fn get_missing_term_req_data(&self, idx: usize) -> &MissingTermAggReqData {
        &self.per_request.missing_term_req_data[idx]
    }
+    #[inline]
+    pub(crate) fn get_histogram_req_data(&self, idx: usize) -> &HistogramAggReqData {
+        self.per_request.histogram_req_data[idx]
+            .as_deref()
+            .expect("histogram_req_data slot is empty (taken)")
+    }
+    #[inline]
+    pub(crate) fn get_range_req_data(&self, idx: usize) -> &RangeAggReqData {
+        self.per_request.range_req_data[idx]
+            .as_deref()
+            .expect("range_req_data slot is empty (taken)")
+    }
+    #[inline]
+    pub(crate) fn get_composite_req_data(&self, idx: usize) -> &CompositeAggReqData {
+        self.per_request.composite_req_data[idx]
+            .as_deref()
+            .expect("composite_req_data slot is empty (taken)")
+    }
+
+    // ---------- mutable getters ----------
+
+    #[inline]
+    pub(crate) fn get_metric_req_data_mut(&mut self, idx: usize) -> &mut MetricAggReqData {
+        &mut self.per_request.stats_metric_req_data[idx]
+    }
+
+    #[inline]
+    pub(crate) fn get_cardinality_req_data_mut(
+        &mut self,
+        idx: usize,
+    ) -> &mut CardinalityAggReqData {
+        &mut self.per_request.cardinality_req_data[idx]
+    }
+
+    #[inline]
+    pub(crate) fn get_histogram_req_data_mut(&mut self, idx: usize) -> &mut HistogramAggReqData {
+        self.per_request.histogram_req_data[idx]
+            .as_deref_mut()
+            .expect("histogram_req_data slot is empty (taken)")
+    }
+
+    // ---------- take / put (terms, histogram, range) ----------
+
+    /// Move out the boxed Histogram request at `idx`, leaving `None`.
+    #[inline]
+    pub(crate) fn take_histogram_req_data(&mut self, idx: usize) -> Box<HistogramAggReqData> {
+        self.per_request.histogram_req_data[idx]
+            .take()
+            .expect("histogram_req_data slot is empty (taken)")
+    }
+
+    /// Put back a Histogram request into an empty slot at `idx`.
+    #[inline]
+    pub(crate) fn put_back_histogram_req_data(
+        &mut self,
+        idx: usize,
+        value: Box<HistogramAggReqData>,
+    ) {
+        debug_assert!(self.per_request.histogram_req_data[idx].is_none());
+        self.per_request.histogram_req_data[idx] = Some(value);
+    }
+
+    /// Move out the boxed Range request at `idx`, leaving `None`.
+    #[inline]
+    pub(crate) fn take_range_req_data(&mut self, idx: usize) -> Box<RangeAggReqData> {
+        self.per_request.range_req_data[idx]
+            .take()
+            .expect("range_req_data slot is empty (taken)")
+    }
+
+    /// Put back a Range request into an empty slot at `idx`.
+    #[inline]
+    pub(crate) fn put_back_range_req_data(&mut self, idx: usize, value: Box<RangeAggReqData>) {
+        debug_assert!(self.per_request.range_req_data[idx].is_none());
+        self.per_request.range_req_data[idx] = Some(value);
+    }
+
+    /// Move out the boxed Filter request at `idx`, leaving `None`.
+    #[inline]
+    pub(crate) fn take_filter_req_data(&mut self, idx: usize) -> Box<FilterAggReqData> {
+        self.per_request.filter_req_data[idx]
+            .take()
+            .expect("filter_req_data slot is empty (taken)")
+    }
+
+    /// Put back a Filter request into an empty slot at `idx`.
+    #[inline]
+    pub(crate) fn put_back_filter_req_data(&mut self, idx: usize, value: Box<FilterAggReqData>) {
+        debug_assert!(self.per_request.filter_req_data[idx].is_none());
+        self.per_request.filter_req_data[idx] = Some(value);
+    }
+
+    /// Move out the Composite request at `idx`.
+    #[inline]
+    pub(crate) fn take_composite_req_data(&mut self, idx: usize) -> Box<CompositeAggReqData> {
+        self.per_request.composite_req_data[idx]
+            .take()
+            .expect("composite_req_data slot is empty (taken)")
+    }
+
+    /// Put back a Composite request into an empty slot at `idx`.
+    #[inline]
+    pub(crate) fn put_back_composite_req_data(
+        &mut self,
+        idx: usize,
+        value: Box<CompositeAggReqData>,
+    ) {
+        debug_assert!(self.per_request.composite_req_data[idx].is_none());
+        self.per_request.composite_req_data[idx] = Some(value);
+    }
 }

 /// Each type of aggregation has its own request data struct. This struct holds
@@ -107,14 +223,15 @@ impl AggregationsSegmentCtx {
 /// for a node with [AggKind::Terms]).
 #[derive(Default)]
 pub struct PerRequestAggSegCtx {
+    // Box for cheap take/put - Only necessary for bucket aggs that have sub-aggregations
    /// TermsAggReqData contains the request data for a terms aggregation.
-    pub term_req_data: Vec<TermsAggReqData>,
+    pub term_req_data: Vec<Option<Box<TermsAggReqData>>>,
    /// HistogramAggReqData contains the request data for a histogram aggregation.
-    pub histogram_req_data: Vec<HistogramAggReqData>,
+    pub histogram_req_data: Vec<Option<Box<HistogramAggReqData>>>,
    /// RangeAggReqData contains the request data for a range aggregation.
-    pub range_req_data: Vec<RangeAggReqData>,
+    pub range_req_data: Vec<Option<Box<RangeAggReqData>>>,
    /// FilterAggReqData contains the request data for a filter aggregation.
-    pub filter_req_data: Vec<FilterAggReqData>,
+    pub filter_req_data: Vec<Option<Box<FilterAggReqData>>>,
    /// Shared by avg, min, max, sum, stats, extended_stats, count
    pub stats_metric_req_data: Vec<MetricAggReqData>,
    /// CardinalityAggReqData contains the request data for a cardinality aggregation.
@@ -124,7 +241,7 @@ pub struct PerRequestAggSegCtx {
    /// MissingTermAggReqData contains the request data for a missing term aggregation.
    pub missing_term_req_data: Vec<MissingTermAggReqData>,
    /// CompositeAggReqData contains the request data for a composite aggregation.
-    pub composite_req_data: Vec<CompositeAggReqData>,
+    pub composite_req_data: Vec<Option<Box<CompositeAggReqData>>>,

    /// Request tree used to build collectors.
    pub agg_tree: Vec<AggRefNode>,
@@ -135,22 +252,22 @@ impl PerRequestAggSegCtx {
    fn get_memory_consumption(&self) -> usize {
        self.term_req_data
            .iter()
-            .map(|t| t.get_memory_consumption())
+            .map(|b| b.as_ref().unwrap().get_memory_consumption())
            .sum::<usize>()
            + self
                .histogram_req_data
                .iter()
-                .map(|t| t.get_memory_consumption())
+                .map(|b| b.as_ref().unwrap().get_memory_consumption())
                .sum::<usize>()
            + self
                .range_req_data
                .iter()
-                .map(|t| t.get_memory_consumption())
+                .map(|b| b.as_ref().unwrap().get_memory_consumption())
                .sum::<usize>()
            + self
                .filter_req_data
                .iter()
-                .map(|t| t.get_memory_consumption())
+                .map(|b| b.as_ref().unwrap().get_memory_consumption())
                .sum::<usize>()
            + self
                .stats_metric_req_data
@@ -175,7 +292,7 @@ impl PerRequestAggSegCtx {
            + self
                .composite_req_data
                .iter()
-                .map(|t| t.get_memory_consumption())
+                .map(|b| b.as_ref().map(|d| d.get_memory_consumption()).unwrap_or(0))
                .sum::<usize>()
            + self.agg_tree.len() * std::mem::size_of::<AggRefNode>()
    }
@@ -184,16 +301,40 @@ impl PerRequestAggSegCtx {
        let idx = node.idx_in_req_data;
        let kind = node.kind;
        match kind {
-            AggKind::Terms => self.term_req_data[idx].name.as_str(),
+            AggKind::Terms => self.term_req_data[idx]
+                .as_deref()
+                .expect("term_req_data slot is empty (taken)")
+                .name
+                .as_str(),
            AggKind::Cardinality => &self.cardinality_req_data[idx].name,
            AggKind::StatsKind(_) => &self.stats_metric_req_data[idx].name,
            AggKind::TopHits => &self.top_hits_req_data[idx].name,
            AggKind::MissingTerm => &self.missing_term_req_data[idx].name,
-            AggKind::Histogram => self.histogram_req_data[idx].name.as_str(),
-            AggKind::DateHistogram => self.histogram_req_data[idx].name.as_str(),
-            AggKind::Range => self.range_req_data[idx].name.as_str(),
-            AggKind::Filter => self.filter_req_data[idx].name.as_str(),
-            AggKind::Composite => self.composite_req_data[idx].name.as_str(),
+            AggKind::Histogram => self.histogram_req_data[idx]
+                .as_deref()
+                .expect("histogram_req_data slot is empty (taken)")
+                .name
+                .as_str(),
+            AggKind::DateHistogram => self.histogram_req_data[idx]
+                .as_deref()
+                .expect("histogram_req_data slot is empty (taken)")
+                .name
+                .as_str(),
+            AggKind::Range => self.range_req_data[idx]
+                .as_deref()
+                .expect("range_req_data slot is empty (taken)")
+                .name
+                .as_str(),
+            AggKind::Filter => self.filter_req_data[idx]
+                .as_deref()
+                .expect("filter_req_data slot is empty (taken)")
+                .name
+                .as_str(),
+            AggKind::Composite => self.composite_req_data[idx]
+                .as_deref()
+                .expect("composite_req_data slot is empty (taken)")
+                .name
+                .as_str(),
        }
    }

@@ -271,39 +412,13 @@ pub(crate) fn build_segment_agg_collector(
            Ok(Box::new(TermMissingAgg::new(req, node)?))
        }
        AggKind::Cardinality => {
-            let req_data = req.get_cardinality_req_data(node.idx_in_req_data);
-            // For str columns, choose the per-bucket entries representation
-            // based on the segment's column.max_value():
-            //   * small (< BITSET_MAX_TERM_ORD): `BitSet`, pre-allocated, no promotion machinery.
-            //   * large: `TermOrdSet` (sparse FxHashSet that promotes to a paged bitset).
-            // For non-str columns the `entries` field is unused (values go
-            // straight into the HLL sketch); we still pick `TermOrdSet`
-            // because its empty Sparse(FxHashSet) costs nothing.
-            let is_str = req_data.column_type == ColumnType::Str;
-            let max_term_ord_inclusive = if is_str {
-                req_data.accessor.max_value()
-            } else {
-                0
-            };
-            let collector: Box<dyn SegmentAggregationCollector> =
-                if is_str && max_term_ord_inclusive < BITSET_MAX_TERM_ORD {
-                    Box::new(SegmentCardinalityCollector::<BitSet>::from_req(
-                        req_data.column_type,
-                        node.idx_in_req_data,
-                        req_data.accessor.clone(),
-                        req_data.missing_value_for_accessor,
-                        max_term_ord_inclusive,
-                    ))
-                } else {
-                    Box::new(SegmentCardinalityCollector::<TermOrdSet>::from_req(
-                        req_data.column_type,
-                        node.idx_in_req_data,
-                        req_data.accessor.clone(),
-                        req_data.missing_value_for_accessor,
-                        max_term_ord_inclusive,
-                    ))
-                };
-            Ok(collector)
+            let req_data = &mut req.get_cardinality_req_data_mut(node.idx_in_req_data);
+            Ok(Box::new(SegmentCardinalityCollector::from_req(
+                req_data.column_type,
+                node.idx_in_req_data,
+                req_data.accessor.clone(),
+                req_data.missing_value_for_accessor,
+            )))
        }
        AggKind::StatsKind(stats_type) => {
            let req_data = &mut req.per_request.stats_metric_req_data[node.idx_in_req_data];
@@ -318,7 +433,7 @@ pub(crate) fn build_segment_agg_collector(
                    SegmentExtendedStatsCollector::from_req(req_data, sigma),
                )),
                StatsType::Percentiles => {
-                    let req_data = req.get_metric_req_data(node.idx_in_req_data);
+                    let req_data = req.get_metric_req_data_mut(node.idx_in_req_data);
                    Ok(Box::new(
                        SegmentPercentilesCollector::from_req_and_validate(
                            req_data.field_type,
@@ -338,8 +453,12 @@ pub(crate) fn build_segment_agg_collector(
                req_data.segment_ordinal,
            )))
        }
-        AggKind::Histogram => build_segment_histogram_collector(req, node),
-        AggKind::DateHistogram => build_segment_histogram_collector(req, node),
+        AggKind::Histogram => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
+            req, node,
+        )?)),
+        AggKind::DateHistogram => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
+            req, node,
+        )?)),
        AggKind::Range => Ok(build_segment_range_collector(req, node)?),
        AggKind::Filter => build_segment_filter_collector(req, node),
        AggKind::Composite => Ok(Box::new(
@@ -654,18 +773,23 @@ fn build_nodes(
            let schema = reader.schema();
            let tokenizers = &data.context.tokenizers;
            let query = filter_req.parse_query(schema, tokenizers)?;
-            let evaluator =
-                std::rc::Rc::new(crate::aggregation::bucket::DocumentQueryEvaluator::new(
-                    query,
-                    schema.clone(),
-                    reader,
-                )?);
+            let evaluator = crate::aggregation::bucket::DocumentQueryEvaluator::new(
+                query,
+                schema.clone(),
+                reader,
+            )?;
+
+            // Pre-allocate buffer for batch filtering
+            let max_doc = reader.max_doc();
+            let buffer_capacity = crate::docset::COLLECT_BLOCK_BUFFER_LEN.min(max_doc as usize);
+            let matching_docs_buffer = Vec::with_capacity(buffer_capacity);

            let idx_in_req_data = data.push_filter_req_data(FilterAggReqData {
                name: agg_name.to_string(),
                req: filter_req.clone(),
                segment_reader: reader.clone(),
                evaluator,
+                matching_docs_buffer,
                is_top_level,
            });
            let children = build_children(&req.sub_aggregation, reader, segment_ordinal, data)?;
@@ -861,12 +985,8 @@ fn build_terms_or_cardinality_nodes(
                    let str_col = str_dict_column
                        .as_ref()
                        .expect("str_dict_column must exist for string column");
-                    allowed_term_ids = build_allowed_term_ids_for_str(
-                        str_col,
-                        &req.include,
-                        &req.exclude,
-                        missing.is_some(),
-                    )?;
+                    allowed_term_ids =
+                        build_allowed_term_ids_for_str(str_col, &req.include, &req.exclude)?;
                };
                let idx_in_req_data = data.push_term_req_data(TermsAggReqData {
                    accessor,
@@ -882,20 +1002,10 @@ fn build_terms_or_cardinality_nodes(
                (idx_in_req_data, AggKind::Terms)
            }
            TermsOrCardinalityRequest::Cardinality(ref req) => {
-                // `str_dict_column` is computed once per field; for JSON paths
-                // with mixed types it's `Some` even on the numeric req_data.
-                // Cardinality only consults it for the str column path, so
-                // gate by column_type to avoid driving non-str collectors
-                // through the coupon-cache path.
-                let str_dict_column_for_req = if column_type == ColumnType::Str {
-                    str_dict_column.clone()
-                } else {
-                    None
-                };
                let idx_in_req_data = data.push_cardinality_req_data(CardinalityAggReqData {
                    accessor,
                    column_type,
-                    str_dict_column: str_dict_column_for_req,
+                    str_dict_column: str_dict_column.clone(),
                    missing_value_for_accessor,
                    name: agg_name.to_string(),
                    req: req.clone(),
@@ -915,21 +1025,16 @@ fn build_terms_or_cardinality_nodes(

 /// Builds a single BitSet of allowed term ordinals for a string dictionary column according to
 /// include/exclude parameters.
-///
-/// When `reserve_missing_sentinel` is true, the bitset will have 1 additional slot for the missing
-/// term ordinal
 fn build_allowed_term_ids_for_str(
    str_col: &StrColumn,
    include: &Option<IncludeExcludeParam>,
    exclude: &Option<IncludeExcludeParam>,
-    reserve_missing_sentinel: bool,
 ) -> crate::Result<Option<BitSet>> {
    let mut allowed: Option<BitSet> = None;
-    let missing_sentinel_adjustment = if reserve_missing_sentinel { 1 } else { 0 };
-    let allowed_capacity = str_col.dictionary().num_terms() as u32 + missing_sentinel_adjustment;
+    let num_terms = str_col.dictionary().num_terms() as u32;
    if let Some(include) = include {
        // add matches
-        allowed = Some(BitSet::with_max_value(allowed_capacity));
+        allowed = Some(BitSet::with_max_value(num_terms));
        let allowed = allowed.as_mut().unwrap();
        for_each_matching_term_ord(str_col, include, |ord| allowed.insert(ord))?;
    };
@@ -937,7 +1042,7 @@ fn build_allowed_term_ids_for_str(
    if let Some(exclude) = exclude {
        if allowed.is_none() {
            // Start with all terms allowed
-            allowed = Some(BitSet::with_max_value_and_full(allowed_capacity));
+            allowed = Some(BitSet::with_max_value_and_full(num_terms));
        }
        let allowed = allowed.as_mut().unwrap();
        for_each_matching_term_ord(str_col, exclude, |ord| allowed.remove(ord))?;
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -115,71 +115,6 @@ pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
    fast_field_names
 }

-/// Validates that all fields referenced in the aggregation request exist in the schema
-/// and are configured as fast fields.
-///
-/// This is a convenience function for upfront validation before executing aggregations.
-/// Returns an error if any field doesn't exist or is not a fast field.
-///
-/// Validation is intentionally opt-in rather than baked into aggregation execution: the
-/// default lenient behavior (returning empty results for missing fields) supports
-/// schema evolution and federated queries where the same request runs against segments
-/// or indices with different schemas.
-///
-/// # Example
-/// ```
-/// use tantivy::aggregation::agg_req::{Aggregations, validate_aggregation_fields_exist};
-/// use tantivy::schema::{Schema, FAST};
-/// use tantivy::Index;
-///
-/// # fn main() -> tantivy::Result<()> {
-/// // Create a simple index
-/// let mut schema_builder = Schema::builder();
-/// schema_builder.add_f64_field("price", FAST);
-/// let schema = schema_builder.build();
-/// let index = Index::create_in_ram(schema);
-///
-/// // Parse aggregation request
-/// let agg_req: Aggregations = serde_json::from_str(r#"{
-///     "avg_price": { "avg": { "field": "price" } }
-/// }"#)?;
-///
-/// let reader = index.reader()?;
-/// let searcher = reader.searcher();
-///
-/// // Validate fields before executing
-/// for segment_reader in searcher.segment_readers() {
-///     validate_aggregation_fields_exist(&agg_req, segment_reader)?;
-/// }
-/// # Ok(())
-/// # }
-/// ```
-pub fn validate_aggregation_fields_exist(
-    aggs: &Aggregations,
-    reader: &crate::SegmentReader,
-) -> crate::Result<()> {
-    let field_names = get_fast_field_names(aggs);
-    let schema = reader.schema();
-
-    for field_name in field_names {
-        // Check if the field is either directly in the schema or could be part of a json field
-        // present in the schema, and verify it's a fast field.
-        if let Some((field, _path)) = schema.find_field(&field_name) {
-            let field_type = schema.get_field_entry(field).field_type();
-            if !field_type.is_fast() {
-                return Err(crate::TantivyError::SchemaError(format!(
-                    "Field '{}' is not a fast field. Aggregations require fast fields.",
-                    field_name
-                )));
-            }
-        } else {
-            return Err(crate::TantivyError::FieldNotFound(field_name));
-        }
-    }
-
-    Ok(())
-}
-
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// All aggregation types.
 pub enum AggregationVariants {
@@ -299,12 +234,6 @@ impl AggregationVariants {
            _ => None,
        }
    }
-    pub(crate) fn as_sum(&self) -> Option<&SumAggregation> {
-        match &self {
-            AggregationVariants::Sum(sum) => Some(sum),
-            _ => None,
-        }
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -208,8 +208,7 @@ pub enum BucketEntries<T> {
 }

 impl<T> BucketEntries<T> {
-    /// Iterate over all bucket entries.
-    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
+    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
        match self {
            BucketEntries::Vec(vec) => Box::new(vec.iter()),
            BucketEntries::HashMap(map) => Box::new(map.values()),
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -1436,46 +1436,3 @@ fn test_aggregation_on_json_object_mixed_numerical_segments() {
        )
    );
 }
-
-#[test]
-fn test_aggregation_field_validation_helper() {
-    // Test the standalone validation helper function for field validation
-    let index = get_test_index_2_segments(false).unwrap();
-    let reader = index.reader().unwrap();
-    let searcher = reader.searcher();
-    let segment_reader = searcher.segment_reader(0);
-
-    // Test with invalid field
-    let agg_req: Aggregations = serde_json::from_str(
-        r#"{
-        "avg_test": {
-            "avg": { "field": "nonexistent_field" }
-        }
-    }"#,
-    )
-    .unwrap();
-
-    let result =
-        crate::aggregation::agg_req::validate_aggregation_fields_exist(&agg_req, segment_reader);
-    assert!(result.is_err());
-    match result {
-        Err(crate::TantivyError::FieldNotFound(field_name)) => {
-            assert_eq!(field_name, "nonexistent_field");
-        }
-        _ => panic!("Expected FieldNotFound error, got: {:?}", result),
-    }
-
-    // Test with valid field
-    let agg_req: Aggregations = serde_json::from_str(
-        r#"{
-        "avg_test": {
-            "avg": { "field": "score" }
-        }
-    }"#,
-    )
-    .unwrap();
-
-    let result =
-        crate::aggregation::agg_req::validate_aggregation_fields_exist(&agg_req, segment_reader);
-    assert!(result.is_ok());
-}
--- a/src/aggregation/bucket/composite/accessors.rs
+++ b/src/aggregation/bucket/composite/accessors.rs
@@ -16,7 +16,6 @@ use crate::{SegmentReader, TantivyError};

 /// Contains all information required by the SegmentCompositeCollector to perform the
 /// composite aggregation on a segment.
-#[derive(Debug, Clone)]
 pub struct CompositeAggReqData {
    /// The name of the aggregation.
    pub name: String,
@@ -35,7 +34,6 @@ impl CompositeAggReqData {
 }

 /// Accessors for a single column in a composite source.
-#[derive(Debug, Clone)]
 pub struct CompositeAccessor {
    /// The fast field column
    pub column: Column<u64>,
@@ -50,7 +48,6 @@ pub struct CompositeAccessor {
 }

 /// Accessors to all the columns that belong to the field of a composite source.
-#[derive(Debug, Clone)]
 pub struct CompositeSourceAccessors {
    /// The accessors for this source
    pub accessors: Vec<CompositeAccessor>,
@@ -361,7 +358,7 @@ impl PrecomputedDateInterval {
 ///
 /// Some column types (term, IP) might not have an exact representation of the
 /// specified after key
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub enum PrecomputedAfterKey {
    /// The after key could be exactly represented in the column space.
    Exact(u64),
--- a/src/aggregation/bucket/composite/collector.rs
+++ b/src/aggregation/bucket/composite/collector.rs
@@ -21,7 +21,7 @@ use crate::aggregation::bucket::composite::map::{DynArrayHeapMap, MAX_DYN_ARRAY_
 use crate::aggregation::bucket::{
    CalendarInterval, CompositeAggregationSource, MissingOrder, Order,
 };
-use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardSubAggBuffer};
+use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardSubAggCache};
 use crate::aggregation::intermediate_agg_result::{
    CompositeIntermediateKey, IntermediateAggregationResult, IntermediateAggregationResults,
    IntermediateBucketResult, IntermediateCompositeBucketEntry, IntermediateCompositeBucketResult,
@@ -118,8 +118,8 @@ impl InternalValueRepr {
 pub struct SegmentCompositeCollector {
    /// One DynArrayHeapMap per parent bucket.
    parent_buckets: Vec<DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>>,
-    req_data: CompositeAggReqData,
-    sub_agg: Option<BufferedSubAggs<HighCardSubAggBuffer>>,
+    accessor_idx: usize,
+    sub_agg: Option<CachedSubAggs<HighCardSubAggCache>>,
    bucket_id_provider: BucketIdProvider,
    /// Number of sources, needed when creating new DynArrayHeapMaps.
    num_sources: usize,
@@ -132,7 +132,10 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
        results: &mut IntermediateAggregationResults,
        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        let name = self.req_data.name.clone();
+        let name = agg_data
+            .get_composite_req_data(self.accessor_idx)
+            .name
+            .clone();

        let buckets = self.add_intermediate_bucket_result(agg_data, parent_bucket_id)?;
        results.push(
@@ -150,11 +153,12 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        let mem_pre = self.get_memory_consumption(parent_bucket_id);
+        let composite_agg_data = agg_data.take_composite_req_data(self.accessor_idx);

        for doc in docs {
            let mut visitor = CompositeKeyVisitor {
                doc_id: *doc,
-                composite_agg_data: &self.req_data,
+                composite_agg_data: &composite_agg_data,
                buckets: &mut self.parent_buckets[parent_bucket_id as usize],
                sub_agg: &mut self.sub_agg,
                bucket_id_provider: &mut self.bucket_id_provider,
@@ -162,6 +166,7 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
            };
            visitor.visit(0, true)?;
        }
+        agg_data.put_back_composite_req_data(self.accessor_idx, composite_agg_data);

        if let Some(sub_agg) = &mut self.sub_agg {
            sub_agg.check_flush_local(agg_data)?;
@@ -194,17 +199,6 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // Composite is a multi-bucket agg with no single value to extract.
-        None
-    }
 }

 impl SegmentCompositeCollector {
@@ -216,27 +210,22 @@ impl SegmentCompositeCollector {
        req_data: &mut AggregationsSegmentCtx,
        node: &AggRefNode,
    ) -> crate::Result<Self> {
-        let composite_req_data =
-            req_data.per_request.composite_req_data[node.idx_in_req_data].clone();
-        validate_req(&composite_req_data)?;
-        req_data
-            .context
-            .limits
-            .add_memory_consumed(composite_req_data.get_memory_consumption() as u64)?;
+        validate_req(req_data, node.idx_in_req_data)?;

        let has_sub_aggregations = !node.children.is_empty();
        let sub_agg = if has_sub_aggregations {
            let sub_agg_collector = build_segment_agg_collectors(req_data, &node.children)?;
-            Some(BufferedSubAggs::new(sub_agg_collector))
+            Some(CachedSubAggs::new(sub_agg_collector))
        } else {
            None
        };

+        let composite_req_data = req_data.get_composite_req_data(node.idx_in_req_data);
        let num_sources = composite_req_data.req.sources.len();

        Ok(SegmentCompositeCollector {
            parent_buckets: vec![DynArrayHeapMap::try_new(num_sources)?],
-            req_data: composite_req_data,
+            accessor_idx: node.idx_in_req_data,
            sub_agg,
            bucket_id_provider: BucketIdProvider::default(),
            num_sources,
@@ -258,7 +247,7 @@ impl SegmentCompositeCollector {
        let mut dict: FxHashMap<Vec<CompositeIntermediateKey>, IntermediateCompositeBucketEntry> =
            Default::default();
        dict.reserve(heap_map.size());
-        let composite_data = &self.req_data;
+        let composite_data = agg_data.get_composite_req_data(self.accessor_idx);
        for (key_internal_repr, agg) in heap_map.into_iter() {
            let key = resolve_key(&key_internal_repr, composite_data)?;
            let mut sub_aggregation_res = IntermediateAggregationResults::default();
@@ -298,7 +287,8 @@ impl SegmentCompositeCollector {
    }
 }

-fn validate_req(composite_data: &CompositeAggReqData) -> crate::Result<()> {
+fn validate_req(req_data: &mut AggregationsSegmentCtx, accessor_idx: usize) -> crate::Result<()> {
+    let composite_data = req_data.get_composite_req_data(accessor_idx);
    let req = &composite_data.req;
    if req.sources.is_empty() {
        return Err(TantivyError::InvalidArgument(
@@ -339,7 +329,7 @@ fn collect_bucket_with_limit(
    limit_num_buckets: usize,
    buckets: &mut DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>,
    key: &[InternalValueRepr],
-    sub_agg: &mut Option<BufferedSubAggs<HighCardSubAggBuffer>>,
+    sub_agg: &mut Option<CachedSubAggs<HighCardSubAggCache>>,
    bucket_id_provider: &mut BucketIdProvider,
 ) {
    let mut record_in_bucket = |bucket: &mut CompositeBucketCollector| {
@@ -495,7 +485,7 @@ struct CompositeKeyVisitor<'a> {
    doc_id: crate::DocId,
    composite_agg_data: &'a CompositeAggReqData,
    buckets: &'a mut DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>,
-    sub_agg: &'a mut Option<BufferedSubAggs<HighCardSubAggBuffer>>,
+    sub_agg: &'a mut Option<CachedSubAggs<HighCardSubAggCache>>,
    bucket_id_provider: &'a mut BucketIdProvider,
    sub_level_values: SmallVec<[InternalValueRepr; MAX_DYN_ARRAY_SIZE]>,
 }
--- a/src/aggregation/bucket/composite/mod.rs
+++ b/src/aggregation/bucket/composite/mod.rs
@@ -511,14 +511,14 @@ mod tests {

    fn datetime_from_iso_str(date_str: &str) -> common::DateTime {
        let dt = OffsetDateTime::parse(date_str, &Rfc3339)
-            .unwrap_or_else(|_| panic!("Failed to parse date: {}", date_str));
+            .expect(&format!("Failed to parse date: {}", date_str));
        let timestamp_secs = dt.unix_timestamp_nanos();
        common::DateTime::from_timestamp_nanos(timestamp_secs as i64)
    }

    fn ms_timestamp_from_iso_str(date_str: &str) -> i64 {
        let dt = OffsetDateTime::parse(date_str, &Rfc3339)
-            .unwrap_or_else(|_| panic!("Failed to parse date: {}", date_str));
+            .expect(&format!("Failed to parse date: {}", date_str));
        (dt.unix_timestamp_nanos() / 1_000_000) as i64
    }

@@ -548,7 +548,7 @@ mod tests {
                    agg_req_json["my_composite"]["composite"]["after"] = after_key.take().unwrap();
                }
                let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
-                let res = exec_request(agg_req.clone(), index).unwrap();
+                let res = exec_request(agg_req.clone(), &index).unwrap();
                let expected_page_buckets = &expected_buckets_vec[page_idx * page_size
                    ..std::cmp::min((page_idx + 1) * page_size, expected_buckets_vec.len())];
                assert_eq!(
@@ -559,30 +559,34 @@ mod tests {
                    page_size,
                    agg_req,
                );
-                assert!(
-                    res["my_composite"].get("after_key").is_some(),
-                    "expected after_key on every non-empty page"
-                );
-                after_key = Some(res["my_composite"]["after_key"].clone());
-            }
-            // Using the after_key from the last page must yield an empty page.
-            let agg_req_json = json!({
-                "my_composite": {
-                    "composite": {
-                        "sources": composite_agg_sources,
-                        "size": page_size,
-                        "after": after_key,
-                    }
+                if page_idx + 1 < page_count {
+                    assert!(
+                        res["my_composite"].get("after_key").is_some(),
+                        "expected after_key on all but last page"
+                    );
+                    after_key = Some(res["my_composite"]["after_key"].clone());
+                } else if res["my_composite"].get("after_key").is_some() {
+                    // currently we sometime have an after_key on the last page,
+                    // check that the next "page" is empty
+                    let agg_req_json = json!({
+                        "my_composite": {
+                            "composite": {
+                                "sources": composite_agg_sources,
+                                "size": page_size,
+                                "after": res["my_composite"]["after_key"].clone(),
+                            }
+                        }
+                    });
+                    let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
+                    let res = exec_request(agg_req.clone(), &index).unwrap();
+                    assert_eq!(
+                        res["my_composite"]["buckets"],
+                        json!([]),
+                        "expected no buckets when using after_key from last page, query: {:?}",
+                        agg_req
+                    );
                }
-            });
-            let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
-            let res = exec_request(agg_req.clone(), index).unwrap();
-            assert_eq!(
-                res["my_composite"]["buckets"],
-                json!([]),
-                "expected no buckets when using after_key from last page, query: {:?}",
-                agg_req
-            );
+            }
        }
    }

@@ -707,28 +711,8 @@ mod tests {
                {"key": {"myterm": "terme"}, "doc_count": 1}
            ])
        );
-
-        // paginating past last page should be empty
-        let agg_req_json = json!({
-            "my_composite": {
-                "composite": {
-                    "sources": [
-                        {"myterm": {"terms": {"field": "string_id"}}}
-                    ],
-                    "size": 3,
-                    "after":  &res["my_composite"]["after_key"]
-                }
-            }
-        });
-        let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
-        let res = exec_request(agg_req.clone(), &index).unwrap();
        assert!(res["my_composite"].get("after_key").is_none());
-        assert_eq!(
-            res["my_composite"]["buckets"],
-            json!([]),
-            "expected no buckets when using after_key from last page, query: {:?}",
-            agg_req
-        );
+
        Ok(())
    }

@@ -836,10 +820,7 @@ mod tests {
                {"key": {"myterm": "apple"}, "doc_count": 1}
            ])
        );
-        assert_eq!(
-            res["fruity_aggreg"]["after_key"],
-            json!({"myterm": "str:apple"})
-        );
+        assert!(res["fruity_aggreg"].get("after_key").is_none());

        Ok(())
    }
@@ -1811,14 +1792,7 @@ mod tests {
                {"key": {"month": ms_timestamp_from_iso_str("2021-02-01T00:00:00Z"), "category": "books"}, "doc_count": 1},
            ]),
        );
-        let feb_2021_ns = ms_timestamp_from_iso_str("2021-02-01T00:00:00Z") * 1_000_000;
-        assert_eq!(
-            res["my_composite"]["after_key"],
-            json!({
-                "month": format!("dt:{}", feb_2021_ns),
-                "category": "str:books"
-            })
-        );
+        assert!(res["my_composite"].get("after_key").is_none());

        Ok(())
    }
--- a/src/aggregation/bucket/filter.rs
+++ b/src/aggregation/bucket/filter.rs
@@ -1,5 +1,4 @@
 use std::fmt::Debug;
-use std::rc::Rc;

 use common::BitSet;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -7,8 +6,8 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
-use crate::aggregation::buffered_sub_aggs::{
-    BufferedSubAggs, HighCardSubAggBuffer, LowCardSubAggBuffer, SubAggBuffer,
+use crate::aggregation::cached_sub_aggs::{
+    CachedSubAggs, HighCardSubAggCache, LowCardSubAggCache, SubAggCache,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -397,7 +396,6 @@ impl PartialEq for FilterAggregation {

 /// Request data for filter aggregation
 /// This struct holds the per-segment data needed to execute a filter aggregation
-#[derive(Clone)]
 pub struct FilterAggReqData {
    /// The name of the filter aggregation
    pub name: String,
@@ -405,20 +403,22 @@ pub struct FilterAggReqData {
    pub req: FilterAggregation,
    /// The segment reader
    pub segment_reader: SegmentReader,
-    /// Document evaluator for the filter query (precomputed BitSet).
-    /// Wrapped in `Rc` so cloning the request data does not duplicate the (potentially large)
-    /// underlying BitSet.
-    pub evaluator: Rc<DocumentQueryEvaluator>,
+    /// Document evaluator for the filter query (precomputed BitSet)
+    /// This is built once when the request data is created
+    pub evaluator: DocumentQueryEvaluator,
+    /// Reusable buffer for matching documents to minimize allocations during collection
+    pub matching_docs_buffer: Vec<DocId>,
    /// True if this filter aggregation is at the top level of the aggregation tree (not nested).
    pub is_top_level: bool,
 }

 impl FilterAggReqData {
    pub(crate) fn get_memory_consumption(&self) -> usize {
-        // Estimate: name + segment reader reference + bitset
+        // Estimate: name + segment reader reference + bitset + buffer capacity
        self.name.len()
        + std::mem::size_of::<SegmentReader>()
        + self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes)
+        + self.matching_docs_buffer.capacity() * std::mem::size_of::<DocId>()
        + std::mem::size_of::<bool>()
    }
 }
@@ -503,24 +503,21 @@ struct DocCount {
 }

 /// Segment collector for filter aggregation
-pub struct SegmentFilterCollector<B: SubAggBuffer> {
+pub struct SegmentFilterCollector<C: SubAggCache> {
    /// Document counts per parent bucket
    parent_buckets: Vec<DocCount>,
    /// Sub-aggregation collectors
-    sub_aggregations: Option<BufferedSubAggs<B>>,
+    sub_aggregations: Option<CachedSubAggs<C>>,
    bucket_id_provider: BucketIdProvider,
-    /// Per-segment filter request data, owned by this collector.
-    req_data: FilterAggReqData,
-    /// Reusable buffer for matching documents to minimize allocations during collection.
-    matching_docs_buffer: Vec<DocId>,
+    /// Accessor index for this filter aggregation (to access FilterAggReqData)
+    accessor_idx: usize,
 }

-impl<B: SubAggBuffer> SegmentFilterCollector<B> {
+impl<C: SubAggCache> SegmentFilterCollector<C> {
    /// Create a new filter segment collector following the new agg_data pattern
    pub(crate) fn from_req_and_validate(
        req: &mut AggregationsSegmentCtx,
        node: &AggRefNode,
-        req_data: FilterAggReqData,
    ) -> crate::Result<Self> {
        // Build sub-aggregation collectors if any
        let sub_agg_collector = if !node.children.is_empty() {
@@ -528,17 +525,13 @@ impl<B: SubAggBuffer> SegmentFilterCollector<B> {
        } else {
            None
        };
-        let sub_agg_collector = sub_agg_collector.map(BufferedSubAggs::new);
-
-        let max_doc = req_data.segment_reader.max_doc();
-        let buffer_capacity = crate::docset::COLLECT_BLOCK_BUFFER_LEN.min(max_doc as usize);
+        let sub_agg_collector = sub_agg_collector.map(CachedSubAggs::new);

        Ok(SegmentFilterCollector {
            parent_buckets: Vec::new(),
            sub_aggregations: sub_agg_collector,
-            req_data,
+            accessor_idx: node.idx_in_req_data,
            bucket_id_provider: BucketIdProvider::default(),
-            matching_docs_buffer: Vec::with_capacity(buffer_capacity),
        })
    }
 }
@@ -547,38 +540,33 @@ pub(crate) fn build_segment_filter_collector(
    req: &mut AggregationsSegmentCtx,
    node: &AggRefNode,
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    let req_data = req.per_request.filter_req_data[node.idx_in_req_data].clone();
-    req.context
-        .limits
-        .add_memory_consumed(req_data.get_memory_consumption() as u64)?;
-    let is_top_level = req_data.is_top_level;
+    let is_top_level = req.per_request.filter_req_data[node.idx_in_req_data]
+        .as_ref()
+        .expect("filter_req_data slot is empty")
+        .is_top_level;

    if is_top_level {
        Ok(Box::new(
-            SegmentFilterCollector::<LowCardSubAggBuffer>::from_req_and_validate(
-                req, node, req_data,
-            )?,
+            SegmentFilterCollector::<LowCardSubAggCache>::from_req_and_validate(req, node)?,
        ))
    } else {
        Ok(Box::new(
-            SegmentFilterCollector::<HighCardSubAggBuffer>::from_req_and_validate(
-                req, node, req_data,
-            )?,
+            SegmentFilterCollector::<HighCardSubAggCache>::from_req_and_validate(req, node)?,
        ))
    }
 }

-impl<B: SubAggBuffer> Debug for SegmentFilterCollector<B> {
+impl<C: SubAggCache> Debug for SegmentFilterCollector<C> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentFilterCollector")
            .field("buckets", &self.parent_buckets)
            .field("has_sub_aggs", &self.sub_aggregations.is_some())
-            .field("name", &self.req_data.name)
+            .field("accessor_idx", &self.accessor_idx)
            .finish()
    }
 }

-impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B> {
+impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
    fn add_intermediate_aggregation_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
@@ -610,7 +598,11 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
        };

        // Get the name of this filter aggregation
-        let name = self.req_data.name.clone();
+        let name = agg_data.per_request.filter_req_data[self.accessor_idx]
+            .as_ref()
+            .expect("filter_req_data slot is empty")
+            .name
+            .clone();

        results.push(
            name,
@@ -631,24 +623,27 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
        }

        let mut bucket = self.parent_buckets[parent_bucket_id as usize];
+        // Take the request data to avoid borrow checker issues with sub-aggregations
+        let mut req = agg_data.take_filter_req_data(self.accessor_idx);

        // Use batch filtering with O(1) BitSet lookups
-        self.matching_docs_buffer.clear();
-        self.req_data
-            .evaluator
-            .filter_batch(docs, &mut self.matching_docs_buffer);
+        req.matching_docs_buffer.clear();
+        req.evaluator
+            .filter_batch(docs, &mut req.matching_docs_buffer);

-        bucket.doc_count += self.matching_docs_buffer.len() as u64;
+        bucket.doc_count += req.matching_docs_buffer.len() as u64;

        // Batch process sub-aggregations if we have matches
-        if !self.matching_docs_buffer.is_empty() {
+        if !req.matching_docs_buffer.is_empty() {
            if let Some(sub_aggs) = &mut self.sub_aggregations {
-                for &doc_id in &self.matching_docs_buffer {
+                for &doc_id in &req.matching_docs_buffer {
                    sub_aggs.push(bucket.bucket_id, doc_id);
                }
            }
        }

+        // Put the request data back
+        agg_data.put_back_filter_req_data(self.accessor_idx, req);
        if let Some(sub_aggs) = &mut self.sub_aggregations {
            sub_aggs.check_flush_local(agg_data)?;
        }
@@ -679,17 +674,6 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // TODO: forward into the inner `sub_agg` for nested order paths (`filter.metric`).
-        None
-    }
 }

 /// Intermediate result for filter aggregation
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -10,7 +10,7 @@ use crate::aggregation::agg_data::{
 };
 use crate::aggregation::agg_req::Aggregations;
 use crate::aggregation::agg_result::BucketEntry;
-use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardBufferedSubAggs};
+use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateHistogramBucketEntry,
@@ -21,7 +21,6 @@ use crate::TantivyError;

 /// Contains all information required by the SegmentHistogramCollector to perform the
 /// histogram or date_histogram aggregation on a segment.
-#[derive(Debug, Clone)]
 pub struct HistogramAggReqData {
    /// The column accessor to access the fast field values.
    pub accessor: Column<u64>,
@@ -244,55 +243,22 @@ impl Display for HistogramBounds {
 }

 impl HistogramBounds {
-    pub(crate) fn contains(&self, val: f64) -> bool {
+    fn contains(&self, val: f64) -> bool {
        val >= self.min && val <= self.max
    }
 }

-/// The per-bucket identifier stored in a [`SegmentHistogramBucketEntry`].
-///
-/// It is [`BucketId`] when the histogram has sub aggregations (which key their state by it), and
-/// the zero-sized `()` when it does not. Without sub aggregations the id is never read, so storing
-/// `()` drops 8 bytes per bucket (24 -> 16) and turns id assignment into a no-op.
-pub trait BucketIdSlot: Copy + Default + std::fmt::Debug + PartialEq {
-    /// Assigns the next id from the provider, called once when a bucket is first filled.
-    fn assign(provider: &mut BucketIdProvider) -> Self;
-    /// Resolves to the `BucketId` for sub-aggregation bookkeeping.
-    ///
-    /// Only ever called for the [`BucketId`] slot: the `()` slot is used exactly when there are no
-    /// sub aggregations, so every call site is guarded by `sub_agg.is_some()` and is dead for `()`.
-    fn to_bucket_id(self) -> BucketId;
-}
-impl BucketIdSlot for BucketId {
-    #[inline(always)]
-    fn assign(provider: &mut BucketIdProvider) -> Self {
-        provider.next_bucket_id()
-    }
-    #[inline(always)]
-    fn to_bucket_id(self) -> BucketId {
-        self
-    }
-}
-impl BucketIdSlot for () {
-    #[inline(always)]
-    fn assign(_provider: &mut BucketIdProvider) -> Self {}
-    #[inline(always)]
-    fn to_bucket_id(self) -> BucketId {
-        unreachable!("bucket ids are only resolved when sub aggregations are present")
-    }
-}
-
 #[derive(Default, Clone, Debug, PartialEq)]
-pub(crate) struct SegmentHistogramBucketEntry<B> {
+pub(crate) struct SegmentHistogramBucketEntry {
    pub key: f64,
    pub doc_count: u64,
-    pub bucket_id: B,
+    pub bucket_id: BucketId,
 }

-impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
+impl SegmentHistogramBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
-        sub_aggregation: &mut Option<HighCardBufferedSubAggs>,
+        sub_aggregation: &mut Option<HighCardCachedSubAggs>,
        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateHistogramBucketEntry> {
        let mut sub_aggregation_res = IntermediateAggregationResults::default();
@@ -302,7 +268,7 @@ impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
                .add_intermediate_aggregation_result(
                    agg_data,
                    &mut sub_aggregation_res,
-                    self.bucket_id.to_bucket_id(),
+                    self.bucket_id,
                )?;
        }
        Ok(IntermediateHistogramBucketEntry {
@@ -313,147 +279,34 @@ impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
    }
 }

-/// The contiguous bucket range a histogram can span, derived from the column min/max (clamped to
-/// the histogram bounds). Buckets in `[base_pos, base_pos + len)` can be stored in a flat `Vec`
-/// indexed by `bucket_pos - base_pos`, avoiding the hash map on the hot path.
-#[derive(Clone, Copy, Debug)]
-pub(crate) struct DenseRange {
-    /// `bucket_pos` mapped to index 0 of the dense `Vec`.
-    pub(crate) base_pos: i64,
-    /// Number of bucket positions in the range.
-    pub(crate) len: usize,
-}
-
-/// Storage for the histogram buckets of a single parent bucket.
-///
-/// Starts out sparse (a hash map keyed by `bucket_pos`). Once enough distinct buckets have been
-/// filled that we are clearly going to cover most of the column's theoretical range, it switches
-/// to a dense `Vec` indexed by `bucket_pos - base_pos`, which removes hashing from the hot loop.
-#[derive(Clone, Debug)]
-enum HistogramBuckets<B> {
-    Sparse(FxHashMap<i64, SegmentHistogramBucketEntry<B>>),
-    Dense {
-        base_pos: i64,
-        /// One slot per bucket position; a slot with `doc_count == 0` has not been hit yet.
-        buckets: Vec<SegmentHistogramBucketEntry<B>>,
-    },
-}
-impl<B> Default for HistogramBuckets<B> {
-    fn default() -> Self {
-        HistogramBuckets::Sparse(FxHashMap::default())
-    }
-}
-impl<B: BucketIdSlot> HistogramBuckets<B> {
-    fn memory_consumption(&self) -> u64 {
-        let num_slots = match self {
-            HistogramBuckets::Sparse(map) => map.capacity(),
-            HistogramBuckets::Dense { buckets, .. } => buckets.capacity(),
-        };
-        num_slots as u64 * std::mem::size_of::<SegmentHistogramBucketEntry<B>>() as u64
-    }
-
-    /// Switches from sparse to dense storage once the dense `Vec` would use no more memory than the
-    /// hash map does now, so the switch never increases memory. Called at block boundaries.
-    ///
-    /// The `Vec` holds one `Entry` per bucket position in the range. The map additionally stores
-    /// the key and a control byte per slot, at a load factor of 7/16..7/8, so for a dense histogram
-    /// its footprint grows past the `Vec` well before full coverage. And since the `Vec` never
-    /// grows afterwards while the map would keep growing, dense only gets relatively cheaper — so
-    /// no upper bound on the range is needed: a large but sparse range simply never crosses over.
-    #[inline]
-    fn maybe_densify(&mut self, dense_range: Option<DenseRange>) {
-        let Some(range) = dense_range else { return };
-        let HistogramBuckets::Sparse(map) = self else {
-            return;
-        };
-        let dense_bytes = range
-            .len
-            .saturating_mul(std::mem::size_of::<SegmentHistogramBucketEntry<B>>());
-        let sparse_bytes = map
-            .capacity()
-            .saturating_mul(std::mem::size_of::<(i64, SegmentHistogramBucketEntry<B>)>() + 1);
-        if dense_bytes > sparse_bytes {
-            return;
-        }
-        let map = std::mem::take(map);
-        let mut buckets = vec![SegmentHistogramBucketEntry::<B>::default(); range.len];
-        for (bucket_pos, entry) in map {
-            buckets[(bucket_pos - range.base_pos) as usize] = entry;
-        }
-        *self = HistogramBuckets::Dense {
-            base_pos: range.base_pos,
-            buckets,
-        };
-    }
-
-    /// Returns the bucket entry for `bucket_pos`, setting its key (and `bucket_id`, when `B` is
-    /// [`BucketId`]) on first use.
-    ///
-    /// For the dense variant `bucket_pos` is guaranteed to be inside the range, since it is
-    /// derived from the column min/max that bounds every value (see [`compute_dense_range`]).
-    #[inline]
-    fn get_or_create(
-        &mut self,
-        bucket_pos: i64,
-        bucket_id_provider: &mut BucketIdProvider,
-        key_from_pos: impl FnOnce(i64) -> f64,
-    ) -> &mut SegmentHistogramBucketEntry<B> {
-        match self {
-            HistogramBuckets::Sparse(map) => {
-                map.entry(bucket_pos)
-                    .or_insert_with(|| SegmentHistogramBucketEntry {
-                        key: key_from_pos(bucket_pos),
-                        doc_count: 0,
-                        bucket_id: B::assign(bucket_id_provider),
-                    })
-            }
-            HistogramBuckets::Dense { base_pos, buckets } => {
-                let idx = (bucket_pos - *base_pos) as usize;
-                debug_assert!(idx < buckets.len(), "bucket_pos outside the dense range");
-                let entry = &mut buckets[idx];
-                if entry.doc_count == 0 {
-                    entry.key = key_from_pos(bucket_pos);
-                    entry.bucket_id = B::assign(bucket_id_provider);
-                }
-                entry
-            }
-        }
-    }
-
-    /// Consumes the storage, yielding all non-empty bucket entries.
-    fn into_filled_entries(self) -> Vec<SegmentHistogramBucketEntry<B>> {
-        match self {
-            HistogramBuckets::Sparse(map) => map.into_values().collect(),
-            HistogramBuckets::Dense { buckets, .. } => {
-                buckets.into_iter().filter(|b| b.doc_count > 0).collect()
-            }
-        }
-    }
+#[derive(Clone, Debug, Default)]
+struct HistogramBuckets {
+    pub buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
 }

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
 #[derive(Debug)]
-pub struct SegmentHistogramCollector<B> {
+pub struct SegmentHistogramCollector {
    /// The buckets containing the aggregation data.
    /// One Histogram bucket per parent bucket id.
-    parent_buckets: Vec<HistogramBuckets<B>>,
-    sub_agg: Option<HighCardBufferedSubAggs>,
-    req_data: HistogramAggReqData,
+    parent_buckets: Vec<HistogramBuckets>,
+    sub_agg: Option<HighCardCachedSubAggs>,
+    accessor_idx: usize,
    bucket_id_provider: BucketIdProvider,
-    /// Theoretical bucket range derived from the column min/max, if dense `Vec` storage is
-    /// viable. `None` keeps every parent bucket in the sparse hash map.
-    dense_range: Option<DenseRange>,
 }

-impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<B> {
+impl SegmentAggregationCollector for SegmentHistogramCollector {
    fn add_intermediate_aggregation_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        let name = self.req_data.name.clone();
+        let name = agg_data
+            .get_histogram_req_data(self.accessor_idx)
+            .name
+            .clone();
        // TODO: avoid prepare_max_bucket here and handle empty buckets.
        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
        let histogram = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);
@@ -470,13 +323,10 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let mem_pre = self.get_memory_consumption(parent_bucket_id);
-        let dense_range = self.dense_range;
-        let store = &mut self.parent_buckets[parent_bucket_id as usize];
-        // Upgrade to dense storage before processing the block if the buckets are dense enough.
-        store.maybe_densify(dense_range);
+        let req = agg_data.take_histogram_req_data(self.accessor_idx);
+        let mem_pre = self.get_memory_consumption();
+        let buckets = &mut self.parent_buckets[parent_bucket_id as usize].buckets;

-        let req = &self.req_data;
        let bounds = req.bounds;
        let interval = req.req.interval;
        let offset = req.offset;
@@ -485,43 +335,35 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
        agg_data
            .column_block_accessor
            .fetch_block(docs, &req.accessor);
-        // special path for nested buckets
-        if let Some(sub_agg) = &mut self.sub_agg {
-            for (doc, val) in agg_data
-                .column_block_accessor
-                .iter_docid_vals(docs, &req.accessor)
-            {
-                let val = f64_from_fastfield_u64(val, req.field_type);
-                if bounds.contains(val) {
-                    let bucket = store.get_or_create(
-                        get_bucket_pos(val),
-                        &mut self.bucket_id_provider,
-                        |pos| get_bucket_key_from_pos(pos as f64, interval, offset),
-                    );
-                    bucket.doc_count += 1;
-                    sub_agg.push(bucket.bucket_id.to_bucket_id(), doc);
-                }
-            }
-        } else {
-            for val in agg_data.column_block_accessor.iter_vals() {
-                let val = f64_from_fastfield_u64(val, req.field_type);
-                if bounds.contains(val) {
-                    let bucket = store.get_or_create(
-                        get_bucket_pos(val),
-                        &mut self.bucket_id_provider,
-                        |pos| get_bucket_key_from_pos(pos as f64, interval, offset),
-                    );
-                    bucket.doc_count += 1;
+        for (doc, val) in agg_data
+            .column_block_accessor
+            .iter_docid_vals(docs, &req.accessor)
+        {
+            let val = f64_from_fastfield_u64(val, req.field_type);
+            let bucket_pos = get_bucket_pos(val);
+            if bounds.contains(val) {
+                let bucket = buckets.entry(bucket_pos).or_insert_with(|| {
+                    let key = get_bucket_key_from_pos(bucket_pos as f64, interval, offset);
+                    SegmentHistogramBucketEntry {
+                        key,
+                        doc_count: 0,
+                        bucket_id: self.bucket_id_provider.next_bucket_id(),
+                    }
+                });
+                bucket.doc_count += 1;
+                if let Some(sub_agg) = &mut self.sub_agg {
+                    sub_agg.push(bucket.bucket_id, doc);
                }
            }
        }
+        agg_data.put_back_histogram_req_data(self.accessor_idx, req);

-        // `checked_sub` is `None` when densifying shrank the accounted memory; only account growth.
-        if let Some(mem_delta) = self
-            .get_memory_consumption(parent_bucket_id)
-            .checked_sub(mem_pre)
-        {
-            agg_data.context.limits.add_memory_consumed(mem_delta)?;
+        let mem_delta = self.get_memory_consumption() - mem_pre;
+        if mem_delta > 0 {
+            agg_data
+                .context
+                .limits
+                .add_memory_consumed(mem_delta as u64)?;
        }

        if let Some(sub_agg) = &mut self.sub_agg {
@@ -544,45 +386,39 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
        _agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        while self.parent_buckets.len() <= max_bucket as usize {
-            self.parent_buckets.push(HistogramBuckets::default());
+            self.parent_buckets.push(HistogramBuckets {
+                buckets: FxHashMap::default(),
+            });
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // Histogram is a multi-bucket agg with no single value to extract.
-        None
-    }
 }

-impl<B: BucketIdSlot> SegmentHistogramCollector<B> {
-    fn get_memory_consumption(&self, parent_bucket_id: BucketId) -> u64 {
-        self.parent_buckets[parent_bucket_id as usize].memory_consumption()
+impl SegmentHistogramCollector {
+    fn get_memory_consumption(&self) -> usize {
+        let self_mem = std::mem::size_of::<Self>();
+        let buckets_mem = self.parent_buckets.len() * std::mem::size_of::<HistogramBuckets>();
+        self_mem + buckets_mem
    }
-
    /// Converts the collector result into a intermediate bucket result.
    fn add_intermediate_bucket_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
-        histogram: HistogramBuckets<B>,
+        histogram: HistogramBuckets,
    ) -> crate::Result<IntermediateBucketResult> {
-        let filled = histogram.into_filled_entries();
-        let mut buckets = Vec::with_capacity(filled.len());
+        let mut buckets = Vec::with_capacity(histogram.buckets.len());

-        for bucket in filled {
+        for bucket in histogram.buckets.into_values() {
            let bucket_res = bucket.into_intermediate_bucket_entry(&mut self.sub_agg, agg_data);

            buckets.push(bucket_res?);
        }
        buckets.sort_unstable_by(|b1, b2| b1.key.total_cmp(&b2.key));

-        let is_date_agg = self.req_data.field_type == ColumnType::DateTime;
+        let is_date_agg = agg_data
+            .get_histogram_req_data(self.accessor_idx)
+            .field_type
+            == ColumnType::DateTime;
        Ok(IntermediateBucketResult::Histogram {
            buckets,
            is_date_agg,
@@ -598,175 +434,32 @@ impl<B: BucketIdSlot> SegmentHistogramCollector<B> {
        } else {
            None
        };
-        let mut req_data = agg_data.per_request.histogram_req_data[node.idx_in_req_data].clone();
-        normalize_histogram_req(&mut req_data)?;
-        agg_data
-            .context
-            .limits
-            .add_memory_consumed(req_data.get_memory_consumption() as u64)?;
-        let dense_range = compute_dense_range(
-            &req_data.accessor,
-            req_data.field_type,
-            req_data.req.interval,
-            req_data.offset,
-            req_data.bounds,
-        );
-        let sub_agg = sub_agg.map(BufferedSubAggs::new);
+        let req_data = agg_data.get_histogram_req_data_mut(node.idx_in_req_data);
+        req_data.req.validate()?;
+        if req_data.field_type == ColumnType::DateTime && !req_data.is_date_histogram {
+            req_data.req.normalize_date_time();
+        }
+        req_data.bounds = req_data.req.hard_bounds.unwrap_or(HistogramBounds {
+            min: f64::MIN,
+            max: f64::MAX,
+        });
+        req_data.offset = req_data.req.offset.unwrap_or(0.0);
+        let sub_agg = sub_agg.map(CachedSubAggs::new);

        Ok(Self {
            parent_buckets: Default::default(),
            sub_agg,
-            req_data,
+            accessor_idx: node.idx_in_req_data,
            bucket_id_provider: BucketIdProvider::default(),
-            dense_range,
        })
    }
 }

-impl SegmentHistogramCollector<()> {
-    /// Builds a histogram collector whose parent `t` is a dense histogram filled from
-    /// `counts[t * num_time_buckets .. (t + 1) * num_time_buckets]` (row-major). Used by the fused
-    /// terms×histogram collector to turn its flat 2D counters into the regular intermediate result,
-    /// so cross-segment merging is shared with the general path.
-    pub(crate) fn from_dense_rows(
-        req_data: HistogramAggReqData,
-        base_pos: i64,
-        num_time_buckets: usize,
-        counts: &[u32],
-    ) -> Self {
-        let interval = req_data.req.interval;
-        let offset = req_data.offset;
-        let num_parents = counts.len().checked_div(num_time_buckets).unwrap_or(0);
-        let parent_buckets = (0..num_parents)
-            .map(|t| {
-                let row = &counts[t * num_time_buckets..(t + 1) * num_time_buckets];
-                let buckets = row
-                    .iter()
-                    .enumerate()
-                    .map(|(b, &doc_count)| SegmentHistogramBucketEntry {
-                        key: get_bucket_key_from_pos(
-                            (base_pos + b as i64) as f64,
-                            interval,
-                            offset,
-                        ),
-                        doc_count: doc_count as u64,
-                        bucket_id: (),
-                    })
-                    .collect();
-                HistogramBuckets::Dense { base_pos, buckets }
-            })
-            .collect();
-        Self {
-            parent_buckets,
-            sub_agg: None,
-            req_data,
-            bucket_id_provider: BucketIdProvider::default(),
-            dense_range: None,
-        }
-    }
-}
-
-/// Validates and normalizes a histogram request in place: applies date ns-normalization (for a
-/// `histogram` on a date column) and resolves `bounds`/`offset` from the request.
-fn normalize_histogram_req(req_data: &mut HistogramAggReqData) -> crate::Result<()> {
-    req_data.req.validate()?;
-    if req_data.field_type == ColumnType::DateTime && !req_data.is_date_histogram {
-        req_data.req.normalize_date_time();
-    }
-    req_data.bounds = req_data.req.hard_bounds.unwrap_or(HistogramBounds {
-        min: f64::MIN,
-        max: f64::MAX,
-    });
-    req_data.offset = req_data.req.offset.unwrap_or(0.0);
-    // Drop `hard_bounds` that can't exclude any value (the column's range already sits inside
-    // them): the per-doc `bounds.contains` check is then a no-op, so collapsing to the unbounded
-    // sentinel lets the histogram hot loop skip it and the fused term×histogram path derive
-    // per-term counts from the grid. Only this collect-time filter is touched — empty-bucket
-    // emission reads `req.hard_bounds` directly (see `get_req_min_max`), and `hard_bounds` only
-    // ever clips that range, so a wider-than-data bound leaves the result unchanged.
-    if req_data.req.hard_bounds.is_some() {
-        let col_min = f64_from_fastfield_u64(req_data.accessor.min_value(), req_data.field_type);
-        let col_max = f64_from_fastfield_u64(req_data.accessor.max_value(), req_data.field_type);
-        if col_min >= req_data.bounds.min && col_max <= req_data.bounds.max {
-            req_data.bounds = HistogramBounds {
-                min: f64::MIN,
-                max: f64::MAX,
-            };
-        }
-    }
-    Ok(())
-}
-
-/// Clones and normalizes (resolving interval/offset/bounds) the histogram request at `node`, and
-/// returns it together with its dense bucket range — or `None` if the column has no usable range.
-/// Used by the fused terms×histogram collector, which then owns the normalized request.
-pub(crate) fn prepare_histogram_dense_range(
-    agg_data: &AggregationsSegmentCtx,
-    node: &AggRefNode,
-) -> crate::Result<Option<(HistogramAggReqData, DenseRange)>> {
-    let mut req_data = agg_data.per_request.histogram_req_data[node.idx_in_req_data].clone();
-    normalize_histogram_req(&mut req_data)?;
-    let dense_range = compute_dense_range(
-        &req_data.accessor,
-        req_data.field_type,
-        req_data.req.interval,
-        req_data.offset,
-        req_data.bounds,
-    );
-    Ok(dense_range.map(|range| (req_data, range)))
-}
-
-/// Builds a boxed histogram (or date histogram) segment collector, picking the bucket-id storage
-/// based on whether there are sub aggregations: `()` (no id stored) when there are none, otherwise
-/// [`BucketId`].
-pub(crate) fn build_segment_histogram_collector(
-    agg_data: &mut AggregationsSegmentCtx,
-    node: &AggRefNode,
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    if node.children.is_empty() {
-        Ok(Box::new(
-            SegmentHistogramCollector::<()>::from_req_and_validate(agg_data, node)?,
-        ))
-    } else {
-        Ok(Box::new(
-            SegmentHistogramCollector::<BucketId>::from_req_and_validate(agg_data, node)?,
-        ))
-    }
-}
-
 #[inline]
-pub(crate) fn get_bucket_pos_f64(val: f64, interval: f64, offset: f64) -> f64 {
+fn get_bucket_pos_f64(val: f64, interval: f64, offset: f64) -> f64 {
    ((val - offset) / interval).floor()
 }

-/// Computes the dense bucket range for a column from its min/max value (clamped to the histogram
-/// bounds), or `None` if there are no values within bounds (or the range overflows `usize`).
-///
-/// There is no upper bound on the range: whether dense storage is actually used is decided later,
-/// per parent bucket, by [`HistogramBuckets::maybe_densify`] based on the memory it would save.
-///
-/// The column min/max bound every value the collector can see, so a `Vec` sized to this range can
-/// be indexed by `bucket_pos - base_pos` without any out-of-bounds check on the hot path.
-fn compute_dense_range(
-    accessor: &Column<u64>,
-    field_type: ColumnType,
-    interval: f64,
-    offset: f64,
-    bounds: HistogramBounds,
-) -> Option<DenseRange> {
-    let col_min = f64_from_fastfield_u64(accessor.min_value(), field_type);
-    let col_max = f64_from_fastfield_u64(accessor.max_value(), field_type);
-    let lo = col_min.max(bounds.min);
-    let hi = col_max.min(bounds.max);
-    if lo > hi {
-        return None;
-    }
-    let base_pos = get_bucket_pos_f64(lo, interval, offset) as i64;
-    let top_pos = get_bucket_pos_f64(hi, interval, offset) as i64;
-    let len = usize::try_from(top_pos.checked_sub(base_pos)?.checked_add(1)?).ok()?;
-    (len > 0).then_some(DenseRange { base_pos, len })
-}
-
 #[inline]
 fn get_bucket_key_from_pos(bucket_pos: f64, interval: f64, offset: f64) -> f64 {
    bucket_pos * interval + offset
@@ -1071,62 +764,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn histogram_dense_storage_test() -> crate::Result<()> {
-        histogram_dense_storage_test_with_opt(false)?;
-        histogram_dense_storage_test_with_opt(true)?;
-        Ok(())
-    }
-
-    /// Exercises the switch from sparse hash map to dense `Vec` storage. The switch happens at a
-    /// block boundary (a block is `COLLECT_BLOCK_BUFFER_LEN` = 64 docs), so we need many docs in a
-    /// single segment, densely covering the bucket range. `with_sub_agg` toggles the `iter_vals`
-    /// fast path vs. the `iter_docid_vals` path used when there is a sub aggregation.
-    fn histogram_dense_storage_test_with_opt(with_sub_agg: bool) -> crate::Result<()> {
-        let num_buckets = 50usize;
-        let docs_per_bucket = 10usize;
-        // Value `k` repeated `docs_per_bucket` times for each bucket `k`, so every value in bucket
-        // `k` equals `k` and the per-bucket average is exactly `k`.
-        let values: Vec<f64> = (0..num_buckets * docs_per_bucket)
-            .map(|i| (i % num_buckets) as f64)
-            .collect();
-        // `merge_segments = true` collapses the per-value segments into a single segment with all
-        // the docs, which is collected in 64-doc blocks and therefore switches to dense storage.
-        let index = get_test_index_from_values(true, &values)?;
-
-        let agg_req: Aggregations = serde_json::from_value(if with_sub_agg {
-            json!({
-                "histogram": {
-                    "histogram": { "field": "score_f64", "interval": 1.0 },
-                    "aggs": { "avg": { "avg": { "field": "score_f64" } } }
-                }
-            })
-        } else {
-            json!({
-                "histogram": {
-                    "histogram": { "field": "score_f64", "interval": 1.0 }
-                }
-            })
-        })
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        for k in 0..num_buckets {
-            assert_eq!(res["histogram"]["buckets"][k]["key"], k as f64);
-            assert_eq!(
-                res["histogram"]["buckets"][k]["doc_count"],
-                docs_per_bucket as u64
-            );
-            if with_sub_agg {
-                assert_eq!(res["histogram"]["buckets"][k]["avg"]["value"], k as f64);
-            }
-        }
-        assert_eq!(res["histogram"]["buckets"][num_buckets], Value::Null);
-
-        Ok(())
-    }
-
    #[test]
    fn histogram_memory_limit() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(true, 100)?;
@@ -1421,55 +1058,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn histogram_non_binding_hard_bounds_test_multi_segment() -> crate::Result<()> {
-        histogram_non_binding_hard_bounds_test_with_opt(false)
-    }
-    #[test]
-    fn histogram_non_binding_hard_bounds_test_single_segment() -> crate::Result<()> {
-        histogram_non_binding_hard_bounds_test_with_opt(true)
-    }
-    /// `hard_bounds` wider than the data (here with mid-interval edges, to cover the "bound cuts a
-    /// bucket" case) can't exclude any value, so the result must be identical to the same request
-    /// without bounds. Guards the normalization that collapses such bounds to the unbounded
-    /// sentinel so the hot loop / fused path can skip the per-doc bounds check.
-    fn histogram_non_binding_hard_bounds_test_with_opt(merge_segments: bool) -> crate::Result<()> {
-        let values = vec![10.0, 12.0, 14.0, 16.0, 10.0, 13.0, 10.0, 12.0];
-        let index = get_test_index_from_values(merge_segments, &values)?;
-
-        // Mid-interval edges, but wider than the data range [10, 16] -> they exclude nothing.
-        let with_bounds: Aggregations = serde_json::from_value(json!({
-            "histogram": {
-                "histogram": {
-                    "field": "score_f64",
-                    "interval": 1.0,
-                    "hard_bounds": { "min": 9.5, "max": 16.5 }
-                }
-            }
-        }))
-        .unwrap();
-        let no_bounds: Aggregations = serde_json::from_value(json!({
-            "histogram": {
-                "histogram": { "field": "score_f64", "interval": 1.0 }
-            }
-        }))
-        .unwrap();
-
-        let res_bounds = exec_request(with_bounds, &index)?;
-        let res_plain = exec_request(no_bounds, &index)?;
-        // Dropping a non-binding bound must not change anything.
-        assert_eq!(res_bounds, res_plain);
-
-        // Sanity: buckets span the data range with gaps filled (min_doc_count defaults to 0).
-        assert_eq!(res_bounds["histogram"]["buckets"][0]["key"], 10.0);
-        assert_eq!(res_bounds["histogram"]["buckets"][0]["doc_count"], 3);
-        assert_eq!(res_bounds["histogram"]["buckets"][6]["key"], 16.0);
-        assert_eq!(res_bounds["histogram"]["buckets"][6]["doc_count"], 1);
-        assert_eq!(res_bounds["histogram"]["buckets"][7], Value::Null);
-
-        Ok(())
-    }
-
    #[test]
    fn histogram_empty_result_behaviour_test_single_segment() -> crate::Result<()> {
        histogram_empty_result_behaviour_test_with_opt(true)
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -9,9 +9,8 @@ use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
 use crate::aggregation::agg_limits::AggregationLimitsGuard;
-use crate::aggregation::buffered_sub_aggs::{
-    BufferedSubAggs, HighCardSubAggBuffer, LowCardBufferedSubAggs, LowCardSubAggBuffer,
-    SubAggBuffer,
+use crate::aggregation::cached_sub_aggs::{
+    CachedSubAggs, HighCardSubAggCache, LowCardCachedSubAggs, LowCardSubAggCache, SubAggCache,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -23,7 +22,6 @@ use crate::TantivyError;

 /// Contains all information required by the SegmentRangeCollector to perform the
 /// range aggregation on a segment.
-#[derive(Debug, Clone)]
 pub struct RangeAggReqData {
    /// The column accessor to access the fast field values.
    pub accessor: Column<u64>,
@@ -157,13 +155,13 @@ pub(crate) struct SegmentRangeAndBucketEntry {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-pub struct SegmentRangeCollector<B: SubAggBuffer> {
+pub struct SegmentRangeCollector<C: SubAggCache> {
    /// The buckets containing the aggregation data.
    /// One for each ParentBucketId
    parent_buckets: Vec<Vec<SegmentRangeAndBucketEntry>>,
    column_type: ColumnType,
-    pub(crate) req_data: RangeAggReqData,
-    sub_agg: Option<BufferedSubAggs<B>>,
+    pub(crate) accessor_idx: usize,
+    sub_agg: Option<CachedSubAggs<C>>,
    /// Here things get a bit weird. We need to assign unique bucket ids across all
    /// parent buckets. So we keep track of the next available bucket id here.
    /// This allows a kind of flattening of the bucket ids across all parent buckets.
@@ -180,12 +178,12 @@ pub struct SegmentRangeCollector<B: SubAggBuffer> {
    limits: AggregationLimitsGuard,
 }

-impl<B: SubAggBuffer> Debug for SegmentRangeCollector<B> {
+impl<C: SubAggCache> Debug for SegmentRangeCollector<C> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentRangeCollector")
            .field("parent_buckets_len", &self.parent_buckets.len())
            .field("column_type", &self.column_type)
-            .field("name", &self.req_data.name)
+            .field("accessor_idx", &self.accessor_idx)
            .field("has_sub_agg", &self.sub_agg.is_some())
            .finish()
    }
@@ -231,7 +229,7 @@ impl SegmentRangeBucketEntry {
    }
 }

-impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
+impl<C: SubAggCache> SegmentAggregationCollector for SegmentRangeCollector<C> {
    fn add_intermediate_aggregation_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
@@ -240,7 +238,10 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
    ) -> crate::Result<()> {
        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
        let field_type = self.column_type;
-        let name = self.req_data.name.to_string();
+        let name = agg_data
+            .get_range_req_data(self.accessor_idx)
+            .name
+            .to_string();

        let buckets = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);

@@ -279,15 +280,17 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
+        let req = agg_data.take_range_req_data(self.accessor_idx);
+
        agg_data
            .column_block_accessor
-            .fetch_block(docs, &self.req_data.accessor);
+            .fetch_block(docs, &req.accessor);

        let buckets = &mut self.parent_buckets[parent_bucket_id as usize];

        for (doc, val) in agg_data
            .column_block_accessor
-            .iter_docid_vals(docs, &self.req_data.accessor)
+            .iter_docid_vals(docs, &req.accessor)
        {
            let bucket_pos = get_bucket_pos(val, buckets);
            let bucket = &mut buckets[bucket_pos];
@@ -297,6 +300,7 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
            }
        }

+        agg_data.put_back_range_req_data(self.accessor_idx, req);
        if let Some(sub_agg) = self.sub_agg.as_mut() {
            sub_agg.check_flush_local(agg_data)?;
        }
@@ -314,26 +318,15 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
    fn prepare_max_bucket(
        &mut self,
        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        while self.parent_buckets.len() <= max_bucket as usize {
-            let new_buckets = self.create_new_buckets()?;
+            let new_buckets = self.create_new_buckets(agg_data)?;
            self.parent_buckets.push(new_buckets);
        }

        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // Range is a multi-bucket agg with no single value to extract.
-        None
-    }
 }
 /// Build a concrete `SegmentRangeCollector` with either a Vec- or HashMap-backed
 /// bucket storage, depending on the column type and aggregation level.
@@ -341,11 +334,8 @@ pub(crate) fn build_segment_range_collector(
    agg_data: &mut AggregationsSegmentCtx,
    node: &AggRefNode,
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    let req_data = agg_data.per_request.range_req_data[node.idx_in_req_data].clone();
-    agg_data
-        .context
-        .limits
-        .add_memory_consumed(req_data.get_memory_consumption() as u64)?;
+    let accessor_idx = node.idx_in_req_data;
+    let req_data = agg_data.get_range_req_data(node.idx_in_req_data);
    let field_type = req_data.field_type;

    // TODO: A better metric instead of is_top_level would be the number of buckets expected.
@@ -360,19 +350,19 @@ pub(crate) fn build_segment_range_collector(
    };

    if is_low_card {
-        Ok(Box::new(SegmentRangeCollector::<LowCardSubAggBuffer> {
-            sub_agg: sub_agg.map(LowCardBufferedSubAggs::new),
+        Ok(Box::new(SegmentRangeCollector::<LowCardSubAggCache> {
+            sub_agg: sub_agg.map(LowCardCachedSubAggs::new),
            column_type: field_type,
-            req_data,
+            accessor_idx,
            parent_buckets: Vec::new(),
            bucket_id_provider: BucketIdProvider::default(),
            limits: agg_data.context.limits.clone(),
        }))
    } else {
-        Ok(Box::new(SegmentRangeCollector::<HighCardSubAggBuffer> {
-            sub_agg: sub_agg.map(BufferedSubAggs::new),
+        Ok(Box::new(SegmentRangeCollector::<HighCardSubAggCache> {
+            sub_agg: sub_agg.map(CachedSubAggs::new),
            column_type: field_type,
-            req_data,
+            accessor_idx,
            parent_buckets: Vec::new(),
            bucket_id_provider: BucketIdProvider::default(),
            limits: agg_data.context.limits.clone(),
@@ -380,10 +370,13 @@ pub(crate) fn build_segment_range_collector(
    }
 }

-impl<B: SubAggBuffer> SegmentRangeCollector<B> {
-    pub(crate) fn create_new_buckets(&mut self) -> crate::Result<Vec<SegmentRangeAndBucketEntry>> {
+impl<C: SubAggCache> SegmentRangeCollector<C> {
+    pub(crate) fn create_new_buckets(
+        &mut self,
+        agg_data: &AggregationsSegmentCtx,
+    ) -> crate::Result<Vec<SegmentRangeAndBucketEntry>> {
        let field_type = self.column_type;
-        let req_data = &self.req_data;
+        let req_data = agg_data.get_range_req_data(self.accessor_idx);
        // The range input on the request is f64.
        // We need to convert to u64 ranges, because we read the values as u64.
        // The mapping from the conversion is monotonic so ordering is preserved.
@@ -558,16 +551,17 @@ mod tests {
        get_test_index_with_num_docs,
    };

-    pub fn build_test_buckets(
-        ranges: &[RangeAggregationRange],
+    pub fn get_collector_from_ranges(
+        ranges: Vec<RangeAggregationRange>,
        field_type: ColumnType,
-    ) -> Vec<SegmentRangeAndBucketEntry> {
+    ) -> SegmentRangeCollector<HighCardSubAggCache> {
        let req = RangeAggregation {
            field: "dummy".to_string(),
-            ranges: ranges.to_vec(),
+            ranges,
            ..Default::default()
        };
-        extend_validate_ranges(&req.ranges, &field_type)
+        // Build buckets directly as in from_req_and_validate without AggregationsData
+        let buckets: Vec<_> = extend_validate_ranges(&req.ranges, &field_type)
            .expect("unexpected error in extend_validate_ranges")
            .iter()
            .map(|range| {
@@ -598,7 +592,16 @@ mod tests {
                    },
                }
            })
-            .collect()
+            .collect();
+
+        SegmentRangeCollector {
+            parent_buckets: vec![buckets],
+            column_type: field_type,
+            accessor_idx: 0,
+            sub_agg: None,
+            bucket_id_provider: Default::default(),
+            limits: AggregationLimitsGuard::default(),
+        }
    }

    #[test]
@@ -841,10 +844,10 @@ mod tests {

    #[test]
    fn bucket_test_extend_range_hole() {
-        let buckets = [(10f64..20f64).into(), (30f64..40f64).into()];
-        let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
+        let buckets = vec![(10f64..20f64).into(), (30f64..40f64).into()];
+        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = parent_buckets[0].clone();
+        let buckets = collector.parent_buckets[0].clone();
        assert_eq!(buckets[0].range.start, u64::MIN);
        assert_eq!(buckets[0].range.end, 10f64.to_u64());
        assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -860,14 +863,14 @@ mod tests {
    fn bucket_test_range_conversion_special_case() {
        // the monotonic conversion between f64 and u64, does not map f64::MIN.to_u64() ==
        // u64::MIN, but the into trait converts f64::MIN/MAX to None
-        let buckets = [
+        let buckets = vec![
            (f64::MIN..10f64).into(),
            (10f64..20f64).into(),
            (20f64..f64::MAX).into(),
        ];
-        let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
+        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = parent_buckets[0].clone();
+        let buckets = collector.parent_buckets[0].clone();
        assert_eq!(buckets[0].range.start, u64::MIN);
        assert_eq!(buckets[0].range.end, 10f64.to_u64());
        assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -879,28 +882,28 @@ mod tests {

    #[test]
    fn bucket_range_test_negative_vals() {
-        let buckets = [(-10f64..-1f64).into()];
-        let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
+        let buckets = vec![(-10f64..-1f64).into()];
+        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = parent_buckets[0].clone();
+        let buckets = collector.parent_buckets[0].clone();
        assert_eq!(&buckets[0].bucket.key.to_string(), "*--10");
        assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "-1-*");
    }
    #[test]
    fn bucket_range_test_positive_vals() {
-        let buckets = [(0f64..10f64).into()];
-        let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
+        let buckets = vec![(0f64..10f64).into()];
+        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = parent_buckets[0].clone();
+        let buckets = collector.parent_buckets[0].clone();
        assert_eq!(&buckets[0].bucket.key.to_string(), "*-0");
        assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "10-*");
    }

    #[test]
    fn range_binary_search_test_u64() {
-        let check_ranges = |ranges: &[RangeAggregationRange]| {
-            let parent_buckets = [build_test_buckets(ranges, ColumnType::U64)];
-            let search = |val: u64| get_bucket_pos(val, &parent_buckets[0]);
+        let check_ranges = |ranges: Vec<RangeAggregationRange>| {
+            let collector = get_collector_from_ranges(ranges, ColumnType::U64);
+            let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);

            assert_eq!(search(u64::MIN), 0);
            assert_eq!(search(9), 0);
@@ -913,7 +916,7 @@ mod tests {
        };

        let ranges = vec![(10.0..100.0).into()];
-        check_ranges(&ranges);
+        check_ranges(ranges);

        let ranges = vec![
            RangeAggregationRange {
@@ -923,7 +926,7 @@ mod tests {
            },
            (10.0..100.0).into(),
        ];
-        check_ranges(&ranges);
+        check_ranges(ranges);

        let ranges = vec![
            RangeAggregationRange {
@@ -938,15 +941,15 @@ mod tests {
                from: Some(100.0),
            },
        ];
-        check_ranges(&ranges);
+        check_ranges(ranges);
    }

    #[test]
    fn range_binary_search_test_f64() {
-        let ranges = [(10.0..100.0).into()];
+        let ranges = vec![(10.0..100.0).into()];

-        let parent_buckets = [build_test_buckets(&ranges, ColumnType::F64)];
-        let search = |val: u64| get_bucket_pos(val, &parent_buckets[0]);
+        let collector = get_collector_from_ranges(ranges, ColumnType::F64);
+        let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);

        assert_eq!(search(u64::MIN), 0);
        assert_eq!(search(9f64.to_u64()), 0);
--- a/src/aggregation/bucket/term_agg/mod.rs
+++ b/src/aggregation/bucket/term_agg/mod.rs
@@ -1,4 +1,5 @@
 use std::fmt::Debug;
+use std::io;
 use std::net::Ipv6Addr;

 use columnar::column_values::CompactSpaceU64Accessor;
@@ -16,9 +17,8 @@ use crate::aggregation::agg_data::{
 };
 use crate::aggregation::agg_limits::MemoryConsumption;
 use crate::aggregation::agg_req::Aggregations;
-use crate::aggregation::buffered_sub_aggs::{
-    BufferedSubAggs, HighCardSubAggBuffer, LowCardBufferedSubAggs, LowCardSubAggBuffer,
-    SubAggBuffer,
+use crate::aggregation::cached_sub_aggs::{
+    CachedSubAggs, HighCardSubAggCache, LowCardCachedSubAggs, LowCardSubAggCache, SubAggCache,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -29,8 +29,6 @@ use crate::aggregation::{format_date, BucketId, Key};
 use crate::error::DataCorruption;
 use crate::TantivyError;

-mod term_histogram;
-
 /// Contains all information required by the SegmentTermCollector to perform the
 /// terms aggregation on a segment.
 #[derive(Debug, Clone)]
@@ -354,15 +352,19 @@ pub(crate) fn build_segment_term_collector(
        )));
    }

-    // Validate that the referenced sub-aggregation exists when ordering by one.
-    if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
-        let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
-        node.get_sub_agg(agg_name, &req_data.per_request)
-            .ok_or_else(|| {
-                TantivyError::InvalidArgument(format!(
-                    "could not find aggregation with name {agg_name} in metric sub_aggregations"
-                ))
-            })?;
+    // Validate sub aggregation exists when ordering by sub-aggregation.
+    {
+        if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
+            let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
+
+            node.get_sub_agg(agg_name, &req_data.per_request)
+                .ok_or_else(|| {
+                    TantivyError::InvalidArgument(format!(
+                        "could not find aggregation with name {agg_name} in metric \
+                         sub_aggregations"
+                    ))
+                })?;
+        }
    }

    // Build sub-aggregation blueprint if there are children.
@@ -376,21 +378,9 @@ pub(crate) fn build_segment_term_collector(
    // Let's see if we can use a vec to aggregate our data
    // instead of a hashmap.
    let col_max_value = terms_req_data.accessor.max_value();
-    let max_column_val: u64 =
+    let max_term_id: u64 =
        col_max_value.max(terms_req_data.missing_value_for_accessor.unwrap_or(0u64));

-    // Fused fast path: low-cardinality terms × a single `histogram`/`date_histogram` leaf over full
-    // columns with a small enough bucket grid. Anything else falls through to the general path.
-    if let Some(collector) = term_histogram::maybe_build_collector(
-        req_data,
-        node,
-        &terms_req_data,
-        max_column_val,
-        is_top_level,
-    )? {
-        return Ok(collector);
-    }
-
    let sub_agg_collector = if has_sub_aggregations {
        Some(build_segment_agg_collectors(req_data, &node.children)?)
    } else {
@@ -399,51 +389,51 @@ pub(crate) fn build_segment_term_collector(

    let mut bucket_id_provider = BucketIdProvider::default();
    // Decide which bucket storage is best suited for this aggregation.
-    if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
-        let term_buckets = VecTermBucketsNoAgg::new(max_column_val + 1, &mut bucket_id_provider);
-        let collector: SegmentTermCollector<_, HighCardSubAggBuffer> = SegmentTermCollector {
+    if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
+        let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
+        let collector: SegmentTermCollector<_, HighCardSubAggCache> = SegmentTermCollector {
            parent_buckets: vec![term_buckets],
            sub_agg: None,
            bucket_id_provider,
-            max_term_id: max_column_val,
+            max_term_id,
            terms_req_data,
        };
        Ok(Box::new(collector))
-    } else if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC {
-        let term_buckets = VecTermBuckets::new(max_column_val + 1, &mut bucket_id_provider);
-        let sub_agg = sub_agg_collector.map(LowCardBufferedSubAggs::new);
-        let collector: SegmentTermCollector<_, LowCardSubAggBuffer> = SegmentTermCollector {
+    } else if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC {
+        let term_buckets = VecTermBuckets::new(max_term_id + 1, &mut bucket_id_provider);
+        let sub_agg = sub_agg_collector.map(LowCardCachedSubAggs::new);
+        let collector: SegmentTermCollector<_, LowCardSubAggCache> = SegmentTermCollector {
            parent_buckets: vec![term_buckets],
            sub_agg,
            bucket_id_provider,
-            max_term_id: max_column_val,
+            max_term_id,
            terms_req_data,
        };
        Ok(Box::new(collector))
-    } else if max_column_val < 8_000_000 && is_top_level {
+    } else if max_term_id < 8_000_000 && is_top_level {
        let term_buckets: PagedTermMap =
-            PagedTermMap::new(max_column_val + 1, &mut bucket_id_provider);
+            PagedTermMap::new(max_term_id + 1, &mut bucket_id_provider);
        // Build sub-aggregation blueprint (flat pairs)
-        let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
-        let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggBuffer> =
+        let sub_agg = sub_agg_collector.map(CachedSubAggs::new);
+        let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggCache> =
            SegmentTermCollector {
                parent_buckets: vec![term_buckets],
                sub_agg,
                bucket_id_provider,
-                max_term_id: max_column_val,
+                max_term_id,
                terms_req_data,
            };
        Ok(Box::new(collector))
    } else {
        let term_buckets: HashMapTermBuckets = HashMapTermBuckets::default();
        // Build sub-aggregation blueprint (flat pairs)
-        let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
-        let collector: SegmentTermCollector<HashMapTermBuckets, HighCardSubAggBuffer> =
+        let sub_agg = sub_agg_collector.map(CachedSubAggs::new);
+        let collector: SegmentTermCollector<HashMapTermBuckets, HighCardSubAggCache> =
            SegmentTermCollector {
                parent_buckets: vec![term_buckets],
                sub_agg,
                bucket_id_provider,
-                max_term_id: max_column_val,
+                max_term_id,
                terms_req_data,
            };
        Ok(Box::new(collector))
@@ -768,10 +758,10 @@ impl TermAggregationMap for VecTermBuckets {
 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
 #[derive(Debug)]
-struct SegmentTermCollector<TermMap: TermAggregationMap, B: SubAggBuffer> {
+struct SegmentTermCollector<TermMap: TermAggregationMap, C: SubAggCache> {
    /// The buckets containing the aggregation data.
    parent_buckets: Vec<TermMap>,
-    sub_agg: Option<BufferedSubAggs<B>>,
+    sub_agg: Option<CachedSubAggs<C>>,
    bucket_id_provider: BucketIdProvider,
    max_term_id: u64,
    terms_req_data: TermsAggReqData,
@@ -782,8 +772,8 @@ pub(crate) fn get_agg_name_and_property(name: &str) -> (&str, &str) {
    (agg_name, agg_property)
 }

-impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentAggregationCollector
-    for SegmentTermCollector<TermMap, B>
+impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentAggregationCollector
+    for SegmentTermCollector<TermMap, C>
 {
    fn add_intermediate_aggregation_result(
        &mut self,
@@ -800,14 +790,8 @@ impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentAggregationCollector
        let term_req = &self.terms_req_data;
        let name = term_req.name.clone();

-        let bucket = Self::into_intermediate_bucket_result(
-            term_req,
-            self.sub_agg
-                .as_mut()
-                .map(BufferedSubAggs::get_sub_agg_collector),
-            bucket,
-            agg_data,
-        )?;
+        let bucket =
+            Self::into_intermediate_bucket_result(term_req, &mut self.sub_agg, bucket, agg_data)?;
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
        Ok(())
    }
@@ -897,17 +881,6 @@ impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentAggregationCollector
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // Terms is a multi-bucket agg with no single value to extract.
-        None
-    }
 }

 /// Missing value are represented as a sentinel value in the column.
@@ -934,38 +907,10 @@ fn extract_missing_value<T>(
    Some((key, bucket))
 }

-fn reborrow_opt_collector<'a>(
-    opt: &'a mut Option<&mut dyn SegmentAggregationCollector>,
-) -> Option<&'a mut dyn SegmentAggregationCollector> {
-    match opt {
-        Some(inner) => Some(*inner),
-        None => None,
-    }
-}
-
-fn into_intermediate_bucket_entry(
-    bucket: Bucket,
-    sub_agg_collector: Option<&mut dyn SegmentAggregationCollector>,
-    agg_data: &AggregationsSegmentCtx,
-) -> crate::Result<IntermediateTermBucketEntry> {
-    let mut sub_aggregation_res = IntermediateAggregationResults::default();
-    if let Some(sub_agg_collector) = sub_agg_collector {
-        sub_agg_collector.add_intermediate_aggregation_result(
-            agg_data,
-            &mut sub_aggregation_res,
-            bucket.bucket_id,
-        )?;
-    }
-    Ok(IntermediateTermBucketEntry {
-        doc_count: bucket.count,
-        sub_aggregation: sub_aggregation_res,
-    })
-}
-
-impl<TermMap, B> SegmentTermCollector<TermMap, B>
+impl<TermMap, C> SegmentTermCollector<TermMap, C>
 where
    TermMap: TermAggregationMap,
-    B: SubAggBuffer,
+    C: SubAggCache,
 {
    #[inline]
    fn get_memory_consumption(&self, parent_bucket_id: BucketId) -> usize {
@@ -975,12 +920,15 @@ where
    #[inline]
    pub(crate) fn into_intermediate_bucket_result(
        term_req: &TermsAggReqData,
-        mut sub_agg_collector: Option<&mut dyn SegmentAggregationCollector>,
+        sub_agg: &mut Option<CachedSubAggs<C>>,
        term_buckets: TermMap,
        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateBucketResult> {
        let mut entries: Vec<(u64, Bucket)> = term_buckets.into_vec();

+        let order_by_sub_aggregation =
+            matches!(term_req.req.order.target, OrderTarget::SubAggregation(_));
+
        match &term_req.req.order.target {
            OrderTarget::Key => {
                // We rely on the fact, that term ordinals match the order of the strings
@@ -992,37 +940,10 @@ where
                    entries.sort_unstable_by_key(|bucket| bucket.0);
                }
            }
-            OrderTarget::SubAggregation(sub_agg_path) => {
-                // Peek segment-level metric values, sort, then fall through to
-                // `cut_off_buckets`. Like Elasticsearch, we always cut off when ordering
-                // by a sub-agg: top-K results are approximate and may differ from the
-                // global ordering, especially for non-monotonic metrics like avg/min.
-                let coll = sub_agg_collector.as_deref().ok_or_else(|| {
-                    TantivyError::InvalidArgument(format!(
-                        "Could not find sub-aggregation collector for path {sub_agg_path}"
-                    ))
-                })?;
-                let (agg_name, agg_prop) = get_agg_name_and_property(sub_agg_path);
-                // Fetch values up-front; otherwise sort would re-compute per comparison
-                let mut keyed: Vec<(f64, (u64, Bucket))> = entries
-                    .into_iter()
-                    .map(|bucket| {
-                        let metric_value = coll
-                            .compute_metric_value(bucket.1.bucket_id, agg_name, agg_prop, agg_data)
-                            .unwrap_or(0.0);
-                        (metric_value, bucket)
-                    })
-                    .collect();
-                if term_req.req.order.order == Order::Desc {
-                    keyed.sort_unstable_by(|a, b| {
-                        b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)
-                    });
-                } else {
-                    keyed.sort_unstable_by(|a, b| {
-                        a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
-                    });
-                }
-                entries = keyed.into_iter().map(|(_, e)| e).collect();
+            OrderTarget::SubAggregation(_name) => {
+                // don't sort and cut off since it's hard to make assumptions on the quality of the
+                // results when cutting off du to unknown nature of the sub_aggregation (possible
+                // to check).
            }
            OrderTarget::Count => {
                if term_req.req.order.order == Order::Desc {
@@ -1033,12 +954,40 @@ where
            }
        }

-        let (term_doc_count_before_cutoff, sum_other_doc_count) =
-            cut_off_buckets(&mut entries, term_req.req.segment_size as usize);
+        let (term_doc_count_before_cutoff, sum_other_doc_count) = if order_by_sub_aggregation {
+            (0, 0)
+        } else {
+            cut_off_buckets(&mut entries, term_req.req.segment_size as usize)
+        };

        let mut dict: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> = Default::default();
        dict.reserve(entries.len());

+        let into_intermediate_bucket_entry =
+            |bucket: Bucket,
+             sub_agg: &mut Option<CachedSubAggs<C>>|
+             -> crate::Result<IntermediateTermBucketEntry> {
+                if let Some(sub_agg) = sub_agg {
+                    let mut sub_aggregation_res = IntermediateAggregationResults::default();
+                    sub_agg
+                        .get_sub_agg_collector()
+                        .add_intermediate_aggregation_result(
+                            agg_data,
+                            &mut sub_aggregation_res,
+                            bucket.bucket_id,
+                        )?;
+                    Ok(IntermediateTermBucketEntry {
+                        doc_count: bucket.count,
+                        sub_aggregation: sub_aggregation_res,
+                    })
+                } else {
+                    Ok(IntermediateTermBucketEntry {
+                        doc_count: bucket.count,
+                        sub_aggregation: Default::default(),
+                    })
+                }
+            };
+
        if term_req.column_type == ColumnType::Str {
            let fallback_dict = Dictionary::empty();
            let term_dict = term_req
@@ -1049,11 +998,7 @@ where

            if let Some((intermediate_key, bucket)) = extract_missing_value(&mut entries, term_req)
            {
-                let intermediate_entry = into_intermediate_bucket_entry(
-                    bucket,
-                    reborrow_opt_collector(&mut sub_agg_collector),
-                    agg_data,
-                )?;
+                let intermediate_entry = into_intermediate_bucket_entry(bucket, sub_agg)?;
                dict.insert(intermediate_key, intermediate_entry);
            }

@@ -1061,28 +1006,19 @@ where
            entries.sort_unstable_by_key(|bucket| bucket.0);

            let (term_ids, buckets): (Vec<u64>, Vec<Bucket>) = entries.into_iter().unzip();
+            let mut buckets_it = buckets.into_iter();

-            let intermediate_entries: Vec<IntermediateTermBucketEntry> = buckets
-                .into_iter()
-                .map(|bucket| {
-                    into_intermediate_bucket_entry(
-                        bucket,
-                        reborrow_opt_collector(&mut sub_agg_collector),
-                        agg_data,
-                    )
-                })
-                .collect::<crate::Result<_>>()?;
-
-            let mut intermediate_entry_it = intermediate_entries.into_iter();
-
-            term_dict.sorted_ords_to_term_cb(&term_ids[..], |term| {
-                let intermediate_entry = intermediate_entry_it.next().unwrap();
+            term_dict.sorted_ords_to_term_cb(term_ids.into_iter(), |term| {
+                let bucket = buckets_it.next().unwrap();
+                let intermediate_entry =
+                    into_intermediate_bucket_entry(bucket, sub_agg).map_err(io::Error::other)?;
                dict.insert(
                    IntermediateKey::Str(
                        String::from_utf8(term.to_vec()).expect("could not convert to String"),
                    ),
                    intermediate_entry,
                );
+                Ok(())
            })?;

            if term_req.req.min_doc_count == 0 {
@@ -1117,22 +1053,14 @@ where
            }
        } else if term_req.column_type == ColumnType::DateTime {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(
-                    doc_count,
-                    reborrow_opt_collector(&mut sub_agg_collector),
-                    agg_data,
-                )?;
+                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
                let val = i64::from_u64(val);
                let date = format_date(val)?;
                dict.insert(IntermediateKey::Str(date), intermediate_entry);
            }
        } else if term_req.column_type == ColumnType::Bool {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(
-                    doc_count,
-                    reborrow_opt_collector(&mut sub_agg_collector),
-                    agg_data,
-                )?;
+                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
                let val = bool::from_u64(val);
                dict.insert(IntermediateKey::Bool(val), intermediate_entry);
            }
@@ -1152,22 +1080,14 @@ where
                })?;

            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(
-                    doc_count,
-                    reborrow_opt_collector(&mut sub_agg_collector),
-                    agg_data,
-                )?;
+                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
                let val: u128 = compact_space_accessor.compact_to_u128(val as u32);
                let val = Ipv6Addr::from_u128(val);
                dict.insert(IntermediateKey::IpAddr(val), intermediate_entry);
            }
        } else {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(
-                    doc_count,
-                    reborrow_opt_collector(&mut sub_agg_collector),
-                    agg_data,
-                )?;
+                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
                if term_req.column_type == ColumnType::U64 {
                    dict.insert(IntermediateKey::U64(val), intermediate_entry);
                } else if term_req.column_type == ColumnType::I64 {
@@ -1201,13 +1121,13 @@ where
    }
 }

-impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentTermCollector<TermMap, B> {
+impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentTermCollector<TermMap, C> {
    #[inline]
    fn collect_terms_with_docs(
        iter: impl Iterator<Item = (crate::DocId, u64)>,
        term_buckets: &mut TermMap,
        bucket_id_provider: &mut BucketIdProvider,
-        sub_agg: &mut BufferedSubAggs<B>,
+        sub_agg: &mut CachedSubAggs<C>,
    ) {
        for (doc, term_id) in iter {
            let bucket_id = term_buckets.term_entry(term_id, bucket_id_provider);
@@ -1280,7 +1200,7 @@ mod tests {
    use crate::aggregation::{AggregationLimitsGuard, DistributedAggregationCollector};
    use crate::indexer::NoMergePolicy;
    use crate::query::AllQuery;
-    use crate::schema::{IntoIpv6Addr, Schema, FAST, INDEXED, STRING, TEXT};
+    use crate::schema::{IntoIpv6Addr, Schema, FAST, STRING};
    use crate::{Index, IndexWriter};

    #[test]
@@ -1809,263 +1729,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn terms_aggregation_order_by_cardinality_desc_single_segment() -> crate::Result<()> {
-        terms_aggregation_order_by_cardinality_desc(true)
-    }
-    #[test]
-    fn terms_aggregation_order_by_cardinality_desc_multi_segment() -> crate::Result<()> {
-        terms_aggregation_order_by_cardinality_desc(false)
-    }
-    fn terms_aggregation_order_by_cardinality_desc(merge_segments: bool) -> crate::Result<()> {
-        // Distinct score values per bucket key: A→5, B→1, C→3.
-        // Order by cardinality desc must yield A, C, B.
-        let segment_and_terms = vec![vec![
-            (1.0, "A".to_string()),
-            (2.0, "A".to_string()),
-            (3.0, "A".to_string()),
-            (4.0, "A".to_string()),
-            (5.0, "A".to_string()),
-            (1.0, "B".to_string()),
-            (1.0, "B".to_string()),
-            (1.0, "B".to_string()),
-            (1.0, "C".to_string()),
-            (2.0, "C".to_string()),
-            (3.0, "C".to_string()),
-        ]];
-        let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
-
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "card": "desc" }
-                },
-                "aggs": {
-                    "card": { "cardinality": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][0]["card"]["value"], 5.0);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][1]["card"]["value"], 3.0);
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
-        assert_eq!(res["my_texts"]["buckets"][2]["card"]["value"], 1.0);
-
-        // Asc engages the segment-cutoff path too (monotonic-safe: discarded buckets had
-        // local card >= cutoff, so merged card >= cutoff and they cannot be globally smallest).
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "card": "asc" }
-                },
-                "aggs": {
-                    "card": { "cardinality": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
-
-        // size=2 with desc engages the segment cutoff: must keep top-2 by cardinality (A, C),
-        // and `sum_other_doc_count` reflects the dropped B (3 docs).
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "size": 2,
-                    "order": { "card": "desc" }
-                },
-                "aggs": {
-                    "card": { "cardinality": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
-
-        // size=2 with asc engages the segment cutoff: must keep bottom-2 by cardinality (B, C).
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "size": 2,
-                    "order": { "card": "asc" }
-                },
-                "aggs": {
-                    "card": { "cardinality": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
-
-        Ok(())
-    }
-
-    #[test]
-    fn terms_aggregation_order_by_sum_single_segment() -> crate::Result<()> {
-        terms_aggregation_order_by_sum(true)
-    }
-    #[test]
-    fn terms_aggregation_order_by_sum_multi_segment() -> crate::Result<()> {
-        terms_aggregation_order_by_sum(false)
-    }
-    fn terms_aggregation_order_by_sum(merge_segments: bool) -> crate::Result<()> {
-        // Per-bucket sums on the U64 `score` column (non-negative => sum is monotonic):
-        //   A → 1+2+3+4+5 = 15, B → 1+1+1 = 3, C → 1+2+3 = 6.
-        let segment_and_terms = vec![
-            vec![
-                (1.0, "A".to_string()),
-                (2.0, "A".to_string()),
-                (3.0, "A".to_string()),
-                (1.0, "B".to_string()),
-                (1.0, "C".to_string()),
-            ],
-            vec![
-                (4.0, "A".to_string()),
-                (5.0, "A".to_string()),
-                (1.0, "B".to_string()),
-                (1.0, "B".to_string()),
-                (2.0, "C".to_string()),
-                (3.0, "C".to_string()),
-            ],
-        ];
-        let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
-
-        // Desc on a Sum metric engages the fast path (column is U64).
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "total": "desc" }
-                },
-                "aggs": {
-                    "total": { "sum": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][0]["total"]["value"], 15.0);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][1]["total"]["value"], 6.0);
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
-        assert_eq!(res["my_texts"]["buckets"][2]["total"]["value"], 3.0);
-
-        // Asc engages the fast path too — discarded buckets had local sum >= cutoff,
-        // and merged sum >= local (non-negative addends), so they cannot be globally smallest.
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "total": "asc" }
-                },
-                "aggs": {
-                    "total": { "sum": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
-
-        // size=2 desc with cutoff: top-2 by sum (A, C).
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "size": 2,
-                    "order": { "total": "desc" }
-                },
-                "aggs": {
-                    "total": { "sum": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
-
-        // Stats sub-property: ordering by `mystats.sum` on a U64 column also engages.
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "mystats.sum": "desc" }
-                },
-                "aggs": {
-                    "mystats": { "stats": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
-
-        // Sum on a signed column (I64) takes the same cutoff path. Results may be
-        // approximate near the boundary on adversarial data, but for this dataset the
-        // top-K is unambiguous.
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "total": "desc" }
-                },
-                "aggs": {
-                    "total": { "sum": { "field": "score_i64" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
-
-        // Order by extended_stats sub-property exercises compute_metric_value on the
-        // ExtendedStats collector. A→max=5, B→max=1, C→max=3, so desc by max → A, C, B.
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "my_texts": {
-                "terms": {
-                    "field": "string_id",
-                    "order": { "ext.max": "desc" }
-                },
-                "aggs": {
-                    "ext": { "extended_stats": { "field": "score" } }
-                }
-            }
-        }))
-        .unwrap();
-        let res = exec_request(agg_req, &index)?;
-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
-
-        Ok(())
-    }
-
    #[test]
    fn terms_aggregation_test_order_key_single_segment() -> crate::Result<()> {
        terms_aggregation_test_order_key_merge_segment(true)
@@ -3231,101 +2894,4 @@ mod tests {

        Ok(())
    }
-
-    fn prep_index_with_n_unique_terms_plus_one_null(n: u64) -> crate::Result<Index> {
-        let mut schema_builder = Schema::builder();
-        let id_field = schema_builder.add_u64_field("id", INDEXED);
-        let title_field = schema_builder.add_text_field("title", TEXT | FAST);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
-        // set to one thread to guarantee all docs end up in the same segment
-        let mut writer = index.writer_with_num_threads(1, 50_000_000)?;
-
-        writer.add_document(doc!(
-            id_field => 0u64,
-        ))?;
-        for i in 1u64..=n {
-            let title = format!("foo{i}");
-            writer.add_document(doc!(
-                id_field => i,
-                title_field => title,
-            ))?;
-        }
-
-        writer.commit()?;
-
-        Ok(index)
-    }
-
-    #[test]
-    fn null_bitset_bounds_check_regression() -> crate::Result<()> {
-        // include cases
-        for i in 0..=4 {
-            let index = prep_index_with_n_unique_terms_plus_one_null(i * 64)?;
-            let normal_req: Aggregations = serde_json::from_value(json!({
-                "my_bool": {
-                    "terms": {
-                        "field": "title",
-                        "missing": "__NULL__",
-                        "size": 1000,
-                    }
-                }
-            }))?;
-            let include_req: Aggregations = serde_json::from_value(json!({
-                "my_bool": {
-                    "terms": {
-                        "field": "title",
-                        "include": "foo(.*)",
-                        "missing": "__NULL__",
-                        "size": 1000,
-                    }
-                }
-            }))?;
-            let exclude_req: Aggregations = serde_json::from_value(json!({
-                "my_bool": {
-                    "terms": {
-                        "field": "title",
-                        "exclude": "foo(.*)",
-                        "missing": "__NULL__",
-                        "size": 1000,
-                    }
-                }
-            }))?;
-
-            let normal_res = exec_request(normal_req, &index)?;
-            let normal_buckets = normal_res["my_bool"]["buckets"].as_array().unwrap();
-            assert_eq!(
-                normal_buckets.len(),
-                (i * 64) as usize + 1,
-                "The normal request should return all 'foo' buckets, plus the missing term bucket",
-            );
-
-            let include_res = exec_request(include_req, &index)?;
-            eprintln!("include_res: {include_res:?}");
-            let include_buckets = include_res["my_bool"]["buckets"].as_array().unwrap();
-            assert_eq!(
-                include_buckets.len(),
-                (i * 64) as usize,
-                "The include request should return all 'foo' buckets, and not the missing term \
-                 bucket",
-            );
-            assert!(include_buckets
-                .iter()
-                .all(|b| b["key"].as_str().unwrap().starts_with("foo")));
-
-            let exclude_res = exec_request(exclude_req, &index)?;
-            let exclude_buckets = exclude_res["my_bool"]["buckets"].as_array().unwrap();
-            if i != 0 {
-                // TODO: Remove this if after fixing exclude + missing bug
-                assert_eq!(
-                    exclude_buckets.len(),
-                    1,
-                    "The exclude request should exclude all 'foo' buckets, and only the missing \
-                     term bucket",
-                );
-                assert_eq!(exclude_buckets[0]["key"], "__NULL__");
-            }
-        }
-        Ok(())
-    }
 }
--- a/src/aggregation/bucket/term_agg/term_histogram.rs
+++ b/src/aggregation/bucket/term_agg/term_histogram.rs
@@ -1,585 +0,0 @@
-//! Fused collector for the very common shape `terms` (low cardinality) × a single
-//! `histogram`/`date_histogram` sub-aggregation with nothing nested below it.
-//!
-//! See [`SegmentTermHistogramCollector`] for the approach and [`maybe_build_collector`] for the
-//! conditions under which it is used.
-
-use columnar::ColumnBlockAccessor;
-
-use super::{Bucket, SegmentTermCollector, TermsAggReqData, VecTermBuckets};
-use crate::aggregation::agg_data::{AggKind, AggRefNode, AggregationsSegmentCtx};
-use crate::aggregation::bucket::{
-    get_bucket_pos_f64, prepare_histogram_dense_range, HistogramAggReqData,
-    SegmentHistogramCollector,
-};
-use crate::aggregation::buffered_sub_aggs::LowCardSubAggBuffer;
-use crate::aggregation::intermediate_agg_result::{
-    IntermediateAggregationResult, IntermediateAggregationResults,
-};
-use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
-use crate::aggregation::{f64_from_fastfield_u64, BucketId};
-
-/// Maximum number of cells (`num_terms × num_time_buckets`) in the fused flat 2D grid. Above this
-/// the grid would be too large/cache-unfriendly, so we fall back to the general buffered path.
-/// `1 << 14` cells = 128 KB of `u64` counters, comfortably L2-resident.
-///
-/// Since we are only at the top-level, this won't be multiplied by any parent buckets.
-const MAX_FUSED_GRID_BUCKETS: usize = 16384;
-
-/// Fused collector for `terms` (low cardinality) × a single `histogram`/`date_histogram` leaf with
-/// nothing nested below it, when the resulting `num_terms × num_time_buckets` grid is small (see
-/// [`MAX_FUSED_GRID_BUCKETS`]).
-///
-/// It keeps a flat, fully dense 2D counter grid (`counts[term * num_time_buckets + bucket]`) and a
-/// per-term total. A single pass reads both the term and histogram columns in document order and
-/// bumps the counters directly — no doc-id buffering, no per-term scattered re-fetch, no dynamic
-/// dispatch on flush, no per-bucket key/id storage during collection (keys are derived from the
-/// index at the end).
-///
-/// At result time the flat grid is expanded back into the regular term map + histogram storage and
-/// handed to the shared intermediate-result builders, so cross-segment merging is identical to the
-/// general path.
-#[derive(Debug)]
-pub(crate) struct SegmentTermHistogramCollector {
-    /// Per-term count of docs *outside* `hard_bounds` (still in `doc_count`, but in no bucket).
-    /// Per-term total = this + the term's `counts` row-sum; left empty when there are no hard
-    /// bounds (every doc is in-bounds, so there's no remainder to track).
-    term_counts: Vec<u32>,
-    /// Flattened `[num_terms * num_time_buckets]` histogram counters (`u32`, see
-    /// `term_counts`).
-    ///
-    /// Each term id get its own contiguous slice of `num_time_buckets` histogram counter.
-    /// When we count all docs (#nofilter), we can derive the per-term total as the sum over that
-    /// term's slice.
-    counts: Vec<u32>,
-    /// Histogram buckets per term (the dense time-range length).
-    num_time_buckets: usize,
-    /// `bucket_pos` mapped to time-bucket index 0.
-    base_pos: i64,
-    terms_req_data: TermsAggReqData,
-    /// The (cloned, normalized) histogram request: its column + interval/offset/bounds.
-    hist_req_data: HistogramAggReqData,
-    /// Private block accessors for both columns. We read them together, so each needs its own
-    /// (the shared `agg_data` scratch accessor only holds one block at a time). Owning them keeps
-    /// `collect` independent of `agg_data`.
-    term_block: ColumnBlockAccessor<u64>,
-    hist_block: ColumnBlockAccessor<u64>,
-    /// No hard bounds, so every doc is in-bounds.
-    all_docs_in_bounds: bool,
-    /// Both columns are full (fused-path precondition); cached so `collect` skips the per-block
-    /// cardinality lookup in `fetch_block`.
-    is_full: bool,
-}
-
-impl SegmentAggregationCollector for SegmentTermHistogramCollector {
-    fn add_intermediate_aggregation_result(
-        &mut self,
-        agg_data: &AggregationsSegmentCtx,
-        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
-    ) -> crate::Result<()> {
-        debug_assert_eq!(
-            parent_bucket_id, 0,
-            "fused term-histogram collector is top-level only"
-        );
-        // Expand the flat grid back into the regular structures and reuse the shared builders, so
-        // ordering/cut-off/dict handling and cross-segment merging match the general path exactly.
-        let mut bucket_id_provider = BucketIdProvider::default();
-        // Per-term total = histogram row-sum (in-bounds) + `term_counts` (out-of-bounds remainder,
-        // empty when there are no hard bounds).
-        let term_buckets = VecTermBuckets {
-            buckets: self
-                .counts
-                .chunks_exact(self.num_time_buckets)
-                .enumerate()
-                .map(|(term_id, row)| {
-                    let in_bounds: u32 = row.iter().sum();
-                    let out_of_bounds = self.term_counts.get(term_id).copied().unwrap_or(0);
-                    Bucket {
-                        count: in_bounds + out_of_bounds,
-                        bucket_id: bucket_id_provider.next_bucket_id(),
-                    }
-                })
-                .collect(),
-        };
-        let mut histogram = SegmentHistogramCollector::<()>::from_dense_rows(
-            self.hist_req_data.clone(),
-            self.base_pos,
-            self.num_time_buckets,
-            &self.counts,
-        );
-        let name = self.terms_req_data.name.clone();
-        let bucket = SegmentTermCollector::<VecTermBuckets, LowCardSubAggBuffer>::into_intermediate_bucket_result(
-            &self.terms_req_data,
-            Some(&mut histogram as &mut dyn SegmentAggregationCollector),
-            term_buckets,
-            agg_data,
-        )?;
-        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
-        Ok(())
-    }
-
-    #[inline]
-    fn collect(
-        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
-        _agg_data: &mut AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        debug_assert_eq!(
-            parent_bucket_id, 0,
-            "fused term-histogram collector is top-level only"
-        );
-
-        // Fetch both columns into our own accessors (we read them together, so they can't share the
-        // single `agg_data` scratch accessor). The collector owns all its inputs, so `collect`
-        // doesn't touch `agg_data`.
-        self.term_block
-            .fetch_block_with_is_full(docs, &self.terms_req_data.accessor, self.is_full);
-        self.hist_block
-            .fetch_block_with_is_full(docs, &self.hist_req_data.accessor, self.is_full);
-
-        // Hoist the loop-invariant fields into locals: the optimizer can't prove the
-        // `self.counts`/`self.term_counts` writes don't alias these `self` fields, so it can't keep
-        // them in registers and re-reads them from memory every iteration — ~15% slower on
-        // `terms_status_with_date_histogram` when read straight from `self`.
-        // Note: check which are actually relevant.
-        let field_type = self.hist_req_data.field_type;
-        let bounds = self.hist_req_data.bounds;
-        let interval = self.hist_req_data.req.interval;
-        let offset = self.hist_req_data.offset;
-        let base_pos = self.base_pos;
-        let num_time_buckets = self.num_time_buckets;
-        let all_docs_in_bounds = self.all_docs_in_bounds;
-        let term_counts = &mut self.term_counts;
-        let counts = &mut self.counts;
-
-        // Both columns are full (checked at construction), so values align with `docs` positionally
-        // and are read together in one pass.
-        // In-bounds docs bump the `counts` grid, out-of-bounds bump `term_counts`; deriving the
-        // total at flush avoids a per-doc `term_counts` RMW that serializes on
-        // store-to-load forwarding.
-        for (term_id, hist_raw) in self.term_block.iter_vals().zip(self.hist_block.iter_vals()) {
-            let term_id = term_id as usize;
-            let val = f64_from_fastfield_u64(hist_raw, field_type);
-            if all_docs_in_bounds || bounds.contains(val) {
-                let bucket = (get_bucket_pos_f64(val, interval, offset) as i64 - base_pos) as usize;
-                debug_assert!(
-                    bucket < num_time_buckets,
-                    "histogram bucket outside dense range"
-                );
-                counts[term_id * num_time_buckets + bucket] += 1;
-            } else {
-                term_counts[term_id] += 1;
-            }
-        }
-        Ok(())
-    }
-
-    fn flush(&mut self, _agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
-        // Nothing is buffered: `collect` writes the flat grid directly.
-        Ok(())
-    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        _max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        // Top-level: the flat grid is allocated up front.
-        Ok(())
-    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        None
-    }
-}
-
-/// Builds the fused terms×histogram collector for a single top-level parent, when the shape is
-/// eligible. Returns `Ok(None)` to fall back to the general buffered terms path.
-///
-/// Eligibility: top-level, low-cardinality terms over a full column with no missing/include-exclude
-/// handling; a single `histogram`/`date_histogram` leaf (no nesting below it) over a full column;
-/// and a `num_terms × num_time_buckets` grid no larger than [`MAX_FUSED_GRID_BUCKETS`].
-pub(super) fn maybe_build_collector(
-    agg_data: &mut AggregationsSegmentCtx,
-    node: &AggRefNode,
-    terms_req_data: &TermsAggReqData,
-    col_max_val: u64,
-    is_top_level: bool,
-) -> crate::Result<Option<Box<dyn SegmentAggregationCollector>>> {
-    // Both columns must be full (one value per doc) so their values align positionally with `docs`
-    // and we can zip them. Requiring full columns also makes the terms agg's `missing` config a
-    // no-op (`fetch_block_with_missing` early-returns on full columns), so we needn't check for it.
-    //
-    // We don't cap the term cardinality here: the flat grid is bounded by the total cell count
-    // (`num_terms * num_time_buckets <= MAX_FUSED_GRID_BUCKETS`) checked below, which subsumes it.
-    //
-    // We only allow this at the top-level, since we don't know how many buckets are created. We
-    // are less likely to get enough docs for the preallocation to be worth and there's a risk of
-    // using too much memory. We could check the maximum theoretical buckets up-front and pass
-    // them down.
-    let fuseable = is_top_level
-        // TODO: We can easily support this
-        && terms_req_data.allowed_term_ids.is_none()
-        && terms_req_data.accessor.get_cardinality().is_full()
-        // The flat counters are `u32`, bumped once per value, so no count can exceed the column's
-        // value count. (Essentially always true here: the column is full, so its value count
-        // equals the doc count, and `DocId` is `u32`.)
-        && terms_req_data.accessor.values.num_vals() < u32::MAX
-        && node.children.len() == 1
-        && matches!(
-            node.children[0].kind,
-            AggKind::Histogram | AggKind::DateHistogram
-        )
-        && node.children[0].children.is_empty()
-        && agg_data.per_request.histogram_req_data[node.children[0].idx_in_req_data]
-            .accessor
-            .get_cardinality()
-            .is_full();
-    if !fuseable {
-        return Ok(None);
-    }
-
-    // Clone + normalize the histogram request and get its dense bucket range; only take the fused
-    // path when the flat `num_terms × num_time_buckets` grid is small enough.
-    let Some((hist_req_data, range)) = prepare_histogram_dense_range(agg_data, &node.children[0])?
-    else {
-        return Ok(None);
-    };
-    let num_terms = col_max_val.saturating_add(1) as usize;
-    if num_terms.saturating_mul(range.len) > MAX_FUSED_GRID_BUCKETS {
-        return Ok(None);
-    }
-
-    // No hard bounds means every doc is in-bounds, letting `collect` short-circuit the bounds
-    // check — and leaving `term_counts` (the out-of-bounds remainder) unused, so we skip allocating
-    // it.
-    let all_docs_in_bounds =
-        hist_req_data.bounds.min == f64::MIN && hist_req_data.bounds.max == f64::MAX;
-    let counts = vec![0u32; num_terms * range.len];
-    let term_counts = if all_docs_in_bounds {
-        Vec::new()
-    } else {
-        vec![0u32; num_terms]
-    };
-    // Charge both grids to the aggregation memory limit.
-    agg_data.context.limits.add_memory_consumed(
-        ((counts.len() + term_counts.len()) * std::mem::size_of::<u32>()) as u64,
-    )?;
-    Ok(Some(Box::new(SegmentTermHistogramCollector {
-        term_counts,
-        counts,
-        num_time_buckets: range.len,
-        base_pos: range.base_pos,
-        terms_req_data: terms_req_data.clone(),
-        hist_req_data,
-        term_block: ColumnBlockAccessor::default(),
-        hist_block: ColumnBlockAccessor::default(),
-        all_docs_in_bounds,
-        is_full: terms_req_data.accessor.get_cardinality().is_full(),
-    })))
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::aggregation::agg_req::Aggregations;
-    use crate::aggregation::tests::{
-        exec_request, exec_request_with_query_and_memory_limit,
-        get_test_index_from_values_and_terms,
-    };
-    use crate::aggregation::AggregationLimitsGuard;
-
-    /// Hand-computed correctness check for the fused terms×histogram fast path
-    /// ([`super::SegmentTermHistogramCollector`]): low-cardinality terms × a histogram leaf over
-    /// full columns, exercised single- and multi-segment.
-    #[test]
-    fn fused_term_histogram_test() -> crate::Result<()> {
-        fused_term_histogram_with_opt(false)?;
-        fused_term_histogram_with_opt(true)?;
-        Ok(())
-    }
-
-    fn fused_term_histogram_with_opt(merge_segments: bool) -> crate::Result<()> {
-        // 300 docs: term = {a, b, c} by i % 3, histogram value = i % 20 (interval 1 => buckets
-        // 0..19). gcd(3, 20) = 1, so every (term, bucket) pair occurs exactly 300 / 60 = 5 times.
-        let docs: Vec<(f64, String)> = (0..300u64)
-            .map(|i| {
-                (
-                    (i % 20) as f64,
-                    ["a", "b", "c"][(i % 3) as usize].to_string(),
-                )
-            })
-            .collect();
-        // Two segments, to also exercise cross-segment merging of the fused per-term histograms.
-        let segments = vec![docs[..150].to_vec(), docs[150..].to_vec()];
-        let index = get_test_index_from_values_and_terms(merge_segments, &segments)?;
-
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id", "order": { "_key": "asc" } },
-                "aggs": {
-                    "histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
-            assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
-            assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
-            let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
-            for b in 0..20usize {
-                assert_eq!(histo[b]["key"], b as f64, "term {term} bucket {b}");
-                assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
-            }
-            assert_eq!(histo[20], serde_json::Value::Null);
-        }
-        assert_eq!(res["by_term"]["buckets"][3], serde_json::Value::Null);
-
-        Ok(())
-    }
-
-    /// A `missing` config on a *full* term column still takes the fused path (the string sentinel
-    /// is just `col_max + 1`, so the column stays low-cardinality). Since no doc is missing, the
-    /// real term buckets must be exactly as without `missing`.
-    #[test]
-    fn fused_term_histogram_with_missing_on_full_column() -> crate::Result<()> {
-        let docs: Vec<(f64, String)> = (0..300u64)
-            .map(|i| {
-                (
-                    (i % 20) as f64,
-                    ["a", "b", "c"][(i % 3) as usize].to_string(),
-                )
-            })
-            .collect();
-        let index = get_test_index_from_values_and_terms(true, &[docs])?;
-
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id", "missing": "MISSING", "order": { "_key": "asc" } },
-                "aggs": {
-                    "histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        // Column is full, so "MISSING" never applies: a, b, c are unchanged (100 docs, 5 per
-        // bucket).
-        for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
-            assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
-            assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
-            let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
-            for b in 0..20usize {
-                assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Term cardinality above the general path's `MAX_NUM_TERMS_FOR_VEC` (100) still fuses: the
-    /// flat grid is bounded by the total cell count (`num_terms * num_time_buckets`), not the
-    /// term count.
-    #[test]
-    fn fused_term_histogram_many_terms() -> crate::Result<()> {
-        let num_terms = 150usize;
-        let docs_per_term = 2usize;
-        // All docs share histogram value 0 (a single bucket), so the grid is 150 x 1 = 150 cells.
-        let docs: Vec<(f64, String)> = (0..num_terms * docs_per_term)
-            .map(|i| (0.0, format!("t{:03}", i % num_terms)))
-            .collect();
-        let index = get_test_index_from_values_and_terms(true, &[docs])?;
-
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id", "size": 1000, "order": { "_key": "asc" } },
-                "aggs": {
-                    "histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        let buckets = res["by_term"]["buckets"].as_array().unwrap();
-        assert_eq!(buckets.len(), num_terms);
-        for (i, bucket) in buckets.iter().enumerate() {
-            assert_eq!(bucket["key"], format!("t{i:03}"));
-            assert_eq!(bucket["doc_count"], docs_per_term as u64);
-            assert_eq!(bucket["histo"]["buckets"][0]["key"], 0.0);
-            assert_eq!(
-                bucket["histo"]["buckets"][0]["doc_count"],
-                docs_per_term as u64
-            );
-        }
-
-        Ok(())
-    }
-
-    /// `hard_bounds` exercises the non-derived `term_counts` branch: a term's `doc_count` must
-    /// count *every* doc with that term, including docs whose histogram value is outside the
-    /// bounds (those are excluded from the histogram buckets but still counted for the term). This
-    /// is the case where the per-doc `term_counts` increment cannot be replaced by the grid
-    /// row-sum.
-    #[test]
-    fn fused_term_histogram_with_hard_bounds() -> crate::Result<()> {
-        // 300 docs: term = {a, b, c} by i % 3, value = i % 20. Per term: 100 docs, each value in
-        // 0..=19 occurring 5 times.
-        let docs: Vec<(f64, String)> = (0..300u64)
-            .map(|i| {
-                (
-                    (i % 20) as f64,
-                    ["a", "b", "c"][(i % 3) as usize].to_string(),
-                )
-            })
-            .collect();
-        let index = get_test_index_from_values_and_terms(true, &[docs])?;
-
-        // hard_bounds [5, 14] (inclusive) keeps only values 5..=14 in the histogram (10 buckets);
-        // values 0..=4 and 15..=19 are out of bounds.
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id", "order": { "_key": "asc" } },
-                "aggs": {
-                    "histo": {
-                        "histogram": {
-                            "field": "score_f64",
-                            "interval": 1.0,
-                            "hard_bounds": { "min": 5.0, "max": 14.0 }
-                        }
-                    }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
-            assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
-            // doc_count includes the 50 per-term docs whose value is outside [5, 14].
-            assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
-            let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
-            for b in 0..10usize {
-                let key = 5 + b;
-                assert_eq!(histo[b]["key"], key as f64, "term {term} bucket key {key}");
-                assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {key}");
-            }
-            // Only the 10 in-bounds buckets exist.
-            assert_eq!(histo[10], serde_json::Value::Null);
-        }
-
-        Ok(())
-    }
-
-    /// Non-binding `hard_bounds` (wider than the data, with mid-interval edges) must still produce
-    /// exact results via the derive-from-grid path: since no doc is out of bounds, normalization
-    /// drops the bound, every doc lands in the dense range, and each term's total equals its
-    /// histogram row-sum. This is the case that previously fell back to the per-doc counter only
-    /// because `bounds != [MIN, MAX]`.
-    #[test]
-    fn fused_term_histogram_with_non_binding_hard_bounds() -> crate::Result<()> {
-        // 300 docs: term = {a, b, c} by i % 3, value = i % 20. Data values span [0, 19].
-        let docs: Vec<(f64, String)> = (0..300u64)
-            .map(|i| {
-                (
-                    (i % 20) as f64,
-                    ["a", "b", "c"][(i % 3) as usize].to_string(),
-                )
-            })
-            .collect();
-        let index = get_test_index_from_values_and_terms(true, &[docs])?;
-
-        // Bounds wider than [0, 19], with mid-interval edges -> they exclude nothing.
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id", "order": { "_key": "asc" } },
-                "aggs": {
-                    "histo": {
-                        "histogram": {
-                            "field": "score_f64",
-                            "interval": 1.0,
-                            "hard_bounds": { "min": -0.5, "max": 19.5 }
-                        }
-                    }
-                }
-            }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-
-        for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
-            assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
-            // Every doc is in-bounds, so the per-term total is the full 100 (as without bounds).
-            assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
-            let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
-            for b in 0..20usize {
-                assert_eq!(histo[b]["key"], b as f64, "term {term} bucket {b}");
-                assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
-            }
-            assert_eq!(histo[20], serde_json::Value::Null);
-        }
-
-        Ok(())
-    }
-
-    /// Regression: with hard bounds the fused path allocates `term_counts` (one `u32`/term) on top
-    /// of the grid, and that allocation must be charged to the memory limit. With many terms and a
-    /// single time bucket the two are equal in size, so a limit admitting the grid alone but not
-    /// grid + `term_counts` must fail.
-    #[test]
-    fn fused_term_histogram_hard_bounds_charges_term_counts() -> crate::Result<()> {
-        // 16k distinct terms, one doc each; values alternate in/out of the single-bucket bounds
-        // [5, 5] so the bounds bind and `term_counts` is allocated. num_terms=16000,
-        // num_time_buckets=1 => `counts` and `term_counts` are ~64 KB each.
-        let docs: Vec<(f64, String)> = (0..16_000u64)
-            .map(|i| (if i % 2 == 0 { 5.0 } else { 10.0 }, format!("t{i:05}")))
-            .collect();
-        let index = get_test_index_from_values_and_terms(true, &[docs])?;
-
-        let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
-            "by_term": {
-                "terms": { "field": "string_id" },
-                "aggs": {
-                    "histo": {
-                        "histogram": {
-                            "field": "score_f64",
-                            "interval": 1.0,
-                            "hard_bounds": { "min": 5.0, "max": 5.0 }
-                        }
-                    }
-                }
-            }
-        }))
-        .unwrap();
-
-        // ~96 KB admits the grid (~64 KB) but not grid + `term_counts` (~128 KB).
-        let err = exec_request_with_query_and_memory_limit(
-            agg_req,
-            &index,
-            None,
-            AggregationLimitsGuard::new(Some(96_000), None),
-        )
-        .unwrap_err();
-        assert!(
-            err.to_string().contains("memory limit was exceeded"),
-            "expected a memory-limit error, got: {err}"
-        );
-
-        Ok(())
-    }
-}
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -5,7 +5,7 @@ use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
 use crate::aggregation::bucket::term_agg::TermsAggregation;
-use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardBufferedSubAggs};
+use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
@@ -47,7 +47,7 @@ struct MissingCount {
 #[derive(Default, Debug)]
 pub struct TermMissingAgg {
    accessor_idx: usize,
-    sub_agg: Option<HighCardBufferedSubAggs>,
+    sub_agg: Option<HighCardCachedSubAggs>,
    /// Idx = parent bucket id, Value = missing count for that bucket
    missing_count_per_bucket: Vec<MissingCount>,
    bucket_id_provider: BucketIdProvider,
@@ -66,7 +66,7 @@ impl TermMissingAgg {
            None
        };

-        let sub_agg = sub_agg.map(BufferedSubAggs::new);
+        let sub_agg = sub_agg.map(CachedSubAggs::new);
        let bucket_id_provider = BucketIdProvider::default();

        Ok(Self {
@@ -177,17 +177,6 @@ impl SegmentAggregationCollector for TermMissingAgg {
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // TODO: forward to `sub_agg` for nested order paths (`missing_agg>metric`).
-        None
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/buffered_sub_aggs.rs
+++ b/src/aggregation/buffered_sub_aggs.rs
@@ -6,7 +6,7 @@ use crate::aggregation::bucket::MAX_NUM_TERMS_FOR_VEC;
 use crate::aggregation::BucketId;
 use crate::DocId;

-/// A buffer for sub-aggregations, storing doc ids per bucket id.
+/// A cache for sub-aggregations, storing doc ids per bucket id.
 /// Depending on the cardinality of the parent aggregation, we use different
 /// storage strategies.
 ///
@@ -24,21 +24,21 @@ use crate::DocId;
 /// aggregations.
 /// What this datastructure does in general is to group docs by bucket id.
 #[derive(Debug)]
-pub(crate) struct BufferedSubAggs<B: SubAggBuffer> {
-    buffer: B,
+pub(crate) struct CachedSubAggs<C: SubAggCache> {
+    cache: C,
    sub_agg_collector: Box<dyn SegmentAggregationCollector>,
    num_docs: usize,
 }

-pub type LowCardBufferedSubAggs = BufferedSubAggs<LowCardSubAggBuffer>;
-pub type HighCardBufferedSubAggs = BufferedSubAggs<HighCardSubAggBuffer>;
+pub type LowCardCachedSubAggs = CachedSubAggs<LowCardSubAggCache>;
+pub type HighCardCachedSubAggs = CachedSubAggs<HighCardSubAggCache>;

 const FLUSH_THRESHOLD: usize = 2048;

-/// A trait for buffering sub-aggregation doc ids per bucket id.
+/// A trait for caching sub-aggregation doc ids per bucket id.
 /// Different implementations can be used depending on the cardinality
 /// of the parent aggregation.
-pub trait SubAggBuffer: Debug {
+pub trait SubAggCache: Debug {
    fn new() -> Self;
    fn push(&mut self, bucket_id: BucketId, doc_id: DocId);
    fn flush_local(
@@ -49,22 +49,22 @@ pub trait SubAggBuffer: Debug {
    ) -> crate::Result<()>;
 }

-impl<Backend: SubAggBuffer + Debug> BufferedSubAggs<Backend> {
+impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
    pub fn new(sub_agg: Box<dyn SegmentAggregationCollector>) -> Self {
        Self {
-            buffer: Backend::new(),
+            cache: Backend::new(),
            sub_agg_collector: sub_agg,
            num_docs: 0,
        }
    }

-    pub fn get_sub_agg_collector(&mut self) -> &mut dyn SegmentAggregationCollector {
-        &mut *self.sub_agg_collector
+    pub fn get_sub_agg_collector(&mut self) -> &mut Box<dyn SegmentAggregationCollector> {
+        &mut self.sub_agg_collector
    }

    #[inline]
    pub fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
-        self.buffer.push(bucket_id, doc_id);
+        self.cache.push(bucket_id, doc_id);
        self.num_docs += 1;
    }

@@ -75,7 +75,7 @@ impl<Backend: SubAggBuffer + Debug> BufferedSubAggs<Backend> {
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        if self.num_docs >= FLUSH_THRESHOLD {
-            self.buffer
+            self.cache
                .flush_local(&mut self.sub_agg_collector, agg_data, false)?;
            self.num_docs = 0;
        }
@@ -85,7 +85,7 @@ impl<Backend: SubAggBuffer + Debug> BufferedSubAggs<Backend> {
    /// Note: this _does_ flush the sub aggregations.
    pub fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        if self.num_docs != 0 {
-            self.buffer
+            self.cache
                .flush_local(&mut self.sub_agg_collector, agg_data, true)?;
            self.num_docs = 0;
        }
@@ -94,11 +94,11 @@ impl<Backend: SubAggBuffer + Debug> BufferedSubAggs<Backend> {
    }
 }

-/// Number of partitions for high cardinality sub-aggregation buffer.
+/// Number of partitions for high cardinality sub-aggregation cache.
 const NUM_PARTITIONS: usize = 16;

 #[derive(Debug)]
-pub(crate) struct HighCardSubAggBuffer {
+pub(crate) struct HighCardSubAggCache {
    /// This weird partitioning is used to do some cheap grouping on the bucket ids.
    /// bucket ids are dense, e.g. when we don't detect the cardinality as low cardinality,
    /// but there are just 16 bucket ids, each bucket id will go to its own partition.
@@ -108,7 +108,7 @@ pub(crate) struct HighCardSubAggBuffer {
    partitions: Box<[PartitionEntry; NUM_PARTITIONS]>,
 }

-impl HighCardSubAggBuffer {
+impl HighCardSubAggCache {
    #[inline]
    fn clear(&mut self) {
        for partition in self.partitions.iter_mut() {
@@ -131,14 +131,13 @@ impl PartitionEntry {
    }
 }

-impl SubAggBuffer for HighCardSubAggBuffer {
+impl SubAggCache for HighCardSubAggCache {
    fn new() -> Self {
        Self {
            partitions: Box::new(core::array::from_fn(|_| PartitionEntry::default())),
        }
    }

-    #[inline]
    fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
        let idx = bucket_id % NUM_PARTITIONS as u32;
        let slot = &mut self.partitions[idx as usize];
@@ -174,14 +173,14 @@ impl SubAggBuffer for HighCardSubAggBuffer {
 }

 #[derive(Debug)]
-pub(crate) struct LowCardSubAggBuffer {
-    /// Buffer doc ids per bucket for sub-aggregations.
+pub(crate) struct LowCardSubAggCache {
+    /// Cache doc ids per bucket for sub-aggregations.
    ///
    /// The outer Vec is indexed by BucketId.
    per_bucket_docs: Vec<Vec<DocId>>,
 }

-impl LowCardSubAggBuffer {
+impl LowCardSubAggCache {
    #[inline]
    fn clear(&mut self) {
        for v in &mut self.per_bucket_docs {
@@ -190,14 +189,13 @@ impl LowCardSubAggBuffer {
    }
 }

-impl SubAggBuffer for LowCardSubAggBuffer {
+impl SubAggCache for LowCardSubAggCache {
    fn new() -> Self {
        Self {
            per_bucket_docs: Vec::new(),
        }
    }

-    #[inline]
    fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
        let idx = bucket_id as usize;
        if self.per_bucket_docs.len() <= idx {
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -1,6 +1,6 @@
 use super::agg_req::Aggregations;
 use super::agg_result::AggregationResults;
-use super::buffered_sub_aggs::LowCardBufferedSubAggs;
+use super::cached_sub_aggs::LowCardCachedSubAggs;
 use super::intermediate_agg_result::IntermediateAggregationResults;
 use super::AggContextParams;
 // group buffering strategy is chosen explicitly by callers; no need to hash-group on the fly.
@@ -136,7 +136,7 @@ fn merge_fruits(
 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsSegmentCtx,
-    agg_collector: LowCardBufferedSubAggs,
+    agg_collector: LowCardCachedSubAggs,
    error: Option<TantivyError>,
 }

@@ -152,7 +152,7 @@ impl AggregationSegmentCollector {
        let mut agg_data =
            build_aggregations_data_from_req(agg, reader, segment_ordinal, context.clone())?;
        let mut result =
-            LowCardBufferedSubAggs::new(build_segment_agg_collectors_root(&mut agg_data)?);
+            LowCardCachedSubAggs::new(build_segment_agg_collectors_root(&mut agg_data)?);
        result
            .get_sub_agg_collector()
            .prepare_max_bucket(0, &agg_data)?; // prepare for bucket zero
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -377,22 +377,7 @@ impl IntermediateMetricResult {
                MetricResult::ExtendedStats(intermediate_stats.finalize())
            }
            IntermediateMetricResult::Sum(intermediate_sum) => {
-                // By default match Elasticsearch: empty / all-missing sum
-                // buckets serialize as `"value": 0`, not `"value": null`.
-                // The non-ES `none_if_no_match` flag on `SumAggregation`
-                // opts into SQL-style `null` for downstream consumers.
-                let none_if_no_match = req
-                    .agg
-                    .as_sum()
-                    .and_then(|sum| sum.none_if_no_match)
-                    .unwrap_or(false);
-                let value = intermediate_sum.finalize();
-                if none_if_no_match {
-                    MetricResult::Sum(value.into())
-                } else {
-                    let value = Some(value.unwrap_or(0.0));
-                    MetricResult::Sum(value.into())
-                }
+                MetricResult::Sum(intermediate_sum.finalize().into())
            }
            IntermediateMetricResult::Percentiles(percentiles) => MetricResult::Percentiles(
                percentiles
@@ -1019,20 +1004,24 @@ impl IntermediateCompositeBucketResult {
    ) -> crate::Result<BucketResult> {
        let trimmed_entry_vec =
            trim_composite_buckets(self.entries, &self.orders, self.target_size)?;
-        let after_key = trimmed_entry_vec
-            .last()
-            .map(|bucket| {
-                let (intermediate_key, _entry) = bucket;
-                intermediate_key
-                    .iter()
-                    .enumerate()
-                    .map(|(idx, intermediate_key)| {
-                        let source = &req.sources[idx];
-                        (source.name().to_string(), intermediate_key.clone().into())
-                    })
-                    .collect()
-            })
-            .unwrap_or_default();
+        let after_key = if trimmed_entry_vec.len() == req.size as usize {
+            trimmed_entry_vec
+                .last()
+                .map(|bucket| {
+                    let (intermediate_key, _entry) = bucket;
+                    intermediate_key
+                        .iter()
+                        .enumerate()
+                        .map(|(idx, intermediate_key)| {
+                            let source = &req.sources[idx];
+                            (source.name().to_string(), intermediate_key.clone().into())
+                        })
+                        .collect()
+                })
+                .unwrap()
+        } else {
+            FxHashMap::default()
+        };

        let buckets = trimmed_entry_vec
            .into_iter()
--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
--- a/src/aggregation/metric/extended_stats.rs
+++ b/src/aggregation/metric/extended_stats.rs
@@ -399,26 +399,6 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        bucket_id: BucketId,
-        sub_agg_name: &str,
-        sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        if self.name != sub_agg_name {
-            return None;
-        }
-        let extended = self.buckets.get(bucket_id as usize)?;
-        // Finalize is a pure read of accumulators — calling it here for the cutoff sort
-        // doesn't disturb the eventual intermediate result.
-        extended
-            .finalize()
-            .get_value(sub_agg_property)
-            .ok()
-            .flatten()
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -107,9 +107,10 @@ pub enum PercentileValues {
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// The entry when requesting percentiles with keyed: false
 pub struct PercentileValuesVecEntry {
-    /// The percentile key (e.g. 1.0, 5.0, 25.0).
+    /// Percentile
    pub key: f64,
-    /// The percentile value. `NaN` when there are no values.
+
+    /// Value at the percentile
    pub value: f64,
 }

--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -312,26 +312,6 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        bucket_id: BucketId,
-        sub_agg_name: &str,
-        sub_agg_property: &str,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        if agg_data.get_metric_req_data(self.accessor_idx).name != sub_agg_name {
-            return None;
-        }
-        let percentile: f64 = sub_agg_property.parse().ok()?;
-        if !(0.0..=100.0).contains(&percentile) {
-            return None;
-        }
-        let bucket = self.buckets.get(bucket_id as usize)?;
-        // DDSketch.quantile is a pure read; calling it here for the cutoff sort does
-        // not affect the intermediate state used for the final result.
-        bucket.sketch.quantile(percentile / 100.0).ok().flatten()
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -321,40 +321,6 @@ impl<const COLUMN_TYPE_ID: u8> SegmentAggregationCollector
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        bucket_id: BucketId,
-        sub_agg_name: &str,
-        sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        if self.name != sub_agg_name {
-            return None;
-        }
-        let stats = self.buckets.get(bucket_id as usize)?;
-        // The property depends on what we're collecting:
-        //   - StatsType::Stats exposes count/sum/min/max/avg via dotted property.
-        //   - Single-value kinds (Sum/Count/Min/Max/Average) expect an empty property and return
-        //     the value they were configured to collect.
-        let prop = match self.collecting_for {
-            StatsType::Stats if !sub_agg_property.is_empty() => sub_agg_property,
-            StatsType::Sum if sub_agg_property.is_empty() => "sum",
-            StatsType::Count if sub_agg_property.is_empty() => "count",
-            StatsType::Max if sub_agg_property.is_empty() => "max",
-            StatsType::Min if sub_agg_property.is_empty() => "min",
-            StatsType::Average if sub_agg_property.is_empty() => "avg",
-            _ => return None,
-        };
-        match prop {
-            "count" => Some(stats.count as f64),
-            "sum" => Some(stats.sum),
-            "min" if stats.count > 0 => Some(stats.min),
-            "max" if stats.count > 0 => Some(stats.max),
-            "avg" if stats.count > 0 => Some(stats.sum / stats.count as f64),
-            _ => None,
-        }
-    }
 }

 #[inline]
--- a/src/aggregation/metric/sum.rs
+++ b/src/aggregation/metric/sum.rs
@@ -27,16 +27,6 @@ pub struct SumAggregation {
    /// { "field": "my_numbers", "missing": "10.0" }
    #[serde(default, deserialize_with = "deserialize_option_f64")]
    pub missing: Option<f64>,
-    /// Non-Elasticsearch extension. When `Some(true)`, the serialized result
-    /// returns `"value": null` if no values were collected (all documents had
-    /// missing/NULL values for the field), matching the behavior of `min`,
-    /// `max`, and `avg`. When `None` or `Some(false)` (the default) the
-    /// result returns `"value": 0`, matching Elasticsearch.
-    ///
-    /// Intended for SQL-style consumers where `SUM` of zero rows is `NULL`
-    /// and must be distinguishable from a bucket that genuinely sums to `0`.
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub none_if_no_match: Option<bool>,
 }

 impl SumAggregation {
@@ -45,7 +35,6 @@ impl SumAggregation {
        Self {
            field: field_name,
            missing: None,
-            none_if_no_match: None,
        }
    }
    /// Returns the field name the aggregation is computed on.
@@ -70,104 +59,8 @@ impl IntermediateSum {
    pub fn merge_fruits(&mut self, other: IntermediateSum) {
        self.stats.merge_fruits(other.stats);
    }
-    /// Computes the final sum value.
-    ///
-    /// Returns `None` when no values were collected, matching the Rust-side
-    /// behavior of `IntermediateMin`, `IntermediateMax`, and
-    /// `IntermediateAvg`. The Elasticsearch-vs-SQL choice for the
-    /// user-visible result is made at the boundary in
-    /// [`IntermediateMetricResult::into_final_metric_result`]: by default
-    /// `None` is coerced to `Some(0.0)` to match Elasticsearch
-    /// (`"value": 0`), and the [`SumAggregation::none_if_no_match`] flag
-    /// opts out of that coercion for SQL-style consumers.
+    /// Computes the final minimum value.
    pub fn finalize(&self) -> Option<f64> {
-        let stats = self.stats.finalize();
-        if stats.count == 0 {
-            None
-        } else {
-            Some(stats.sum)
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_sum_finalize_returns_none_when_no_values() {
-        // Default IntermediateSum has count=0 — finalize should return None,
-        // matching MIN/MAX/AVG behavior for all-NULL groups.
-        let sum = IntermediateSum::default();
-        assert_eq!(sum.finalize(), None);
-    }
-
-    #[test]
-    fn test_sum_finalize_returns_value_when_has_values() {
-        let mut sum = IntermediateSum::default();
-        // Merge in a result that has actual values
-        let stats = IntermediateStats {
-            count: 3,
-            sum: 42.0,
-            min: 10.0,
-            max: 20.0,
-            ..Default::default()
-        };
-        let other = IntermediateSum::from_stats(stats);
-        sum.merge_fruits(other);
-        assert_eq!(sum.finalize(), Some(42.0));
-    }
-
-    #[test]
-    fn test_sum_merge_two_empty_still_none() {
-        let mut a = IntermediateSum::default();
-        let b = IntermediateSum::default();
-        a.merge_fruits(b);
-        assert_eq!(a.finalize(), None);
-    }
-
-    #[test]
-    fn test_sum_aggregation_empty_index_default_matches_es() -> crate::Result<()> {
-        use serde_json::json;
-
-        use crate::aggregation::agg_req::Aggregations;
-        use crate::aggregation::tests::{exec_request, get_test_index_from_terms};
-
-        // Empty index — sum has no values to collect.
-        let values: Vec<Vec<&str>> = vec![];
-        let index = get_test_index_from_terms(false, &values)?;
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "score_sum": { "sum": { "field": "score" } }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-        // Default: match Elasticsearch — empty sum serializes as 0, not null.
-        assert_eq!(res["score_sum"]["value"], 0.0);
-        Ok(())
-    }
-
-    #[test]
-    fn test_sum_aggregation_empty_index_none_if_no_match_opt_in() -> crate::Result<()> {
-        use serde_json::json;
-
-        use crate::aggregation::agg_req::Aggregations;
-        use crate::aggregation::tests::{exec_request, get_test_index_from_terms};
-
-        let values: Vec<Vec<&str>> = vec![];
-        let index = get_test_index_from_terms(false, &values)?;
-        let agg_req: Aggregations = serde_json::from_value(json!({
-            "score_sum": { "sum": { "field": "score", "none_if_no_match": true } }
-        }))
-        .unwrap();
-
-        let res = exec_request(agg_req, &index)?;
-        // Opt-in non-ES extension — empty sum serializes as null.
-        assert!(
-            res["score_sum"]["value"].is_null(),
-            "expected null, got {:?}",
-            res["score_sum"]["value"]
-        );
-        Ok(())
+        Some(self.stats.finalize().sum)
    }
 }
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -644,17 +644,6 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
        );
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        _bucket_id: BucketId,
-        _sub_agg_name: &str,
-        _sub_agg_property: &str,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        // top_hits is not a numeric metric and cannot be used as an order target.
-        None
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -133,7 +133,7 @@ mod agg_limits;
 pub mod agg_req;
 pub mod agg_result;
 pub mod bucket;
-pub(crate) mod buffered_sub_aggs;
+pub(crate) mod cached_sub_aggs;
 mod collector;
 mod date;
 mod error;
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -76,31 +76,6 @@ pub trait SegmentAggregationCollector: Debug {
    fn flush(&mut self, _agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        Ok(())
    }
-
-    /// Compute the segment-level metric value of the named direct-child metric for `bucket_id`.
-    ///
-    /// Used by parent term aggs that order by a sub-aggregation: the parent sorts on
-    /// this value and cuts off at segment time, matching the approximation tradeoff
-    /// Elasticsearch makes for any sub-agg ordering.
-    ///
-    /// `sub_agg_property` is the dotted suffix (e.g. `"sum"` in `mystats.sum`); empty when
-    /// the metric is a single-value kind such as cardinality.
-    ///
-    /// Returns `None` only on name mismatch, unknown property, or empty bucket. Implementations
-    /// may finalize their per-bucket state (e.g. compute a percentile from a sketch); calls
-    /// must be idempotent so the final intermediate result is unaffected.
-    ///
-    /// No default impl on purpose: every collector must decide explicitly whether it
-    /// produces a metric value, forwards into children (single-bucket aggs), or rejects
-    /// the lookup. A silent `None` default would let a parent term agg's cutoff sort all
-    /// buckets to the same key and drop arbitrary winners.
-    fn compute_metric_value(
-        &self,
-        bucket_id: BucketId,
-        sub_agg_name: &str,
-        sub_agg_property: &str,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64>;
 }

 #[derive(Default)]
@@ -162,21 +137,4 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
        }
        Ok(())
    }
-
-    fn compute_metric_value(
-        &self,
-        bucket_id: BucketId,
-        sub_agg_name: &str,
-        sub_agg_property: &str,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> Option<f64> {
-        for agg in &self.aggs {
-            if let Some(value) =
-                agg.compute_metric_value(bucket_id, sub_agg_name, sub_agg_property, agg_data)
-            {
-                return Some(value);
-            }
-        }
-        None
-    }
 }
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,6 +1,5 @@
 use super::Collector;
 use crate::collector::SegmentCollector;
-use crate::query::Weight;
 use crate::{DocId, Score, SegmentOrdinal, SegmentReader};

 /// `CountCollector` collector only counts how many
@@ -56,15 +55,6 @@ impl Collector for Count {
    fn merge_fruits(&self, segment_counts: Vec<usize>) -> crate::Result<usize> {
        Ok(segment_counts.into_iter().sum())
    }
-
-    fn collect_segment(
-        &self,
-        weight: &dyn Weight,
-        _segment_ord: u32,
-        reader: &SegmentReader,
-    ) -> crate::Result<usize> {
-        Ok(weight.count(reader)? as usize)
-    }
 }

 #[derive(Default)]
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -389,13 +389,6 @@ impl SegmentCollector for FacetSegmentCollector {
            }
            let mut facet = vec![];
            let (facet_ord, facet_depth) = self.unique_facet_ords[collapsed_facet_ord];
-            // u64::MAX is used as a sentinel for unmapped ordinals (e.g. when a
-            // document has the exact registered facet, not a child of it).
-            // Passing it to ord_to_term would resolve to the last dictionary
-            // entry and produce a spurious facet from an unrelated branch.
-            if facet_ord == u64::MAX {
-                continue;
-            }
            // TODO handle errors.
            if facet_dict.ord_to_term(facet_ord, &mut facet).is_ok() {
                if let Some((end_collapsed_facet, _)) = facet
@@ -821,63 +814,6 @@ mod tests {
        assert!(!super::is_child_facet(&b"foo\0bar"[..], &b"foo"[..]));
        assert!(!super::is_child_facet(&b"foo"[..], &b"foobar\0baz"[..]));
    }
-
-    // Regression test for https://github.com/quickwit-oss/tantivy/issues/2494
-    // When a document has the exact registered facet path (not just a child),
-    // harvest() must not turn the unmapped sentinel into a spurious root entry.
-    #[test]
-    fn test_facet_collector_wrong_root() -> crate::Result<()> {
-        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-
-        let mut index_writer: IndexWriter = index.writer_for_tests()?;
-        let facets: Vec<&str> = vec![
-            "/science-fiction/asimov",
-            "/science-fiction/clarke",
-            "/science-fiction/dick",
-            "/science-fiction/herbert",
-            "/science-fiction/orwell",
-            // This exact match on the registered facet is the bug trigger:
-            // its ordinal maps to the sentinel (u64::MAX, 0) in the collapse
-            // mapping, which without the fix resolves to an unrelated term.
-            "/fantasy/epic-fantasy",
-            "/fantasy/epic-fantasy/tolkien",
-            "/fantasy/epic-fantasy/martin",
-        ];
-        for facet_str in &facets {
-            index_writer.add_document(doc!(
-                facet_field => Facet::from(*facet_str)
-            ))?;
-        }
-        index_writer.commit()?;
-
-        let reader = index.reader()?;
-        let searcher = reader.searcher();
-
-        let term = Term::from_facet(facet_field, &Facet::from("/fantasy/epic-fantasy"));
-        let query = TermQuery::new(term, IndexRecordOption::Basic);
-
-        let mut facet_collector = FacetCollector::for_field("facet");
-        facet_collector.add_facet("/fantasy/epic-fantasy");
-        let counts: FacetCounts = searcher.search(&query, &facet_collector)?;
-
-        let result: Vec<(String, u64)> = counts
-            .get("/")
-            .map(|(facet, count)| (facet.to_string(), count))
-            .collect();
-
-        // Only children of /fantasy/epic-fantasy should appear, not /science-fiction
-        assert_eq!(
-            result,
-            vec![
-                ("/fantasy/epic-fantasy/martin".to_string(), 1),
-                ("/fantasy/epic-fantasy/tolkien".to_string(), 1),
-            ]
-        );
-        Ok(())
-    }
 }

 #[cfg(all(test, feature = "unstable"))]
--- a/src/collector/sort_key/sort_by_score.rs
+++ b/src/collector/sort_key/sort_by_score.rs
@@ -1,8 +1,5 @@
-use std::cmp::{Ordering, Reverse};
-use std::collections::BinaryHeap;
-
 use crate::collector::sort_key::NaturalComparator;
-use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
+use crate::collector::{SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
 use crate::{DocAddress, DocId, Score};

 /// Sort by similarity score.
@@ -28,10 +25,6 @@ impl SortKeyComputer for SortBySimilarityScore {
    }

    // Sorting by score is special in that it allows for the Block-Wand optimization.
-    //
-    // We use a BinaryHeap (TopNHeap) instead of TopNComputer here so that the
-    // threshold is always the exact K-th best score. TopNComputer only updates its
-    // threshold every K docs (at truncation), giving Block-WAND a stale bound.
    fn collect_segment_top_k(
        &self,
        k: usize,
@@ -39,10 +32,12 @@ impl SortKeyComputer for SortBySimilarityScore {
        reader: &crate::SegmentReader,
        segment_ord: u32,
    ) -> crate::Result<Vec<(Self::SortKey, DocAddress)>> {
-        let mut top_n = TopNHeap::new(k);
+        let mut top_n: TopNComputer<Score, DocId, Self::Comparator> =
+            TopNComputer::new_with_comparator(k, self.comparator());

        if let Some(alive_bitset) = reader.alive_bitset() {
            let mut threshold = Score::MIN;
+            top_n.threshold = Some(threshold);
            weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
                if alive_bitset.is_deleted(doc) {
                    return threshold;
@@ -61,7 +56,7 @@ impl SortKeyComputer for SortBySimilarityScore {
        Ok(top_n
            .into_vec()
            .into_iter()
-            .map(|(score, doc)| (score, DocAddress::new(segment_ord, doc)))
+            .map(|cid| (cid.sort_key, DocAddress::new(segment_ord, cid.doc)))
            .collect())
    }
 }
@@ -80,204 +75,3 @@ impl SegmentSortKeyComputer for SortBySimilarityScore {
        score
    }
 }
-
-/// Min-heap entry: higher score = greater, lower doc wins ties.
-struct ScoreHeapEntry {
-    score: Score,
-    doc: DocId,
-}
-
-impl Eq for ScoreHeapEntry {}
-
-impl PartialEq for ScoreHeapEntry {
-    fn eq(&self, other: &Self) -> bool {
-        self.cmp(other) == Ordering::Equal
-    }
-}
-
-impl PartialOrd for ScoreHeapEntry {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for ScoreHeapEntry {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.score
-            .partial_cmp(&other.score)
-            .unwrap_or(Ordering::Equal)
-            .then_with(|| other.doc.cmp(&self.doc))
-    }
-}
-
-/// Heap-based top-K for score collection. O(log K) per insert, but the threshold
-/// is always tight, so Block-WAND prunes better than with [`TopNComputer`]'s
-/// buffer/median approach.
-///
-/// Like [`TopNComputer`], items must arrive in ascending doc order, and equal
-/// scores are rejected (strict `>`) so that lower doc IDs win ties.
-///
-/// [`TopNComputer`]: crate::collector::TopNComputer
-struct TopNHeap {
-    heap: BinaryHeap<Reverse<ScoreHeapEntry>>,
-    top_n: usize,
-    threshold: Option<Score>,
-}
-
-impl TopNHeap {
-    fn new(top_n: usize) -> Self {
-        TopNHeap {
-            heap: BinaryHeap::with_capacity(top_n),
-            top_n,
-            threshold: None,
-        }
-    }
-
-    #[inline]
-    fn push(&mut self, score: Score, doc: DocId) {
-        if self.heap.len() < self.top_n {
-            self.heap.push(Reverse(ScoreHeapEntry { score, doc }));
-            if self.heap.len() == self.top_n {
-                self.threshold = self.heap.peek().map(|Reverse(entry)| entry.score);
-            }
-        } else if let Some(threshold) = self.threshold {
-            if score > threshold {
-                // peek_mut + assign is a single sift-down, vs pop + push = two sifts.
-                if let Some(mut min) = self.heap.peek_mut() {
-                    *min = Reverse(ScoreHeapEntry { score, doc });
-                }
-                self.threshold = self.heap.peek().map(|Reverse(entry)| entry.score);
-            }
-        }
-    }
-
-    fn into_vec(self) -> Vec<(Score, DocId)> {
-        self.heap
-            .into_vec()
-            .into_iter()
-            .map(|Reverse(entry)| (entry.score, entry.doc))
-            .collect()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use proptest::prelude::*;
-
-    use super::*;
-    use crate::collector::sort_key::NaturalComparator;
-    use crate::collector::TopNComputer;
-
-    #[test]
-    fn test_top_n_heap_zero_capacity() {
-        let mut heap = TopNHeap::new(0);
-        heap.push(1.0, 0);
-        heap.push(2.0, 1);
-        assert!(heap.into_vec().is_empty());
-    }
-
-    #[test]
-    fn test_top_n_heap_basic() {
-        let mut heap = TopNHeap::new(2);
-        heap.push(1.0, 0);
-        heap.push(3.0, 1);
-        heap.push(2.0, 2);
-
-        let mut results = heap.into_vec();
-        results.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1)));
-        assert_eq!(results, vec![(3.0, 1), (2.0, 2)]);
-    }
-
-    #[test]
-    fn test_top_n_heap_threshold_always_accurate() {
-        let mut heap = TopNHeap::new(2);
-        assert_eq!(heap.threshold, None);
-
-        heap.push(1.0, 0);
-        assert_eq!(heap.threshold, None);
-
-        heap.push(3.0, 1);
-        assert_eq!(heap.threshold, Some(1.0));
-
-        heap.push(2.0, 2); // evicts 1.0
-        assert_eq!(heap.threshold, Some(2.0));
-
-        heap.push(4.0, 3); // evicts 2.0
-        assert_eq!(heap.threshold, Some(3.0));
-    }
-
-    #[test]
-    fn test_top_n_heap_tiebreaking_lower_doc_wins() {
-        let mut heap = TopNHeap::new(2);
-        heap.push(5.0, 0);
-        heap.push(5.0, 1);
-        heap.push(5.0, 2); // rejected: not strictly > threshold
-
-        let mut results = heap.into_vec();
-        results.sort_by_key(|&(_, doc)| doc);
-        assert_eq!(results, vec![(5.0, 0), (5.0, 1)]);
-    }
-
-    #[test]
-    fn test_top_n_heap_single_element() {
-        let mut heap = TopNHeap::new(1);
-        heap.push(1.0, 0);
-        assert_eq!(heap.threshold, Some(1.0));
-
-        heap.push(0.5, 1); // rejected
-        heap.push(2.0, 2); // accepted
-        assert_eq!(heap.threshold, Some(2.0));
-
-        let results = heap.into_vec();
-        assert_eq!(results, vec![(2.0, 2)]);
-    }
-
-    #[test]
-    fn test_top_n_heap_under_capacity() {
-        let mut heap = TopNHeap::new(5);
-        heap.push(3.0, 0);
-        heap.push(1.0, 1);
-        heap.push(2.0, 2);
-        // Only 3 elements, capacity is 5 — all should be kept
-        assert_eq!(heap.threshold, None);
-
-        let mut results = heap.into_vec();
-        results.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1)));
-        assert_eq!(results, vec![(3.0, 0), (2.0, 2), (1.0, 1)]);
-    }
-
-    proptest! {
-        #[test]
-        fn test_top_n_heap_matches_top_n_computer(
-            limit in 0..20_usize,
-            mut docs in proptest::collection::vec((0..1000_u32, 0..1000_u32), 0..200_usize),
-        ) {
-            // Both require ascending doc order.
-            docs.sort_by_key(|(_, doc_id)| *doc_id);
-            docs.dedup_by_key(|(_, doc_id)| *doc_id);
-
-            let mut heap = TopNHeap::new(limit);
-            let mut computer: TopNComputer<Score, DocId, NaturalComparator> =
-                TopNComputer::new_with_comparator(limit, NaturalComparator);
-
-            for &(score_u32, doc) in &docs {
-                let score = score_u32 as Score;
-                heap.push(score, doc);
-                computer.push(score, doc);
-            }
-
-            let mut heap_results = heap.into_vec();
-            heap_results.sort_by(|a, b| {
-                b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1))
-            });
-
-            let computer_results: Vec<(Score, DocId)> = computer
-                .into_sorted_vec()
-                .into_iter()
-                .map(|cd| (cd.sort_key, cd.doc))
-                .collect();
-
-            prop_assert_eq!(heap_results, computer_results);
-        }
-    }
-}
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -52,7 +52,7 @@ impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {
        if schema_type != T::to_type() {
            return Err(crate::TantivyError::SchemaError(format!(
                "Field `{}` is of type {schema_type:?}, not of the type {:?}.",
-                self.field,
+                &self.field,
                T::to_type()
            )));
        }
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -513,9 +513,7 @@ pub struct TopNComputer<Score, D, C> {
    /// The buffer reverses sort order to get top-semantics instead of bottom-semantics
    buffer: Vec<ComparableDoc<Score, D>>,
    top_n: usize,
-    /// The current threshold for pruning. Documents with scores at or below
-    /// this value are skipped by `push()`. Updated when the buffer is truncated.
-    pub threshold: Option<Score>,
+    pub(crate) threshold: Option<Score>,
    comparator: C,
 }

--- a/src/directory/mmap_directory/mod.rs
+++ b/src/directory/mmap_directory/mod.rs
@@ -676,7 +676,7 @@ mod tests {
            let num_segments = reader.searcher().segment_readers().len();
            assert!(num_segments <= 4);
            let num_components_except_deletes_and_tempstore =
-                crate::index::SegmentComponent::iterator().len() - 2;
+                crate::index::SegmentComponent::iterator().len() - 1;
            let max_num_mmapped = num_components_except_deletes_and_tempstore * num_segments;
            assert_eventually(|| {
                let num_mmapped = mmap_directory.get_cache_info().mmapped.len();
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,7 +1,5 @@
 use std::borrow::{Borrow, BorrowMut};

-use common::TinySet;
-
 use crate::fastfield::AliveBitSet;
 use crate::DocId;

@@ -16,12 +14,6 @@ pub const TERMINATED: DocId = i32::MAX as u32;
 /// exactly this size as long as we can fill the buffer.
 pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;

-/// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`].
-pub const BLOCK_NUM_TINYBITSETS: usize = 16;
-
-/// Number of doc IDs covered by one block: `BLOCK_NUM_TINYBITSETS * 64 = 1024`.
-pub const BLOCK_WINDOW: u32 = BLOCK_NUM_TINYBITSETS as u32 * 64;
-
 /// Represents an iterable set of sorted doc ids.
 pub trait DocSet: Send {
    /// Goes to the next element.
@@ -168,31 +160,6 @@ pub trait DocSet: Send {
        self.size_hint() as u64
    }

-    /// Fills a bitmask representing which documents in `[min_doc, min_doc + BLOCK_WINDOW)` are
-    /// present in this docset.
-    ///
-    /// The window is divided into `BLOCK_NUM_TINYBITSETS` buckets of 64 docs each.
-    /// Returns the next doc `>= min_doc + BLOCK_WINDOW`, or `TERMINATED` if exhausted.
-    fn fill_bitset_block(
-        &mut self,
-        min_doc: DocId,
-        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
-    ) -> DocId {
-        self.seek(min_doc);
-        let horizon = min_doc + BLOCK_WINDOW;
-        loop {
-            let doc = self.doc();
-            if doc >= horizon {
-                return doc;
-            }
-            let delta = doc - min_doc;
-            mask[(delta / 64) as usize].insert_mut(delta % 64);
-            if self.advance() == TERMINATED {
-                return TERMINATED;
-            }
-        }
-    }
-
    /// Returns the number documents matching.
    /// Calling this method consumes the `DocSet`.
    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
@@ -247,18 +214,6 @@ impl DocSet for &mut dyn DocSet {
        (**self).seek_danger(target)
    }

-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
-        (**self).fill_buffer(buffer)
-    }
-
-    fn fill_bitset_block(
-        &mut self,
-        min_doc: DocId,
-        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
-    ) -> DocId {
-        (**self).fill_bitset_block(min_doc, mask)
-    }
-
    fn doc(&self) -> u32 {
        (**self).doc()
    }
@@ -301,15 +256,6 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
        unboxed.fill_buffer(buffer)
    }

-    fn fill_bitset_block(
-        &mut self,
-        min_doc: DocId,
-        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
-    ) -> DocId {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.fill_bitset_block(min_doc, mask)
-    }
-
    fn doc(&self) -> DocId {
        let unboxed: &TDocSet = self.borrow();
        unboxed.doc()
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -127,7 +127,7 @@ mod tests {
            fast_field_writers
                .add_document(&doc!(*FIELD=>2u64))
                .unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -178,7 +178,7 @@ mod tests {
            fast_field_writers
                .add_document(&doc!(*FIELD=>215u64))
                .unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -211,7 +211,7 @@ mod tests {
                    .add_document(&doc!(*FIELD=>100_000u64))
                    .unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -243,7 +243,7 @@ mod tests {
                    .add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id))
                    .unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -276,7 +276,7 @@ mod tests {
                doc.add_i64(i64_field, i);
                fast_field_writers.add_document(&doc).unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -315,7 +315,7 @@ mod tests {
            let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
            let doc = TantivyDocument::default();
            fast_field_writers.add_document(&doc).unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }

@@ -348,7 +348,7 @@ mod tests {
            let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
            let doc = TantivyDocument::default();
            fast_field_writers.add_document(&doc).unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }

@@ -385,7 +385,7 @@ mod tests {
            for &x in &permutation {
                fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -770,7 +770,7 @@ mod tests {
            fast_field_writers
                .add_document(&doc!(field=>false))
                .unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -802,7 +802,7 @@ mod tests {
                    .add_document(&doc!(field=>false))
                    .unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -827,7 +827,7 @@ mod tests {
            let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
            let doc = TantivyDocument::default();
            fast_field_writers.add_document(&doc).unwrap();
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
@@ -855,7 +855,7 @@ mod tests {
            for doc in docs {
                fast_field_writers.add_document(doc).unwrap();
            }
-            fast_field_writers.serialize(&mut write, None).unwrap();
+            fast_field_writers.serialize(&mut write).unwrap();
            write.terminate().unwrap();
        }
        Ok(directory)
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -4,7 +4,6 @@ use columnar::{ColumnarWriter, NumericalValue};
 use common::{DateTimePrecision, JsonPathWriter};
 use tokenizer_api::Token;

-use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
@@ -106,16 +105,6 @@ impl FastFieldsWriter {
        self.columnar_writer.mem_usage()
    }

-    pub(crate) fn sort_order(
-        &self,
-        sort_field: &str,
-        num_docs: DocId,
-        reversed: bool,
-    ) -> Vec<DocId> {
-        self.columnar_writer
-            .sort_order(sort_field, num_docs, reversed)
-    }
-
    /// Indexes all of the fastfields of a new document.
    pub fn add_document<D: Document>(&mut self, doc: &D) -> crate::Result<()> {
        let doc_id = self.num_docs;
@@ -233,16 +222,9 @@ impl FastFieldsWriter {

    /// Serializes all of the `FastFieldWriter`s by pushing them in
    /// order to the fast field serializer.
-    pub fn serialize(
-        mut self,
-        wrt: &mut dyn io::Write,
-        doc_id_map_opt: Option<&DocIdMapping>,
-    ) -> io::Result<()> {
+    pub fn serialize(mut self, wrt: &mut dyn io::Write) -> io::Result<()> {
        let num_docs = self.num_docs;
-        let old_to_new_row_ids =
-            doc_id_map_opt.map(|doc_id_mapping| doc_id_mapping.old_to_new_ids());
-        self.columnar_writer
-            .serialize(num_docs, old_to_new_row_ids, wrt)?;
+        self.columnar_writer.serialize(num_docs, wrt)?;
        Ok(())
    }
 }
@@ -392,7 +374,7 @@ mod tests {
        }
        let mut buffer = Vec::new();
        columnar_writer
-            .serialize(json_docs.len() as DocId, None, &mut buffer)
+            .serialize(json_docs.len() as DocId, &mut buffer)
            .unwrap();
        ColumnarReader::open(buffer).unwrap()
    }
--- a/src/fieldnorm/mod.rs
+++ b/src/fieldnorm/mod.rs
@@ -77,7 +77,7 @@ mod tests {
            let mut fieldnorm_writers = FieldNormsWriter::for_schema(&SCHEMA);
            fieldnorm_writers.record(2u32, *TXT_FIELD, 5);
            fieldnorm_writers.record(3u32, *TXT_FIELD, 3);
-            fieldnorm_writers.serialize(serializer, None)?;
+            fieldnorm_writers.serialize(serializer)?;
        }
        let file = directory.open_read(path)?;
        {
--- a/src/fieldnorm/writer.rs
+++ b/src/fieldnorm/writer.rs
@@ -2,7 +2,6 @@ use std::cmp::Ordering;
 use std::{io, iter};

 use super::{fieldnorm_to_id, FieldNormsSerializer};
-use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::{Field, Schema};
 use crate::DocId;

@@ -92,11 +91,7 @@ impl FieldNormsWriter {
    }

    /// Serialize the seen fieldnorm values to the serializer for all fields.
-    pub fn serialize(
-        &self,
-        mut fieldnorms_serializer: FieldNormsSerializer,
-        doc_id_map: Option<&DocIdMapping>,
-    ) -> io::Result<()> {
+    pub fn serialize(&self, mut fieldnorms_serializer: FieldNormsSerializer) -> io::Result<()> {
        for (field, fieldnorms_buffer) in self.fieldnorms_buffers.iter().enumerate().filter_map(
            |(field_id, fieldnorms_buffer_opt)| {
                fieldnorms_buffer_opt.as_ref().map(|fieldnorms_buffer| {
@@ -104,12 +99,7 @@ impl FieldNormsWriter {
                })
            },
        ) {
-            if let Some(doc_id_map) = doc_id_map {
-                let remapped_fieldnorm_buffer = doc_id_map.remap(fieldnorms_buffer);
-                fieldnorms_serializer.serialize_field(field, &remapped_fieldnorm_buffer)?;
-            } else {
-                fieldnorms_serializer.serialize_field(field, fieldnorms_buffer)?;
-            }
+            fieldnorms_serializer.serialize_field(field, fieldnorms_buffer)?;
        }
        fieldnorms_serializer.close()?;
        Ok(())
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -4,8 +4,7 @@ use rand::{rng, Rng};

 use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
 use crate::schema::*;
-#[allow(deprecated)]
-use crate::{doc, schema, Index, IndexSettings, IndexSortByField, IndexWriter, Order, Searcher};
+use crate::{doc, schema, Index, IndexWriter, Searcher};

 fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> {
    assert!(searcher.segment_readers().len() < 20);
@@ -63,71 +62,6 @@ fn get_num_iterations() -> usize {
        .map(|str| str.parse().unwrap())
        .unwrap_or(2000)
 }
-#[test]
-#[ignore]
-fn test_functional_indexing_sorted() -> crate::Result<()> {
-    let mut schema_builder = Schema::builder();
-
-    let id_field = schema_builder.add_u64_field("id", INDEXED | FAST);
-    let multiples_field = schema_builder.add_u64_field("multiples", INDEXED);
-    let text_field_options = TextOptions::default()
-        .set_indexing_options(
-            TextFieldIndexing::default()
-                .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
-        )
-        .set_stored();
-    let text_field = schema_builder.add_text_field("text_field", text_field_options);
-    let schema = schema_builder.build();
-
-    let mut index_builder = Index::builder().schema(schema);
-    index_builder = index_builder.settings(IndexSettings {
-        sort_by_field: Some(IndexSortByField {
-            field: "id".to_string(),
-            order: Order::Desc,
-        }),
-        ..Default::default()
-    });
-    let index = index_builder.create_from_tempdir().unwrap();
-
-    let reader = index.reader()?;
-
-    let mut rng = rng();
-
-    let mut index_writer: IndexWriter =
-        index.writer_with_num_threads(3, 3 * MEMORY_BUDGET_NUM_BYTES_MIN)?;
-
-    let mut committed_docs: HashSet<u64> = HashSet::new();
-    let mut uncommitted_docs: HashSet<u64> = HashSet::new();
-
-    for _ in 0..get_num_iterations() {
-        let random_val = rng.random_range(0..20);
-        if random_val == 0 {
-            index_writer.commit()?;
-            committed_docs.extend(&uncommitted_docs);
-            uncommitted_docs.clear();
-            reader.reload()?;
-            let searcher = reader.searcher();
-            // check that everything is correct.
-            check_index_content(
-                &searcher,
-                &committed_docs.iter().cloned().collect::<Vec<u64>>(),
-            )?;
-        } else if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
-            let doc_id_term = Term::from_field_u64(id_field, random_val);
-            index_writer.delete_term(doc_id_term);
-        } else {
-            uncommitted_docs.insert(random_val);
-            let mut doc = TantivyDocument::new();
-            doc.add_u64(id_field, random_val);
-            for i in 1u64..10u64 {
-                doc.add_u64(multiples_field, random_val * i);
-            }
-            doc.add_text(text_field, get_text());
-            index_writer.add_document(doc)?;
-        }
-    }
-    Ok(())
-}

 const LOREM: &str = "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod \
                     tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, \
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -22,7 +22,7 @@ use crate::indexer::segment_updater::save_metas;
 use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
 use crate::reader::{IndexReader, IndexReaderBuilder};
 use crate::schema::document::Document;
-use crate::schema::{Field, FieldType, Schema, Type};
+use crate::schema::{Field, FieldType, Schema};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
 use crate::SegmentReader;

@@ -232,38 +232,7 @@ impl IndexBuilder {
    }

    fn validate(&self) -> crate::Result<()> {
-        if let Some(schema) = self.schema.as_ref() {
-            if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref() {
-                let schema_field = schema.get_field(&sort_by_field.field).map_err(|_| {
-                    TantivyError::InvalidArgument(format!(
-                        "Field to sort index {} not found in schema",
-                        sort_by_field.field
-                    ))
-                })?;
-                let entry = schema.get_field_entry(schema_field);
-                if !entry.is_fast() {
-                    return Err(TantivyError::InvalidArgument(format!(
-                        "Field {} is no fast field. Field needs to be a single value fast field \
-                         to be used to sort an index",
-                        sort_by_field.field
-                    )));
-                }
-                let supported_field_types = [
-                    Type::I64,
-                    Type::U64,
-                    Type::F64,
-                    Type::Date,
-                    Type::Str,
-                    Type::Bytes,
-                ];
-                let field_type = entry.field_type().value_type();
-                if !supported_field_types.contains(&field_type) {
-                    return Err(TantivyError::InvalidArgument(format!(
-                        "Unsupported field type in sort_by_field: {field_type:?}. Supported field \
-                         types: {supported_field_types:?} ",
-                    )));
-                }
-            }
+        if let Some(_schema) = self.schema.as_ref() {
            Ok(())
        } else {
            Err(TantivyError::InvalidArgument(
--- a/src/index/index_meta.rs
+++ b/src/index/index_meta.rs
@@ -1,8 +1,6 @@
 use std::collections::HashSet;
 use std::fmt;
 use std::path::PathBuf;
-use std::sync::atomic::AtomicBool;
-use std::sync::Arc;

 use serde::{Deserialize, Serialize};

@@ -37,7 +35,6 @@ impl SegmentMetaInventory {
        let inner = InnerSegmentMeta {
            segment_id,
            max_doc,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
            deletes: None,
        };
        SegmentMeta::from(self.inventory.track(inner))
@@ -85,15 +82,6 @@ impl SegmentMeta {
        self.tracked.segment_id
    }

-    /// Removes the Component::TempStore from the alive list and
-    /// therefore marks the temp docstore file to be deleted by
-    /// the garbage collection.
-    pub fn untrack_temp_docstore(&self) {
-        self.tracked
-            .include_temp_doc_store
-            .store(false, std::sync::atomic::Ordering::Relaxed);
-    }
-
    /// Returns the number of deleted documents.
    pub fn num_deleted_docs(&self) -> u32 {
        self.tracked
@@ -111,20 +99,9 @@ impl SegmentMeta {
    /// is by removing all files that have been created by tantivy
    /// and are not used by any segment anymore.
    pub fn list_files(&self) -> HashSet<PathBuf> {
-        if self
-            .tracked
-            .include_temp_doc_store
-            .load(std::sync::atomic::Ordering::Relaxed)
-        {
-            SegmentComponent::iterator()
-                .map(|component| self.relative_path(*component))
-                .collect::<HashSet<PathBuf>>()
-        } else {
-            SegmentComponent::iterator()
-                .filter(|comp| *comp != &SegmentComponent::TempStore)
-                .map(|component| self.relative_path(*component))
-                .collect::<HashSet<PathBuf>>()
-        }
+        SegmentComponent::iterator()
+            .map(|component| self.relative_path(*component))
+            .collect::<HashSet<PathBuf>>()
    }

    /// Returns the relative path of a component of our segment.
@@ -138,7 +115,6 @@ impl SegmentMeta {
            SegmentComponent::Positions => ".pos".to_string(),
            SegmentComponent::Terms => ".term".to_string(),
            SegmentComponent::Store => ".store".to_string(),
-            SegmentComponent::TempStore => ".store.temp".to_string(),
            SegmentComponent::FastFields => ".fast".to_string(),
            SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
            SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
@@ -183,7 +159,6 @@ impl SegmentMeta {
            segment_id: inner_meta.segment_id,
            max_doc,
            deletes: None,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
        });
        SegmentMeta { tracked }
    }
@@ -202,7 +177,6 @@ impl SegmentMeta {
        let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta {
            segment_id: inner_meta.segment_id,
            max_doc: inner_meta.max_doc,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
            deletes: Some(delete_meta),
        });
        SegmentMeta { tracked }
@@ -214,14 +188,6 @@ struct InnerSegmentMeta {
    segment_id: SegmentId,
    max_doc: u32,
    pub deletes: Option<DeleteMeta>,
-    /// If you want to avoid the SegmentComponent::TempStore file to be covered by
-    /// garbage collection and deleted, set this to true. This is used during merge.
-    #[serde(skip)]
-    #[serde(default = "default_temp_store")]
-    pub(crate) include_temp_doc_store: Arc<AtomicBool>,
-}
-fn default_temp_store() -> Arc<AtomicBool> {
-    Arc::new(AtomicBool::new(false))
 }

 impl InnerSegmentMeta {
@@ -246,10 +212,6 @@ fn is_true(val: &bool) -> bool {
 /// index, like presort documents.
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub struct IndexSettings {
-    /// Sorts the documents by information
-    /// provided in `IndexSortByField`
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub sort_by_field: Option<IndexSortByField>,
    /// The `Compressor` used to compress the doc store.
    #[serde(default)]
    pub docstore_compression: Compressor,
@@ -272,7 +234,6 @@ fn default_docstore_blocksize() -> usize {
 impl Default for IndexSettings {
    fn default() -> Self {
        Self {
-            sort_by_field: None,
            docstore_compression: Compressor::default(),
            docstore_blocksize: default_docstore_blocksize(),
            docstore_compress_dedicated_thread: true,
@@ -280,18 +241,6 @@ impl Default for IndexSettings {
    }
 }

-/// Settings to presort the documents in an index
-///
-/// Presorting documents can greatly improve performance
-/// in some scenarios, by applying top n
-/// optimizations.
-#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
-pub struct IndexSortByField {
-    /// The field to sort the documents by
-    pub field: String,
-    /// The order to sort the documents by
-    pub order: Order,
-}
 /// The order to sort by
 #[derive(Clone, Copy, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub enum Order {
@@ -411,7 +360,7 @@ mod tests {
    use crate::store::Compressor;
    #[cfg(feature = "zstd-compression")]
    use crate::store::ZstdCompressor;
-    use crate::{IndexSettings, IndexSortByField, Order};
+    use crate::IndexSettings;

    #[test]
    fn test_serialize_metas() {
@@ -423,10 +372,6 @@ mod tests {
        let index_metas = IndexMeta {
            index_settings: IndexSettings {
                docstore_compression: Compressor::None,
-                sort_by_field: Some(IndexSortByField {
-                    field: "text".to_string(),
-                    order: Order::Asc,
-                }),
                ..Default::default()
            },
            segments: Vec::new(),
@@ -437,7 +382,7 @@ mod tests {
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
        assert_eq!(
            json,
-            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"none","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            r#"{"index_settings":{"docstore_compression":"none","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
        );

        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
@@ -456,10 +401,6 @@ mod tests {
        };
        let index_metas = IndexMeta {
            index_settings: IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "text".to_string(),
-                    order: Order::Asc,
-                }),
                docstore_compression: crate::store::Compressor::Zstd(ZstdCompressor {
                    compression_level: Some(4),
                }),
@@ -474,7 +415,7 @@ mod tests {
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
        assert_eq!(
            json,
-            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zstd(compression_level=4)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            r#"{"index_settings":{"docstore_compression":"zstd(compression_level=4)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
        );

        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
@@ -486,35 +427,35 @@ mod tests {
    #[test]
    #[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))]
    fn test_serialize_metas_invalid_comp() {
-        let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;
+        let json = r#"{"index_settings":{"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

        let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
        assert_eq!(
            err.to_string(),
            "unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \
-             `zstd(compression_level=5)` at line 1 column 96"
+             `zstd(compression_level=5)` at line 1 column 49"
                .to_string()
        );

-        let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zstd(bla=10)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;
+        let json = r#"{"index_settings":{"docstore_compression":"zstd(bla=10)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

        let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
        assert_eq!(
            err.to_string(),
-            "unknown zstd option \"bla\" at line 1 column 103".to_string()
+            "unknown zstd option \"bla\" at line 1 column 56".to_string()
        );
    }

    #[test]
    #[cfg(not(feature = "zstd-compression"))]
    fn test_serialize_metas_unsupported_comp() {
-        let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;
+        let json = r#"{"index_settings":{"docstore_compression":"zstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

        let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
        assert_eq!(
            err.to_string(),
            "unsupported variant `zstd`, please enable Tantivy's `zstd-compression` feature at \
-             line 1 column 95"
+             line 1 column 48"
                .to_string()
        );
    }
@@ -528,7 +469,6 @@ mod tests {
        assert_eq!(
            index_settings,
            IndexSettings {
-                sort_by_field: None,
                docstore_compression: Compressor::default(),
                docstore_compress_dedicated_thread: true,
                docstore_blocksize: 16_384
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -12,7 +12,7 @@ mod segment_reader;

 pub use self::index::{Index, IndexBuilder};
 pub(crate) use self::index_meta::SegmentMetaInventory;
-pub use self::index_meta::{IndexMeta, IndexSettings, IndexSortByField, Order, SegmentMeta};
+pub use self::index_meta::{IndexMeta, IndexSettings, Order, SegmentMeta};
 pub use self::inverted_index_reader::InvertedIndexReader;
 pub use self::segment::Segment;
 pub use self::segment_component::SegmentComponent;
--- a/src/index/segment_component.rs
+++ b/src/index/segment_component.rs
@@ -23,8 +23,6 @@ pub enum SegmentComponent {
    /// Accessing a document from the store is relatively slow, as it
    /// requires to decompress the entire block it belongs to.
    Store,
-    /// Temporary storage of the documents, before streamed to `Store`.
-    TempStore,
    /// Bitset describing which document of the segment is alive.
    /// (It was representing deleted docs but changed to represent alive docs from v0.17)
    Delete,
@@ -33,14 +31,13 @@ pub enum SegmentComponent {
 impl SegmentComponent {
    /// Iterates through the components.
    pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
-        static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
+        static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
            SegmentComponent::Postings,
            SegmentComponent::Positions,
            SegmentComponent::FastFields,
            SegmentComponent::FieldNorms,
            SegmentComponent::Terms,
            SegmentComponent::Store,
-            SegmentComponent::TempStore,
            SegmentComponent::Delete,
        ];
        SEGMENT_COMPONENTS.iter()
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -6,7 +6,6 @@ use common::{ByteCount, HasLen};
 use fnv::FnvHashMap;
 use itertools::Itertools;

-use crate::directory::error::OpenReadError;
 use crate::directory::{CompositeFile, FileSlice};
 use crate::error::DataCorruption;
 use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
@@ -160,10 +159,12 @@ impl SegmentReader {
        let postings_file = segment.open_read(SegmentComponent::Postings)?;
        let postings_composite = CompositeFile::open(&postings_file)?;

-        let positions_composite = match segment.open_read(SegmentComponent::Positions) {
-            Ok(positions_file) => CompositeFile::open(&positions_file)?,
-            Err(OpenReadError::FileDoesNotExist(_)) => CompositeFile::empty(),
-            Err(open_read_error) => return Err(open_read_error.into()),
+        let positions_composite = {
+            if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
+                CompositeFile::open(&positions_file)?
+            } else {
+                CompositeFile::empty()
+            }
        };

        let schema = segment.schema();
@@ -322,7 +323,7 @@ impl SegmentReader {
                            // Without expand dots enabled dots need to be escaped.
                            let escaped_json_path = json_path.replace('.', "\\.");
                            let full_path = format!("{field_name}.{escaped_json_path}");
-                            let full_path_unescaped = format!("{}.{}", field_name, json_path);
+                            let full_path_unescaped = format!("{}.{}", field_name, &json_path);
                            map_to_canonical.insert(full_path_unescaped, full_path.to_string());
                            full_path
                        } else {
--- a/src/indexer/doc_id_mapping.rs
+++ b/src/indexer/doc_id_mapping.rs
@@ -3,28 +3,17 @@

 use common::ReadOnlyBitSet;

-use super::SegmentWriter;
-use crate::schema::{Field, Schema};
-use crate::{DocAddress, DocId, IndexSortByField, TantivyError};
+use crate::DocAddress;

-/// Describes how the document ID mapping was produced during a merge.
 #[derive(Copy, Clone, Eq, PartialEq)]
 pub enum MappingType {
-    /// Segments are concatenated in order with no deletes; doc IDs are contiguous ranges.
    Stacked,
-    /// Segments are concatenated in order but some documents have been deleted and are skipped.
    StackedWithDeletes,
-    /// Documents have been reordered (e.g. sorted by a field or externally shuffled).
-    Shuffled,
 }

 /// Struct to provide mapping from new doc_id to old doc_id and segment.
-///
-/// Callers outside tantivy (e.g. pomsky's merge executor) can construct a
-/// `Shuffled` mapping directly from a precomputed permutation and pass it
-/// into [`IndexMerger::write_with_doc_id_mapping`].
 #[derive(Clone)]
-pub struct SegmentDocIdMapping {
+pub(crate) struct SegmentDocIdMapping {
    pub(crate) new_doc_id_to_old_doc_addr: Vec<DocAddress>,
    pub(crate) alive_bitsets: Vec<Option<ReadOnlyBitSet>>,
    mapping_type: MappingType,
@@ -43,25 +32,6 @@ impl SegmentDocIdMapping {
        }
    }

-    /// Build a `Shuffled` mapping from an explicit permutation of [`DocAddress`]es.
-    ///
-    /// `new_doc_id_to_old_doc_addr[new_id]` gives the source segment and doc id for
-    /// the document that should appear at position `new_id` in the merged segment.
-    /// `alive_bitsets` must contain one entry per source segment (in the same order
-    /// as passed to [`IndexMerger::open_with_custom_alive_set`]), each `None` if that
-    /// segment has no deletes.
-    pub fn new_shuffled(
-        new_doc_id_to_old_doc_addr: Vec<DocAddress>,
-        alive_bitsets: Vec<Option<ReadOnlyBitSet>>,
-    ) -> Self {
-        Self {
-            new_doc_id_to_old_doc_addr,
-            mapping_type: MappingType::Shuffled,
-            alive_bitsets,
-        }
-    }
-
-    /// Returns the [`MappingType`] that describes how this mapping was constructed.
    pub fn mapping_type(&self) -> MappingType {
        self.mapping_type
    }
@@ -73,559 +43,4 @@ impl SegmentDocIdMapping {
    pub(crate) fn iter_old_doc_addrs(&self) -> impl Iterator<Item = DocAddress> + '_ {
        self.new_doc_id_to_old_doc_addr.iter().copied()
    }
-
-    /// This flags means the segments are simply stacked in the order of their ordinal.
-    /// e.g. [(0, 1), .. (n, 1), (0, 2)..., (m, 2)]
-    ///
-    /// The different segment may present some deletes, in which case it is expressed by skipping a
-    /// `DocId`. [(0, 1), (0, 3)] <--- here doc_id=0 and doc_id=1 have been deleted
-    ///
-    /// Being trivial is equivalent to having the `new_doc_id_to_old_doc_addr` array sorted.
-    ///
-    /// This allows for some optimization.
-    pub(crate) fn is_trivial(&self) -> bool {
-        match self.mapping_type {
-            MappingType::Stacked | MappingType::StackedWithDeletes => true,
-            MappingType::Shuffled => false,
-        }
-    }
-}
-
-/// Bidirectional mapping between old and new doc IDs within a single segment.
-pub struct DocIdMapping {
-    new_doc_id_to_old: Vec<DocId>,
-    old_doc_id_to_new: Vec<DocId>,
-}
-
-impl DocIdMapping {
-    /// Constructs a [`DocIdMapping`] from a vector mapping each new doc ID to its old doc ID.
-    pub fn from_new_id_to_old_id(new_doc_id_to_old: Vec<DocId>) -> Self {
-        let max_doc = new_doc_id_to_old.len();
-        let old_max_doc = new_doc_id_to_old
-            .iter()
-            .cloned()
-            .max()
-            .map(|n| n + 1)
-            .unwrap_or(0);
-        let mut old_doc_id_to_new = vec![0; old_max_doc as usize];
-        for i in 0..max_doc {
-            old_doc_id_to_new[new_doc_id_to_old[i] as usize] = i as DocId;
-        }
-        DocIdMapping {
-            new_doc_id_to_old,
-            old_doc_id_to_new,
-        }
-    }
-
-    /// returns the new doc_id for the old doc_id
-    pub fn get_new_doc_id(&self, doc_id: DocId) -> DocId {
-        self.old_doc_id_to_new[doc_id as usize]
-    }
-    /// returns the old doc_id for the new doc_id
-    pub fn get_old_doc_id(&self, doc_id: DocId) -> DocId {
-        self.new_doc_id_to_old[doc_id as usize]
-    }
-    /// iterate over old doc_ids in order of the new doc_ids
-    pub fn iter_old_doc_ids(&self) -> impl Iterator<Item = DocId> + Clone + '_ {
-        self.new_doc_id_to_old.iter().cloned()
-    }
-
-    /// Returns a slice mapping each old doc ID to its corresponding new doc ID.
-    pub fn old_to_new_ids(&self) -> &[DocId] {
-        &self.old_doc_id_to_new[..]
-    }
-
-    /// Remaps a given array to the new doc ids.
-    pub fn remap<T: Copy>(&self, els: &[T]) -> Vec<T> {
-        self.new_doc_id_to_old
-            .iter()
-            .map(|old_doc| els[*old_doc as usize])
-            .collect()
-    }
-    /// Returns the number of new (post-sort) doc IDs in this mapping.
-    pub fn num_new_doc_ids(&self) -> usize {
-        self.new_doc_id_to_old.len()
-    }
-    /// Returns the number of old (pre-sort) doc IDs covered by this mapping.
-    pub fn num_old_doc_ids(&self) -> usize {
-        self.old_doc_id_to_new.len()
-    }
-}
-
-pub(crate) fn expect_field_id_for_sort_field(
-    schema: &Schema,
-    sort_by_field: &IndexSortByField,
-) -> crate::Result<Field> {
-    schema.get_field(&sort_by_field.field).map_err(|_| {
-        TantivyError::InvalidArgument(format!(
-            "field to sort index by not found: {:?}",
-            sort_by_field.field
-        ))
-    })
-}
-
-// Generates a document mapping in the form of [index new doc_id] -> old doc_id
-// TODO detect if field is already sorted and discard mapping
-pub(crate) fn get_doc_id_mapping_from_field(
-    sort_by_field: IndexSortByField,
-    segment_writer: &SegmentWriter,
-) -> crate::Result<DocIdMapping> {
-    let schema = segment_writer.segment_serializer.segment().schema();
-    expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect
-    let new_doc_id_to_old = segment_writer.fast_field_writers.sort_order(
-        sort_by_field.field.as_str(),
-        segment_writer.max_doc(),
-        sort_by_field.order.is_desc(),
-    );
-    // create new doc_id to old doc_id index (used in fast_field_writers)
-    Ok(DocIdMapping::from_new_id_to_old_id(new_doc_id_to_old))
-}
-
-#[cfg(test)]
-mod tests_indexsorting {
-    use common::DateTime;
-
-    use crate::collector::TopDocs;
-    use crate::indexer::doc_id_mapping::DocIdMapping;
-    use crate::indexer::NoMergePolicy;
-    use crate::query::QueryParser;
-    use crate::schema::*;
-    use crate::{DocAddress, Index, IndexBuilder, IndexSettings, IndexSortByField, Order};
-
-    fn create_test_index(
-        index_settings: Option<IndexSettings>,
-        text_field_options: TextOptions,
-    ) -> crate::Result<Index> {
-        let mut schema_builder = Schema::builder();
-
-        let my_text_field = schema_builder.add_text_field("text_field", text_field_options);
-        let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED);
-        let my_number =
-            schema_builder.add_u64_field("my_number", NumericOptions::default().set_fast());
-
-        let multi_numbers =
-            schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
-
-        let schema = schema_builder.build();
-        let mut index_builder = Index::builder().schema(schema);
-        if let Some(settings) = index_settings {
-            index_builder = index_builder.settings(settings);
-        }
-        let index = index_builder.create_in_ram()?;
-
-        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(my_number=>40_u64))?;
-        index_writer.add_document(
-            doc!(my_number=>20_u64, multi_numbers => 5_u64, multi_numbers => 6_u64),
-        )?;
-        index_writer.add_document(doc!(my_number=>100_u64))?;
-        index_writer.add_document(
-            doc!(my_number=>10_u64, my_string_field=> "blublub", my_text_field => "some text"),
-        )?;
-        index_writer.add_document(doc!(my_number=>30_u64, multi_numbers => 3_u64 ))?;
-        index_writer.commit()?;
-        Ok(index)
-    }
-    fn get_text_options() -> TextOptions {
-        TextOptions::default().set_indexing_options(
-            TextFieldIndexing::default().set_index_option(IndexRecordOption::Basic),
-        )
-    }
-    #[test]
-    fn test_sort_index_test_text_field() -> crate::Result<()> {
-        // there are different serializers for different settings in postings/recorder.rs
-        // test remapping for all of them
-        let options = vec![
-            get_text_options(),
-            get_text_options().set_indexing_options(
-                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
-            ),
-            get_text_options().set_indexing_options(
-                TextFieldIndexing::default()
-                    .set_index_option(IndexRecordOption::WithFreqsAndPositions),
-            ),
-        ];
-
-        for option in options {
-            // let options = get_text_options();
-            // no index_sort
-            let index = create_test_index(None, option.clone())?;
-            let my_text_field = index.schema().get_field("text_field").unwrap();
-            let searcher = index.reader()?.searcher();
-
-            let query = QueryParser::for_index(&index, vec![my_text_field]).parse_query("text")?;
-            let top_docs: Vec<(f32, DocAddress)> =
-                searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-            assert_eq!(
-                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-                vec![3]
-            );
-
-            // sort by field asc
-            let index = create_test_index(
-                Some(IndexSettings {
-                    sort_by_field: Some(IndexSortByField {
-                        field: "my_number".to_string(),
-                        order: Order::Asc,
-                    }),
-                    ..Default::default()
-                }),
-                option.clone(),
-            )?;
-            let my_text_field = index.schema().get_field("text_field").unwrap();
-            let reader = index.reader()?;
-            let searcher = reader.searcher();
-
-            let query = QueryParser::for_index(&index, vec![my_text_field]).parse_query("text")?;
-            let top_docs: Vec<(f32, DocAddress)> =
-                searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-            assert_eq!(
-                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-                vec![0]
-            );
-
-            // test new field norm mapping
-            {
-                let my_text_field = index.schema().get_field("text_field").unwrap();
-                let fieldnorm_reader = searcher
-                    .segment_reader(0)
-                    .get_fieldnorms_reader(my_text_field)?;
-                assert_eq!(fieldnorm_reader.fieldnorm(0), 2); // some text
-                assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
-            }
-            // sort by field desc
-            let index = create_test_index(
-                Some(IndexSettings {
-                    sort_by_field: Some(IndexSortByField {
-                        field: "my_number".to_string(),
-                        order: Order::Desc,
-                    }),
-                    ..Default::default()
-                }),
-                option.clone(),
-            )?;
-            let my_string_field = index.schema().get_field("text_field").unwrap();
-            let searcher = index.reader()?.searcher();
-
-            let query =
-                QueryParser::for_index(&index, vec![my_string_field]).parse_query("text")?;
-            let top_docs: Vec<(f32, DocAddress)> =
-                searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-            assert_eq!(
-                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-                vec![4]
-            );
-            // test new field norm mapping
-            {
-                let my_text_field = index.schema().get_field("text_field").unwrap();
-                let fieldnorm_reader = searcher
-                    .segment_reader(0)
-                    .get_fieldnorms_reader(my_text_field)?;
-                assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
-                assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
-                assert_eq!(fieldnorm_reader.fieldnorm(2), 0);
-                assert_eq!(fieldnorm_reader.fieldnorm(3), 0);
-                assert_eq!(fieldnorm_reader.fieldnorm(4), 2); // some text
-            }
-        }
-        Ok(())
-    }
-    #[test]
-    fn test_sort_index_get_documents() -> crate::Result<()> {
-        // default baseline
-        let index = create_test_index(None, get_text_options())?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let searcher = index.reader()?.searcher();
-        {
-            assert!(searcher
-                .doc::<TantivyDocument>(DocAddress::new(0, 0))?
-                .get_first(my_string_field)
-                .is_none());
-            assert_eq!(
-                searcher
-                    .doc::<TantivyDocument>(DocAddress::new(0, 3))?
-                    .get_first(my_string_field)
-                    .unwrap()
-                    .as_str(),
-                Some("blublub")
-            );
-        }
-        // sort by field asc
-        let index = create_test_index(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "my_number".to_string(),
-                    order: Order::Asc,
-                }),
-                ..Default::default()
-            }),
-            get_text_options(),
-        )?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let searcher = index.reader()?.searcher();
-        {
-            assert_eq!(
-                searcher
-                    .doc::<TantivyDocument>(DocAddress::new(0, 0))?
-                    .get_first(my_string_field)
-                    .unwrap()
-                    .as_str(),
-                Some("blublub")
-            );
-            let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 4))?;
-            assert!(doc.get_first(my_string_field).is_none());
-        }
-        // sort by field desc
-        let index = create_test_index(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "my_number".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            get_text_options(),
-        )?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let searcher = index.reader()?.searcher();
-        {
-            let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 4))?;
-            assert_eq!(
-                doc.get_first(my_string_field).unwrap().as_str(),
-                Some("blublub")
-            );
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn test_sort_index_test_string_field() -> crate::Result<()> {
-        let index = create_test_index(None, get_text_options())?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let searcher = index.reader()?.searcher();
-
-        let query = QueryParser::for_index(&index, vec![my_string_field]).parse_query("blublub")?;
-        let top_docs: Vec<(f32, DocAddress)> =
-            searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-        assert_eq!(
-            top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-            vec![3]
-        );
-
-        let index = create_test_index(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "my_number".to_string(),
-                    order: Order::Asc,
-                }),
-                ..Default::default()
-            }),
-            get_text_options(),
-        )?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let reader = index.reader()?;
-        let searcher = reader.searcher();
-
-        let query = QueryParser::for_index(&index, vec![my_string_field]).parse_query("blublub")?;
-        let top_docs: Vec<(f32, DocAddress)> =
-            searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-        assert_eq!(
-            top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-            vec![0]
-        );
-
-        // test new field norm mapping
-        {
-            let my_text_field = index.schema().get_field("text_field").unwrap();
-            let fieldnorm_reader = searcher
-                .segment_reader(0)
-                .get_fieldnorms_reader(my_text_field)?;
-            assert_eq!(fieldnorm_reader.fieldnorm(0), 2); // some text
-            assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
-        }
-        // sort by field desc
-        let index = create_test_index(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "my_number".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            get_text_options(),
-        )?;
-        let my_string_field = index.schema().get_field("string_field").unwrap();
-        let searcher = index.reader()?.searcher();
-
-        let query = QueryParser::for_index(&index, vec![my_string_field]).parse_query("blublub")?;
-        let top_docs: Vec<(f32, DocAddress)> =
-            searcher.search(&query, &TopDocs::with_limit(3).order_by_score())?;
-        assert_eq!(
-            top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>(),
-            vec![4]
-        );
-        // test new field norm mapping
-        {
-            let my_text_field = index.schema().get_field("text_field").unwrap();
-            let fieldnorm_reader = searcher
-                .segment_reader(0)
-                .get_fieldnorms_reader(my_text_field)?;
-            assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(2), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(3), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(4), 2); // some text
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn test_sort_index_fast_field() -> crate::Result<()> {
-        let index = create_test_index(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "my_number".to_string(),
-                    order: Order::Asc,
-                }),
-                ..Default::default()
-            }),
-            get_text_options(),
-        )?;
-        assert_eq!(
-            index.settings().sort_by_field.as_ref().unwrap().field,
-            "my_number".to_string()
-        );
-
-        let searcher = index.reader()?.searcher();
-        assert_eq!(searcher.segment_readers().len(), 1);
-        let segment_reader = searcher.segment_reader(0);
-        let fast_fields = segment_reader.fast_fields();
-
-        let fast_field = fast_fields
-            .u64("my_number")
-            .unwrap()
-            .first_or_default_col(999);
-        assert_eq!(fast_field.get_val(0), 10u64);
-        assert_eq!(fast_field.get_val(1), 20u64);
-        assert_eq!(fast_field.get_val(2), 30u64);
-
-        let multifield = fast_fields.u64("multi_numbers").unwrap();
-        let vals: Vec<u64> = multifield.values_for_doc(0u32).collect();
-        assert_eq!(vals, &[] as &[u64]);
-        let vals: Vec<_> = multifield.values_for_doc(1u32).collect();
-        assert_eq!(vals, &[5, 6]);
-
-        let vals: Vec<_> = multifield.values_for_doc(2u32).collect();
-        assert_eq!(vals, &[3]);
-        Ok(())
-    }
-
-    #[test]
-    fn test_with_sort_by_date_field() -> crate::Result<()> {
-        let mut schema_builder = Schema::builder();
-        let date_field = schema_builder.add_date_field("date", INDEXED | STORED | FAST);
-        let schema = schema_builder.build();
-
-        let settings = IndexSettings {
-            sort_by_field: Some(IndexSortByField {
-                field: "date".to_string(),
-                order: Order::Desc,
-            }),
-            ..Default::default()
-        };
-
-        let index = Index::builder()
-            .schema(schema)
-            .settings(settings)
-            .create_in_ram()?;
-        let mut index_writer = index.writer_for_tests()?;
-        index_writer.set_merge_policy(Box::new(NoMergePolicy));
-
-        index_writer.add_document(doc!(
-            date_field => DateTime::from_timestamp_secs(1000),
-        ))?;
-        index_writer.add_document(doc!(
-            date_field => DateTime::from_timestamp_secs(999),
-        ))?;
-        index_writer.add_document(doc!(
-            date_field => DateTime::from_timestamp_secs(1001),
-        ))?;
-        index_writer.commit()?;
-
-        let searcher = index.reader()?.searcher();
-        assert_eq!(searcher.segment_readers().len(), 1);
-        let segment_reader = searcher.segment_reader(0);
-        let fast_fields = segment_reader.fast_fields();
-
-        let fast_field = fast_fields
-            .date("date")
-            .unwrap()
-            .first_or_default_col(DateTime::from_timestamp_secs(0));
-        assert_eq!(fast_field.get_val(0), DateTime::from_timestamp_secs(1001));
-        assert_eq!(fast_field.get_val(1), DateTime::from_timestamp_secs(1000));
-        assert_eq!(fast_field.get_val(2), DateTime::from_timestamp_secs(999));
-        Ok(())
-    }
-
-    #[test]
-    fn test_doc_mapping() {
-        let doc_mapping = DocIdMapping::from_new_id_to_old_id(vec![3, 2, 5]);
-        assert_eq!(doc_mapping.get_old_doc_id(0), 3);
-        assert_eq!(doc_mapping.get_old_doc_id(1), 2);
-        assert_eq!(doc_mapping.get_old_doc_id(2), 5);
-        assert_eq!(doc_mapping.get_new_doc_id(0), 0);
-        assert_eq!(doc_mapping.get_new_doc_id(1), 0);
-        assert_eq!(doc_mapping.get_new_doc_id(2), 1);
-        assert_eq!(doc_mapping.get_new_doc_id(3), 0);
-        assert_eq!(doc_mapping.get_new_doc_id(4), 0);
-        assert_eq!(doc_mapping.get_new_doc_id(5), 2);
-    }
-
-    #[test]
-    fn test_doc_mapping_remap() {
-        let doc_mapping = DocIdMapping::from_new_id_to_old_id(vec![2, 8, 3]);
-        assert_eq!(
-            &doc_mapping.remap(&[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]),
-            &[2000, 8000, 3000]
-        );
-    }
-
-    #[test]
-    fn test_text_sort() -> crate::Result<()> {
-        let mut schema_builder = SchemaBuilder::new();
-        let id_field = schema_builder.add_text_field("id", STRING | FAST | STORED);
-        schema_builder.add_text_field("name", TEXT | STORED);
-
-        let index = IndexBuilder::new()
-            .schema(schema_builder.build())
-            .settings(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "id".to_string(),
-                    order: Order::Asc,
-                }),
-                ..Default::default()
-            })
-            .create_in_ram()?;
-
-        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(id_field => "z"))?;
-        index_writer.add_document(doc!(id_field => "a"))?;
-        index_writer.add_document(doc!(id_field => "m"))?;
-        index_writer.commit()?;
-
-        let searcher = index.reader()?.searcher();
-        let segment_reader = searcher.segment_reader(0);
-        let str_col = segment_reader.fast_fields().str("id")?.unwrap();
-        let mut values = Vec::new();
-        for doc in 0..segment_reader.max_doc() {
-            if let Some(ord) = str_col.ords().first(doc) {
-                let mut s = String::new();
-                str_col.ord_to_str(ord, &mut s)?;
-                values.push(s);
-            }
-        }
-        assert_eq!(values, vec!["a", "m", "z"]);
-
-        Ok(())
-    }
 }
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -218,7 +218,7 @@ fn index_documents<D: Document>(
    let alive_bitset_opt = apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;

    let meta = segment_with_max_doc.meta().clone();
-    meta.untrack_temp_docstore();
+
    // update segment_updater inventory to remove tempstore
    let segment_entry = SegmentEntry::new(meta, delete_cursor, alive_bitset_opt);
    segment_updater.schedule_add_segment(segment_entry).wait()?;
@@ -819,7 +819,7 @@ mod tests {
    use std::collections::{HashMap, HashSet};
    use std::net::Ipv6Addr;

-    use columnar::{Cardinality, Column, MonotonicallyMappableToU128};
+    use columnar::{Column, MonotonicallyMappableToU128};
    use itertools::Itertools;
    use proptest::prop_oneof;

@@ -829,7 +829,7 @@ mod tests {
    use crate::error::*;
    use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
    use crate::indexer::{IndexWriterOptions, NoMergePolicy};
-    use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
+    use crate::query::{QueryParser, TermQuery};
    use crate::schema::{
        self, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, JsonObjectOptions,
        NumericOptions, Schema, TextFieldIndexing, TextOptions, Value, FAST, INDEXED, STORED,
@@ -837,8 +837,8 @@ mod tests {
    };
    use crate::store::DOCSTORE_CACHE_CAPACITY;
    use crate::{
-        DateTime, DocAddress, Index, IndexSettings, IndexSortByField, IndexWriter, Order,
-        ReloadPolicy, TantivyDocument, Term,
+        DateTime, DocAddress, Index, IndexSettings, IndexWriter, ReloadPolicy, TantivyDocument,
+        Term,
    };

    const LOREM: &str = "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
@@ -1479,116 +1479,6 @@ mod tests {
        assert!(text_fast_field.term_ords(1).eq([1].into_iter()));
    }

-    #[test]
-    fn test_delete_with_sort_by_field() -> crate::Result<()> {
-        let mut schema_builder = schema::Schema::builder();
-        let id_field = schema_builder.add_u64_field("id", INDEXED | schema::STORED | FAST);
-        let schema = schema_builder.build();
-
-        let settings = IndexSettings {
-            sort_by_field: Some(IndexSortByField {
-                field: "id".to_string(),
-                order: Order::Desc,
-            }),
-            ..Default::default()
-        };
-
-        let index = Index::builder()
-            .schema(schema)
-            .settings(settings)
-            .create_in_ram()?;
-        let index_reader = index.reader()?;
-        let mut index_writer = index.writer_for_tests()?;
-
-        // create and delete docs in same commit
-        for id in 0u64..5u64 {
-            index_writer.add_document(doc!(id_field => id))?;
-        }
-        for id in 2u64..4u64 {
-            index_writer.delete_term(Term::from_field_u64(id_field, id));
-        }
-        for id in 5u64..10u64 {
-            index_writer.add_document(doc!(id_field => id))?;
-        }
-        index_writer.commit()?;
-        index_reader.reload()?;
-
-        let searcher = index_reader.searcher();
-        assert_eq!(searcher.segment_readers().len(), 1);
-
-        let segment_reader = searcher.segment_reader(0);
-        assert_eq!(segment_reader.num_docs(), 8);
-        assert_eq!(segment_reader.max_doc(), 10);
-        let fast_field_reader = segment_reader.fast_fields().u64("id")?;
-
-        let in_order_alive_ids: Vec<u64> = segment_reader
-            .doc_ids_alive()
-            .flat_map(|doc| fast_field_reader.values_for_doc(doc))
-            .collect();
-        assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
-        Ok(())
-    }
-
-    #[test]
-    fn test_delete_query_with_sort_by_field() -> crate::Result<()> {
-        let mut schema_builder = schema::Schema::builder();
-        let id_field = schema_builder.add_u64_field("id", INDEXED | schema::STORED | FAST);
-        let schema = schema_builder.build();
-
-        let settings = IndexSettings {
-            sort_by_field: Some(IndexSortByField {
-                field: "id".to_string(),
-                order: Order::Desc,
-            }),
-            ..Default::default()
-        };
-
-        let index = Index::builder()
-            .schema(schema)
-            .settings(settings)
-            .create_in_ram()?;
-        let index_reader = index.reader()?;
-        let mut index_writer = index.writer_for_tests()?;
-
-        // create and delete docs in same commit
-        for id in 0u64..5u64 {
-            index_writer.add_document(doc!(id_field => id))?;
-        }
-        for id in 1u64..4u64 {
-            let term = Term::from_field_u64(id_field, id);
-            let not_term = Term::from_field_u64(id_field, 2);
-            let term = Box::new(TermQuery::new(term, Default::default()));
-            let not_term = Box::new(TermQuery::new(not_term, Default::default()));
-
-            let query: BooleanQuery = vec![
-                (Occur::Must, term as Box<dyn Query>),
-                (Occur::MustNot, not_term as Box<dyn Query>),
-            ]
-            .into();
-
-            index_writer.delete_query(Box::new(query))?;
-        }
-        for id in 5u64..10u64 {
-            index_writer.add_document(doc!(id_field => id))?;
-        }
-        index_writer.commit()?;
-        index_reader.reload()?;
-
-        let searcher = index_reader.searcher();
-        assert_eq!(searcher.segment_readers().len(), 1);
-
-        let segment_reader = searcher.segment_reader(0);
-        assert_eq!(segment_reader.num_docs(), 8);
-        assert_eq!(segment_reader.max_doc(), 10);
-        let fast_field_reader = segment_reader.fast_fields().u64("id")?;
-        let in_order_alive_ids: Vec<u64> = segment_reader
-            .doc_ids_alive()
-            .flat_map(|doc| fast_field_reader.values_for_doc(doc))
-            .collect();
-        assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]);
-        Ok(())
-    }
-
    #[derive(Debug, Clone)]
    enum IndexingOp {
        AddMultipleDoc {
@@ -1735,11 +1625,7 @@ mod tests {
        id_list
    }

-    fn test_operation_strategy(
-        ops: &[IndexingOp],
-        sort_index: bool,
-        force_end_merge: bool,
-    ) -> crate::Result<Index> {
+    fn test_operation_strategy(ops: &[IndexingOp], force_end_merge: bool) -> crate::Result<Index> {
        let mut schema_builder = schema::Schema::builder();
        let json_field = schema_builder.add_json_field("json", FAST | TEXT | STORED);
        let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED);
@@ -1775,15 +1661,7 @@ mod tests {
        );
        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
-        let settings = if sort_index {
-            IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "id_opt".to_string(),
-                    order: Order::Asc,
-                }),
-                ..Default::default()
-            }
-        } else {
+        let settings = {
            IndexSettings {
                ..Default::default()
            }
@@ -2347,33 +2225,13 @@ mod tests {
            }
        }

-        // Test if index property is in sort order
-        if sort_index {
-            // load all id_opt in each segment and check they are in order
-
-            for reader in searcher.segment_readers() {
-                let (ff_reader, _) = reader.fast_fields().u64_lenient("id_opt").unwrap().unwrap();
-                let mut ids_in_segment: Vec<u64> = Vec::new();
-
-                for doc in 0..reader.num_docs() {
-                    ids_in_segment.extend(ff_reader.values_for_doc(doc));
-                }
-
-                assert!(is_sorted(&ids_in_segment));
-
-                fn is_sorted<T>(data: &[T]) -> bool
-                where T: Ord {
-                    data.windows(2).all(|w| w[0] <= w[1])
-                }
-            }
-        }
        Ok(index)
    }

    #[test]
    fn test_fast_field_range() {
        let ops: Vec<_> = (0..1000).map(IndexingOp::add).collect();
-        assert!(test_operation_strategy(&ops, false, true).is_ok());
+        assert!(test_operation_strategy(&ops, true).is_ok());
    }

    #[test]
@@ -2387,7 +2245,6 @@ mod tests {
                IndexingOp::Commit,
                IndexingOp::Merge
            ],
-            true,
            false
        )
        .is_ok());
@@ -2404,7 +2261,6 @@ mod tests {
                IndexingOp::add(1),
                IndexingOp::Commit,
            ],
-            false,
            true
        )
        .is_ok());
@@ -2412,97 +2268,24 @@ mod tests {

    #[test]
    fn test_minimal_sort_force_end_merge() {
-        assert!(test_operation_strategy(
-            &[IndexingOp::add(23), IndexingOp::add(13),],
-            false,
-            false
-        )
-        .is_ok());
+        assert!(
+            test_operation_strategy(&[IndexingOp::add(23), IndexingOp::add(13),], false).is_ok()
+        );
    }

    #[test]
-    fn test_minimal_sort() {
-        let mut schema_builder = Schema::builder();
-        let val = schema_builder.add_u64_field("val", FAST);
-        let id = schema_builder.add_u64_field("id", FAST);
-        let schema = schema_builder.build();
-        let settings = IndexSettings {
-            sort_by_field: Some(IndexSortByField {
-                field: "id".to_string(),
-                order: Order::Asc,
-            }),
-            ..Default::default()
-        };
-        let index = Index::builder()
-            .schema(schema)
-            .settings(settings)
-            .create_in_ram()
-            .unwrap();
-        let mut writer = index.writer_for_tests().unwrap();
-        writer
-            .add_document(doc!(id=> 3u64, val=>4u64, val=>4u64))
-            .unwrap();
-        writer
-            .add_document(doc!(id=> 2u64, val=>2u64, val=>2u64))
-            .unwrap();
-        writer
-            .add_document(doc!(id=> 1u64, val=>1u64, val=>1u64))
-            .unwrap();
-        writer.commit().unwrap();
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let segment_reader = searcher.segment_reader(0);
-        let id_col: Column = segment_reader
-            .fast_fields()
-            .column_opt("id")
-            .unwrap()
-            .unwrap();
-        let val_col: Column = segment_reader
-            .fast_fields()
-            .column_opt("val")
-            .unwrap()
-            .unwrap();
-        assert_eq!(id_col.get_cardinality(), Cardinality::Full);
-        assert_eq!(val_col.get_cardinality(), Cardinality::Multivalued);
-        assert_eq!(id_col.first(0u32), Some(1u64));
-        assert_eq!(id_col.first(1u32), Some(2u64));
-        assert!(val_col.values_for_doc(0u32).eq([1u64, 1u64].into_iter()));
-        assert!(val_col.values_for_doc(1u32).eq([2u64, 2u64].into_iter()));
-    }
-
-    #[test]
-    fn test_minimal_sort_force_end_merge_with_delete() {
+    fn test_minimal_no_force_end_merge() {
        assert!(test_operation_strategy(
            &[
                IndexingOp::add(23),
                IndexingOp::add(13),
                IndexingOp::DeleteDoc { id: 13 }
            ],
-            true,
-            true
-        )
-        .is_ok());
-    }
-
-    #[test]
-    fn test_minimal_no_sort_no_force_end_merge() {
-        assert!(test_operation_strategy(
-            &[
-                IndexingOp::add(23),
-                IndexingOp::add(13),
-                IndexingOp::DeleteDoc { id: 13 }
-            ],
-            false,
            false
        )
        .is_ok());
    }

-    #[test]
-    fn test_minimal_sort_merge() {
-        assert!(test_operation_strategy(&[IndexingOp::add(3),], true, true).is_ok());
-    }
-
    use proptest::prelude::*;

    proptest! {
@@ -2510,77 +2293,23 @@ mod tests {
        #![proptest_config(ProptestConfig::with_cases(20))]
        #[test]
        fn test_delete_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
-            assert!(test_operation_strategy(&ops[..],  true, false).is_ok());
+            assert!(test_operation_strategy(&ops[..],  false).is_ok());
        }

        #[test]
        fn test_delete_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) {
-            assert!(test_operation_strategy(&ops[..],  false, false).is_ok());
+            assert!(test_operation_strategy(&ops[..],  true).is_ok());
        }

        #[test]
        fn test_delete_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
-            assert!(test_operation_strategy(&ops[..],  true, true).is_ok());
+            assert!(test_operation_strategy(&ops[..],  false).is_ok());
        }

        #[test]
        fn test_delete_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) {
-            assert!(test_operation_strategy(&ops[..],  false, true).is_ok());
+            assert!(test_operation_strategy(&ops[..],  true).is_ok());
        }
-
-        #[test]
-        #[ignore = "doesn't work with deferred segment loading"]
-        fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
-            assert!(test_operation_strategy(&ops[..], false, false).is_ok());
-        }
-
-        #[test]
-        #[ignore = "doesn't work with deferred segment loading"]
-        fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) {
-            assert!(test_operation_strategy(&ops[..], true, true).is_ok());
-        }
-    }
-
-    #[test]
-    fn test_delete_with_sort_by_field_last_opstamp_is_not_max() -> crate::Result<()> {
-        let mut schema_builder = schema::Schema::builder();
-        let sort_by_field = schema_builder.add_u64_field("sort_by", FAST);
-        let id_field = schema_builder.add_u64_field("id", INDEXED);
-        let schema = schema_builder.build();
-
-        let settings = IndexSettings {
-            sort_by_field: Some(IndexSortByField {
-                field: "sort_by".to_string(),
-                order: Order::Asc,
-            }),
-            ..Default::default()
-        };
-
-        let index = Index::builder()
-            .schema(schema)
-            .settings(settings)
-            .create_in_ram()?;
-        let mut index_writer = index.writer_for_tests()?;
-
-        // We add a doc...
-        index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64))?;
-        // And remove it.
-        index_writer.delete_term(Term::from_field_u64(id_field, 0u64));
-        // We add another doc.
-        index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64))?;
-
-        // The expected result is a segment with
-        // maxdoc = 2
-        // numdoc = 1.
-        index_writer.commit()?;
-
-        let searcher = index.reader()?.searcher();
-        assert_eq!(searcher.segment_readers().len(), 1);
-
-        let segment_reader = searcher.segment_reader(0);
-        assert_eq!(segment_reader.max_doc(), 2);
-        assert_eq!(segment_reader.num_docs(), 1);
-        Ok(())
    }

    #[test]
@@ -2597,7 +2326,7 @@ mod tests {
            IndexingOp::add(4),
            Commit,
        ];
-        test_operation_strategy(&ops[..], false, true).unwrap();
+        test_operation_strategy(&ops[..], true).unwrap();
    }

    #[test]
@@ -2610,7 +2339,7 @@ mod tests {
            Commit,
            Merge,
        ];
-        test_operation_strategy(&ops[..], false, true).unwrap();
+        test_operation_strategy(&ops[..], true).unwrap();
    }

    #[test]
@@ -2622,7 +2351,7 @@ mod tests {
            IndexingOp::add(13),
            Commit,
        ];
-        test_operation_strategy(&ops[..], false, true).unwrap();
+        test_operation_strategy(&ops[..], true).unwrap();
    }

    #[test]
@@ -2633,7 +2362,7 @@ mod tests {
            IndexingOp::add(9),
            IndexingOp::add(10),
        ];
-        test_operation_strategy(&ops[..], false, false).unwrap();
+        test_operation_strategy(&ops[..], false).unwrap();
    }

    #[test]
@@ -2660,7 +2389,6 @@ mod tests {
                IndexingOp::Commit,
                IndexingOp::Commit
            ],
-            false,
            false
        )
        .is_ok());
@@ -2681,7 +2409,6 @@ mod tests {
                IndexingOp::Merge,
            ],
            true,
-            false,
        )
        .unwrap();
    }
--- a/src/indexer/merge_index_test.rs
+++ b/src/indexer/merge_index_test.rs
@@ -0,0 +1,148 @@
+#[cfg(test)]
+mod tests {
+    use crate::collector::TopDocs;
+    use crate::fastfield::AliveBitSet;
+    use crate::index::Index;
+    use crate::postings::Postings;
+    use crate::query::QueryParser;
+    use crate::schema::{
+        self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
+        TextFieldIndexing, TextOptions,
+    };
+    use crate::{DocAddress, DocSet, IndexSettings, IndexWriter, Term};
+
+    fn create_test_index(index_settings: Option<IndexSettings>) -> crate::Result<Index> {
+        let mut schema_builder = schema::Schema::builder();
+        let int_options = NumericOptions::default()
+            .set_fast()
+            .set_stored()
+            .set_indexed();
+        let int_field = schema_builder.add_u64_field("intval", int_options);
+
+        let bytes_options = BytesOptions::default().set_fast().set_indexed();
+        let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
+
+        let multi_numbers =
+            schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast());
+        let text_field_options = TextOptions::default()
+            .set_indexing_options(
+                TextFieldIndexing::default()
+                    .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+            )
+            .set_stored();
+        let text_field = schema_builder.add_text_field("text_field", text_field_options);
+        let schema = schema_builder.build();
+
+        let mut index_builder = Index::builder().schema(schema);
+        if let Some(settings) = index_settings {
+            index_builder = index_builder.settings(settings);
+        }
+        let index = index_builder.create_in_ram()?;
+
+        {
+            let mut index_writer = index.writer_for_tests()?;
+
+            // segment 1 - range 1-3
+            index_writer.add_document(doc!(int_field=>1_u64))?;
+            index_writer.add_document(
+                doc!(int_field=>3_u64, multi_numbers => 3_u64, multi_numbers => 4_u64, bytes_field => vec![1, 2, 3], text_field => "some text", facet_field=> Facet::from("/book/crime")),
+            )?;
+            index_writer.add_document(
+                doc!(int_field=>1_u64, text_field=> "deleteme",  text_field => "ok text more text"),
+            )?;
+            index_writer.add_document(
+                doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64, text_field => "ok text more text"),
+            )?;
+
+            index_writer.commit()?;
+            index_writer.add_document(doc!(int_field=>20_u64, multi_numbers => 20_u64))?;
+
+            let in_val = 1u64;
+            index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme" , text_field => "ok text more text", facet_field=> Facet::from("/book/crime")))?;
+            index_writer.commit()?;
+            let int_vals = [10u64, 5];
+            index_writer.add_document( // position of this doc after delete in desc sorting = [2], in disjunct case [1]
+                doc!(int_field=>int_vals[0], multi_numbers => 10_u64, multi_numbers => 11_u64, text_field=> "blubber", facet_field=> Facet::from("/book/fantasy")),
+            )?;
+            index_writer.add_document(doc!(int_field=>int_vals[1], text_field=> "deleteme"))?;
+            index_writer.add_document(
+                doc!(int_field=>1_000u64, multi_numbers => 1001_u64, multi_numbers => 1002_u64, bytes_field => vec![5, 5],text_field => "the biggest num")
+            )?;
+
+            index_writer.delete_term(Term::from_field_text(text_field, "deleteme"));
+            index_writer.commit()?;
+        }
+
+        // Merging the segments
+        {
+            let segment_ids = index.searchable_segment_ids()?;
+            let mut index_writer: IndexWriter = index.writer_for_tests()?;
+            index_writer.merge(&segment_ids).wait()?;
+            index_writer.wait_merging_threads()?;
+        }
+        Ok(index)
+    }
+
+    #[test]
+    fn test_merge_index() {
+        let index = create_test_index(Some(IndexSettings {
+            ..Default::default()
+        }))
+        .unwrap();
+
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+        let segment_reader = searcher.segment_readers().last().unwrap();
+
+        let searcher = index.reader().unwrap().searcher();
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+
+            let do_search = |term: &str| {
+                let query = QueryParser::for_index(&index, vec![my_text_field])
+                    .parse_query(term)
+                    .unwrap();
+                let top_docs: Vec<(f32, DocAddress)> = searcher
+                    .search(&query, &TopDocs::with_limit(3).order_by_score())
+                    .unwrap();
+
+                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
+            };
+
+            assert_eq!(do_search("some"), vec![1]);
+            assert_eq!(do_search("blubber"), vec![3]);
+            assert_eq!(do_search("biggest"), vec![4]);
+        }
+
+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            assert_eq!(postings.term_freq(), 1);
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+            postings.advance();
+
+            assert_eq!(postings.term_freq(), 2);
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+        }
+    }
+}
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,8 +1,7 @@
 use std::sync::Arc;

 use columnar::{
-    compute_merged_term_ord_mapping, BytesColumn, Column, ColumnType, ColumnarReader,
-    MergeRowOrder, RowAddr, ShuffleMergeOrder, StackMergeOrder,
+    ColumnType, ColumnarReader, MergeRowOrder, RowAddr, ShuffleMergeOrder, StackMergeOrder,
 };
 use common::ReadOnlyBitSet;
 use itertools::Itertools;
@@ -11,52 +10,16 @@ use measure_time::debug_time;
 use crate::directory::WritePtr;
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
-use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
+use crate::fastfield::AliveBitSet;
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
 use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
 use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
-use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
+use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
-use crate::{
-    DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, SegmentOrdinal,
-};
-
-/// Per-segment accessor for Str/Bytes sort fields during index merging.
-///
-/// Each segment stores its own term dictionary with segment-local ordinals. To compare terms
-/// across segments we compute a merged global dictionary and map each segment's local ordinals
-/// to the corresponding merged ordinal via `merged_term_ord_mapping`. This avoids materializing
-/// the actual term bytes during the merge sort — ordinal comparison is sufficient because the
-/// merged dictionary preserves lexicographic order.
-struct StrBytesSortFieldAccessor {
-    ords: Column<u64>,
-    merged_term_ord_mapping: Vec<TermOrdinal>,
-}
-
-impl StrBytesSortFieldAccessor {
-    fn remapped_term_ord(&self, doc_id: DocId) -> Option<TermOrdinal> {
-        self.ords.first(doc_id).map(|old_ord| {
-            let old_ord = old_ord as usize;
-            debug_assert!(old_ord < self.merged_term_ord_mapping.len());
-            self.merged_term_ord_mapping[old_ord]
-        })
-    }
-}
-
-/// Owned per-segment sort-field accessors, kept alive for the duration of the merge.
-///
-/// - `Numeric`: direct column value access — all numeric/datetime types share a single u64 column
-///   interface, so segments can be compared directly by value.
-/// - `StrBytes`: ordinal-based access — each segment's local term ordinals are remapped to merged
-///   global ordinals so that cross-segment lexicographic comparison works without loading term
-///   bytes.
-enum ReaderSortFieldAccessors {
-    Numeric(Vec<(SegmentOrdinal, Column<u64>)>),
-    StrBytes(Vec<(SegmentOrdinal, StrBytesSortFieldAccessor)>),
-}
+use crate::{DocAddress, DocId, InvertedIndexReader};

 /// Segment's max doc must be `< MAX_DOC_LIMIT`.
 ///
@@ -113,9 +76,7 @@ fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::
    Ok(total_num_tokens)
 }

-/// Merges multiple index segments into a single new segment.
 pub struct IndexMerger {
-    index_settings: IndexSettings,
    schema: Schema,
    pub(crate) readers: Vec<SegmentReader>,
    max_doc: u32,
@@ -151,7 +112,7 @@ fn convert_to_merge_order(
 ) -> MergeRowOrder {
    match doc_id_mapping.mapping_type() {
        MappingType::Stacked => MergeRowOrder::Stack(StackMergeOrder::stack(columnars)),
-        MappingType::StackedWithDeletes | MappingType::Shuffled => {
+        MappingType::StackedWithDeletes => {
            // RUST/LLVM is amazing. The following conversion is actually a no-op:
            // no allocation, no copy.
            let new_row_id_to_old_row_id: Vec<RowAddr> = doc_id_mapping
@@ -184,62 +145,25 @@ fn extract_fast_field_required_columns(schema: &Schema) -> Vec<(String, ColumnTy
 }

 impl IndexMerger {
-    fn total_num_new_docs(&self) -> usize {
-        self.readers
-            .iter()
-            .map(|reader| reader.num_docs() as usize)
-            .sum()
-    }
-
-    fn collect_alive_bitsets(&self) -> Vec<Option<ReadOnlyBitSet>> {
-        self.readers
-            .iter()
-            .map(|reader| {
-                reader
-                    .alive_bitset()
-                    .map(|alive_bitset| alive_bitset.bitset().clone())
-            })
-            .collect()
-    }
-
-    /// Column cardinality metadata (`Optional`) covers all docs including deleted ones.
-    /// A segment can report `Optional` but have zero live NULLs if every NULL doc was
-    /// deleted. We scan alive docs to distinguish this case, because deleted NULLs
-    /// are excluded from the merge and shouldn't block the disjunct-stack path.
-    fn segment_has_live_nulls(&self, segment_ord: SegmentOrdinal, col: &Column<u64>) -> bool {
-        if col.get_cardinality() != columnar::Cardinality::Optional {
-            return false;
-        }
-        let reader = &self.readers[segment_ord as usize];
-        if !reader.has_deletes() {
-            return true;
-        }
-        reader
-            .doc_ids_alive()
-            .any(|doc_id| col.first(doc_id).is_none())
-    }
-
-    /// Opens an [`IndexMerger`] over the given segments using their existing delete sets.
-    pub fn open(
-        schema: Schema,
-        index_settings: IndexSettings,
-        segments: &[Segment],
-    ) -> crate::Result<IndexMerger> {
+    pub fn open(schema: Schema, segments: &[Segment]) -> crate::Result<IndexMerger> {
        let alive_bitset = segments.iter().map(|_| None).collect_vec();
-        Self::open_with_custom_alive_set(schema, index_settings, segments, alive_bitset)
+        Self::open_with_custom_alive_set(schema, segments, alive_bitset)
    }

-    /// Opens an [`IndexMerger`] with a custom alive (delete) set per segment.
-    ///
-    /// For every segment, an optional [`AliveBitSet`] can be provided which is intersected
-    /// with the segment's existing alive set. Pass `None` for a segment to use its existing
-    /// delete set unchanged.
-    ///
-    /// This allows merging while also applying an additional filter, for example to demux
-    /// documents by a field value into separate output segments.
+    // Create merge with a custom delete set.
+    // For every Segment, a delete bitset can be provided, which
+    // will be merged with the existing bit set. Make sure the index
+    // corresponds to the segment index.
+    //
+    // If `None` is provided for custom alive set, the regular alive set will be used.
+    // If a alive_bitset is provided, the union between the provided and regular
+    // alive set will be used.
+    //
+    // This can be used to merge but also apply an additional filter.
+    // One use case is demux, which is basically taking a list of
+    // segments and partitions them e.g. by a value in a field.
    pub fn open_with_custom_alive_set(
        schema: Schema,
-        index_settings: IndexSettings,
        segments: &[Segment],
        alive_bitset_opt: Vec<Option<AliveBitSet>>,
    ) -> crate::Result<IndexMerger> {
@@ -253,12 +177,6 @@ impl IndexMerger {
        }

        let max_doc = readers.iter().map(|reader| reader.num_docs()).sum();
-        if let Some(sort_by_field) = index_settings.sort_by_field.as_ref() {
-            let schema_field = schema.get_field(&sort_by_field.field)?;
-            let field_entry = schema.get_field_entry(schema_field);
-            let field_type = field_entry.field_type().value_type();
-            readers = Self::sort_readers_by_min_sort_field(readers, sort_by_field, field_type)?;
-        }
        // sort segments by their natural sort setting
        if max_doc >= MAX_DOC_LIMIT {
            let err_msg = format!(
@@ -268,50 +186,12 @@ impl IndexMerger {
            return Err(crate::TantivyError::InvalidArgument(err_msg));
        }
        Ok(IndexMerger {
-            index_settings,
            schema,
            readers,
            max_doc,
        })
    }

-    fn sort_by_field_type(&self, sort_by_field: &IndexSortByField) -> crate::Result<Type> {
-        let schema_field = self.schema.get_field(&sort_by_field.field)?;
-        let field_entry = self.schema.get_field_entry(schema_field);
-        Ok(field_entry.field_type().value_type())
-    }
-
-    fn sort_readers_by_min_sort_field(
-        readers: Vec<SegmentReader>,
-        sort_by_field: &IndexSortByField,
-        field_type: Type,
-    ) -> crate::Result<Vec<SegmentReader>> {
-        if matches!(field_type, Type::Str | Type::Bytes) {
-            // Ordinals are per-segment and not directly comparable, so the "disjunct min/max"
-            // shortcut that works for numeric fields does not apply here.
-            return Ok(readers);
-        }
-
-        // presort the readers by their min_values, so that when they are disjunct, we can use
-        // the regular merge logic (implicitly sorted)
-        let mut readers_with_min_sort_values = readers
-            .into_iter()
-            .map(|reader| {
-                let accessor = Self::get_numeric_accessor(&reader, sort_by_field)?;
-                Ok((reader, accessor.min_value()))
-            })
-            .collect::<crate::Result<Vec<_>>>()?;
-        if sort_by_field.order.is_asc() {
-            readers_with_min_sort_values.sort_by_key(|(_, min_val)| *min_val);
-        } else {
-            readers_with_min_sort_values.sort_by_key(|(_, min_val)| std::cmp::Reverse(*min_val));
-        }
-        Ok(readers_with_min_sort_values
-            .into_iter()
-            .map(|(reader, _)| reader)
-            .collect())
-    }
-
    fn write_fieldnorms(
        &self,
        mut fieldnorms_serializer: FieldNormsSerializer,
@@ -359,261 +239,14 @@ impl IndexMerger {
        Ok(())
    }

-    /// Checks if segments can use the fast disjunct-stack path (byte concatenation)
-    /// instead of a full k-way merge.
-    ///
-    /// Stacking preserves per-segment order but doesn't reposition docs across segments.
-    /// NULLs must sort first (ASC) or last (DESC) globally, but stacking can't move a
-    /// NULL from segment 2 before values in segment 1. So any live NULL forces a full
-    /// k-way merge to place NULLs correctly.
-    fn is_disjunct_and_sorted_on_sort_property(
-        &self,
-        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<bool> {
-        let field_type = self.sort_by_field_type(sort_by_field)?;
-        // Disjunct shortcut is invalid for Str/Bytes because ords are per-segment.
-        if matches!(field_type, Type::Str | Type::Bytes) {
-            return Ok(false);
-        }
-
-        let reader_ordinal_and_field_accessors = self.get_numeric_accessors(sort_by_field)?;
-
-        let asc = sort_by_field.order.is_asc();
-
-        let values_disjunct = reader_ordinal_and_field_accessors
-            .iter()
-            .map(|(_, col)| col)
-            .tuple_windows()
-            .all(|(col1, col2)| {
-                if asc {
-                    col1.max_value() <= col2.min_value()
-                } else {
-                    col1.min_value() >= col2.max_value()
-                }
-            });
-
-        if !values_disjunct {
-            return Ok(false);
-        }
-
-        let has_live_nulls = reader_ordinal_and_field_accessors
-            .iter()
-            .any(|(segment_ord, col)| self.segment_has_live_nulls(*segment_ord, col));
-
-        Ok(!has_live_nulls)
-    }
-
-    fn get_str_bytes_column(
-        reader: &SegmentReader,
-        sort_by_field: &IndexSortByField,
-        field_type: Type,
-    ) -> crate::Result<BytesColumn> {
-        let not_available = || -> crate::TantivyError {
-            FastFieldNotAvailableError {
-                field_name: sort_by_field.field.to_string(),
-            }
-            .into()
-        };
-        match field_type {
-            Type::Str => reader
-                .fast_fields()
-                .str(&sort_by_field.field)?
-                .map(Into::into)
-                .ok_or_else(not_available),
-            Type::Bytes => reader
-                .fast_fields()
-                .bytes(&sort_by_field.field)?
-                .ok_or_else(not_available),
-            _ => unreachable!("get_str_bytes_column called with non-Str/Bytes type"),
-        }
-    }
-
-    /// Builds per-segment [`StrBytesSortFieldAccessor`]s for Str/Bytes sort fields.
-    ///
-    /// 1. Extracts each segment's `BytesColumn` (term dictionary + ordinal column).
-    /// 2. Computes a merged dictionary across all segments via [`compute_merged_term_ord_mapping`],
-    ///    producing a per-segment mapping from local term ordinal → merged global ordinal.
-    /// 3. Wraps each segment's ordinal column and mapping into a `StrBytesSortFieldAccessor`.
-    fn get_str_bytes_accessors(
-        &self,
-        sort_by_field: &IndexSortByField,
-        field_type: Type,
-    ) -> crate::Result<Vec<(SegmentOrdinal, StrBytesSortFieldAccessor)>> {
-        let bytes_columns = self
-            .readers
-            .iter()
-            .map(|reader| Self::get_str_bytes_column(reader, sort_by_field, field_type))
-            .collect::<crate::Result<Vec<_>>>()?;
-        let merged_term_ord_mappings = compute_merged_term_ord_mapping(&bytes_columns)?;
-        debug_assert_eq!(bytes_columns.len(), merged_term_ord_mappings.len());
-        let accessors = bytes_columns
-            .into_iter()
-            .zip(merged_term_ord_mappings)
-            .enumerate()
-            .map(
-                |(reader_ordinal, (bytes_column, merged_term_ord_mapping))| {
-                    (
-                        reader_ordinal as SegmentOrdinal,
-                        StrBytesSortFieldAccessor {
-                            ords: bytes_column.ords().clone(),
-                            merged_term_ord_mapping,
-                        },
-                    )
-                },
-            )
-            .collect::<Vec<_>>();
-        Ok(accessors)
-    }
-
-    /// Returns the full `Column<u64>` so callers can use `Column::first()` which
-    /// returns `Option<u64>` — `None` for NULLs, `Some` for real values. This
-    /// distinction is required for correct NULL ordering during merge sort and
-    /// for detecting live NULLs in the disjunct-stack check.
-    fn get_numeric_accessor(
-        reader: &SegmentReader,
-        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<Column<u64>> {
-        reader.schema().get_field(&sort_by_field.field)?;
-        let (value_accessor, _column_type) = reader
-            .fast_fields()
-            .u64_lenient(&sort_by_field.field)?
-            .ok_or_else(|| FastFieldNotAvailableError {
-                field_name: sort_by_field.field.to_string(),
-            })?;
-        Ok(value_accessor)
-    }
-
-    fn get_numeric_accessors(
-        &self,
-        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<Vec<(SegmentOrdinal, Column<u64>)>> {
-        self.readers
-            .iter()
-            .enumerate()
-            .map(|(reader_ordinal, reader)| {
-                let reader_ordinal = reader_ordinal as SegmentOrdinal;
-                let accessor = Self::get_numeric_accessor(reader, sort_by_field)?;
-                Ok((reader_ordinal, accessor))
-            })
-            .collect::<crate::Result<Vec<_>>>()
-    }
-    /// Builds owned per-segment sort accessors so they stay alive during merge.
-    ///
-    /// Dispatches on the sort field's value type: numeric types use direct column value access,
-    /// while Str/Bytes types go through the ordinal-remapping path (see
-    /// [`StrBytesSortFieldAccessor`]).
-    fn get_reader_with_sort_field_accessor(
-        &self,
-        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<ReaderSortFieldAccessors> {
-        let field_type = self.sort_by_field_type(sort_by_field)?;
-
-        if matches!(field_type, Type::Str | Type::Bytes) {
-            let accessors = self.get_str_bytes_accessors(sort_by_field, field_type)?;
-            return Ok(ReaderSortFieldAccessors::StrBytes(accessors));
-        }
-
-        let accessors = self.get_numeric_accessors(sort_by_field)?;
-        Ok(ReaderSortFieldAccessors::Numeric(accessors))
-    }
-
-    fn extend_sorted_doc_ids<T, F>(
-        &self,
-        reader_ordinal_and_field_accessors: &[(SegmentOrdinal, T)],
-        mut is_less: F,
-        sorted_doc_ids: &mut Vec<DocAddress>,
-    ) where
-        F: FnMut(&(DocId, &SegmentOrdinal, &T), &(DocId, &SegmentOrdinal, &T)) -> bool,
-    {
-        let doc_id_reader_pair =
-            reader_ordinal_and_field_accessors
-                .iter()
-                .map(|(reader_ord, ff_reader)| {
-                    let reader = &self.readers[*reader_ord as usize];
-                    reader
-                        .doc_ids_alive()
-                        .map(move |doc_id| (doc_id, reader_ord, ff_reader))
-                });
-        sorted_doc_ids.extend(
-            doc_id_reader_pair
-                .into_iter()
-                .kmerge_by(|a, b| is_less(a, b))
-                .map(|(doc_id, &segment_ord, _)| DocAddress {
-                    doc_id,
-                    segment_ord,
-                }),
-        );
-    }
-
-    /// Generates the doc_id mapping where position in the vec=new
-    /// doc_id.
-    /// ReaderWithOrdinal will include the ordinal position of the
-    /// reader in self.readers.
-    pub(crate) fn generate_doc_id_mapping_with_sort_by_field(
-        &self,
-        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<SegmentDocIdMapping> {
-        let sort_field_accessors = self.get_reader_with_sort_field_accessor(sort_by_field)?;
-        // Loading the field accessor on demand causes a 15x regression
-
-        let total_num_new_docs = self.total_num_new_docs();
-
-        let mut sorted_doc_ids: Vec<DocAddress> = Vec::with_capacity(total_num_new_docs);
-
-        // K-way merge of alive doc ids across segments, ordered by the sort field.
-        //
-        // Numeric: compare raw u64 column values directly.
-        // Str/Bytes: compare merged global ordinals obtained via `remapped_term_ord`.
-        //   Documents without a value map to `None` — first in ascending, last in descending.
-        let asc = sort_by_field.order == Order::Asc;
-        match sort_field_accessors {
-            ReaderSortFieldAccessors::Numeric(reader_ordinal_and_field_accessors) => {
-                self.extend_sorted_doc_ids(
-                    &reader_ordinal_and_field_accessors,
-                    |a, b| {
-                        // Column::first() returns Option<u64>: None for NULLs, Some for values.
-                        // Option's Ord puts None < Some, giving NULL-first in ASC, NULL-last in
-                        // DESC.
-                        let val1 = a.2.first(a.0);
-                        let val2 = b.2.first(b.0);
-                        if asc {
-                            val1 < val2
-                        } else {
-                            val1 > val2
-                        }
-                    },
-                    &mut sorted_doc_ids,
-                );
-            }
-            ReaderSortFieldAccessors::StrBytes(reader_ordinal_and_field_accessors) => {
-                self.extend_sorted_doc_ids(
-                    &reader_ordinal_and_field_accessors,
-                    |a, b| {
-                        let val1 = a.2.remapped_term_ord(a.0);
-                        let val2 = b.2.remapped_term_ord(b.0);
-                        if asc {
-                            val1 < val2
-                        } else {
-                            val1 > val2
-                        }
-                    },
-                    &mut sorted_doc_ids,
-                );
-            }
-        }
-
-        let alive_bitsets = self.collect_alive_bitsets();
-        Ok(SegmentDocIdMapping::new(
-            sorted_doc_ids,
-            MappingType::Shuffled,
-            alive_bitsets,
-        ))
-    }
-
    /// Creates a mapping if the segments are stacked. this is helpful to merge codelines between
    /// index sorting and the others
    pub(crate) fn get_doc_id_from_concatenated_data(&self) -> crate::Result<SegmentDocIdMapping> {
-        let total_num_new_docs = self.total_num_new_docs();
+        let total_num_new_docs = self
+            .readers
+            .iter()
+            .map(|reader| reader.num_docs() as usize)
+            .sum();

        let mut mapping: Vec<DocAddress> = Vec::with_capacity(total_num_new_docs);

@@ -629,13 +262,20 @@ impl IndexMerger {
                }),
        );

-        let has_deletes = self.readers.iter().any(SegmentReader::has_deletes);
+        let has_deletes: bool = self.readers.iter().any(SegmentReader::has_deletes);
        let mapping_type = if has_deletes {
            MappingType::StackedWithDeletes
        } else {
            MappingType::Stacked
        };
-        let alive_bitsets = self.collect_alive_bitsets();
+        let alive_bitsets: Vec<Option<ReadOnlyBitSet>> = self
+            .readers
+            .iter()
+            .map(|reader| {
+                let alive_bitset = reader.alive_bitset()?;
+                Some(alive_bitset.bitset().clone())
+            })
+            .collect();
        Ok(SegmentDocIdMapping::new(
            mapping,
            mapping_type,
@@ -716,7 +356,6 @@ impl IndexMerger {
        );

        let mut segment_postings_containing_the_term: Vec<(usize, SegmentPostings)> = vec![];
-        let mut doc_id_and_positions = vec![];

        while merged_terms.advance() {
            segment_postings_containing_the_term.clear();
@@ -812,37 +451,13 @@ impl IndexMerger {
                            0u32
                        };

-                        // if doc_id_mapping exists, the doc_ids are reordered, they are
-                        // not just stacked. The field serializer expects monotonically increasing
-                        // doc_ids, so we collect and sort them first, before writing.
-                        //
-                        // I think this is not strictly necessary, it would be possible to
-                        // avoid the loading into a vec via some form of kmerge, but then the merge
-                        // logic would deviate much more from the stacking case (unsorted index)
-                        if !doc_id_mapping.is_trivial() {
-                            doc_id_and_positions.push((
-                                remapped_doc_id,
-                                term_freq,
-                                positions_buffer.to_vec(),
-                            ));
-                        } else {
-                            let delta_positions = delta_computer.compute_delta(&positions_buffer);
-                            field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions);
-                        }
+                        let delta_positions = delta_computer.compute_delta(&positions_buffer);
+                        field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions);
                    }

                    doc = segment_postings.advance();
                }
            }
-            if !doc_id_mapping.is_trivial() {
-                doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _, _)| doc_id);
-
-                for (doc_id, term_freq, positions) in &doc_id_and_positions {
-                    let delta_positions = delta_computer.compute_delta(positions);
-                    field_serializer.write_doc(*doc_id, *term_freq, delta_positions);
-                }
-                doc_id_and_positions.clear();
-            }
            // closing the term.
            field_serializer.close_term()?;
        }
@@ -871,47 +486,13 @@ impl IndexMerger {
        Ok(())
    }

-    fn write_storable_fields(
-        &self,
-        store_writer: &mut StoreWriter,
-        doc_id_mapping: &SegmentDocIdMapping,
-    ) -> crate::Result<()> {
+    fn write_storable_fields(&self, store_writer: &mut StoreWriter) -> crate::Result<()> {
        debug_time!("write-storable-fields");
        debug!("write-storable-field");

-        if !doc_id_mapping.is_trivial() {
-            debug!("non-trivial-doc-id-mapping");
-
-            let store_readers: Vec<_> = self
-                .readers
-                .iter()
-                .map(|reader| reader.get_store_reader(50))
-                .collect::<Result<_, _>>()?;
-
-            let mut document_iterators: Vec<_> = store_readers
-                .iter()
-                .enumerate()
-                .map(|(i, store)| store.iter_raw(self.readers[i].alive_bitset()))
-                .collect();
-
-            for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() {
-                let doc_bytes_it = &mut document_iterators[old_doc_addr.segment_ord as usize];
-                if let Some(doc_bytes_res) = doc_bytes_it.next() {
-                    let doc_bytes = doc_bytes_res?;
-                    store_writer.store_bytes(&doc_bytes)?;
-                } else {
-                    return Err(DataCorruption::comment_only(format!(
-                        "unexpected missing document in docstore on merge, doc address \
-                         {old_doc_addr:?}",
-                    ))
-                    .into());
-                }
-            }
-        } else {
-            debug!("trivial-doc-id-mapping");
-            for reader in &self.readers {
-                let store_reader = reader.get_store_reader(1)?;
-                if reader.has_deletes()
+        for reader in &self.readers {
+            let store_reader = reader.get_store_reader(1)?;
+            if reader.has_deletes()
                    // If there is not enough data in the store, we avoid stacking in order to
                    // avoid creating many small blocks in the doc store. Once we have 5 full blocks,
                    // we start stacking. In the worst case 2/7 of the blocks would be very small.
@@ -927,14 +508,13 @@ impl IndexMerger {
                    // take 7 in order to not walk over all checkpoints.
                    || store_reader.block_checkpoints().take(7).count() < 6
                    || store_reader.decompressor() != store_writer.compressor().into()
-                {
-                    for doc_bytes_res in store_reader.iter_raw(reader.alive_bitset()) {
-                        let doc_bytes = doc_bytes_res?;
-                        store_writer.store_bytes(&doc_bytes)?;
-                    }
-                } else {
-                    store_writer.stack(store_reader)?;
+            {
+                for doc_bytes_res in store_reader.iter_raw(reader.alive_bitset()) {
+                    let doc_bytes = doc_bytes_res?;
+                    store_writer.store_bytes(&doc_bytes)?;
                }
+            } else {
+                store_writer.stack(store_reader)?;
            }
        }
        Ok(())
@@ -945,42 +525,8 @@ impl IndexMerger {
    ///
    /// # Returns
    /// The number of documents in the resulting segment.
-    pub fn write(&self, serializer: SegmentSerializer) -> crate::Result<u32> {
-        let doc_id_mapping = if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref()
-        {
-            if self.is_disjunct_and_sorted_on_sort_property(sort_by_field)? {
-                self.get_doc_id_from_concatenated_data()?
-            } else {
-                self.generate_doc_id_mapping_with_sort_by_field(sort_by_field)?
-            }
-        } else {
-            self.get_doc_id_from_concatenated_data()?
-        };
-        self.write_with_mapping(serializer, doc_id_mapping)
-    }
-
-    /// Like [`write`], but uses the caller-supplied `doc_id_mapping` instead of
-    /// deriving one from an index sort field.
-    ///
-    /// The mapping must cover *all* live documents across every segment passed to
-    /// [`IndexMerger::open_with_custom_alive_set`].  The simplest way to build one
-    /// is [`SegmentDocIdMapping::new_shuffled`].
-    ///
-    /// # Returns
-    /// The number of documents in the resulting segment.
-    pub fn write_with_doc_id_mapping(
-        &self,
-        serializer: SegmentSerializer,
-        doc_id_mapping: SegmentDocIdMapping,
-    ) -> crate::Result<u32> {
-        self.write_with_mapping(serializer, doc_id_mapping)
-    }
-
-    fn write_with_mapping(
-        &self,
-        mut serializer: SegmentSerializer,
-        doc_id_mapping: SegmentDocIdMapping,
-    ) -> crate::Result<u32> {
+    pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
+        let doc_id_mapping = self.get_doc_id_from_concatenated_data()?;
        debug!("write-fieldnorms");
        if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
            self.write_fieldnorms(fieldnorms_serializer, &doc_id_mapping)?;
@@ -997,7 +543,7 @@ impl IndexMerger {
        )?;

        debug!("write-storagefields");
-        self.write_storable_fields(serializer.get_store_writer(), &doc_id_mapping)?;
+        self.write_storable_fields(serializer.get_store_writer())?;
        debug!("write-fastfields");
        self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?;

@@ -1009,6 +555,7 @@ impl IndexMerger {

 #[cfg(test)]
 mod tests {
+
    use columnar::Column;
    use proptest::prop_oneof;
    use proptest::strategy::Strategy;
@@ -1028,7 +575,7 @@ mod tests {
    use crate::time::OffsetDateTime;
    use crate::{
        assert_nearly_equals, schema, DateTime, DocAddress, DocId, DocSet, IndexSettings,
-        IndexSortByField, IndexWriter, Order, Searcher,
+        IndexWriter, Searcher,
    };

    #[test]
@@ -1501,60 +1048,6 @@ mod tests {
        test_merge_facets(None, true)
    }

-    #[test]
-    fn test_merge_facets_sort_asc() {
-        // In the merge case this will go through the doc_id mapping code
-        test_merge_facets(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "intval".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            true,
-        );
-        // In the merge case this will not go through the doc_id mapping code, because the data
-        // sorted and disjunct
-        test_merge_facets(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "intval".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            false,
-        );
-    }
-
-    #[test]
-    fn test_merge_facets_sort_desc() {
-        // In the merge case this will go through the doc_id mapping code
-        test_merge_facets(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "intval".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            true,
-        );
-        // In the merge case this will not go through the doc_id mapping code, because the data
-        // sorted and disjunct
-        test_merge_facets(
-            Some(IndexSettings {
-                sort_by_field: Some(IndexSortByField {
-                    field: "intval".to_string(),
-                    order: Order::Desc,
-                }),
-                ..Default::default()
-            }),
-            false,
-        );
-    }
-
    // force_segment_value_overlap forces the int value for sorting to have overlapping min and max
    // ranges between segments so that merge algorithm can't apply certain optimizations
    fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -8,18 +8,17 @@
 pub(crate) mod delete_queue;
 pub(crate) mod path_to_unordered_id;

-pub mod doc_id_mapping;
+pub(crate) mod doc_id_mapping;
 mod doc_opstamp_mapping;
 mod flat_map_with_buffer;
 pub(crate) mod index_writer;
 pub(crate) mod index_writer_status;
 pub(crate) mod indexing_term;
 mod log_merge_policy;
+mod merge_index_test;
 mod merge_operation;
 pub(crate) mod merge_policy;
-/// Segment merger: combines multiple segments into one.
-pub mod merger;
-mod merger_sorted_index_test;
+pub(crate) mod merger;
 pub(crate) mod operation;
 pub(crate) mod prepared_commit;
 mod segment_entry;
@@ -34,19 +33,15 @@ mod stamper;
 use crossbeam_channel as channel;
 use smallvec::SmallVec;

-pub use self::doc_id_mapping::SegmentDocIdMapping;
 pub use self::index_writer::{advance_deletes, IndexWriter, IndexWriterOptions};
 pub use self::log_merge_policy::LogMergePolicy;
 pub use self::merge_operation::MergeOperation;
 pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
-pub use self::merger::IndexMerger;
 pub use self::operation::{AddOperation, DeleteOperation, UserOperation};
 pub use self::prepared_commit::PreparedCommit;
 pub use self::segment_entry::SegmentEntry;
 pub(crate) use self::segment_serializer::SegmentSerializer;
-pub use self::segment_updater::{
-    merge_filtered_segments, merge_indices, merge_segments_with_doc_id_mapping,
-};
+pub use self::segment_updater::{merge_filtered_segments, merge_indices};
 pub use self::segment_writer::SegmentWriter;
 pub use self::single_segment_index_writer::SingleSegmentIndexWriter;

--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -18,27 +18,9 @@ pub struct SegmentSerializer {

 impl SegmentSerializer {
    /// Creates a new `SegmentSerializer`.
-    pub fn for_segment(
-        mut segment: Segment,
-        is_in_merge: bool,
-    ) -> crate::Result<SegmentSerializer> {
-        // If the segment is going to be sorted, we stream the docs first to a temporary file.
-        // In the merge case this is not necessary because we can kmerge the already sorted
-        // segments
-        let remapping_required = segment.index().settings().sort_by_field.is_some() && !is_in_merge;
+    pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
        let settings = segment.index().settings().clone();
-        let store_writer = if remapping_required {
-            let store_write = segment.open_write(SegmentComponent::TempStore)?;
-            StoreWriter::new(
-                store_write,
-                crate::store::Compressor::None,
-                // We want fast random access on the docs, so we choose a small block size.
-                // If this is zero, the skip index will contain too many checkpoints and
-                // therefore will be relatively slow.
-                16000,
-                settings.docstore_compress_dedicated_thread,
-            )?
-        } else {
+        let store_writer = {
            let store_write = segment.open_write(SegmentComponent::Store)?;
            StoreWriter::new(
                store_write,
@@ -72,10 +54,6 @@ impl SegmentSerializer {
        &self.segment
    }

-    pub fn segment_mut(&mut self) -> &mut Segment {
-        &mut self.segment
-    }
-
    /// Accessor to the `PostingsSerializer`.
    pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
        &mut self.postings_serializer
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -15,7 +15,6 @@ use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
 use crate::fastfield::AliveBitSet;
 use crate::index::{Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta};
 use crate::indexer::delete_queue::DeleteCursor;
-use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
 use crate::indexer::index_writer::advance_deletes;
 use crate::indexer::merge_operation::MergeOperationInventory;
 use crate::indexer::merger::IndexMerger;
@@ -115,11 +114,10 @@ fn merge(
        .collect();

    // An IndexMerger is like a "view" of our merged segments.
-    let merger: IndexMerger =
-        IndexMerger::open(index.schema(), index.settings().clone(), &segments[..])?;
+    let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;

    // ... we just serialize this index merger in our new segment to merge the segments.
-    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone(), true)?;
+    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;

    let num_docs = merger.write(segment_serializer)?;

@@ -220,13 +218,9 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
    )?;
    let merged_segment = merged_index.new_segment();
    let merged_segment_id = merged_segment.id();
-    let merger: IndexMerger = IndexMerger::open_with_custom_alive_set(
-        merged_index.schema(),
-        merged_index.settings().clone(),
-        segments,
-        filter_doc_ids,
-    )?;
-    let segment_serializer = SegmentSerializer::for_segment(merged_segment, true)?;
+    let merger: IndexMerger =
+        IndexMerger::open_with_custom_alive_set(merged_index.schema(), segments, filter_doc_ids)?;
+    let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
    let num_docs = merger.write(segment_serializer)?;

    let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);
@@ -256,82 +250,6 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
    Ok(merged_index)
 }

-/// Like [`merge_filtered_segments`], but uses a caller-supplied [`SegmentDocIdMapping`]
-/// to control the final document order.  The mapping should be built from the same
-/// segments (in the same order) passed here.
-///
-/// Use this to apply an external reordering during a merge without relying on a persistent fast field.
-///
-/// # Warning
-/// Same caveats as [`merge_filtered_segments`]: no live `IndexWriter` allowed.
-#[doc(hidden)]
-pub fn merge_segments_with_doc_id_mapping<T: Into<Box<dyn Directory>>>(
-    segments: &[Segment],
-    target_settings: IndexSettings,
-    filter_doc_ids: Vec<Option<AliveBitSet>>,
-    doc_id_mapping: SegmentDocIdMapping,
-    output_directory: T,
-) -> crate::Result<Index> {
-    if segments.is_empty() {
-        return Err(crate::TantivyError::InvalidArgument(
-            "No segments given to merge".to_string(),
-        ));
-    }
-
-    let target_schema = segments[0].schema();
-
-    if segments
-        .iter()
-        .skip(1)
-        .any(|seg| seg.schema() != target_schema)
-    {
-        return Err(crate::TantivyError::InvalidArgument(
-            "Attempt to merge different schema indices".to_string(),
-        ));
-    }
-
-    let mut merged_index = Index::create(
-        output_directory,
-        target_schema.clone(),
-        target_settings.clone(),
-    )?;
-    let merged_segment = merged_index.new_segment();
-    let merged_segment_id = merged_segment.id();
-    let merger: IndexMerger = IndexMerger::open_with_custom_alive_set(
-        merged_index.schema(),
-        merged_index.settings().clone(),
-        segments,
-        filter_doc_ids,
-    )?;
-    let segment_serializer = SegmentSerializer::for_segment(merged_segment, true)?;
-    let num_docs = merger.write_with_doc_id_mapping(segment_serializer, doc_id_mapping)?;
-
-    let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);
-
-    let stats = format!(
-        "Segments Merge (external reordering): [{}]",
-        segments
-            .iter()
-            .fold(String::new(), |sum, current| format!(
-                "{sum}{} ",
-                current.meta().id().uuid_string()
-            ))
-            .trim_end()
-    );
-
-    let index_meta = IndexMeta {
-        index_settings: target_settings,
-        segments: vec![segment_meta],
-        schema: target_schema,
-        opstamp: 0u64,
-        payload: Some(stats),
-    };
-
-    save_metas(&index_meta, merged_index.directory_mut())?;
-
-    Ok(merged_index)
-}
-
 pub(crate) struct InnerSegmentUpdater {
    // we keep a copy of the current active IndexMeta to
    // avoid loading the file every time we need it in the
@@ -1197,7 +1115,6 @@ mod tests {
            )?;
            let merger: IndexMerger = IndexMerger::open_with_custom_alive_set(
                merged_index.schema(),
-                merged_index.settings().clone(),
                &segments[..],
                filter_segments,
            )?;
@@ -1213,7 +1130,6 @@ mod tests {
                Index::create(RamDirectory::default(), target_schema, target_settings)?;
            let merger: IndexMerger = IndexMerger::open_with_custom_alive_set(
                merged_index.schema(),
-                merged_index.settings().clone(),
                &segments[..],
                filter_segments,
            )?;
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -3,7 +3,6 @@ use common::JsonPathWriter;
 use itertools::Itertools;
 use tokenizer_api::BoxTokenStream;

-use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
 use super::operation::AddOperation;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
@@ -17,7 +16,6 @@ use crate::postings::{
 };
 use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
-use crate::store::{StoreReader, StoreWriter};
 use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
 use crate::{DocId, Opstamp, TantivyError};

@@ -42,20 +40,6 @@ fn compute_initial_table_size(per_thread_memory_budget: usize) -> crate::Result<
        })
 }

-fn remap_doc_opstamps(
-    opstamps: Vec<Opstamp>,
-    doc_id_mapping_opt: Option<&DocIdMapping>,
-) -> Vec<Opstamp> {
-    if let Some(doc_id_mapping_opt) = doc_id_mapping_opt {
-        doc_id_mapping_opt
-            .iter_old_doc_ids()
-            .map(|doc| opstamps[doc as usize])
-            .collect()
-    } else {
-        opstamps
-    }
-}
-
 /// A `SegmentWriter` is in charge of creating segment index from a
 /// set of documents.
 ///
@@ -91,7 +75,7 @@ impl SegmentWriter {
        let tokenizer_manager = segment.index().tokenizers().clone();
        let tokenizer_manager_fast_field = segment.index().fast_field_tokenizer().clone();
        let table_size = compute_initial_table_size(memory_budget_in_bytes)?;
-        let segment_serializer = SegmentSerializer::for_segment(segment, false)?;
+        let segment_serializer = SegmentSerializer::for_segment(segment)?;
        let per_field_postings_writers = PerFieldPostingsWriter::for_schema(&schema);
        let per_field_text_analyzers = schema
            .fields()
@@ -140,15 +124,6 @@ impl SegmentWriter {
    /// be used afterwards.
    pub fn finalize(mut self) -> crate::Result<Vec<u64>> {
        self.fieldnorms_writer.fill_up_to_max_doc(self.max_doc);
-        let mapping: Option<DocIdMapping> = self
-            .segment_serializer
-            .segment()
-            .index()
-            .settings()
-            .sort_by_field
-            .clone()
-            .map(|sort_by_field| get_doc_id_mapping_from_field(sort_by_field, &self))
-            .transpose()?;
        remap_and_write(
            self.schema,
            &self.per_field_postings_writers,
@@ -156,10 +131,8 @@ impl SegmentWriter {
            self.fast_field_writers,
            &self.fieldnorms_writer,
            self.segment_serializer,
-            mapping.as_ref(),
        )?;
-        let doc_opstamps = remap_doc_opstamps(self.doc_opstamps, mapping.as_ref());
-        Ok(doc_opstamps)
+        Ok(self.doc_opstamps)
    }

    /// Returns an estimation of the current memory usage of the segment writer.
@@ -420,11 +393,10 @@ fn remap_and_write(
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: &FieldNormsWriter,
    mut serializer: SegmentSerializer,
-    doc_id_map: Option<&DocIdMapping>,
 ) -> crate::Result<()> {
    debug!("remap-and-write");
    if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
-        fieldnorms_writer.serialize(fieldnorms_serializer, doc_id_map)?;
+        fieldnorms_writer.serialize(fieldnorms_serializer)?;
    }
    let fieldnorm_data = serializer
        .segment()
@@ -435,39 +407,10 @@ fn remap_and_write(
        schema,
        per_field_postings_writers,
        fieldnorm_readers,
-        doc_id_map,
        serializer.get_postings_serializer(),
    )?;
    debug!("fastfield-serialize");
-    fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
-
-    // finalize temp docstore and create version, which reflects the doc_id_map
-    if let Some(doc_id_map) = doc_id_map {
-        debug!("resort-docstore");
-        let store_write = serializer
-            .segment_mut()
-            .open_write(SegmentComponent::Store)?;
-        let settings = serializer.segment().index().settings();
-        let store_writer = StoreWriter::new(
-            store_write,
-            settings.docstore_compression,
-            settings.docstore_blocksize,
-            settings.docstore_compress_dedicated_thread,
-        )?;
-        let old_store_writer = std::mem::replace(&mut serializer.store_writer, store_writer);
-        old_store_writer.close()?;
-        let store_read = StoreReader::open(
-            serializer
-                .segment()
-                .open_read(SegmentComponent::TempStore)?,
-            1, /* The docstore is configured to have one doc per block, and each doc is
-                * accessed only once: we don't need caching. */
-        )?;
-        for old_doc_id in doc_id_map.iter_old_doc_ids() {
-            let doc_bytes = store_read.get_document_bytes(old_doc_id)?;
-            serializer.get_store_writer().store_bytes(&doc_bytes)?;
-        }
-    }
+    fast_field_writers.serialize(serializer.get_fast_field_write())?;

    debug!("serializer-close");
    serializer.close()?;
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -226,13 +226,10 @@ pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
 pub use crate::core::{json_utils, Executor, Searcher, SearcherGeneration};
 pub use crate::directory::Directory;
 pub use crate::index::{
-    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
-    Segment, SegmentMeta, SegmentReader,
-};
-pub use crate::indexer::{
-    IndexMerger, IndexWriter, SegmentDocIdMapping, SingleSegmentIndexWriter,
-    merge_segments_with_doc_id_mapping,
+    Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment,
+    SegmentMeta, SegmentReader,
 };
+pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
 pub use crate::schema::{Document, TantivyDocument, Term};

 /// Index format version.
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -249,12 +249,6 @@ impl BlockSegmentPostings {

    /// Returns the length of the current block.
    ///
-    /// Returns the decoded term-frequency buffer for the current block.
-    #[inline]
-    pub(crate) fn freq_output_array(&self) -> &[u32] {
-        self.freq_decoder.output_array()
-    }
-
    /// All blocks have a length of `NUM_DOCS_PER_BLOCK`,
    /// except the last block that may have a length
    /// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
@@ -287,33 +281,6 @@ impl BlockSegmentPostings {
        doc
    }

-    /// Returns the number of documents with a doc id strictly smaller than `target`
-    /// (i.e. the *rank* of `target` in this posting list).
-    ///
-    /// This jumps to the block that may contain `target` through the skip list, so no
-    /// skipped block is decoded; a single block is then decoded to locate `target`
-    /// within it. The cost is therefore `O(number_of_skip_list_entries)` plus one block
-    /// decode, rather than `O(doc_freq)`.
-    ///
-    /// Like [`Self::seek`], the underlying cursor only ever moves forward. This method
-    /// must be called with **non-decreasing** `target` values (galloping); calling it
-    /// with a `target` smaller than a previous one yields an incorrect result. `target`
-    /// must be a valid doc id (i.e. `target <= TERMINATED`), exactly as for `seek`.
-    ///
-    /// Edge cases: returns `0` when `target` is smaller than every doc id, and
-    /// `doc_freq()` when `target` is larger than every doc id.
-    pub fn rank(&mut self, target: DocId) -> u32 {
-        if self.doc_freq == 0 {
-            return 0;
-        }
-        // `within` = number of docs in the landed block with a doc id < target.
-        let within = self.seek(target);
-        // `remaining_docs` counts the landed block and everything after it, so the
-        // difference is the number of docs in all blocks strictly before it.
-        let docs_before_block = self.doc_freq - self.skip_reader.remaining_docs();
-        docs_before_block + within as u32
-    }
-
    pub(crate) fn position_offset(&self) -> u64 {
        self.skip_reader.position_offset()
    }
@@ -331,11 +298,6 @@ impl BlockSegmentPostings {
        }
    }

-    #[inline]
-    pub(crate) fn has_remaining_docs(&self) -> bool {
-        self.skip_reader.has_remaining_docs()
-    }
-
    pub(crate) fn block_is_loaded(&self) -> bool {
        self.block_loaded
    }
@@ -595,38 +557,4 @@ mod tests {
        assert_eq!(block_segments.docs(), &[1, 3, 5]);
        Ok(())
    }
-
-    #[test]
-    fn test_block_segment_postings_rank() -> crate::Result<()> {
-        // ~8 blocks worth of docs so the skip list is actually exercised.
-        let docs: Vec<DocId> = (0..1000u32).map(|i| i * 3).collect();
-        let mut block_postings = build_block_postings(&docs[..])?;
-        let doc_freq = block_postings.doc_freq();
-
-        // rank(target) must equal the number of docs strictly below target.
-        // Targets are queried in non-decreasing order, as the API requires.
-        // `target` values must be a valid doc id (<= TERMINATED) and non-decreasing.
-        let targets = [
-            0u32, 1, 2, 3, 4, 299, 300, 301, 1500, 2996, 2997, 3000, 10_000,
-        ];
-        for &target in &targets {
-            let expected = docs.iter().filter(|&&d| d < target).count() as u32;
-            assert_eq!(
-                block_postings.rank(target),
-                expected,
-                "rank({target}) mismatch"
-            );
-        }
-
-        // Edge cases: below the first doc -> 0, above the last doc -> doc_freq.
-        let mut fresh = build_block_postings(&docs[..])?;
-        assert_eq!(fresh.rank(0), 0);
-        let mut fresh = build_block_postings(&docs[..])?;
-        assert_eq!(fresh.rank(1_000_000), doc_freq);
-
-        // Empty postings: rank is always 0.
-        let mut empty = BlockSegmentPostings::empty();
-        assert_eq!(empty.rank(42), 0);
-        Ok(())
-    }
 }
--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -3,7 +3,6 @@ use std::io;
 use common::json_path_writer::JSON_END_OF_PATH;
 use stacker::Addr;

-use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
 use crate::postings::postings_writer::SpecializedPostingsWriter;
@@ -63,7 +62,6 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
        &self,
        ordered_term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
-        doc_id_map: Option<&DocIdMapping>,
        ctx: &IndexingContext,
        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
@@ -86,7 +84,6 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
                SpecializedPostingsWriter::<Rec>::serialize_one_term(
                    term_buffer.as_bytes(),
                    *addr,
-                    doc_id_map,
                    &mut buffer_lender,
                    ctx,
                    serializer,
@@ -95,7 +92,6 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
                SpecializedPostingsWriter::<DocIdRecorder>::serialize_one_term(
                    term_buffer.as_bytes(),
                    *addr,
-                    doc_id_map,
                    &mut buffer_lender,
                    ctx,
                    serializer,
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -5,7 +5,6 @@ use std::ops::Range;
 use stacker::Addr;

 use crate::fieldnorm::FieldNormReaders;
-use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
 use crate::postings::recorder::{BufferLender, Recorder};
@@ -51,7 +50,6 @@ pub(crate) fn serialize_postings(
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    fieldnorm_readers: FieldNormReaders,
-    doc_id_map: Option<&DocIdMapping>,
    serializer: &mut InvertedIndexSerializer,
 ) -> crate::Result<()> {
    // Replace unordered ids by ordered ids to be able to sort
@@ -87,7 +85,6 @@ pub(crate) fn serialize_postings(
        postings_writer.serialize(
            &term_offsets[byte_offsets],
            &ordered_id_to_path,
-            doc_id_map,
            &ctx,
            &mut field_serializer,
        )?;
@@ -123,7 +120,6 @@ pub(crate) trait PostingsWriter: Send + Sync {
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
-        doc_id_map: Option<&DocIdMapping>,
        ctx: &IndexingContext,
        serializer: &mut FieldSerializer,
    ) -> io::Result<()>;
@@ -188,7 +184,6 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
    pub(crate) fn serialize_one_term(
        term: &[u8],
        addr: Addr,
-        doc_id_map: Option<&DocIdMapping>,
        buffer_lender: &mut BufferLender,
        ctx: &IndexingContext,
        serializer: &mut FieldSerializer,
@@ -196,7 +191,7 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
        let recorder: Rec = ctx.term_index.read(addr);
        let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
        serializer.new_term(term, term_doc_freq, recorder.has_term_freq())?;
-        recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender);
+        recorder.serialize(&ctx.arena, serializer, buffer_lender);
        serializer.close_term()?;
        Ok(())
    }
@@ -236,13 +231,12 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        _ordered_id_to_path: &[&str],
-        doc_id_map: Option<&DocIdMapping>,
        ctx: &IndexingContext,
        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
        let mut buffer_lender = BufferLender::default();
        for (_field, _path_id, term, addr) in term_addrs {
-            Self::serialize_one_term(term, *addr, doc_id_map, &mut buffer_lender, ctx, serializer)?;
+            Self::serialize_one_term(term, *addr, &mut buffer_lender, ctx, serializer)?;
        }
        Ok(())
    }
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,7 +1,6 @@
 use common::read_u32_vint;
 use stacker::{ExpUnrolledLinkedList, MemoryArena};

-use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::FieldSerializer;
 use crate::DocId;

@@ -71,7 +70,6 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        doc_id_map: Option<&DocIdMapping>,
        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    );
@@ -115,26 +113,15 @@ impl Recorder for DocIdRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        doc_id_map: Option<&DocIdMapping>,
        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
-        let (buffer, doc_ids) = buffer_lender.lend_all();
+        let buffer = buffer_lender.lend_u8();
        // TODO avoid reading twice.
        self.stack.read_to_end(arena, buffer);
-        if let Some(doc_id_map) = doc_id_map {
-            let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
-            doc_ids.extend(iter.map(|old_doc_id| doc_id_map.get_new_doc_id(old_doc_id)));
-            doc_ids.sort_unstable();
-
-            for doc in doc_ids {
-                serializer.write_doc(*doc, 0u32, &[][..]);
-            }
-        } else {
-            let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
-            for doc_id in iter {
-                serializer.write_doc(doc_id, 0u32, &[][..]);
-            }
+        let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
+        for doc_id in iter {
+            serializer.write_doc(doc_id, 0u32, &[][..]);
        }
    }

@@ -194,35 +181,18 @@ impl Recorder for TermFrequencyRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        doc_id_map: Option<&DocIdMapping>,
        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
        self.stack.read_to_end(arena, buffer);
        let mut u32_it = VInt32Reader::new(&buffer[..]);
-        if let Some(doc_id_map) = doc_id_map {
-            let mut doc_id_and_tf = vec![];
-            let mut prev_doc = 0;
-            while let Some(delta_doc_id) = u32_it.next() {
-                let doc_id = prev_doc + delta_doc_id;
-                prev_doc = doc_id;
-                let term_freq = u32_it.next().unwrap_or(self.current_tf);
-                doc_id_and_tf.push((doc_id_map.get_new_doc_id(doc_id), term_freq));
-            }
-            doc_id_and_tf.sort_unstable_by_key(|&(doc_id, _)| doc_id);
-
-            for (doc_id, tf) in doc_id_and_tf {
-                serializer.write_doc(doc_id, tf, &[][..]);
-            }
-        } else {
-            let mut prev_doc = 0;
-            while let Some(delta_doc_id) = u32_it.next() {
-                let doc_id = prev_doc + delta_doc_id;
-                prev_doc = doc_id;
-                let term_freq = u32_it.next().unwrap_or(self.current_tf);
-                serializer.write_doc(doc_id, term_freq, &[][..]);
-            }
+        let mut prev_doc = 0;
+        while let Some(delta_doc_id) = u32_it.next() {
+            let doc_id = prev_doc + delta_doc_id;
+            prev_doc = doc_id;
+            let term_freq = u32_it.next().unwrap_or(self.current_tf);
+            serializer.write_doc(doc_id, term_freq, &[][..]);
        }
    }

@@ -268,14 +238,12 @@ impl Recorder for TfAndPositionRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        doc_id_map: Option<&DocIdMapping>,
        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
        let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
        self.stack.read_to_end(arena, buffer_u8);
        let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
-        let mut doc_id_and_positions = vec![];
        let mut prev_doc = 0;
        while let Some(delta_doc_id) = u32_it.next() {
            let doc_id = prev_doc + delta_doc_id;
@@ -294,19 +262,7 @@ impl Recorder for TfAndPositionRecorder {
                    }
                }
            }
-            if let Some(doc_id_map) = doc_id_map {
-                // this simple variant to remap may consume to much memory
-                doc_id_and_positions
-                    .push((doc_id_map.get_new_doc_id(doc_id), buffer_positions.to_vec()));
-            } else {
-                serializer.write_doc(doc_id, buffer_positions.len() as u32, buffer_positions);
-            }
-        }
-        if doc_id_map.is_some() {
-            doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _)| doc_id);
-            for (doc_id, positions) in doc_id_and_positions {
-                serializer.write_doc(doc_id, positions.len() as u32, &positions);
-            }
+            serializer.write_doc(doc_id, buffer_positions.len() as u32, buffer_positions);
        }
    }

@@ -319,9 +275,8 @@ impl Recorder for TfAndPositionRecorder {
 mod tests {

    use common::write_u32_vint;
-    use stacker::MemoryArena;

-    use super::{BufferLender, Recorder, TermFrequencyRecorder, VInt32Reader};
+    use super::{BufferLender, VInt32Reader};

    #[test]
    fn test_buffer_lender() {
@@ -359,98 +314,4 @@ mod tests {
        let res: Vec<u32> = VInt32Reader::new(&buffer[..]).collect();
        assert_eq!(&res[..], &vals[..]);
    }
-
-    // ── TermFrequencyRecorder ─────────────────────────────────────────────────
-
-    #[test]
-    fn term_frequency_recorder_has_term_freq() {
-        let rec = TermFrequencyRecorder::default();
-        assert!(
-            rec.has_term_freq(),
-            "TermFrequencyRecorder must advertise term-frequency support"
-        );
-    }
-
-    #[test]
-    fn term_frequency_recorder_term_doc_freq_single_doc() {
-        let mut arena = MemoryArena::default();
-        let mut rec = TermFrequencyRecorder::default();
-
-        // Record one document with two term occurrences.
-        rec.new_doc(0, &mut arena);
-        rec.record_position(0, &mut arena);
-        rec.record_position(1, &mut arena);
-        rec.close_doc(&mut arena);
-
-        assert_eq!(
-            rec.term_doc_freq(),
-            Some(1),
-            "term_doc_freq should be 1 after recording one document"
-        );
-    }
-
-    #[test]
-    fn term_frequency_recorder_term_doc_freq_multiple_docs() {
-        let mut arena = MemoryArena::default();
-        let mut rec = TermFrequencyRecorder::default();
-
-        // Three documents with 1, 3, and 2 occurrences respectively.
-        for (doc, tf) in [(0u32, 1u32), (5, 3), (10, 2)] {
-            rec.new_doc(doc, &mut arena);
-            for pos in 0..tf {
-                rec.record_position(pos, &mut arena);
-            }
-            rec.close_doc(&mut arena);
-        }
-
-        assert_eq!(
-            rec.term_doc_freq(),
-            Some(3),
-            "term_doc_freq should equal the number of documents recorded"
-        );
-    }
-
-    #[test]
-    fn term_frequency_recorder_zero_docs() {
-        let rec = TermFrequencyRecorder::default();
-        assert_eq!(
-            rec.term_doc_freq(),
-            Some(0),
-            "term_doc_freq should be 0 before any document is recorded"
-        );
-    }
-
-    #[test]
-    fn term_frequency_recorder_single_occurrence_per_doc() {
-        let mut arena = MemoryArena::default();
-        let mut rec = TermFrequencyRecorder::default();
-
-        // Each document has exactly one occurrence — the minimum non-trivial case.
-        for doc in [1u32, 2, 100] {
-            rec.new_doc(doc, &mut arena);
-            rec.record_position(0, &mut arena);
-            rec.close_doc(&mut arena);
-        }
-
-        assert_eq!(rec.term_doc_freq(), Some(3));
-    }
-
-    #[test]
-    fn term_frequency_recorder_high_frequency_doc() {
-        let mut arena = MemoryArena::default();
-        let mut rec = TermFrequencyRecorder::default();
-
-        // A document where the term appears many times.
-        rec.new_doc(42, &mut arena);
-        for pos in 0..1000 {
-            rec.record_position(pos, &mut arena);
-        }
-        rec.close_doc(&mut arena);
-
-        assert_eq!(
-            rec.term_doc_freq(),
-            Some(1),
-            "term_doc_freq counts documents, not occurrences"
-        );
-    }
 }
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -146,11 +146,6 @@ impl SkipReader {
        skip_reader
    }

-    #[inline(always)]
-    pub fn has_remaining_docs(&self) -> bool {
-        self.remaining_docs != 0
-    }
-
    pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
        self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
            0
@@ -187,12 +182,6 @@ impl SkipReader {
        self.last_doc_in_block
    }

-    /// Number of docs from the start of the current block to the end of the postings
-    /// (i.e. the current block plus every block after it).
-    pub(crate) fn remaining_docs(&self) -> u32 {
-        self.remaining_docs
-    }
-
    pub fn position_offset(&self) -> u64 {
        self.position_offset
    }
--- a/src/query/boolean_query/block_wand_union.rs
+++ b/src/query/boolean_query/block_wand_union.rs
@@ -50,7 +50,7 @@ fn block_max_was_too_low_advance_one_scorer(
    scorers: &mut [TermScorerWithMaxScore],
    pivot_len: usize,
 ) {
-    debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
    let mut scorer_to_seek = pivot_len - 1;
    let mut global_max_score = scorers[scorer_to_seek].max_score;
    let mut doc_to_seek_after = scorers[scorer_to_seek].last_doc_in_block();
@@ -76,7 +76,7 @@ fn block_max_was_too_low_advance_one_scorer(
    scorers[scorer_to_seek].seek(doc_to_seek_after);

    restore_ordering(scorers, scorer_to_seek);
-    debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
 }

 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
@@ -90,7 +90,7 @@ fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
        }
        term_scorers.swap(i, i - 1);
    }
-    debug_assert!(term_scorers.iter().map(|scorer| scorer.doc()).is_sorted());
+    debug_assert!(is_sorted(term_scorers.iter().map(|scorer| scorer.doc())));
 }

 // Attempts to advance all term_scorers between `&term_scorers[0..before_len]` to the pivot.
@@ -150,21 +150,17 @@ pub fn block_wand(
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
-    scorers.retain(|scorer| scorer.doc() < TERMINATED);
-    if scorers.len() == 1 {
-        let scorer = scorers.pop().unwrap();
-        return block_wand_single_scorer(scorer, threshold, callback);
-    }
    let mut scorers: Vec<TermScorerWithMaxScore> = scorers
        .iter_mut()
        .map(TermScorerWithMaxScore::from)
        .collect();
-    // At this point we need to ensure that the scorers are sorted!
    scorers.sort_by_key(|scorer| scorer.doc());
+    // At this point we need to ensure that the scorers are sorted!
+    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
    while let Some((before_pivot_len, pivot_len, pivot_doc)) =
        find_pivot_doc(&scorers[..], threshold)
    {
-        debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
+        debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
        debug_assert_ne!(pivot_doc, TERMINATED);
        debug_assert!(before_pivot_len < pivot_len);

@@ -232,7 +228,7 @@ pub fn block_wand_single_scorer(
    loop {
        // We position the scorer on a block that can reach
        // the threshold.
-        while scorer.block_max_score() <= threshold {
+        while scorer.block_max_score() < threshold {
            let last_doc_in_block = scorer.last_doc_in_block();
            if last_doc_in_block == TERMINATED {
                return;
@@ -290,6 +286,18 @@ impl DerefMut for TermScorerWithMaxScore<'_> {
    }
 }

+fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
+    if let Some(first) = it.next() {
+        let mut prev = first;
+        for doc in it {
+            if doc < prev {
+                return false;
+            }
+            prev = doc;
+        }
+    }
+    true
+}
 #[cfg(test)]
 mod tests {
    use std::cmp::Ordering;
--- a/src/query/boolean_query/block_wand_intersection.rs
+++ b/src/query/boolean_query/block_wand_intersection.rs
@@ -1,464 +0,0 @@
-use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::query::term_query::TermScorer;
-use crate::query::Scorer;
-use crate::{DocId, DocSet, Score, TERMINATED};
-
-/// Block-max pruning for top-K over intersection of term scorers.
-///
-/// Uses the least-frequent term as "leader" to define 128-doc processing windows.
-/// For each window, the sum of block_max_scores is compared to the current threshold;
-/// if the block can't beat it, the entire block is skipped.
-///
-/// Within non-skipped blocks, individual documents are pruned by checking whether
-/// leader_score + sum(secondary block_max_scores) can exceed the threshold before
-/// performing the expensive intersection membership check (seeking into secondary scorers).
-///
-/// # Preconditions
-/// - `scorers` has at least 2 elements
-/// - All scorers read frequencies (`FreqReadingOption::ReadFreq`)
-pub(crate) fn block_wand_intersection(
-    mut scorers: Vec<TermScorer>,
-    mut threshold: Score,
-    callback: &mut dyn FnMut(DocId, Score) -> Score,
-) {
-    assert!(scorers.len() >= 2);
-
-    // Sort by cost (ascending). scorers[0] becomes the "leader" (rarest term).
-    scorers.sort_by_key(TermScorer::size_hint);
-
-    let (leader, secondaries) = scorers.split_first_mut().unwrap();
-
-    // Precompute global max scores for early termination checks.
-    let leader_max_score: Score = leader.max_score();
-    let secondaries_global_max_sum: Score = secondaries.iter().map(TermScorer::max_score).sum();
-
-    // Early exit: no document can possibly beat the threshold.
-    if leader_max_score + secondaries_global_max_sum <= threshold {
-        return;
-    }
-
-    // Borrow fieldnorm reader and BM25 weight before the main loop.
-    // These are immutable references to disjoint fields from block_cursor,
-    // but Rust's borrow checker can't see through method calls, so we
-    // extract them once upfront.
-    let fieldnorm_reader = leader.fieldnorm_reader().clone();
-    let bm25_weight = leader.bm25_weight().clone();
-
-    let mut doc = leader.doc();
-
-    let mut secondary_block_max_scores: Box<[f32]> =
-        vec![0.0f32; secondaries.len()].into_boxed_slice();
-    let mut secondary_suffix_block_max: Box<[f32]> =
-        vec![0.0f32; secondaries.len()].into_boxed_slice();
-
-    while doc < TERMINATED {
-        // --- Phase 1: Block-level pruning ---
-        //
-        // Position all skip readers on the block containing `doc`.
-        // seek_block is cheap: it only advances the skip reader, no block decompression.
-        leader.seek_block(doc);
-        let leader_block_max: Score = leader.block_max_score();
-
-        // Compute the window end as the minimum last_doc_in_block across all scorers.
-        // This ensures the block_max values are valid for all docs in [doc, window_end].
-        // Different scorers have independently aligned blocks, so we must use the
-        // smallest window where all block_max values hold.
-        let mut window_end: DocId = leader.last_doc_in_block();
-
-        let mut secondary_block_max_sum: Score = 0.0;
-        let num_secondaries = secondaries.len();
-        for (idx, secondary) in secondaries.iter_mut().enumerate() {
-            secondary.block_cursor().seek_block(doc);
-            if !secondary.block_cursor().has_remaining_docs() {
-                return;
-            }
-            window_end = window_end.min(secondary.last_doc_in_block());
-            let bms = secondary.block_max_score();
-            secondary_block_max_scores[idx] = bms;
-            secondary_block_max_sum += bms;
-        }
-
-        if leader_block_max + secondary_block_max_sum <= threshold {
-            // The entire window cannot beat the threshold. Skip past it.
-            doc = window_end + 1;
-            continue;
-        }
-
-        // --- Phase 2: Batch processing within the window ---
-        //
-        // Score-first approach: decode the leader's block, filter by threshold,
-        // then check intersection membership only for survivors. This avoids expensive
-        // secondary seeks for docs that can't beat the threshold.
-        let block_cursor = leader.block_cursor();
-        // seek loads the block and returns the in-block index of the first doc >= `doc`.
-        let start_idx = block_cursor.seek(doc);
-
-        // Use the branchless binary search on the doc decoder to find the first
-        // index past window_end.
-        let end_idx = block_cursor
-            .doc_decoder
-            .seek_within_block(window_end + 1)
-            .min(block_cursor.block_len());
-
-        let block_docs = &block_cursor.doc_decoder.output_array()[start_idx..end_idx];
-        let block_freqs = &block_cursor.freq_output_array()[start_idx..end_idx];
-
-        // Pass 1: Batch-compute leader BM25 scores and branchlessly filter
-        // candidates that can't beat the threshold.
-        //
-        // The trick: always write to the buffer at `num_candidates`, then
-        // conditionally advance the count. The compiler can turn this into
-        // a cmov instead of a branch, avoiding misprediction costs.
-        let score_threshold = threshold - secondary_block_max_sum;
-        let mut candidate_doc_ids = [0u32; COMPRESSION_BLOCK_SIZE];
-        let mut candidate_scores = [0.0f32; COMPRESSION_BLOCK_SIZE];
-        let mut num_candidates = 0usize;
-
-        for (candidate_doc, term_freq) in
-            block_docs.iter().copied().zip(block_freqs.iter().copied())
-        {
-            let fieldnorm_id = fieldnorm_reader.fieldnorm_id(candidate_doc);
-            let leader_score = bm25_weight.score(fieldnorm_id, term_freq);
-            candidate_doc_ids[num_candidates] = candidate_doc;
-            candidate_scores[num_candidates] = leader_score;
-            num_candidates += (leader_score > score_threshold) as usize;
-        }
-
-        // Precompute suffix sums: suffix[i] = sum of block_max for secondaries[i+1..].
-        // Used in Phase 2 to prune candidates that can't beat threshold even with
-        // remaining secondaries contributing their block_max.
-        if num_candidates == 0 {
-            doc = window_end + 1;
-            continue;
-        }
-
-        let mut running = 0.0f32;
-        for idx in (0..num_secondaries).rev() {
-            secondary_suffix_block_max[idx] = running;
-            running += secondary_block_max_scores[idx];
-        }
-
-        // Pass 2: Check intersection membership only for survivors.
-        // score_threshold may be stale (threshold can increase from callbacks),
-        // but that's conservative — we may check a few extra candidates, never miss one.
-        'next_candidate: for candidate_idx in 0..num_candidates {
-            let candidate_doc = candidate_doc_ids[candidate_idx];
-            let mut total_score: Score = candidate_scores[candidate_idx];
-
-            for (secondary_idx, secondary) in secondaries.iter_mut().enumerate() {
-                // If a previous candidate already advanced this secondary past
-                // candidate_doc, the candidate can't be in the intersection.
-                if secondary.doc() > candidate_doc {
-                    continue 'next_candidate;
-                }
-                let seek_result = secondary.seek(candidate_doc);
-                if seek_result != candidate_doc {
-                    continue 'next_candidate;
-                }
-                total_score += secondary.score();
-
-                // Prune: even if all remaining secondaries score at their block max,
-                // can we still beat the threshold?
-                if total_score + secondary_suffix_block_max[secondary_idx] <= threshold {
-                    continue 'next_candidate;
-                }
-            }
-
-            // All secondaries matched.
-            if total_score > threshold {
-                threshold = callback(candidate_doc, total_score);
-
-                if leader_max_score + secondaries_global_max_sum <= threshold {
-                    return;
-                }
-            }
-        }
-
-        doc = window_end + 1;
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::cmp::Ordering;
-    use std::collections::BinaryHeap;
-
-    use proptest::prelude::*;
-
-    use crate::query::term_query::TermScorer;
-    use crate::query::{Bm25Weight, Scorer};
-    use crate::{DocId, DocSet, Score, TERMINATED};
-
-    struct Float(Score);
-
-    impl Eq for Float {}
-
-    impl PartialEq for Float {
-        fn eq(&self, other: &Self) -> bool {
-            self.cmp(other) == Ordering::Equal
-        }
-    }
-
-    impl PartialOrd for Float {
-        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-            Some(self.cmp(other))
-        }
-    }
-
-    impl Ord for Float {
-        fn cmp(&self, other: &Self) -> Ordering {
-            other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
-        }
-    }
-
-    fn nearly_equals(left: Score, right: Score) -> bool {
-        (left - right).abs() < 0.0001 * (left + right).abs()
-    }
-
-    /// Run block_wand_intersection and collect (doc, score) pairs above threshold.
-    fn compute_checkpoints_block_wand_intersection(
-        term_scorers: Vec<TermScorer>,
-        top_k: usize,
-    ) -> Vec<(DocId, Score)> {
-        let mut heap: BinaryHeap<Float> = BinaryHeap::with_capacity(top_k);
-        let mut checkpoints: Vec<(DocId, Score)> = Vec::new();
-        let mut limit: Score = 0.0;
-
-        let callback = &mut |doc, score| {
-            heap.push(Float(score));
-            if heap.len() > top_k {
-                heap.pop().unwrap();
-            }
-            if heap.len() == top_k {
-                limit = heap.peek().unwrap().0;
-            }
-            if !nearly_equals(score, limit) {
-                checkpoints.push((doc, score));
-            }
-            limit
-        };
-
-        super::block_wand_intersection(term_scorers, Score::MIN, callback);
-        checkpoints
-    }
-
-    /// Naive baseline: intersect by iterating all docs.
-    fn compute_checkpoints_naive_intersection(
-        mut term_scorers: Vec<TermScorer>,
-        top_k: usize,
-    ) -> Vec<(DocId, Score)> {
-        let mut heap: BinaryHeap<Float> = BinaryHeap::with_capacity(top_k);
-        let mut checkpoints: Vec<(DocId, Score)> = Vec::new();
-        let mut limit = Score::MIN;
-
-        // Sort by cost to use the cheapest as driver.
-        term_scorers.sort_by_key(|s| s.cost());
-
-        let (leader, secondaries) = term_scorers.split_first_mut().unwrap();
-
-        let mut doc = leader.doc();
-        while doc != TERMINATED {
-            let mut all_match = true;
-            for secondary in secondaries.iter_mut() {
-                let secondary_doc = secondary.doc();
-                let seek_result = if secondary_doc <= doc {
-                    secondary.seek(doc)
-                } else {
-                    secondary_doc
-                };
-                if seek_result != doc {
-                    all_match = false;
-                    break;
-                }
-            }
-
-            if all_match {
-                let score: Score =
-                    leader.score() + secondaries.iter_mut().map(|s| s.score()).sum::<Score>();
-
-                if score > limit {
-                    heap.push(Float(score));
-                    if heap.len() > top_k {
-                        heap.pop().unwrap();
-                    }
-                    if heap.len() == top_k {
-                        limit = heap.peek().unwrap().0;
-                    }
-                    if !nearly_equals(score, limit) {
-                        checkpoints.push((doc, score));
-                    }
-                }
-            }
-            doc = leader.advance();
-        }
-        checkpoints
-    }
-
-    const MAX_TERM_FREQ: u32 = 100u32;
-
-    fn posting_list(max_doc: u32) -> BoxedStrategy<Vec<(DocId, u32)>> {
-        (1..max_doc + 1)
-            .prop_flat_map(move |doc_freq| {
-                (
-                    proptest::bits::bitset::sampled(doc_freq as usize, 0..max_doc as usize),
-                    proptest::collection::vec(1u32..MAX_TERM_FREQ, doc_freq as usize),
-                )
-            })
-            .prop_map(|(docset, term_freqs)| {
-                docset
-                    .iter()
-                    .map(|doc| doc as u32)
-                    .zip(term_freqs.iter().cloned())
-                    .collect::<Vec<_>>()
-            })
-            .boxed()
-    }
-
-    #[expect(clippy::type_complexity)]
-    fn gen_term_scorers(num_scorers: usize) -> BoxedStrategy<(Vec<Vec<(DocId, u32)>>, Vec<u32>)> {
-        (1u32..100u32)
-            .prop_flat_map(move |max_doc: u32| {
-                (
-                    proptest::collection::vec(posting_list(max_doc), num_scorers),
-                    proptest::collection::vec(2u32..10u32 * MAX_TERM_FREQ, max_doc as usize),
-                )
-            })
-            .boxed()
-    }
-
-    fn test_block_wand_intersection_aux(posting_lists: &[Vec<(DocId, u32)>], fieldnorms: &[u32]) {
-        // Repeat docs 64 times to create multi-block scenarios, matching block_wand.rs test
-        // strategy.
-        const REPEAT: usize = 64;
-        let fieldnorms_expanded: Vec<u32> = fieldnorms
-            .iter()
-            .cloned()
-            .flat_map(|fieldnorm| std::iter::repeat_n(fieldnorm, REPEAT))
-            .collect();
-
-        let postings_lists_expanded: Vec<Vec<(DocId, u32)>> = posting_lists
-            .iter()
-            .map(|posting_list| {
-                posting_list
-                    .iter()
-                    .cloned()
-                    .flat_map(|(doc, term_freq)| {
-                        (0_u32..REPEAT as u32).map(move |offset| {
-                            (
-                                doc * (REPEAT as u32) + offset,
-                                if offset == 0 { term_freq } else { 1 },
-                            )
-                        })
-                    })
-                    .collect::<Vec<(DocId, u32)>>()
-            })
-            .collect();
-
-        let total_fieldnorms: u64 = fieldnorms_expanded
-            .iter()
-            .cloned()
-            .map(|fieldnorm| fieldnorm as u64)
-            .sum();
-        let average_fieldnorm = (total_fieldnorms as Score) / (fieldnorms_expanded.len() as Score);
-        let max_doc = fieldnorms_expanded.len();
-
-        let make_scorers = || -> Vec<TermScorer> {
-            postings_lists_expanded
-                .iter()
-                .map(|postings| {
-                    let bm25_weight = Bm25Weight::for_one_term(
-                        postings.len() as u64,
-                        max_doc as u64,
-                        average_fieldnorm,
-                    );
-                    TermScorer::create_for_test(postings, &fieldnorms_expanded[..], bm25_weight)
-                })
-                .collect()
-        };
-
-        for top_k in 1..4 {
-            let checkpoints_optimized =
-                compute_checkpoints_block_wand_intersection(make_scorers(), top_k);
-            let checkpoints_naive = compute_checkpoints_naive_intersection(make_scorers(), top_k);
-            assert_eq!(
-                checkpoints_optimized.len(),
-                checkpoints_naive.len(),
-                "Mismatch in checkpoint count for top_k={top_k}"
-            );
-            for (&(left_doc, left_score), &(right_doc, right_score)) in
-                checkpoints_optimized.iter().zip(checkpoints_naive.iter())
-            {
-                assert_eq!(left_doc, right_doc);
-                assert!(
-                    nearly_equals(left_score, right_score),
-                    "Score mismatch for doc {left_doc}: {left_score} vs {right_score}"
-                );
-            }
-        }
-    }
-
-    proptest! {
-        #![proptest_config(ProptestConfig::with_cases(500))]
-        #[test]
-        fn test_block_wand_intersection_two_scorers(
-            (posting_lists, fieldnorms) in gen_term_scorers(2)
-        ) {
-            test_block_wand_intersection_aux(&posting_lists[..], &fieldnorms[..]);
-        }
-    }
-
-    proptest! {
-        #![proptest_config(ProptestConfig::with_cases(500))]
-        #[test]
-        fn test_block_wand_intersection_three_scorers(
-            (posting_lists, fieldnorms) in gen_term_scorers(3)
-        ) {
-            test_block_wand_intersection_aux(&posting_lists[..], &fieldnorms[..]);
-        }
-    }
-
-    #[test]
-    fn test_block_wand_intersection_disjoint() {
-        // Two posting lists with no overlap — intersection is empty.
-        let fieldnorms: Vec<u32> = vec![10; 200];
-        let average_fieldnorm = 10.0;
-        let postings_a: Vec<(DocId, u32)> = (0..100).map(|d| (d, 1)).collect();
-        let postings_b: Vec<(DocId, u32)> = (100..200).map(|d| (d, 1)).collect();
-
-        let scorer_a = TermScorer::create_for_test(
-            &postings_a,
-            &fieldnorms,
-            Bm25Weight::for_one_term(100, 200, average_fieldnorm),
-        );
-        let scorer_b = TermScorer::create_for_test(
-            &postings_b,
-            &fieldnorms,
-            Bm25Weight::for_one_term(100, 200, average_fieldnorm),
-        );
-
-        let checkpoints = compute_checkpoints_block_wand_intersection(vec![scorer_a, scorer_b], 10);
-        assert!(checkpoints.is_empty());
-    }
-
-    #[test]
-    fn test_block_wand_intersection_all_overlap() {
-        // Two posting lists with full overlap.
-        let fieldnorms: Vec<u32> = vec![10; 50];
-        let average_fieldnorm = 10.0;
-        let postings: Vec<(DocId, u32)> = (0..50).map(|d| (d, 3)).collect();
-
-        let make_scorer = || {
-            TermScorer::create_for_test(
-                &postings,
-                &fieldnorms,
-                Bm25Weight::for_one_term(50, 50, average_fieldnorm),
-            )
-        };
-
-        let checkpoints_opt =
-            compute_checkpoints_block_wand_intersection(vec![make_scorer(), make_scorer()], 5);
-        let checkpoints_naive =
-            compute_checkpoints_naive_intersection(vec![make_scorer(), make_scorer()], 5);
-        assert_eq!(checkpoints_opt.len(), checkpoints_naive.len());
-    }
-}
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -16,7 +16,6 @@ use crate::{DocId, Score};

 enum SpecializedScorer {
    TermUnion(Vec<TermScorer>),
-    TermIntersection(Vec<TermScorer>),
    Other(Box<dyn Scorer>),
 }

@@ -50,9 +49,10 @@ where
    TScoreCombiner: ScoreCombiner,
 {
    assert!(!scorers.is_empty());
-    if scorers.len() == 1 && !scorers[0].is::<TermScorer>() {
+    if scorers.len() == 1 {
        return SpecializedScorer::Other(scorers.into_iter().next().unwrap()); //< we checked the size beforehand
    }
+
    {
        let is_all_term_queries = scorers.iter().all(|scorer| scorer.is::<TermScorer>());
        if is_all_term_queries {
@@ -66,9 +66,6 @@ where
            {
                // Block wand is only available if we read frequencies.
                return SpecializedScorer::TermUnion(scorers);
-            } else if scorers.len() == 1 {
-                // Single TermScorer without freq reading — unwrap directly.
-                return SpecializedScorer::Other(Box::new(scorers.into_iter().next().unwrap()));
            } else {
                return SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
                    scorers,
@@ -96,13 +93,6 @@ fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
                BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
            Box::new(union_scorer)
        }
-        SpecializedScorer::TermIntersection(term_scorers) => {
-            let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
-                .into_iter()
-                .map(|s| Box::new(s) as Box<dyn Scorer>)
-                .collect();
-            intersect_scorers(boxed_scorers, num_docs)
-        }
        SpecializedScorer::Other(scorer) => scorer,
    }
 }
@@ -307,43 +297,14 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                // Result depends entirely on MUST + any removed AllScorers.
                let combined_all_scorer_count = must_special_scorer_counts.num_all_scorers
                    + should_special_scorer_counts.num_all_scorers;
-
-                // Try to detect a pure TermScorer intersection for block-max optimization.
-                // Preconditions: no removed AllScorers, at least 2 scorers, all TermScorer
-                // with frequency reading enabled.
-                if combined_all_scorer_count == 0
-                    && must_scorers.len() >= 2
-                    && must_scorers.iter().all(|s| s.is::<TermScorer>())
-                {
-                    let term_scorers: Vec<TermScorer> = must_scorers
-                        .into_iter()
-                        .map(|s| *(s.downcast::<TermScorer>().map_err(|_| ()).unwrap()))
-                        .collect();
-                    if term_scorers
-                        .iter()
-                        .all(|s| s.freq_reading_option() == FreqReadingOption::ReadFreq)
-                    {
-                        SpecializedScorer::TermIntersection(term_scorers)
-                    } else {
-                        let must_scorers: Vec<Box<dyn Scorer>> = term_scorers
-                            .into_iter()
-                            .map(|s| Box::new(s) as Box<dyn Scorer>)
-                            .collect();
-                        let boxed_scorer: Box<dyn Scorer> =
-                            effective_must_scorer(must_scorers, 0, reader.max_doc(), num_docs)
-                                .unwrap_or_else(|| Box::new(EmptyScorer));
-                        SpecializedScorer::Other(boxed_scorer)
-                    }
-                } else {
-                    let boxed_scorer: Box<dyn Scorer> = effective_must_scorer(
-                        must_scorers,
-                        combined_all_scorer_count,
-                        reader.max_doc(),
-                        num_docs,
-                    )
-                    .unwrap_or_else(|| Box::new(EmptyScorer));
-                    SpecializedScorer::Other(boxed_scorer)
-                }
+                let boxed_scorer: Box<dyn Scorer> = effective_must_scorer(
+                    must_scorers,
+                    combined_all_scorer_count,
+                    reader.max_doc(),
+                    num_docs,
+                )
+                .unwrap_or_else(|| Box::new(EmptyScorer));
+                SpecializedScorer::Other(boxed_scorer)
            }
            (ShouldScorersCombinationMethod::Optional(should_scorer), must_scorers) => {
                // Optional SHOULD: contributes to scoring but not required for matching.
@@ -502,21 +463,15 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        callback: &mut dyn FnMut(DocId, Score),
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        let num_docs = reader.num_docs();
        match scorer {
            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer =
-                    BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
+                let mut union_scorer = BufferedUnionScorer::build(
+                    term_scorers,
+                    &self.score_combiner_fn,
+                    reader.num_docs(),
+                );
                for_each_scorer(&mut union_scorer, callback);
            }
-            SpecializedScorer::TermIntersection(term_scorers) => {
-                let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
-                    .into_iter()
-                    .map(|term_scorer| Box::new(term_scorer) as Box<dyn Scorer>)
-                    .collect();
-                let mut intersection = intersect_scorers(boxed_scorers, num_docs);
-                for_each_scorer(intersection.as_mut(), callback);
-            }
            SpecializedScorer::Other(mut scorer) => {
                for_each_scorer(scorer.as_mut(), callback);
            }
@@ -530,23 +485,17 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
-        let num_docs = reader.num_docs();
        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];

        match scorer {
            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer =
-                    BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
+                let mut union_scorer = BufferedUnionScorer::build(
+                    term_scorers,
+                    &self.score_combiner_fn,
+                    reader.num_docs(),
+                );
                for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
            }
-            SpecializedScorer::TermIntersection(term_scorers) => {
-                let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
-                    .into_iter()
-                    .map(|term_scorer| Box::new(term_scorer) as Box<dyn Scorer>)
-                    .collect();
-                let mut intersection = intersect_scorers(boxed_scorers, num_docs);
-                for_each_docset_buffered(intersection.as_mut(), &mut buffer, callback);
-            }
            SpecializedScorer::Other(mut scorer) => {
                for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
            }
@@ -575,9 +524,6 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
            SpecializedScorer::TermUnion(term_scorers) => {
                super::block_wand(term_scorers, threshold, callback);
            }
-            SpecializedScorer::TermIntersection(term_scorers) => {
-                super::block_wand_intersection(term_scorers, threshold, callback);
-            }
            SpecializedScorer::Other(mut scorer) => {
                for_each_pruning_scorer(scorer.as_mut(), threshold, callback);
            }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Pascal Seitz	d8f4c0b703	chore: Release 0.26.1	2026-04-21 07:30:14 +02:00
Pascal Seitz	386b0a2a68	perf(agg): only measure active parent bucket in composite collect Same change as `26a589e` for SegmentCompositeCollector: get_memory_consumption summed across all parent_buckets on every block, scaling with outer bucket cardinality. Pass parent_bucket_id and index the single bucket.	2026-04-21 07:29:35 +02:00
Pascal Seitz	56cd88928d	add inline	2026-04-21 07:29:35 +02:00
Pascal Seitz	cb8a2df8b0	agg fix: compute memory consumption only for current bucket	2026-04-21 07:29:35 +02:00
Pascal Seitz	9e63fc5081	chore: Release	2026-03-31 15:10:59 +08:00
Pascal Seitz	d882b34cf8	unbump for release and update Changelog.md	2026-03-31 14:48:43 +08:00