removed support for date rfc parsing in json object

added positions codec
exposing more
2026-06-11 21:10:42 +00:00 · 2026-06-11 15:18:06 +02:00 · 2026-06-10 23:38:46 +02:00 · 2026-06-10 16:26:34 +02:00 · 2026-06-10 11:20:21 +02:00 · 2026-06-08 11:22:50 +02:00
134 changed files with 7927 additions and 2470 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,6 +6,8 @@ updates:
    interval: daily
    time: "20:00"
  open-pull-requests-limit: 10
+  cooldown:
+    default-days: 2

 - package-ecosystem: "github-actions"
  directory: "/"
@@ -13,3 +15,5 @@ updates:
    interval: daily
    time: "20:00"
  open-pull-requests-limit: 10
+  cooldown:
+    default-days: 2
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -4,6 +4,9 @@ on:
  push:
    branches: [main]

+permissions:
+  contents: read
+
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -12,16 +15,20 @@ concurrency:
 jobs:
  coverage:
    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
      - name: Install Rust
        run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
-      - uses: Swatinem/rust-cache@v2
-      - uses: taiki-e/install-action@cargo-llvm-cov
+      - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
+      - uses: taiki-e/install-action@e4b3a0453201addddc06d3a72db90326aad87084 # cargo-llvm-cov
      - name: Generate code coverage
        run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
        continue-on-error: true
        with:
          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -8,6 +8,9 @@ env:
  CARGO_TERM_COLOR: always
  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000

+permissions:
+  contents: read
+
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -18,10 +21,13 @@ jobs:

    runs-on: ubuntu-latest

+    permissions:
+      contents: read
+
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
    - name: Install stable
-      uses: actions-rs/toolchain@v1
+      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
      with:
          toolchain: stable
          profile: minimal
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -0,0 +1,49 @@
+name: OpenSSF Scorecard
+
+on:
+  schedule:
+    - cron: '0 0 * * 0'
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+
+jobs:
+  analysis:
+    name: Scorecards analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      # Needed to publish results
+      id-token: write
+
+    steps:
+      - name: 'Checkout code'
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: 'Run analysis'
+        uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          repo_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_results: true
+
+      # Upload the results as artifacts.
+      - name: 'Upload artifact'
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      # Upload the results to GitHub's code scanning dashboard.
+      - name: 'Upload to code-scanning'
+        uses: github/codeql-action/upload-sarif@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
+        with:
+          sarif_file: results.sarif
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,9 @@ on:
 env:
  CARGO_TERM_COLOR: always

+permissions:
+  contents: read
+
 # Ensures that we cancel running jobs for the same PR / same workflow.
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -19,23 +22,27 @@ jobs:

    runs-on: ubuntu-latest

+    permissions:
+      contents: read
+      checks: write
+
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3

    - name: Install nightly
-      uses: actions-rs/toolchain@v1
+      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
      with:
            toolchain: nightly
            profile: minimal
            components: rustfmt
    - name: Install stable
-      uses: actions-rs/toolchain@v1
+      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
      with:
            toolchain: stable
            profile: minimal
            components: clippy

-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1

    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
@@ -47,7 +54,7 @@ jobs:
    - name: Check Bench Compilation
      run: cargo +nightly bench --no-run --profile=dev --all-features

-    - uses: actions-rs/clippy-check@v1
+    - uses: actions-rs/clippy-check@b5b5f21f4797c02da247df37026fcd0a5024aa4d # v1.0.7
      with:
        toolchain: stable
        token: ${{ secrets.GITHUB_TOKEN }}
@@ -57,6 +64,9 @@ jobs:

    runs-on: ubuntu-latest

+    permissions:
+      contents: read
+
    strategy:
      matrix:
        features:
@@ -67,17 +77,17 @@ jobs:
    name: test-${{ matrix.features.label}}

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3

    - name: Install stable
-      uses: actions-rs/toolchain@v1
+      uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
      with:
            toolchain: stable
            profile: minimal
            override: true

-    - uses: taiki-e/install-action@nextest
-    - uses: Swatinem/rust-cache@v2
+    - uses: taiki-e/install-action@56cc9adf3a3e2c23eafb56e8acaf9d0373cb845a # nextest
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1

    - name: Run tests
      run: |
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,8 @@
 Tantivy 0.26.1
 ================================

-## Bugfixes
- Fix memory consumption accounting in nested term aggregation to only scan the active parent bucket (@PSeitz)
- Fix memory consumption accounting in composite aggregation to only scan the active parent bucket (@PSeitz)
+## Performance
+- Fix quadratic runtime in nested term and composite aggregations: memory accounting scanned all parent buckets on every collect instead of just the current parent (@PSeitz @fulmicoton)

 Tantivy 0.26 (Unreleased)
 ================================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.26.1"
+version = "0.26.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -65,7 +65,7 @@ tantivy-bitpacker = { version = "0.10", path = "./bitpacker" }
 common = { version = "0.11", path = "./common/", package = "tantivy-common" }
 tokenizer-api = { version = "0.7", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.4", features = ["use_serde"] }
-datasketches = "0.2.0"
+datasketches = { version = "0.3.0", features = ["hll"] }
 futures-util = { version = "0.3.28", optional = true }
 futures-channel = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
@@ -75,7 +75,7 @@ typetag = "0.2.21"
 winapi = "0.3.9"

 [dev-dependencies]
-binggan = "0.15.3"
+binggan = "0.17.0"
 rand = "0.9"
 maplit = "1.0.2"
 matches = "0.1.9"
@@ -92,7 +92,7 @@ postcard = { version = "1.0.4", features = [
 ], default-features = false }

 [target.'cfg(not(windows))'.dev-dependencies]
-criterion = { version = "0.5", default-features = false }
+criterion = { version = "0.8", default-features = false }

 [dev-dependencies.fail]
 version = "0.5.0"
@@ -201,3 +201,11 @@ harness = false
 [[bench]]
 name = "regex_all_terms"
 harness = false
+
+[[bench]]
+name = "query_parser_nested"
+harness = false
+
+[[bench]]
+name = "intersection_bench"
+harness = false
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 [![Docs](https://docs.rs/tantivy/badge.svg)](https://docs.rs/crate/tantivy/)
 [![Build Status](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml/badge.svg)](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml)
 [![codecov](https://codecov.io/gh/quickwit-oss/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/quickwit-oss/tantivy)
+[![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/quickwit-oss/tantivy/badge)](https://scorecard.dev/viewer/?uri=github.com/quickwit-oss/tantivy)
 [![Join the chat at https://discord.gg/MT27AG5EVE](https://shields.io/discord/908281611840282624?label=chat%20on%20discord)](https://discord.gg/MT27AG5EVE)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -63,6 +63,8 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, terms_all_unique_with_avg_sub_agg);
    register!(group, terms_many_with_avg_sub_agg);
    register!(group, terms_status_with_avg_sub_agg);
+    register!(group, terms_status_with_terms_zipf_1000_sub_agg);
+    register!(group, terms_zipf_1000_with_terms_status_sub_agg);
    register!(group, terms_status_with_histogram);
    register!(group, terms_zipf_1000);
    register!(group, terms_zipf_1000_with_histogram);
@@ -77,7 +79,12 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, composite_histogram_calendar);

    register!(group, cardinality_agg);
+    register!(group, cardinality_agg_high_card);
+    register!(group, cardinality_agg_low_card);
    register!(group, terms_status_with_cardinality_agg);
+    register!(group, terms_100_buckets_with_cardinality_agg);
+    register!(group, terms_many_with_single_term_order_by_card);
+    register!(group, terms_many_with_single_term_2_order_by_card);

    register!(group, range_agg);
    register!(group, range_agg_with_avg_sub_agg);
@@ -165,10 +172,52 @@ fn cardinality_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+// Full-scan cardinality on a near-1M-cardinality string field.
+// Hits the dense (PagedBitset) path: every doc has a unique term,
+// so the bucket promotes from FxHashSet shortly into the scan.
+fn cardinality_agg_high_card(index: &Index) {
+    let agg_req = json!({
+        "cardinality": {
+            "cardinality": {
+                "field": "text_all_unique_terms"
+            },
+        }
+    });
+    execute_agg(index, agg_req);
+}
+// Full-scan cardinality on a tiny-cardinality string field (7 distinct
+// values). Stays on the FxHashSet path — the promotion threshold is
+// never crossed. Validates no regression on the sparse path.
+fn cardinality_agg_low_card(index: &Index) {
+    let agg_req = json!({
+        "cardinality": {
+            "cardinality": {
+                "field": "text_few_terms_status"
+            },
+        }
+    });
+    execute_agg(index, agg_req);
+}
 fn terms_status_with_cardinality_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_few_terms_status" },
+            "aggs": {
+                "cardinality": {
+                    "cardinality": {
+                        "field": "text_few_terms_status"
+                    },
+                }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+fn terms_100_buckets_with_cardinality_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_1000_terms_zipf", "size": 100 },
            "aggs": {
                "cardinality": {
                    "cardinality": {
@@ -181,6 +230,58 @@ fn terms_status_with_cardinality_agg(index: &Index) {
    execute_agg(index, agg_req);
 }

+fn terms_many_with_single_term_order_by_card(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_many_terms" },
+            "aggs": {
+                "nested_terms": {
+                    "terms": {
+                        "field": "single_term",
+                        "order": { "cardinality": "desc" }
+                    },
+                    "aggs": {
+                        "cardinality": {
+                            "cardinality": { "field": "text_few_terms" }
+                        }
+                    }
+                }
+            }
+        },
+    });
+    execute_agg(index, agg_req);
+}
+
+// Two-level terms ordered by cardinality at each level: a high-card outer terms
+// (text_many_terms) ordered by a cardinality sub-agg, with a nested low-card terms
+// (text_few_terms_status) also ordered by a cardinality sub-agg, plus an avg.
+fn terms_many_with_single_term_2_order_by_card(index: &Index) {
+    let agg_req = json!({
+        "by_ip": {
+            "terms": {
+                "field": "text_many_terms",
+                "order": { "card_few_terms": "desc" }
+            },
+            "aggs": {
+                "card_few_terms": {
+                    "cardinality": { "field": "text_few_terms" }
+                },
+                "nested_terms": {
+                    "terms": {
+                        "field": " single_term",
+                        "order": { "distinct_path2": "desc" }
+                    },
+                    "aggs": {
+                        "avg_botscore": { "avg": { "field": "score" } },
+                        "distinct_path2": { "cardinality": { "field": "text_few_terms" } }
+                    }
+                }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
 fn terms_7(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_few_terms_status" } },
@@ -253,6 +354,30 @@ fn terms_all_unique_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+fn terms_status_with_terms_zipf_1000_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms_status" },
+            "aggs": {
+                "nested_terms": { "terms": { "field": "text_1000_terms_zipf" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn terms_zipf_1000_with_terms_status_sub_agg(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_1000_terms_zipf" },
+            "aggs": {
+                "nested_terms": { "terms": { "field": "text_few_terms_status" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
 fn terms_status_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -566,7 +691,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
        )
        .set_stored();
-    let text_field = schema_builder.add_text_field("text", text_fieldtype);
+    let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
+    let single_term = schema_builder.add_text_field("single_term", FAST);
    let json_field = schema_builder.add_json_field("json", FAST);
    let text_field_all_unique_terms =
        schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
@@ -630,6 +756,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
+                single_term => "single_term",
+                single_term => "single_term",
                text_field => "cool",
                text_field => "cool",
                text_field_all_unique_terms => "cool",
@@ -664,6 +792,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
                json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
            };
            index_writer.add_document(doc!(
+                single_term => "single_term",
                text_field => "cool",
                json_field => json,
                text_field_all_unique_terms => format!("unique_term_{}", rng.random::<u64>()),
--- a/benches/intersection_bench.rs
+++ b/benches/intersection_bench.rs
@@ -0,0 +1,149 @@
+// Benchmarks top-K intersection of term scorers (block_wand_intersection).
+//
+// What's measured:
+// - Conjunctive queries (+a +b, +a +b +c) with top-10 by score
+// - Varying doc-frequency balance between terms (balanced, skewed, very skewed)
+// - Realistic term frequencies (geometric distribution, mostly low)
+// - 1M-doc single segment
+//
+// Run with: cargo bench --bench intersection_bench
+
+use binggan::{black_box, BenchRunner};
+use rand::prelude::*;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::{Schema, TEXT};
+use tantivy::{doc, Index, ReloadPolicy, Searcher};
+
+const NUM_DOCS: usize = 1_000_000;
+
+struct BenchIndex {
+    searcher: Searcher,
+    query_parser: QueryParser,
+}
+
+/// Generate term frequency from a geometric-like distribution.
+/// Most values are 1, a few are 2-3, rarely higher.
+/// p controls the decay: higher p → more weight on tf=1.
+fn random_term_freq(rng: &mut StdRng, p: f64) -> u32 {
+    let mut tf = 1u32;
+    while tf < 10 && rng.random_bool(1.0 - p) {
+        tf += 1;
+    }
+    tf
+}
+
+/// Build an index with three terms (a, b, c) with given doc-frequency probabilities.
+/// Each term occurrence has a realistic term frequency (geometric distribution).
+/// Field length is padded with filler tokens to create varied fieldnorms.
+fn build_index(p_a: f64, p_b: f64, p_c: f64) -> BenchIndex {
+    let mut schema_builder = Schema::builder();
+    let body = schema_builder.add_text_field("body", TEXT);
+    let schema = schema_builder.build();
+    let index = Index::create_in_ram(schema);
+
+    let mut rng = StdRng::from_seed([42u8; 32]);
+
+    {
+        let mut writer = index.writer_with_num_threads(1, 500_000_000).unwrap();
+        for _ in 0..NUM_DOCS {
+            let mut tokens: Vec<String> = Vec::new();
+
+            if rng.random_bool(p_a) {
+                let tf = random_term_freq(&mut rng, 0.7);
+                for _ in 0..tf {
+                    tokens.push("aaa".to_string());
+                }
+            }
+            if rng.random_bool(p_b) {
+                let tf = random_term_freq(&mut rng, 0.7);
+                for _ in 0..tf {
+                    tokens.push("bbb".to_string());
+                }
+            }
+            if rng.random_bool(p_c) {
+                let tf = random_term_freq(&mut rng, 0.7);
+                for _ in 0..tf {
+                    tokens.push("ccc".to_string());
+                }
+            }
+
+            // Pad with filler to create varied field lengths (5-30 tokens).
+            let filler_count = rng.random_range(5u32..30u32);
+            for _ in 0..filler_count {
+                tokens.push("filler".to_string());
+            }
+
+            let text = tokens.join(" ");
+            writer.add_document(doc!(body => text)).unwrap();
+        }
+        writer.commit().unwrap();
+    }
+
+    let reader = index
+        .reader_builder()
+        .reload_policy(ReloadPolicy::Manual)
+        .try_into()
+        .unwrap();
+    let searcher = reader.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![body]);
+
+    BenchIndex {
+        searcher,
+        query_parser,
+    }
+}
+
+fn main() {
+    // Scenarios: (label, p_a, p_b, p_c)
+    //
+    // "balanced":    all terms ~10% → intersection ~1% of docs
+    // "skewed":      one common (50%), one rare (2%) → intersection ~1%
+    // "very_skewed": one very common (80%), one very rare (0.5%) → intersection ~0.4%
+    // "three_balanced": three terms ~20% each → intersection ~0.8%
+    // "three_skewed":   50% / 10% / 2% → intersection ~0.1%
+    let scenarios: Vec<(&str, f64, f64, f64)> = vec![
+        ("balanced_10%_10%", 0.10, 0.10, 0.0),
+        ("skewed_50%_2%", 0.50, 0.02, 0.0),
+        ("very_skewed_80%_0.5%", 0.80, 0.005, 0.0),
+        ("three_balanced_20%_20%_20%", 0.20, 0.20, 0.20),
+        ("three_skewed_50%_10%_2%", 0.50, 0.10, 0.02),
+    ];
+
+    let mut runner = BenchRunner::new();
+
+    for (label, p_a, p_b, p_c) in &scenarios {
+        let bench_index = build_index(*p_a, *p_b, *p_c);
+
+        let mut group = runner.new_group();
+        group.set_name(format!("intersection — {label}"));
+
+        // Two-term intersection
+        if *p_a > 0.0 && *p_b > 0.0 {
+            let query_str = "+aaa +bbb";
+            let query = bench_index.query_parser.parse_query(query_str).unwrap();
+            let searcher = bench_index.searcher.clone();
+            group.register(format!("{query_str} top10"), move |_| {
+                let collector = TopDocs::with_limit(10).order_by_score();
+                black_box(searcher.search(&query, &collector).unwrap());
+                1usize
+            });
+        }
+
+        // Three-term intersection
+        if *p_c > 0.0 {
+            let query_str = "+aaa +bbb +ccc";
+            let query = bench_index.query_parser.parse_query(query_str).unwrap();
+            let searcher = bench_index.searcher.clone();
+            group.register(format!("{query_str} top10"), move |_| {
+                let collector = TopDocs::with_limit(10).order_by_score();
+                black_box(searcher.search(&query, &collector).unwrap());
+                1usize
+            });
+        }
+
+        group.run();
+    }
+}
--- a/benches/query_parser_nested.rs
+++ b/benches/query_parser_nested.rs
@@ -0,0 +1,35 @@
+// Benchmark for the query grammar parsing deeply nested queries.
+//
+// Regression guard for https://github.com/quickwit-oss/tantivy/issues/2498:
+// at depth 20/21 the old parser took 0.87 s / 1.72 s respectively because
+// `ast()` retried `occur_leaf` on backtrack, giving O(2^n) time. With the
+// fix parsing is linear and completes in microseconds.
+//
+// Run with: `cargo bench --bench query_parser_nested`.
+
+use binggan::{black_box, BenchRunner};
+use tantivy::query_grammar::parse_query;
+
+fn nested_query(depth: usize, leading_plus: bool) -> String {
+    let leading = "(".repeat(depth);
+    let trailing = ")".repeat(depth);
+    let prefix = if leading_plus { "+" } else { "" };
+    format!("{prefix}{leading}title:test{trailing}")
+}
+
+fn main() {
+    let mut runner = BenchRunner::new();
+
+    for depth in [20, 21] {
+        for leading_plus in [false, true] {
+            let query = nested_query(depth, leading_plus);
+            let label = format!(
+                "parse_nested_depth_{depth}_{}",
+                if leading_plus { "plus" } else { "plain" },
+            );
+            runner.bench_function(&label, move |_| {
+                black_box(parse_query(black_box(&query)).unwrap());
+            });
+        }
+    }
+}
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -18,5 +18,10 @@ homepage = "https://github.com/quickwit-oss/tantivy"
 bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }

 [dev-dependencies]
+binggan = "0.17.0"
 rand = "0.9"
 proptest = "1"
+
+[[bench]]
+name = "bench"
+harness = false
--- a/bitpacker/benches/bench.rs
+++ b/bitpacker/benches/bench.rs
@@ -1,65 +1,110 @@
-#![feature(test)]
+use std::cell::RefCell;

-extern crate test;
+use binggan::{BenchRunner, black_box};
+use rand::rng;
+use rand::seq::IteratorRandom;
+use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};

-#[cfg(test)]
-mod tests {
-    use rand::rng;
-    use rand::seq::IteratorRandom;
-    use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};
-    use test::Bencher;
+fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
+    let mut bitpacker = BitPacker::new();
+    let mut buffer = Vec::new();
+    for _ in 0..num_els {
+        bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
+        bitpacker.flush(&mut buffer).unwrap();
+    }
+    buffer
+}

-    #[inline(never)]
-    fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
-        let mut bitpacker = BitPacker::new();
-        let mut buffer = Vec::new();
-        for _ in 0..num_els {
-            // the values do not matter.
-            bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
-            bitpacker.flush(&mut buffer).unwrap();
+const N: usize = 100_000;
+const MAX_VAL: u64 = 1_000;
+const BIT_WIDTH: u8 = 10; // 2^10 = 1024 > MAX_VAL
+
+fn create_packed_data() -> (BitUnpacker, Vec<u8>) {
+    let mut bitpacker = BitPacker::new();
+    let mut data = Vec::new();
+    for i in 0..N as u64 {
+        let val = i * MAX_VAL / N as u64;
+        bitpacker.write(val, BIT_WIDTH, &mut data).unwrap();
+    }
+    bitpacker.close(&mut data).unwrap();
+    (BitUnpacker::new(BIT_WIDTH), data)
+}
+
+fn bench_bitpacking() {
+    let mut runner = BenchRunner::new();
+    let bit_width = 3;
+    let num_els = 1_000_000u32;
+    let bit_unpacker = BitUnpacker::new(bit_width);
+    let data = create_bitpacked_data(bit_width, num_els);
+    let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
+    runner.bench_function("bitpacking_read", move |_| {
+        let mut out = 0u64;
+        for &idx in &idxs {
+            out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
        }
-        buffer
-    }
+        black_box(out);
+    });
+}

-    #[bench]
-    fn bench_bitpacking_read(b: &mut Bencher) {
-        let bit_width = 3;
-        let num_els = 1_000_000u32;
-        let bit_unpacker = BitUnpacker::new(bit_width);
-        let data = create_bitpacked_data(bit_width, num_els);
-        let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
-        b.iter(|| {
-            let mut out = 0u64;
-            for &idx in &idxs {
-                out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
-            }
-            out
-        });
+fn bench_blocked_bitpacker() {
+    let mut runner = BenchRunner::new();
+    let mut blocked_bitpacker = BlockedBitpacker::new();
+    for val in 0..=21500 {
+        blocked_bitpacker.add(val * val);
    }
-
-    #[bench]
-    fn bench_blockedbitp_read(b: &mut Bencher) {
+    runner.bench_function("blockedbitp_read", move |_| {
+        let mut out = 0u64;
+        for val in 0..=21500 {
+            out = out.wrapping_add(blocked_bitpacker.get(val));
+        }
+        black_box(out);
+    });
+    runner.bench_function("blockedbitp_create", |_| {
        let mut blocked_bitpacker = BlockedBitpacker::new();
        for val in 0..=21500 {
            blocked_bitpacker.add(val * val);
        }
-        b.iter(|| {
-            let mut out = 0u64;
-            for val in 0..=21500 {
-                out = out.wrapping_add(blocked_bitpacker.get(val));
-            }
-            out
-        });
-    }
-
-    #[bench]
-    fn bench_blockedbitp_create(b: &mut Bencher) {
-        b.iter(|| {
-            let mut blocked_bitpacker = BlockedBitpacker::new();
-            for val in 0..=21500 {
-                blocked_bitpacker.add(val * val);
-            }
-            blocked_bitpacker
-        });
-    }
+        black_box(blocked_bitpacker);
+    });
+}
+
+fn bench_filter_vec() {
+    let mut runner = BenchRunner::new();
+
+    let (unpacker, data) = create_packed_data();
+    let positions = RefCell::new(Vec::with_capacity(N));
+    runner.bench_function("filter_vec_dense", move |_| {
+        unpacker.get_ids_for_value_range(
+            250..=750,
+            0..N as u32,
+            &data,
+            &mut positions.borrow_mut(),
+        );
+        black_box(positions.borrow().len());
+    });
+
+    let (unpacker, data) = create_packed_data();
+    let positions = RefCell::new(Vec::with_capacity(N));
+    runner.bench_function("filter_vec_sparse", move |_| {
+        unpacker.get_ids_for_value_range(0..=50, 0..N as u32, &data, &mut positions.borrow_mut());
+        black_box(positions.borrow().len());
+    });
+
+    let (unpacker, data) = create_packed_data();
+    let positions = RefCell::new(Vec::with_capacity(N));
+    runner.bench_function("filter_vec_full", move |_| {
+        unpacker.get_ids_for_value_range(
+            0..=MAX_VAL,
+            0..N as u32,
+            &data,
+            &mut positions.borrow_mut(),
+        );
+        black_box(positions.borrow().len());
+    });
+}
+
+fn main() {
+    bench_bitpacking();
+    bench_blocked_bitpacker();
+    bench_filter_vec();
 }
--- a/bitpacker/src/filter_vec/mod.rs
+++ b/bitpacker/src/filter_vec/mod.rs
@@ -1,8 +1,17 @@
+#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+use std::arch::is_aarch64_feature_detected;
 use std::ops::RangeInclusive;

 #[cfg(target_arch = "x86_64")]
 mod avx2;

+#[cfg(target_arch = "aarch64")]
+mod neon;
+
+// SVE intrinsics are not exposed on aarch64-apple-darwin.
+#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+mod sve;
+
 mod scalar;

 #[derive(Clone, Copy, Eq, PartialEq, Debug)]
@@ -10,6 +19,10 @@ mod scalar;
 enum FilterImplPerInstructionSet {
    #[cfg(target_arch = "x86_64")]
    AVX2 = 0u8,
+    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+    SVE = 3u8,
+    #[cfg(target_arch = "aarch64")]
+    Neon = 2u8,
    Scalar = 1u8,
 }

@@ -19,29 +32,57 @@ impl FilterImplPerInstructionSet {
        match *self {
            #[cfg(target_arch = "x86_64")]
            FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
+            #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+            FilterImplPerInstructionSet::SVE => is_aarch64_feature_detected!("sve"),
+            // TIL Neon is required on aarch 64.
+            #[cfg(target_arch = "aarch64")]
+            FilterImplPerInstructionSet::Neon => true,
            FilterImplPerInstructionSet::Scalar => true,
        }
    }
 }

-// List of available implementation in preferred order.
+// List of available implementations in preferred order.
 #[cfg(target_arch = "x86_64")]
 const IMPLS: [FilterImplPerInstructionSet; 2] = [
    FilterImplPerInstructionSet::AVX2,
    FilterImplPerInstructionSet::Scalar,
 ];

-#[cfg(not(target_arch = "x86_64"))]
+// Non-Apple aarch64: try SVE, NEON, Scalar.
+#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+const IMPLS: [FilterImplPerInstructionSet; 3] = [
+    FilterImplPerInstructionSet::SVE,
+    FilterImplPerInstructionSet::Neon,
+    FilterImplPerInstructionSet::Scalar,
+];
+
+// Apple aarch64 (M-series): SVE not available; use NEON or Scalar.
+#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
+const IMPLS: [FilterImplPerInstructionSet; 2] = [
+    FilterImplPerInstructionSet::Neon,
+    FilterImplPerInstructionSet::Scalar,
+];
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
 const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];

 impl FilterImplPerInstructionSet {
    #[inline]
-    #[allow(unused_variables)] // on non-x86_64, code is unused.
+    #[allow(unused_variables)]
    fn from(code: u8) -> FilterImplPerInstructionSet {
        #[cfg(target_arch = "x86_64")]
        if code == FilterImplPerInstructionSet::AVX2 as u8 {
            return FilterImplPerInstructionSet::AVX2;
        }
+        #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+        if code == FilterImplPerInstructionSet::SVE as u8 {
+            return FilterImplPerInstructionSet::SVE;
+        }
+        #[cfg(target_arch = "aarch64")]
+        if code == FilterImplPerInstructionSet::Neon as u8 {
+            return FilterImplPerInstructionSet::Neon;
+        }
        FilterImplPerInstructionSet::Scalar
    }

@@ -50,6 +91,13 @@ impl FilterImplPerInstructionSet {
        match self {
            #[cfg(target_arch = "x86_64")]
            FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
+            #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+            // SAFETY: SVE availability was verified by is_available() before selecting this impl.
+            FilterImplPerInstructionSet::SVE => unsafe {
+                sve::filter_vec_in_place(range, offset, output)
+            },
+            #[cfg(target_arch = "aarch64")]
+            FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
            FilterImplPerInstructionSet::Scalar => {
                scalar::filter_vec_in_place(range, offset, output)
            }
@@ -57,6 +105,12 @@ impl FilterImplPerInstructionSet {
    }
 }

+fn available_impls() -> impl Iterator<Item = FilterImplPerInstructionSet> {
+    IMPLS
+        .into_iter()
+        .filter(FilterImplPerInstructionSet::is_available)
+}
+
 #[inline]
 fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
    use std::sync::atomic::{AtomicU8, Ordering};
@@ -64,10 +118,7 @@ fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
    let instruction_set_byte: u8 = INSTRUCTION_SET_BYTE.load(Ordering::Relaxed);
    if instruction_set_byte == u8::MAX {
        // Let's initialize the instruction set and cache it.
-        let instruction_set = IMPLS
-            .into_iter()
-            .find(FilterImplPerInstructionSet::is_available)
-            .unwrap();
+        let instruction_set = available_impls().next().unwrap();
        INSTRUCTION_SET_BYTE.store(instruction_set as u8, Ordering::Relaxed);
        return instruction_set;
    }
@@ -80,12 +131,12 @@ pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut

 #[cfg(test)]
 mod tests {
+    use proptest::strategy::Strategy;
+
    use super::*;

    #[test]
    fn test_get_best_available_instruction_set() {
-        // This does not test much unfortunately.
-        // We just make sure the function returns without crashing and returns the same result.
        let instruction_set = get_best_available_instruction_set();
        assert_eq!(get_best_available_instruction_set(), instruction_set);
    }
@@ -102,6 +153,31 @@ mod tests {
        }
    }

+    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+    #[test]
+    fn test_instruction_set_to_code_from_code() {
+        for instruction_set in [
+            FilterImplPerInstructionSet::SVE,
+            FilterImplPerInstructionSet::Neon,
+            FilterImplPerInstructionSet::Scalar,
+        ] {
+            let code = instruction_set as u8;
+            assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
+        }
+    }
+
+    #[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
+    #[test]
+    fn test_instruction_set_to_code_from_code() {
+        for instruction_set in [
+            FilterImplPerInstructionSet::Neon,
+            FilterImplPerInstructionSet::Scalar,
+        ] {
+            let code = instruction_set as u8;
+            assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
+        }
+    }
+
    fn test_filter_impl_empty_aux(filter_impl: FilterImplPerInstructionSet) {
        let mut output = vec![];
        filter_impl.filter_vec_in_place(0..=u32::MAX, 0, &mut output);
@@ -126,11 +202,20 @@ mod tests {
        assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
    }

+    fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) {
+        // start > end: RangeInclusive::contains always returns false; output must be empty.
+        // The SVE path's wrapping_sub would otherwise produce a huge range_width.
+        let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
+        filter_impl.filter_vec_in_place(10..=5, 0, &mut output);
+        assert_eq!(&output, &[]);
+    }
+
    fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
        test_filter_impl_empty_aux(filter_impl);
        test_filter_impl_simple_aux(filter_impl);
        test_filter_impl_simple_aux_shifted(filter_impl);
        test_filter_impl_simple_outside_i32_range(filter_impl);
+        test_filter_impl_empty_range_aux(filter_impl);
    }

    #[test]
@@ -141,25 +226,60 @@ mod tests {
        }
    }

+    #[test]
+    #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
+    fn test_filter_implementation_sve() {
+        if FilterImplPerInstructionSet::SVE.is_available() {
+            test_filter_impl_test_suite(FilterImplPerInstructionSet::SVE);
+        }
+    }
+
+    #[test]
+    #[cfg(target_arch = "aarch64")]
+    fn test_filter_implementation_neon() {
+        test_filter_impl_test_suite(FilterImplPerInstructionSet::Neon);
+    }
+
    #[test]
    fn test_filter_implementation_scalar() {
        test_filter_impl_test_suite(FilterImplPerInstructionSet::Scalar);
    }

-    #[cfg(target_arch = "x86_64")]
+    fn max_val_strategy() -> impl proptest::strategy::Strategy<Value = u32> {
+        proptest::prop_oneof![
+            0u32..10u32,
+            255u32..258u32,
+            proptest::prelude::Just(1u32 << 25),
+            proptest::prelude::Just(u32::MAX - 1),
+            proptest::prelude::Just(u32::MAX),
+        ]
+    }
+
+    fn vals_strategy() -> impl proptest::strategy::Strategy<Value = Vec<u32>> {
+        proptest::prop_oneof![
+            proptest::collection::vec(proptest::prelude::any::<u32>(), 0..300),
+            max_val_strategy()
+                .prop_flat_map(|max_val| { proptest::collection::vec(0..=max_val, 0..300) })
+        ]
+    }
+
    proptest::proptest! {
        #[test]
-        fn test_filter_compare_scalar_and_avx2_impl_proptest(
-            start in proptest::prelude::any::<u32>(),
-            end in proptest::prelude::any::<u32>(),
+        fn test_filter_compare_scalar_and_impls_impl_proptest(
+            start in 0u32..400u32,
+            end in 0u32..400u32,
            offset in 0u32..2u32,
-            mut vals in proptest::collection::vec(0..u32::MAX, 0..30)) {
-            if FilterImplPerInstructionSet::AVX2.is_available() {
-                let mut vals_clone = vals.clone();
-                FilterImplPerInstructionSet::AVX2.filter_vec_in_place(start..=end, offset, &mut vals);
-                FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
-                assert_eq!(&vals, &vals_clone);
-            }
+            vals in vals_strategy()) {
+                for implementation in available_impls() {
+                    if implementation == FilterImplPerInstructionSet::Scalar {
+                        continue;
+                    }
+                    let mut impl_output = vals.clone();
+                    let mut scalar_output = vals.clone();
+                    implementation.filter_vec_in_place(start..=end, offset, &mut impl_output);
+                    FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut scalar_output);
+                    assert_eq!(&impl_output, &scalar_output);
+                }
       }
    }
 }
--- a/bitpacker/src/filter_vec/neon.rs
+++ b/bitpacker/src/filter_vec/neon.rs
@@ -0,0 +1,118 @@
+use std::arch::aarch64::*;
+use std::ops::RangeInclusive;
+
+const NUM_LANES: usize = 4;
+
+// Compacts matching lanes to the front using a byte-level shuffle.
+// `mask` is a 4-bit value: bit k=1 means lane k should appear in the output.
+#[inline]
+#[target_feature(enable = "neon")]
+unsafe fn compact(data: uint32x4_t, mask: u8) -> uint32x4_t {
+    unsafe {
+        // SAFETY: mask is always in [0, 15] by construction (max sum of [1,2,4,8]).
+        // BYTE_SHUFFLE_TABLE has 16 entries, so this is always in bounds.
+        let shuffle = BYTE_SHUFFLE_TABLE.get_unchecked(mask as usize);
+        let shuffle_vec = vld1q_u8(shuffle.as_ptr());
+        vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(data), shuffle_vec))
+    }
+}
+
+// Safe (not unsafe) because NEON is mandatory on aarch64: no runtime feature check needed.
+#[inline(never)]
+pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+    let num_words = output.len() / NUM_LANES;
+    let mut output_len = unsafe {
+        filter_vec_neon_aux(
+            output.as_ptr(),
+            range.clone(),
+            output.as_mut_ptr(),
+            offset,
+            num_words,
+        )
+    };
+    let remainder_start = num_words * NUM_LANES;
+    for i in remainder_start..output.len() {
+        let val = output[i];
+        output[output_len] = offset + i as u32;
+        output_len += if range.contains(&val) { 1 } else { 0 };
+    }
+    output.truncate(output_len);
+}
+
+#[target_feature(enable = "neon")]
+unsafe fn filter_vec_neon_aux(
+    input: *const u32,
+    range: RangeInclusive<u32>,
+    output: *mut u32,
+    offset: u32,
+    num_words: usize,
+) -> usize {
+    unsafe {
+        let mut input = input;
+        let mut output_tail = output;
+        let range_start_simd = vdupq_n_u32(*range.start());
+        let range_end_simd = vdupq_n_u32(*range.end());
+        let mut ids = vld1q_u32([offset, offset + 1, offset + 2, offset + 3].as_ptr());
+        let shift = vdupq_n_u32(NUM_LANES as u32);
+        let bit_weights = vld1q_u32([1u32, 2, 4, 8].as_ptr());
+
+        for _ in 0..num_words {
+            let word = vld1q_u32(input);
+
+            // Unsigned compares: CMHS (compare higher or same) tests `word >= start`
+            // and `end >= word`. ANDing both gives the inside-range mask directly,
+            // which is cheaper than computing `outside` and then negating.
+            let ge_start = vcgeq_u32(word, range_start_simd);
+            let le_end = vcleq_u32(word, range_end_simd);
+            // inside[k] = 0xFFFFFFFF if val[k] is in range, 0 otherwise.
+            let inside = vandq_u32(ge_start, le_end);
+
+            // Build the 4-bit mask: AND bit_weights with the inside lane mask, so each
+            // inside lane contributes its bit_weight (1, 2, 4, or 8). Summing yields the
+            // 4-bit mask in one addv.
+            let inside_bits = vandq_u32(bit_weights, inside);
+            let mask = vaddvq_u32(inside_bits) as u8;
+            // mask is mathematically bounded: max value is 1+2+4+8=15 (all lanes match)
+            debug_assert!(mask <= 15, "mask must fit in 4 bits: {}", mask);
+
+            // Count of matching lanes = popcount(mask). Derives the count directly from
+            // the mask instead of running a parallel SIMD reduction over `outside`.
+            let added_len = mask.count_ones() as usize;
+
+            // Safe because mask is guaranteed to be in [0, 15]
+            let filtered_ids = compact(ids, mask);
+            vst1q_u32(output_tail, filtered_ids);
+            output_tail = output_tail.add(added_len);
+            ids = vaddq_u32(ids, shift);
+            input = input.add(NUM_LANES);
+        }
+
+        output_tail.offset_from(output) as usize
+    }
+}
+
+// Byte shuffle patterns to compact matching lanes to the front of the vector.
+// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
+// The j-th set bit determines which input lane goes to output position j.
+const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
+    [
+        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    ], // 0b0000: none
+    [0, 1, 2, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0001: lane 0
+    [4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0010: lane 1
+    [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16],     // 0b0011: lanes 0,1
+    [8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0100: lane 2
+    [0, 1, 2, 3, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16],   // 0b0101: lanes 0,2
+    [4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16],   // 0b0110: lanes 1,2
+    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16],       // 0b0111: lanes 0,1,2
+    [
+        12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    ], // 0b1000: lane 3
+    [0, 1, 2, 3, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1001: lanes 0,3
+    [4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1010: lanes 1,3
+    [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16],     // 0b1011: lanes 0,1,3
+    [8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1100: lanes 2,3
+    [0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16],   // 0b1101: lanes 0,2,3
+    [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16],   // 0b1110: lanes 1,2,3
+    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],       // 0b1111: all lanes
+];
--- a/bitpacker/src/filter_vec/sve.rs
+++ b/bitpacker/src/filter_vec/sve.rs
@@ -0,0 +1,260 @@
+use std::ops::RangeInclusive;
+
+// SVE vector length (in u32 lanes) is not a compile-time constant; query at runtime.
+// Safe to call only when SVE is confirmed available via is_aarch64_feature_detected!("sve").
+#[target_feature(enable = "sve")]
+unsafe fn num_lanes() -> usize {
+    let vl: usize;
+    unsafe {
+        core::arch::asm!(
+            "cntw {vl}",
+            vl = out(reg) vl,
+            options(nostack, nomem, preserves_flags),
+        );
+    }
+    vl
+}
+
+// SAFETY: caller must ensure SVE is available (checked via is_aarch64_feature_detected!("sve")).
+// Unlike NEON, SVE is optional on aarch64 and not guaranteed by the target architecture.
+pub unsafe fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
+    if range.start() > range.end() {
+        output.clear();
+        return;
+    }
+    let vl = unsafe { num_lanes() };
+    let num_words = output.len() / vl;
+    let range_start = *range.start();
+    // Unsigned subtraction trick: val ∈ [lo, hi] ↔ (val - lo) ≤ᵤ (hi - lo).
+    // Values below lo wrap around to large u32, so the single unsigned ≤ excludes them.
+    let range_width = range.end().wrapping_sub(range_start);
+    let mut output_len = unsafe {
+        filter_vec_sve_aux(
+            output.as_ptr(),
+            range_start,
+            range_width,
+            output.as_mut_ptr(),
+            offset,
+            num_words,
+            vl,
+        )
+    };
+    let remainder_start = num_words * vl;
+    for i in remainder_start..output.len() {
+        let val = output[i];
+        output[output_len] = offset + i as u32;
+        output_len += if range.contains(&val) { 1 } else { 0 };
+    }
+    output.truncate(output_len);
+}
+
+// Register allocation for the asm! blocks:
+//   z0        ids_a (index vector for first half of each pair, advances by step2 each iter)
+//   z1        range_width broadcast
+//   z2        range_start broadcast
+//   z3        step2 broadcast (2 * vl)
+//   z4        ids_b (index vector for second half, = ids_a + step, advances by step2)
+//   z5        scratch: loaded word_a, then compacted_a
+//   z6        scratch: loaded word_b, then compacted_b
+//   p0        all-true predicate (ptrue p0.s)
+//   p1        in-range mask for word_a
+//   p2        in-range mask for word_b
+#[target_feature(enable = "sve")]
+unsafe fn filter_vec_sve_aux(
+    input: *const u32,
+    range_start: u32,
+    range_width: u32,
+    output: *mut u32,
+    offset: u32,
+    num_words: usize,
+    vl: usize,
+) -> usize {
+    let num_pairs = num_words / 2;
+    let mut input_ptr = input;
+    let mut output_tail = output;
+
+    if num_pairs > 0 {
+        unsafe {
+            // We rely on asm! because the SVE intrinsics are not available in stable Rust.
+            // The code that follows was generated by Rustc nightly based on the intrinsics version
+            // at the bottom of this file.
+            core::arch::asm!(
+                // --- Setup ---
+                // All-true predicate for 32-bit lanes.
+                "ptrue p0.s",
+                // ids_a = [offset, offset+1, offset+2, ...]
+                "index z0.s, {offset:w}, #1",
+                // Broadcast scalars into SVE vectors.
+                "mov z1.s, {range_width:w}",
+                "mov z2.s, {range_start:w}",
+                // vl_gpr = number of 32-bit lanes (cntw).
+                "cntw {vl_gpr}",
+                // step2_bytes will first hold 2*vl (for the step2 vector), then 2*VL in bytes.
+                "lsl {step2_bytes}, {vl_gpr}, #1",
+                // z4 = step = [vl, vl, ...]; will become ids_b after the add below.
+                "mov z4.s, {vl_gpr:w}",
+                // z3 = step2 = [2*vl, 2*vl, ...], used to advance both id vectors each iter.
+                "mov z3.s, {step2_bytes:w}",
+                // Repurpose step2_bytes to hold the byte stride for advancing the input pointer
+                // by two full SVE vectors per iteration.
+                "rdvl {step2_bytes}, #2",
+                // ids_b = ids_a + step = [offset+vl, offset+vl+1, ...]
+                "add z4.s, z0.s, z4.s",
+
+                // --- Main loop: process two SVE vectors (ids_a and ids_b) per iteration ---
+                "0:",
+                // Load two consecutive SVE vectors from input.
+                "ld1w {{z5.s}}, p0/z, [{input}]",
+                "ld1w {{z6.s}}, p0/z, [{input}, #1, mul vl]",
+                // Advance input pointer by 2 * VL bytes.
+                "add {input}, {input}, {step2_bytes}",
+                // Unsigned shift: subtract range_start so in-range check becomes a single cmpu ≤.
+                "sub z5.s, z5.s, z2.s",
+                "sub z6.s, z6.s, z2.s",
+                // in_range: shifted value ≤ range_width  (unsigned, so values below lo also fail).
+                "cmphs p1.s, p0/z, z1.s, z5.s",
+                "cmphs p2.s, p0/z, z1.s, z6.s",
+                // Count matching lanes; both cntp calls have independent inputs for OOO parallelism.
+                "cntp {cnt_a}, p0, p1.s",
+                "compact z5.s, p1, z0.s",
+                "compact z6.s, p2, z4.s",
+                "cntp {cnt_b}, p0, p2.s",
+                // Advance id vectors for the next iteration.
+                "add z0.s, z0.s, z3.s",
+                "add z4.s, z4.s, z3.s",
+                // Store compacted ids. Only the first cnt_a / cnt_b slots are valid; the rest
+                // will be overwritten by subsequent iterations before the final truncate.
+                "str z5, [{out}]",
+                "st1w {{z6.s}}, p0, [{out}, {cnt_a}, lsl #2]",
+                "add {out}, {out}, {cnt_a}, lsl #2",
+                "add {out}, {out}, {cnt_b}, lsl #2",
+                "subs {pairs}, {pairs}, #1",
+                "b.ne 0b",
+
+                // --- Operands ---
+                input       = inout(reg) input_ptr,
+                out         = inout(reg) output_tail,
+                pairs       = inout(reg) num_pairs => _,
+                offset      = in(reg) offset,
+                range_start = in(reg) range_start,
+                range_width = in(reg) range_width,
+                vl_gpr      = out(reg) _,
+                step2_bytes = out(reg) _,
+                cnt_a       = out(reg) _,
+                cnt_b       = out(reg) _,
+                out("p0") _, out("p1") _, out("p2") _,
+                out("v0") _, out("v1") _, out("v2") _, out("v3") _,
+                out("v4") _, out("v5") _, out("v6") _,
+                options(nostack),
+            );
+        }
+    }
+
+    // Handle an odd trailing vector.
+    if num_words % 2 == 1 {
+        // ids_a for the odd word starts at offset + num_pairs * 2 * vl.
+        // input_ptr was advanced by the main loop and now points at the odd word.
+        let odd_offset =
+            offset.wrapping_add((num_pairs as u32).wrapping_mul(2).wrapping_mul(vl as u32));
+        unsafe {
+            core::arch::asm!(
+                "ptrue p0.s",
+                "index z0.s, {odd_offset:w}, #1",
+                "mov z1.s, {range_width:w}",
+                "mov z2.s, {range_start:w}",
+                "ld1w {{z3.s}}, p0/z, [{input}]",
+                "sub z3.s, z3.s, z2.s",
+                "cmphs p1.s, p0/z, z1.s, z3.s",
+                "cntp {cnt}, p0, p1.s",
+                "compact z0.s, p1, z0.s",
+                "str z0, [{out}]",
+                "add {out}, {out}, {cnt}, lsl #2",
+                odd_offset  = in(reg) odd_offset,
+                range_width = in(reg) range_width,
+                range_start = in(reg) range_start,
+                input       = in(reg) input_ptr,
+                out         = inout(reg) output_tail,
+                cnt         = out(reg) _,
+                out("p0") _, out("p1") _,
+                out("v0") _, out("v1") _, out("v2") _, out("v3") _,
+                options(nostack),
+            );
+        }
+    }
+
+    unsafe { output_tail.offset_from(output) as usize }
+}
+
+// SVE implements with intrinsics.
+//
+// #[target_feature(enable = "sve")]
+// unsafe fn filter_vec_sve_aux(
+//     input: *const u32,
+//     range_start: u32,
+//     range_width: u32,
+//     output: *mut u32,
+//     offset: u32,
+//     num_words: usize,
+//     vl: usize,
+// ) -> usize {
+//     unsafe {
+//         let all_true = svptrue_b32();
+//         let range_start_simd = svdup_n_u32(range_start);
+//         let range_width_simd = svdup_n_u32(range_width);
+//         // ids_a covers [offset .. offset+vl), ids_b covers the next vl ids.
+//         // Keeping them separate breaks the loop-carried dependency through ids so
+//         // both compact/cntp chains are fully independent within each unrolled body.
+//         let mut ids_a = svindex_u32(offset, 1);
+//         let step = svdup_n_u32(vl as u32);
+//         let step2 = svdup_n_u32(2 * vl as u32);
+//         let mut ids_b = svadd_u32_x(all_true, ids_a, step);
+
+//         let mut input = input;
+//         let mut output_tail = output;
+
+//         // Unrolled ×2: both cntp calls have independent inputs and execute in parallel.
+//         // The two output_tail updates are sequential but together cost 4+1+1=6 cy per
+//         // pair vs 5+5=10 cy for two scalar iterations, breaking the cntp latency chain.
+//         let num_pairs = num_words / 2;
+//         for _ in 0..num_pairs {
+//             let word_a = svld1_u32(all_true, input);
+//             let word_b = svld1_u32(all_true, input.add(vl));
+
+//             let shifted_a = svsub_u32_x(all_true, word_a, range_start_simd);
+//             let shifted_b = svsub_u32_x(all_true, word_b, range_start_simd);
+
+//             let in_range_a = svcmple_u32(all_true, shifted_a, range_width_simd);
+//             let in_range_b = svcmple_u32(all_true, shifted_b, range_width_simd);
+
+//             let compacted_a = svcompact_u32(in_range_a, ids_a);
+//             let compacted_b = svcompact_u32(in_range_b, ids_b);
+//             // cntp_a and cntp_b have independent inputs: OOO engine issues them in parallel.
+//             let added_len_a = svcntp_b32(all_true, in_range_a) as usize;
+//             let added_len_b = svcntp_b32(all_true, in_range_b) as usize;
+
+//             // Write the full vector — only the first added_len slots are valid.
+//             // Subsequent iterations overwrite the trailing zeros before truncate.
+//             svst1_u32(all_true, output_tail, compacted_a);
+//             output_tail = output_tail.add(added_len_a);
+//             svst1_u32(all_true, output_tail, compacted_b);
+//             output_tail = output_tail.add(added_len_b);
+
+//             ids_a = svadd_u32_x(all_true, ids_a, step2);
+//             ids_b = svadd_u32_x(all_true, ids_b, step2);
+//             input = input.add(2 * vl);
+//         }
+
+//         // Handle an odd trailing word.
+//         if num_words % 2 == 1 {
+//             let word = svld1_u32(all_true, input);
+//             let shifted = svsub_u32_x(all_true, word, range_start_simd);
+//             let in_range = svcmple_u32(all_true, shifted, range_width_simd);
+//             let added_len = svcntp_b32(all_true, in_range) as usize;
+//             let compacted_ids = svcompact_u32(in_range, ids_a);
+//             svst1_u32(all_true, output_tail, compacted_ids);
+//             output_tail = output_tail.add(added_len);
+//         }
+
+//         output_tail.offset_from(output) as usize
+//     }
+// }
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -23,7 +23,7 @@ downcast-rs = "2.0.1"
 proptest = "1"
 more-asserts = "0.3.1"
 rand = "0.9"
-binggan = "0.15.3"
+binggan = "0.17.0"

 [[bench]]
 name = "bench_merge"
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -33,14 +33,14 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
        &mut self,
        docs: &[u32],
        accessor: &Column<T>,
-        missing: Option<T>,
+        missing_opt: Option<T>,
    ) {
        self.fetch_block(docs, accessor);
        // no missing values
        if accessor.index.get_cardinality().is_full() {
            return;
        }
-        let Some(missing) = missing else {
+        let Some(missing) = missing_opt else {
            return;
        };

@@ -191,6 +191,7 @@ where F: FnMut(u32) {
 }

 #[cfg(test)]
+#[allow(clippy::field_reassign_with_default)]
 mod tests {
    use super::*;

--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -19,6 +19,6 @@ time = { version = "0.3.47", features = ["serde-well-known"] }
 serde = { version = "1.0.136", features = ["derive"] }

 [dev-dependencies]
-binggan = "0.15.3"
+binggan = "0.17.0"
 proptest = "1.0.0"
 rand = "0.9"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -47,6 +47,9 @@ impl TinySet {
        TinySet(val)
    }

+    /// An empty `TinySet` constant.
+    pub const EMPTY: TinySet = TinySet(0u64);
+
    /// Returns an empty `TinySet`.
    #[inline]
    pub fn empty() -> TinySet {
@@ -193,13 +196,11 @@ impl TinySet {
 #[derive(Clone)]
 pub struct BitSet {
    tinysets: Box<[TinySet]>,
-    len: u64,
    max_value: u32,
 }
 impl std::fmt::Debug for BitSet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("BitSet")
-            .field("len", &self.len)
            .field("max_value", &self.max_value)
            .finish()
    }
@@ -227,7 +228,6 @@ impl BitSet {
        let tinybitsets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
        BitSet {
            tinysets: tinybitsets,
-            len: 0,
            max_value,
        }
    }
@@ -245,7 +245,6 @@ impl BitSet {
        }
        BitSet {
            tinysets: tinybitsets,
-            len: max_value as u64,
            max_value,
        }
    }
@@ -264,17 +263,19 @@ impl BitSet {

    /// Intersect with tinysets
    fn intersect_update_with_iter(&mut self, other: impl Iterator<Item = TinySet>) {
-        self.len = 0;
        for (left, right) in self.tinysets.iter_mut().zip(other) {
            *left = left.intersect(right);
-            self.len += left.len() as u64;
        }
    }

    /// Returns the number of elements in the `BitSet`.
    #[inline]
    pub fn len(&self) -> usize {
-        self.len as usize
+        self.tinysets
+            .iter()
+            .copied()
+            .map(|tinyset| tinyset.len())
+            .sum::<u32>() as usize
    }

    /// Inserts an element in the `BitSet`
@@ -283,7 +284,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len += u64::from(self.tinysets[higher as usize].insert_mut(lower));
+        self.tinysets[higher as usize].insert_mut(lower);
    }

    /// Inserts an element in the `BitSet`
@@ -292,7 +293,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len -= u64::from(self.tinysets[higher as usize].remove_mut(lower));
+        self.tinysets[higher as usize].remove_mut(lower);
    }

    /// Returns true iff the elements is in the `BitSet`.
@@ -314,6 +315,9 @@ impl BitSet {
            .map(|delta_bucket| bucket + delta_bucket as u32)
    }

+    /// Returns the maximum number of elements in the bitset.
+    ///
+    /// Warning: The largest element the bitset can contain is `max_value - 1`.
    #[inline]
    pub fn max_value(&self) -> u32 {
        self.max_value
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -121,7 +121,7 @@ pub struct FileSlice {

 impl fmt::Debug for FileSlice {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FileSlice({:?}, {:?})", &self.data, self.range)
+        write!(f, "FileSlice({:?}, {:?})", self.data, self.range)
    }
 }

--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -91,46 +91,10 @@ fn main() -> tantivy::Result<()> {
        }
    }

-    // A `Term` is a text token associated with a field.
-    // Let's go through all docs containing the term `title:the` and access their position
-    let term_the = Term::from_field_text(title, "the");
-
-    // Some other powerful operations (especially `.skip_to`) may be useful to consume these
+    // Some other powerful operations (especially `.seek`) may be useful to consume these
    // posting lists rapidly.
    // You can check for them in the [`DocSet`](https://docs.rs/tantivy/~0/tantivy/trait.DocSet.html) trait
    // and the [`Postings`](https://docs.rs/tantivy/~0/tantivy/trait.Postings.html) trait

-    // Also, for some VERY specific high performance use case like an OLAP analysis of logs,
-    // you can get better performance by accessing directly the blocks of doc ids.
-    for segment_reader in searcher.segment_readers() {
-        // A segment contains different data structure.
-        // Inverted index stands for the combination of
-        // - the term dictionary
-        // - the inverted lists associated with each terms and their positions
-        let inverted_index = segment_reader.inverted_index(title)?;
-
-        // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
-        // frequencies and positions.
-        //
-        // If you don't need all this information, you may get better performance by decompressing
-        // less information.
-        if let Some(mut block_segment_postings) =
-            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
-        {
-            loop {
-                let docs = block_segment_postings.docs();
-                if docs.is_empty() {
-                    break;
-                }
-                // Once again these docs MAY contains deleted documents as well.
-                let docs = block_segment_postings.docs();
-                // Prints `Docs [0, 2].`
-                println!("Docs {docs:?}");
-                block_segment_postings.advance();
-            }
-        }
-    }
-
    Ok(())
 }
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1045,18 +1045,43 @@ fn operand_leaf(inp: &str) -> IResult<&str, (Option<BinaryOperand>, Option<Occur
 }

 fn ast(inp: &str) -> IResult<&str, UserInputAst> {
-    let boolean_expr = map_res(
-        separated_pair(occur_leaf, multispace1, many1(operand_leaf)),
-        |(left, right)| aggregate_binary_expressions(left, right),
-    );
-    let single_leaf = map(occur_leaf, |(occur, ast)| {
-        if occur == Some(Occur::MustNot) {
-            ast.unary(Occur::MustNot)
-        } else {
-            ast
-        }
-    });
-    delimited(multispace0, alt((boolean_expr, single_leaf)), multispace0)(inp)
+    // Parse `occur_leaf` once, then conditionally extend into a boolean
+    // expression. The previous implementation used `alt((boolean_expr,
+    // single_leaf))` which, when the input was a single leaf with no
+    // following operand, would parse `occur_leaf` once for `boolean_expr`,
+    // fail at `multispace1`, backtrack, then re-parse `occur_leaf` for
+    // `single_leaf`. With recursively-nested groups like `(+(+(+a)))`, that
+    // doubling at every level produced O(2^n) parse time. Parsing once and
+    // peeking ahead for the operand keeps it O(n).
+    delimited(
+        multispace0,
+        |inp| {
+            let (rest, first) = occur_leaf(inp)?;
+            // Only fall back on `Err::Error` (recoverable), mirroring
+            // `alt`'s behaviour. `Err::Failure` and `Err::Incomplete`
+            // must propagate so cut points and streaming needs are not
+            // accidentally swallowed if they are ever introduced in the
+            // operand parsers.
+            match preceded(multispace1, many1(operand_leaf))(rest) {
+                Ok((rest, more)) => {
+                    let combined = aggregate_binary_expressions(first, more)
+                        .map_err(|_| nom::Err::Error(Error::new(inp, ErrorKind::MapRes)))?;
+                    Ok((rest, combined))
+                }
+                Err(nom::Err::Error(_)) => {
+                    let (occur, ast) = first;
+                    let single = if occur == Some(Occur::MustNot) {
+                        ast.unary(Occur::MustNot)
+                    } else {
+                        ast
+                    };
+                    Ok((rest, single))
+                }
+                Err(e) => Err(e),
+            }
+        },
+        multispace0,
+    )(inp)
 }

 fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
@@ -1891,4 +1916,23 @@ mod test {
            r#"(+"field":'happy tax payer' +"other_field":1)"#,
        );
    }
+
+    // Regression test for https://github.com/quickwit-oss/tantivy/issues/2498:
+    // deeply nested parenthesized queries used to take O(2^n) time because the
+    // top-level `ast()` parser tried `boolean_expr` first and re-parsed the
+    // inner `occur_leaf` when it backtracked to `single_leaf`. Depth 60 would
+    // take ~10^18 operations under the regression; with the fix it parses
+    // instantly. We use `test_parse_query_to_ast_helper` so this test would
+    // never finish if the regression returned.
+    #[test]
+    fn test_parse_deeply_nested_query() {
+        let depth = 60;
+        let leading: String = "(".repeat(depth);
+        let trailing: String = ")".repeat(depth);
+        let query = format!("{leading}title:test{trailing}");
+        test_parse_query_to_ast_helper(&query, r#""title":test"#);
+
+        let query_with_plus = format!("+{leading}title:test{trailing}");
+        test_parse_query_to_ast_helper(&query_with_plus, r#""title":test"#);
+    }
 }
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
@@ -20,8 +20,8 @@ use crate::aggregation::metric::{
    build_segment_stats_collector, AverageAggregation, CardinalityAggReqData,
    CardinalityAggregationReq, CountAggregation, ExtendedStatsAggregation, MaxAggregation,
    MetricAggReqData, MinAggregation, SegmentCardinalityCollector, SegmentExtendedStatsCollector,
-    SegmentPercentilesCollector, StatsAggregation, StatsType, SumAggregation, TopHitsAggReqData,
-    TopHitsSegmentCollector,
+    SegmentPercentilesCollector, StatsAggregation, StatsType, SumAggregation, TermOrdSet,
+    TopHitsAggReqData, TopHitsSegmentCollector, BITSET_MAX_TERM_ORD,
 };
 use crate::aggregation::segment_agg_result::{
    GenericSegmentAggregationResultsCollector, SegmentAggregationCollector,
@@ -413,12 +413,38 @@ pub(crate) fn build_segment_agg_collector(
        }
        AggKind::Cardinality => {
            let req_data = &mut req.get_cardinality_req_data_mut(node.idx_in_req_data);
-            Ok(Box::new(SegmentCardinalityCollector::from_req(
-                req_data.column_type,
-                node.idx_in_req_data,
-                req_data.accessor.clone(),
-                req_data.missing_value_for_accessor,
-            )))
+            // For str columns, choose the per-bucket entries representation
+            // based on the segment's column.max_value():
+            //   * small (< BITSET_MAX_TERM_ORD): `BitSet`, pre-allocated, no promotion machinery.
+            //   * large: `TermOrdSet` (sparse FxHashSet that promotes to a paged bitset).
+            // For non-str columns the `entries` field is unused (values go
+            // straight into the HLL sketch); we still pick `TermOrdSet`
+            // because its empty Sparse(FxHashSet) costs nothing.
+            let is_str = req_data.column_type == ColumnType::Str;
+            let max_term_ord_inclusive = if is_str {
+                req_data.accessor.max_value()
+            } else {
+                0
+            };
+            let collector: Box<dyn SegmentAggregationCollector> =
+                if is_str && max_term_ord_inclusive < BITSET_MAX_TERM_ORD {
+                    Box::new(SegmentCardinalityCollector::<BitSet>::from_req(
+                        req_data.column_type,
+                        node.idx_in_req_data,
+                        req_data.accessor.clone(),
+                        req_data.missing_value_for_accessor,
+                        max_term_ord_inclusive,
+                    ))
+                } else {
+                    Box::new(SegmentCardinalityCollector::<TermOrdSet>::from_req(
+                        req_data.column_type,
+                        node.idx_in_req_data,
+                        req_data.accessor.clone(),
+                        req_data.missing_value_for_accessor,
+                        max_term_ord_inclusive,
+                    ))
+                };
+            Ok(collector)
        }
        AggKind::StatsKind(stats_type) => {
            let req_data = &mut req.per_request.stats_metric_req_data[node.idx_in_req_data];
@@ -985,8 +1011,12 @@ fn build_terms_or_cardinality_nodes(
                    let str_col = str_dict_column
                        .as_ref()
                        .expect("str_dict_column must exist for string column");
-                    allowed_term_ids =
-                        build_allowed_term_ids_for_str(str_col, &req.include, &req.exclude)?;
+                    allowed_term_ids = build_allowed_term_ids_for_str(
+                        str_col,
+                        &req.include,
+                        &req.exclude,
+                        missing.is_some(),
+                    )?;
                };
                let idx_in_req_data = data.push_term_req_data(TermsAggReqData {
                    accessor,
@@ -1002,10 +1032,20 @@ fn build_terms_or_cardinality_nodes(
                (idx_in_req_data, AggKind::Terms)
            }
            TermsOrCardinalityRequest::Cardinality(ref req) => {
+                // `str_dict_column` is computed once per field; for JSON paths
+                // with mixed types it's `Some` even on the numeric req_data.
+                // Cardinality only consults it for the str column path, so
+                // gate by column_type to avoid driving non-str collectors
+                // through the coupon-cache path.
+                let str_dict_column_for_req = if column_type == ColumnType::Str {
+                    str_dict_column.clone()
+                } else {
+                    None
+                };
                let idx_in_req_data = data.push_cardinality_req_data(CardinalityAggReqData {
                    accessor,
                    column_type,
-                    str_dict_column: str_dict_column.clone(),
+                    str_dict_column: str_dict_column_for_req,
                    missing_value_for_accessor,
                    name: agg_name.to_string(),
                    req: req.clone(),
@@ -1025,16 +1065,21 @@ fn build_terms_or_cardinality_nodes(

 /// Builds a single BitSet of allowed term ordinals for a string dictionary column according to
 /// include/exclude parameters.
+///
+/// When `reserve_missing_sentinel` is true, the bitset will have 1 additional slot for the missing
+/// term ordinal
 fn build_allowed_term_ids_for_str(
    str_col: &StrColumn,
    include: &Option<IncludeExcludeParam>,
    exclude: &Option<IncludeExcludeParam>,
+    reserve_missing_sentinel: bool,
 ) -> crate::Result<Option<BitSet>> {
    let mut allowed: Option<BitSet> = None;
-    let num_terms = str_col.dictionary().num_terms() as u32;
+    let missing_sentinel_adjustment = if reserve_missing_sentinel { 1 } else { 0 };
+    let allowed_capacity = str_col.dictionary().num_terms() as u32 + missing_sentinel_adjustment;
    if let Some(include) = include {
        // add matches
-        allowed = Some(BitSet::with_max_value(num_terms));
+        allowed = Some(BitSet::with_max_value(allowed_capacity));
        let allowed = allowed.as_mut().unwrap();
        for_each_matching_term_ord(str_col, include, |ord| allowed.insert(ord))?;
    };
@@ -1042,7 +1087,7 @@ fn build_allowed_term_ids_for_str(
    if let Some(exclude) = exclude {
        if allowed.is_none() {
            // Start with all terms allowed
-            allowed = Some(BitSet::with_max_value_and_full(num_terms));
+            allowed = Some(BitSet::with_max_value_and_full(allowed_capacity));
        }
        let allowed = allowed.as_mut().unwrap();
        for_each_matching_term_ord(str_col, exclude, |ord| allowed.remove(ord))?;
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -115,6 +115,71 @@ pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
    fast_field_names
 }

+/// Validates that all fields referenced in the aggregation request exist in the schema
+/// and are configured as fast fields.
+///
+/// This is a convenience function for upfront validation before executing aggregations.
+/// Returns an error if any field doesn't exist or is not a fast field.
+///
+/// Validation is intentionally opt-in rather than baked into aggregation execution: the
+/// default lenient behavior (returning empty results for missing fields) supports
+/// schema evolution and federated queries where the same request runs against segments
+/// or indices with different schemas.
+///
+/// # Example
+/// ```
+/// use tantivy::aggregation::agg_req::{Aggregations, validate_aggregation_fields_exist};
+/// use tantivy::schema::{Schema, FAST};
+/// use tantivy::Index;
+///
+/// # fn main() -> tantivy::Result<()> {
+/// // Create a simple index
+/// let mut schema_builder = Schema::builder();
+/// schema_builder.add_f64_field("price", FAST);
+/// let schema = schema_builder.build();
+/// let index = Index::create_in_ram(schema);
+///
+/// // Parse aggregation request
+/// let agg_req: Aggregations = serde_json::from_str(r#"{
+///     "avg_price": { "avg": { "field": "price" } }
+/// }"#)?;
+///
+/// let reader = index.reader()?;
+/// let searcher = reader.searcher();
+///
+/// // Validate fields before executing
+/// for segment_reader in searcher.segment_readers() {
+///     validate_aggregation_fields_exist(&agg_req, segment_reader)?;
+/// }
+/// # Ok(())
+/// # }
+/// ```
+pub fn validate_aggregation_fields_exist(
+    aggs: &Aggregations,
+    reader: &crate::SegmentReader,
+) -> crate::Result<()> {
+    let field_names = get_fast_field_names(aggs);
+    let schema = reader.schema();
+
+    for field_name in field_names {
+        // Check if the field is either directly in the schema or could be part of a json field
+        // present in the schema, and verify it's a fast field.
+        if let Some((field, _path)) = schema.find_field(&field_name) {
+            let field_type = schema.get_field_entry(field).field_type();
+            if !field_type.is_fast() {
+                return Err(crate::TantivyError::SchemaError(format!(
+                    "Field '{}' is not a fast field. Aggregations require fast fields.",
+                    field_name
+                )));
+            }
+        } else {
+            return Err(crate::TantivyError::FieldNotFound(field_name));
+        }
+    }
+
+    Ok(())
+}
+
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// All aggregation types.
 pub enum AggregationVariants {
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -208,7 +208,8 @@ pub enum BucketEntries<T> {
 }

 impl<T> BucketEntries<T> {
-    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
+    /// Iterate over all bucket entries.
+    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
        match self {
            BucketEntries::Vec(vec) => Box::new(vec.iter()),
            BucketEntries::HashMap(map) => Box::new(map.values()),
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -1436,3 +1436,46 @@ fn test_aggregation_on_json_object_mixed_numerical_segments() {
        )
    );
 }
+
+#[test]
+fn test_aggregation_field_validation_helper() {
+    // Test the standalone validation helper function for field validation
+    let index = get_test_index_2_segments(false).unwrap();
+    let reader = index.reader().unwrap();
+    let searcher = reader.searcher();
+    let segment_reader = searcher.segment_reader(0);
+
+    // Test with invalid field
+    let agg_req: Aggregations = serde_json::from_str(
+        r#"{
+        "avg_test": {
+            "avg": { "field": "nonexistent_field" }
+        }
+    }"#,
+    )
+    .unwrap();
+
+    let result =
+        crate::aggregation::agg_req::validate_aggregation_fields_exist(&agg_req, segment_reader);
+    assert!(result.is_err());
+    match result {
+        Err(crate::TantivyError::FieldNotFound(field_name)) => {
+            assert_eq!(field_name, "nonexistent_field");
+        }
+        _ => panic!("Expected FieldNotFound error, got: {:?}", result),
+    }
+
+    // Test with valid field
+    let agg_req: Aggregations = serde_json::from_str(
+        r#"{
+        "avg_test": {
+            "avg": { "field": "score" }
+        }
+    }"#,
+    )
+    .unwrap();
+
+    let result =
+        crate::aggregation::agg_req::validate_aggregation_fields_exist(&agg_req, segment_reader);
+    assert!(result.is_ok());
+}
--- a/src/aggregation/bucket/composite/collector.rs
+++ b/src/aggregation/bucket/composite/collector.rs
@@ -21,7 +21,7 @@ use crate::aggregation::bucket::composite::map::{DynArrayHeapMap, MAX_DYN_ARRAY_
 use crate::aggregation::bucket::{
    CalendarInterval, CompositeAggregationSource, MissingOrder, Order,
 };
-use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardSubAggCache};
+use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardSubAggBuffer};
 use crate::aggregation::intermediate_agg_result::{
    CompositeIntermediateKey, IntermediateAggregationResult, IntermediateAggregationResults,
    IntermediateBucketResult, IntermediateCompositeBucketEntry, IntermediateCompositeBucketResult,
@@ -119,7 +119,7 @@ pub struct SegmentCompositeCollector {
    /// One DynArrayHeapMap per parent bucket.
    parent_buckets: Vec<DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>>,
    accessor_idx: usize,
-    sub_agg: Option<CachedSubAggs<HighCardSubAggCache>>,
+    sub_agg: Option<BufferedSubAggs<HighCardSubAggBuffer>>,
    bucket_id_provider: BucketIdProvider,
    /// Number of sources, needed when creating new DynArrayHeapMaps.
    num_sources: usize,
@@ -199,6 +199,17 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // Composite is a multi-bucket agg with no single value to extract.
+        None
+    }
 }

 impl SegmentCompositeCollector {
@@ -215,7 +226,7 @@ impl SegmentCompositeCollector {
        let has_sub_aggregations = !node.children.is_empty();
        let sub_agg = if has_sub_aggregations {
            let sub_agg_collector = build_segment_agg_collectors(req_data, &node.children)?;
-            Some(CachedSubAggs::new(sub_agg_collector))
+            Some(BufferedSubAggs::new(sub_agg_collector))
        } else {
            None
        };
@@ -329,7 +340,7 @@ fn collect_bucket_with_limit(
    limit_num_buckets: usize,
    buckets: &mut DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>,
    key: &[InternalValueRepr],
-    sub_agg: &mut Option<CachedSubAggs<HighCardSubAggCache>>,
+    sub_agg: &mut Option<BufferedSubAggs<HighCardSubAggBuffer>>,
    bucket_id_provider: &mut BucketIdProvider,
 ) {
    let mut record_in_bucket = |bucket: &mut CompositeBucketCollector| {
@@ -485,7 +496,7 @@ struct CompositeKeyVisitor<'a> {
    doc_id: crate::DocId,
    composite_agg_data: &'a CompositeAggReqData,
    buckets: &'a mut DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>,
-    sub_agg: &'a mut Option<CachedSubAggs<HighCardSubAggCache>>,
+    sub_agg: &'a mut Option<BufferedSubAggs<HighCardSubAggBuffer>>,
    bucket_id_provider: &'a mut BucketIdProvider,
    sub_level_values: SmallVec<[InternalValueRepr; MAX_DYN_ARRAY_SIZE]>,
 }
--- a/src/aggregation/bucket/composite/mod.rs
+++ b/src/aggregation/bucket/composite/mod.rs
@@ -511,14 +511,14 @@ mod tests {

    fn datetime_from_iso_str(date_str: &str) -> common::DateTime {
        let dt = OffsetDateTime::parse(date_str, &Rfc3339)
-            .expect(&format!("Failed to parse date: {}", date_str));
+            .unwrap_or_else(|_| panic!("Failed to parse date: {}", date_str));
        let timestamp_secs = dt.unix_timestamp_nanos();
        common::DateTime::from_timestamp_nanos(timestamp_secs as i64)
    }

    fn ms_timestamp_from_iso_str(date_str: &str) -> i64 {
        let dt = OffsetDateTime::parse(date_str, &Rfc3339)
-            .expect(&format!("Failed to parse date: {}", date_str));
+            .unwrap_or_else(|_| panic!("Failed to parse date: {}", date_str));
        (dt.unix_timestamp_nanos() / 1_000_000) as i64
    }

@@ -548,7 +548,7 @@ mod tests {
                    agg_req_json["my_composite"]["composite"]["after"] = after_key.take().unwrap();
                }
                let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
-                let res = exec_request(agg_req.clone(), &index).unwrap();
+                let res = exec_request(agg_req.clone(), index).unwrap();
                let expected_page_buckets = &expected_buckets_vec[page_idx * page_size
                    ..std::cmp::min((page_idx + 1) * page_size, expected_buckets_vec.len())];
                assert_eq!(
@@ -559,34 +559,30 @@ mod tests {
                    page_size,
                    agg_req,
                );
-                if page_idx + 1 < page_count {
-                    assert!(
-                        res["my_composite"].get("after_key").is_some(),
-                        "expected after_key on all but last page"
-                    );
-                    after_key = Some(res["my_composite"]["after_key"].clone());
-                } else if res["my_composite"].get("after_key").is_some() {
-                    // currently we sometime have an after_key on the last page,
-                    // check that the next "page" is empty
-                    let agg_req_json = json!({
-                        "my_composite": {
-                            "composite": {
-                                "sources": composite_agg_sources,
-                                "size": page_size,
-                                "after": res["my_composite"]["after_key"].clone(),
-                            }
-                        }
-                    });
-                    let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
-                    let res = exec_request(agg_req.clone(), &index).unwrap();
-                    assert_eq!(
-                        res["my_composite"]["buckets"],
-                        json!([]),
-                        "expected no buckets when using after_key from last page, query: {:?}",
-                        agg_req
-                    );
-                }
+                assert!(
+                    res["my_composite"].get("after_key").is_some(),
+                    "expected after_key on every non-empty page"
+                );
+                after_key = Some(res["my_composite"]["after_key"].clone());
            }
+            // Using the after_key from the last page must yield an empty page.
+            let agg_req_json = json!({
+                "my_composite": {
+                    "composite": {
+                        "sources": composite_agg_sources,
+                        "size": page_size,
+                        "after": after_key,
+                    }
+                }
+            });
+            let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
+            let res = exec_request(agg_req.clone(), index).unwrap();
+            assert_eq!(
+                res["my_composite"]["buckets"],
+                json!([]),
+                "expected no buckets when using after_key from last page, query: {:?}",
+                agg_req
+            );
        }
    }

@@ -711,8 +707,28 @@ mod tests {
                {"key": {"myterm": "terme"}, "doc_count": 1}
            ])
        );
-        assert!(res["my_composite"].get("after_key").is_none());

+        // paginating past last page should be empty
+        let agg_req_json = json!({
+            "my_composite": {
+                "composite": {
+                    "sources": [
+                        {"myterm": {"terms": {"field": "string_id"}}}
+                    ],
+                    "size": 3,
+                    "after":  &res["my_composite"]["after_key"]
+                }
+            }
+        });
+        let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
+        let res = exec_request(agg_req.clone(), &index).unwrap();
+        assert!(res["my_composite"].get("after_key").is_none());
+        assert_eq!(
+            res["my_composite"]["buckets"],
+            json!([]),
+            "expected no buckets when using after_key from last page, query: {:?}",
+            agg_req
+        );
        Ok(())
    }

@@ -820,7 +836,10 @@ mod tests {
                {"key": {"myterm": "apple"}, "doc_count": 1}
            ])
        );
-        assert!(res["fruity_aggreg"].get("after_key").is_none());
+        assert_eq!(
+            res["fruity_aggreg"]["after_key"],
+            json!({"myterm": "str:apple"})
+        );

        Ok(())
    }
@@ -1792,7 +1811,14 @@ mod tests {
                {"key": {"month": ms_timestamp_from_iso_str("2021-02-01T00:00:00Z"), "category": "books"}, "doc_count": 1},
            ]),
        );
-        assert!(res["my_composite"].get("after_key").is_none());
+        let feb_2021_ns = ms_timestamp_from_iso_str("2021-02-01T00:00:00Z") * 1_000_000;
+        assert_eq!(
+            res["my_composite"]["after_key"],
+            json!({
+                "month": format!("dt:{}", feb_2021_ns),
+                "category": "str:books"
+            })
+        );

        Ok(())
    }
--- a/src/aggregation/bucket/filter.rs
+++ b/src/aggregation/bucket/filter.rs
@@ -6,8 +6,8 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
-use crate::aggregation::cached_sub_aggs::{
-    CachedSubAggs, HighCardSubAggCache, LowCardSubAggCache, SubAggCache,
+use crate::aggregation::buffered_sub_aggs::{
+    BufferedSubAggs, HighCardSubAggBuffer, LowCardSubAggBuffer, SubAggBuffer,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -503,17 +503,17 @@ struct DocCount {
 }

 /// Segment collector for filter aggregation
-pub struct SegmentFilterCollector<C: SubAggCache> {
+pub struct SegmentFilterCollector<B: SubAggBuffer> {
    /// Document counts per parent bucket
    parent_buckets: Vec<DocCount>,
    /// Sub-aggregation collectors
-    sub_aggregations: Option<CachedSubAggs<C>>,
+    sub_aggregations: Option<BufferedSubAggs<B>>,
    bucket_id_provider: BucketIdProvider,
    /// Accessor index for this filter aggregation (to access FilterAggReqData)
    accessor_idx: usize,
 }

-impl<C: SubAggCache> SegmentFilterCollector<C> {
+impl<B: SubAggBuffer> SegmentFilterCollector<B> {
    /// Create a new filter segment collector following the new agg_data pattern
    pub(crate) fn from_req_and_validate(
        req: &mut AggregationsSegmentCtx,
@@ -525,7 +525,7 @@ impl<C: SubAggCache> SegmentFilterCollector<C> {
        } else {
            None
        };
-        let sub_agg_collector = sub_agg_collector.map(CachedSubAggs::new);
+        let sub_agg_collector = sub_agg_collector.map(BufferedSubAggs::new);

        Ok(SegmentFilterCollector {
            parent_buckets: Vec::new(),
@@ -547,16 +547,16 @@ pub(crate) fn build_segment_filter_collector(

    if is_top_level {
        Ok(Box::new(
-            SegmentFilterCollector::<LowCardSubAggCache>::from_req_and_validate(req, node)?,
+            SegmentFilterCollector::<LowCardSubAggBuffer>::from_req_and_validate(req, node)?,
        ))
    } else {
        Ok(Box::new(
-            SegmentFilterCollector::<HighCardSubAggCache>::from_req_and_validate(req, node)?,
+            SegmentFilterCollector::<HighCardSubAggBuffer>::from_req_and_validate(req, node)?,
        ))
    }
 }

-impl<C: SubAggCache> Debug for SegmentFilterCollector<C> {
+impl<B: SubAggBuffer> Debug for SegmentFilterCollector<B> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentFilterCollector")
            .field("buckets", &self.parent_buckets)
@@ -566,7 +566,7 @@ impl<C: SubAggCache> Debug for SegmentFilterCollector<C> {
    }
 }

-impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
+impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B> {
    fn add_intermediate_aggregation_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
@@ -674,6 +674,17 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // TODO: forward into the inner `sub_agg` for nested order paths (`filter.metric`).
+        None
+    }
 }

 /// Intermediate result for filter aggregation
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -10,7 +10,7 @@ use crate::aggregation::agg_data::{
 };
 use crate::aggregation::agg_req::Aggregations;
 use crate::aggregation::agg_result::BucketEntry;
-use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
+use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardBufferedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateHistogramBucketEntry,
@@ -258,7 +258,7 @@ pub(crate) struct SegmentHistogramBucketEntry {
 impl SegmentHistogramBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
-        sub_aggregation: &mut Option<HighCardCachedSubAggs>,
+        sub_aggregation: &mut Option<HighCardBufferedSubAggs>,
        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateHistogramBucketEntry> {
        let mut sub_aggregation_res = IntermediateAggregationResults::default();
@@ -283,6 +283,11 @@ impl SegmentHistogramBucketEntry {
 struct HistogramBuckets {
    pub buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
 }
+impl HistogramBuckets {
+    fn memory_consumption(&self) -> u64 {
+        self.buckets.capacity() as u64 * std::mem::size_of::<SegmentHistogramBucketEntry>() as u64
+    }
+}

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
@@ -291,7 +296,7 @@ pub struct SegmentHistogramCollector {
    /// The buckets containing the aggregation data.
    /// One Histogram bucket per parent bucket id.
    parent_buckets: Vec<HistogramBuckets>,
-    sub_agg: Option<HighCardCachedSubAggs>,
+    sub_agg: Option<HighCardBufferedSubAggs>,
    accessor_idx: usize,
    bucket_id_provider: BucketIdProvider,
 }
@@ -324,7 +329,7 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        let req = agg_data.take_histogram_req_data(self.accessor_idx);
-        let mem_pre = self.get_memory_consumption();
+        let mem_pre = self.get_memory_consumption(parent_bucket_id);
        let buckets = &mut self.parent_buckets[parent_bucket_id as usize].buckets;

        let bounds = req.bounds;
@@ -358,12 +363,9 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }
        agg_data.put_back_histogram_req_data(self.accessor_idx, req);

-        let mem_delta = self.get_memory_consumption() - mem_pre;
+        let mem_delta = self.get_memory_consumption(parent_bucket_id) - mem_pre;
        if mem_delta > 0 {
-            agg_data
-                .context
-                .limits
-                .add_memory_consumed(mem_delta as u64)?;
+            agg_data.context.limits.add_memory_consumed(mem_delta)?;
        }

        if let Some(sub_agg) = &mut self.sub_agg {
@@ -392,14 +394,24 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // Histogram is a multi-bucket agg with no single value to extract.
+        None
+    }
 }

 impl SegmentHistogramCollector {
-    fn get_memory_consumption(&self) -> usize {
-        let self_mem = std::mem::size_of::<Self>();
-        let buckets_mem = self.parent_buckets.len() * std::mem::size_of::<HistogramBuckets>();
-        self_mem + buckets_mem
+    fn get_memory_consumption(&self, parent_bucket_id: BucketId) -> u64 {
+        self.parent_buckets[parent_bucket_id as usize].memory_consumption()
    }
+
    /// Converts the collector result into a intermediate bucket result.
    fn add_intermediate_bucket_result(
        &mut self,
@@ -444,7 +456,7 @@ impl SegmentHistogramCollector {
            max: f64::MAX,
        });
        req_data.offset = req_data.req.offset.unwrap_or(0.0);
-        let sub_agg = sub_agg.map(CachedSubAggs::new);
+        let sub_agg = sub_agg.map(BufferedSubAggs::new);

        Ok(Self {
            parent_buckets: Default::default(),
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -9,8 +9,9 @@ use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
 use crate::aggregation::agg_limits::AggregationLimitsGuard;
-use crate::aggregation::cached_sub_aggs::{
-    CachedSubAggs, HighCardSubAggCache, LowCardCachedSubAggs, LowCardSubAggCache, SubAggCache,
+use crate::aggregation::buffered_sub_aggs::{
+    BufferedSubAggs, HighCardSubAggBuffer, LowCardBufferedSubAggs, LowCardSubAggBuffer,
+    SubAggBuffer,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -155,13 +156,13 @@ pub(crate) struct SegmentRangeAndBucketEntry {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-pub struct SegmentRangeCollector<C: SubAggCache> {
+pub struct SegmentRangeCollector<B: SubAggBuffer> {
    /// The buckets containing the aggregation data.
    /// One for each ParentBucketId
    parent_buckets: Vec<Vec<SegmentRangeAndBucketEntry>>,
    column_type: ColumnType,
    pub(crate) accessor_idx: usize,
-    sub_agg: Option<CachedSubAggs<C>>,
+    sub_agg: Option<BufferedSubAggs<B>>,
    /// Here things get a bit weird. We need to assign unique bucket ids across all
    /// parent buckets. So we keep track of the next available bucket id here.
    /// This allows a kind of flattening of the bucket ids across all parent buckets.
@@ -178,7 +179,7 @@ pub struct SegmentRangeCollector<C: SubAggCache> {
    limits: AggregationLimitsGuard,
 }

-impl<C: SubAggCache> Debug for SegmentRangeCollector<C> {
+impl<B: SubAggBuffer> Debug for SegmentRangeCollector<B> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentRangeCollector")
            .field("parent_buckets_len", &self.parent_buckets.len())
@@ -229,7 +230,7 @@ impl SegmentRangeBucketEntry {
    }
 }

-impl<C: SubAggCache> SegmentAggregationCollector for SegmentRangeCollector<C> {
+impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
    fn add_intermediate_aggregation_result(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
@@ -327,6 +328,17 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentRangeCollector<C> {

        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // Range is a multi-bucket agg with no single value to extract.
+        None
+    }
 }
 /// Build a concrete `SegmentRangeCollector` with either a Vec- or HashMap-backed
 /// bucket storage, depending on the column type and aggregation level.
@@ -350,8 +362,8 @@ pub(crate) fn build_segment_range_collector(
    };

    if is_low_card {
-        Ok(Box::new(SegmentRangeCollector::<LowCardSubAggCache> {
-            sub_agg: sub_agg.map(LowCardCachedSubAggs::new),
+        Ok(Box::new(SegmentRangeCollector::<LowCardSubAggBuffer> {
+            sub_agg: sub_agg.map(LowCardBufferedSubAggs::new),
            column_type: field_type,
            accessor_idx,
            parent_buckets: Vec::new(),
@@ -359,8 +371,8 @@ pub(crate) fn build_segment_range_collector(
            limits: agg_data.context.limits.clone(),
        }))
    } else {
-        Ok(Box::new(SegmentRangeCollector::<HighCardSubAggCache> {
-            sub_agg: sub_agg.map(CachedSubAggs::new),
+        Ok(Box::new(SegmentRangeCollector::<HighCardSubAggBuffer> {
+            sub_agg: sub_agg.map(BufferedSubAggs::new),
            column_type: field_type,
            accessor_idx,
            parent_buckets: Vec::new(),
@@ -370,7 +382,7 @@ pub(crate) fn build_segment_range_collector(
    }
 }

-impl<C: SubAggCache> SegmentRangeCollector<C> {
+impl<B: SubAggBuffer> SegmentRangeCollector<B> {
    pub(crate) fn create_new_buckets(
        &mut self,
        agg_data: &AggregationsSegmentCtx,
@@ -554,7 +566,7 @@ mod tests {
    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
        field_type: ColumnType,
-    ) -> SegmentRangeCollector<HighCardSubAggCache> {
+    ) -> SegmentRangeCollector<HighCardSubAggBuffer> {
        let req = RangeAggregation {
            field: "dummy".to_string(),
            ranges,
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,5 +1,4 @@
 use std::fmt::Debug;
-use std::io;
 use std::net::Ipv6Addr;

 use columnar::column_values::CompactSpaceU64Accessor;
@@ -17,8 +16,9 @@ use crate::aggregation::agg_data::{
 };
 use crate::aggregation::agg_limits::MemoryConsumption;
 use crate::aggregation::agg_req::Aggregations;
-use crate::aggregation::cached_sub_aggs::{
-    CachedSubAggs, HighCardSubAggCache, LowCardCachedSubAggs, LowCardSubAggCache, SubAggCache,
+use crate::aggregation::buffered_sub_aggs::{
+    BufferedSubAggs, HighCardSubAggBuffer, LowCardBufferedSubAggs, LowCardSubAggBuffer,
+    SubAggBuffer,
 };
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
@@ -352,19 +352,15 @@ pub(crate) fn build_segment_term_collector(
        )));
    }

-    // Validate sub aggregation exists when ordering by sub-aggregation.
-    {
-        if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
-            let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
-
-            node.get_sub_agg(agg_name, &req_data.per_request)
-                .ok_or_else(|| {
-                    TantivyError::InvalidArgument(format!(
-                        "could not find aggregation with name {agg_name} in metric \
-                         sub_aggregations"
-                    ))
-                })?;
-        }
+    // Validate that the referenced sub-aggregation exists when ordering by one.
+    if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
+        let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
+        node.get_sub_agg(agg_name, &req_data.per_request)
+            .ok_or_else(|| {
+                TantivyError::InvalidArgument(format!(
+                    "could not find aggregation with name {agg_name} in metric sub_aggregations"
+                ))
+            })?;
    }

    // Build sub-aggregation blueprint if there are children.
@@ -391,7 +387,7 @@ pub(crate) fn build_segment_term_collector(
    // Decide which bucket storage is best suited for this aggregation.
    if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
        let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
-        let collector: SegmentTermCollector<_, HighCardSubAggCache> = SegmentTermCollector {
+        let collector: SegmentTermCollector<_, HighCardSubAggBuffer> = SegmentTermCollector {
            parent_buckets: vec![term_buckets],
            sub_agg: None,
            bucket_id_provider,
@@ -401,8 +397,8 @@ pub(crate) fn build_segment_term_collector(
        Ok(Box::new(collector))
    } else if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC {
        let term_buckets = VecTermBuckets::new(max_term_id + 1, &mut bucket_id_provider);
-        let sub_agg = sub_agg_collector.map(LowCardCachedSubAggs::new);
-        let collector: SegmentTermCollector<_, LowCardSubAggCache> = SegmentTermCollector {
+        let sub_agg = sub_agg_collector.map(LowCardBufferedSubAggs::new);
+        let collector: SegmentTermCollector<_, LowCardSubAggBuffer> = SegmentTermCollector {
            parent_buckets: vec![term_buckets],
            sub_agg,
            bucket_id_provider,
@@ -414,8 +410,8 @@ pub(crate) fn build_segment_term_collector(
        let term_buckets: PagedTermMap =
            PagedTermMap::new(max_term_id + 1, &mut bucket_id_provider);
        // Build sub-aggregation blueprint (flat pairs)
-        let sub_agg = sub_agg_collector.map(CachedSubAggs::new);
-        let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggCache> =
+        let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
+        let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggBuffer> =
            SegmentTermCollector {
                parent_buckets: vec![term_buckets],
                sub_agg,
@@ -427,8 +423,8 @@ pub(crate) fn build_segment_term_collector(
    } else {
        let term_buckets: HashMapTermBuckets = HashMapTermBuckets::default();
        // Build sub-aggregation blueprint (flat pairs)
-        let sub_agg = sub_agg_collector.map(CachedSubAggs::new);
-        let collector: SegmentTermCollector<HashMapTermBuckets, HighCardSubAggCache> =
+        let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
+        let collector: SegmentTermCollector<HashMapTermBuckets, HighCardSubAggBuffer> =
            SegmentTermCollector {
                parent_buckets: vec![term_buckets],
                sub_agg,
@@ -758,10 +754,10 @@ impl TermAggregationMap for VecTermBuckets {
 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
 #[derive(Debug)]
-struct SegmentTermCollector<TermMap: TermAggregationMap, C: SubAggCache> {
+struct SegmentTermCollector<TermMap: TermAggregationMap, B: SubAggBuffer> {
    /// The buckets containing the aggregation data.
    parent_buckets: Vec<TermMap>,
-    sub_agg: Option<CachedSubAggs<C>>,
+    sub_agg: Option<BufferedSubAggs<B>>,
    bucket_id_provider: BucketIdProvider,
    max_term_id: u64,
    terms_req_data: TermsAggReqData,
@@ -772,8 +768,8 @@ pub(crate) fn get_agg_name_and_property(name: &str) -> (&str, &str) {
    (agg_name, agg_property)
 }

-impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentAggregationCollector
-    for SegmentTermCollector<TermMap, C>
+impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentAggregationCollector
+    for SegmentTermCollector<TermMap, B>
 {
    fn add_intermediate_aggregation_result(
        &mut self,
@@ -790,8 +786,14 @@ impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentAggregationCollector
        let term_req = &self.terms_req_data;
        let name = term_req.name.clone();

-        let bucket =
-            Self::into_intermediate_bucket_result(term_req, &mut self.sub_agg, bucket, agg_data)?;
+        let bucket = Self::into_intermediate_bucket_result(
+            term_req,
+            self.sub_agg
+                .as_mut()
+                .map(BufferedSubAggs::get_sub_agg_collector),
+            bucket,
+            agg_data,
+        )?;
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
        Ok(())
    }
@@ -881,6 +883,17 @@ impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentAggregationCollector
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // Terms is a multi-bucket agg with no single value to extract.
+        None
+    }
 }

 /// Missing value are represented as a sentinel value in the column.
@@ -907,10 +920,38 @@ fn extract_missing_value<T>(
    Some((key, bucket))
 }

-impl<TermMap, C> SegmentTermCollector<TermMap, C>
+fn reborrow_opt_collector<'a>(
+    opt: &'a mut Option<&mut dyn SegmentAggregationCollector>,
+) -> Option<&'a mut dyn SegmentAggregationCollector> {
+    match opt {
+        Some(inner) => Some(*inner),
+        None => None,
+    }
+}
+
+fn into_intermediate_bucket_entry(
+    bucket: Bucket,
+    sub_agg_collector: Option<&mut dyn SegmentAggregationCollector>,
+    agg_data: &AggregationsSegmentCtx,
+) -> crate::Result<IntermediateTermBucketEntry> {
+    let mut sub_aggregation_res = IntermediateAggregationResults::default();
+    if let Some(sub_agg_collector) = sub_agg_collector {
+        sub_agg_collector.add_intermediate_aggregation_result(
+            agg_data,
+            &mut sub_aggregation_res,
+            bucket.bucket_id,
+        )?;
+    }
+    Ok(IntermediateTermBucketEntry {
+        doc_count: bucket.count,
+        sub_aggregation: sub_aggregation_res,
+    })
+}
+
+impl<TermMap, B> SegmentTermCollector<TermMap, B>
 where
    TermMap: TermAggregationMap,
-    C: SubAggCache,
+    B: SubAggBuffer,
 {
    #[inline]
    fn get_memory_consumption(&self, parent_bucket_id: BucketId) -> usize {
@@ -920,15 +961,12 @@ where
    #[inline]
    pub(crate) fn into_intermediate_bucket_result(
        term_req: &TermsAggReqData,
-        sub_agg: &mut Option<CachedSubAggs<C>>,
+        mut sub_agg_collector: Option<&mut dyn SegmentAggregationCollector>,
        term_buckets: TermMap,
        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateBucketResult> {
        let mut entries: Vec<(u64, Bucket)> = term_buckets.into_vec();

-        let order_by_sub_aggregation =
-            matches!(term_req.req.order.target, OrderTarget::SubAggregation(_));
-
        match &term_req.req.order.target {
            OrderTarget::Key => {
                // We rely on the fact, that term ordinals match the order of the strings
@@ -940,10 +978,37 @@ where
                    entries.sort_unstable_by_key(|bucket| bucket.0);
                }
            }
-            OrderTarget::SubAggregation(_name) => {
-                // don't sort and cut off since it's hard to make assumptions on the quality of the
-                // results when cutting off du to unknown nature of the sub_aggregation (possible
-                // to check).
+            OrderTarget::SubAggregation(sub_agg_path) => {
+                // Peek segment-level metric values, sort, then fall through to
+                // `cut_off_buckets`. Like Elasticsearch, we always cut off when ordering
+                // by a sub-agg: top-K results are approximate and may differ from the
+                // global ordering, especially for non-monotonic metrics like avg/min.
+                let coll = sub_agg_collector.as_deref().ok_or_else(|| {
+                    TantivyError::InvalidArgument(format!(
+                        "Could not find sub-aggregation collector for path {sub_agg_path}"
+                    ))
+                })?;
+                let (agg_name, agg_prop) = get_agg_name_and_property(sub_agg_path);
+                // Fetch values up-front; otherwise sort would re-compute per comparison
+                let mut keyed: Vec<(f64, (u64, Bucket))> = entries
+                    .into_iter()
+                    .map(|bucket| {
+                        let metric_value = coll
+                            .compute_metric_value(bucket.1.bucket_id, agg_name, agg_prop, agg_data)
+                            .unwrap_or(0.0);
+                        (metric_value, bucket)
+                    })
+                    .collect();
+                if term_req.req.order.order == Order::Desc {
+                    keyed.sort_unstable_by(|a, b| {
+                        b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)
+                    });
+                } else {
+                    keyed.sort_unstable_by(|a, b| {
+                        a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
+                    });
+                }
+                entries = keyed.into_iter().map(|(_, e)| e).collect();
            }
            OrderTarget::Count => {
                if term_req.req.order.order == Order::Desc {
@@ -954,40 +1019,12 @@ where
            }
        }

-        let (term_doc_count_before_cutoff, sum_other_doc_count) = if order_by_sub_aggregation {
-            (0, 0)
-        } else {
-            cut_off_buckets(&mut entries, term_req.req.segment_size as usize)
-        };
+        let (term_doc_count_before_cutoff, sum_other_doc_count) =
+            cut_off_buckets(&mut entries, term_req.req.segment_size as usize);

        let mut dict: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> = Default::default();
        dict.reserve(entries.len());

-        let into_intermediate_bucket_entry =
-            |bucket: Bucket,
-             sub_agg: &mut Option<CachedSubAggs<C>>|
-             -> crate::Result<IntermediateTermBucketEntry> {
-                if let Some(sub_agg) = sub_agg {
-                    let mut sub_aggregation_res = IntermediateAggregationResults::default();
-                    sub_agg
-                        .get_sub_agg_collector()
-                        .add_intermediate_aggregation_result(
-                            agg_data,
-                            &mut sub_aggregation_res,
-                            bucket.bucket_id,
-                        )?;
-                    Ok(IntermediateTermBucketEntry {
-                        doc_count: bucket.count,
-                        sub_aggregation: sub_aggregation_res,
-                    })
-                } else {
-                    Ok(IntermediateTermBucketEntry {
-                        doc_count: bucket.count,
-                        sub_aggregation: Default::default(),
-                    })
-                }
-            };
-
        if term_req.column_type == ColumnType::Str {
            let fallback_dict = Dictionary::empty();
            let term_dict = term_req
@@ -998,7 +1035,11 @@ where

            if let Some((intermediate_key, bucket)) = extract_missing_value(&mut entries, term_req)
            {
-                let intermediate_entry = into_intermediate_bucket_entry(bucket, sub_agg)?;
+                let intermediate_entry = into_intermediate_bucket_entry(
+                    bucket,
+                    reborrow_opt_collector(&mut sub_agg_collector),
+                    agg_data,
+                )?;
                dict.insert(intermediate_key, intermediate_entry);
            }

@@ -1006,19 +1047,28 @@ where
            entries.sort_unstable_by_key(|bucket| bucket.0);

            let (term_ids, buckets): (Vec<u64>, Vec<Bucket>) = entries.into_iter().unzip();
-            let mut buckets_it = buckets.into_iter();

-            term_dict.sorted_ords_to_term_cb(term_ids.into_iter(), |term| {
-                let bucket = buckets_it.next().unwrap();
-                let intermediate_entry =
-                    into_intermediate_bucket_entry(bucket, sub_agg).map_err(io::Error::other)?;
+            let intermediate_entries: Vec<IntermediateTermBucketEntry> = buckets
+                .into_iter()
+                .map(|bucket| {
+                    into_intermediate_bucket_entry(
+                        bucket,
+                        reborrow_opt_collector(&mut sub_agg_collector),
+                        agg_data,
+                    )
+                })
+                .collect::<crate::Result<_>>()?;
+
+            let mut intermediate_entry_it = intermediate_entries.into_iter();
+
+            term_dict.sorted_ords_to_term_cb(&term_ids[..], |term| {
+                let intermediate_entry = intermediate_entry_it.next().unwrap();
                dict.insert(
                    IntermediateKey::Str(
                        String::from_utf8(term.to_vec()).expect("could not convert to String"),
                    ),
                    intermediate_entry,
                );
-                Ok(())
            })?;

            if term_req.req.min_doc_count == 0 {
@@ -1053,14 +1103,22 @@ where
            }
        } else if term_req.column_type == ColumnType::DateTime {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
+                let intermediate_entry = into_intermediate_bucket_entry(
+                    doc_count,
+                    reborrow_opt_collector(&mut sub_agg_collector),
+                    agg_data,
+                )?;
                let val = i64::from_u64(val);
                let date = format_date(val)?;
                dict.insert(IntermediateKey::Str(date), intermediate_entry);
            }
        } else if term_req.column_type == ColumnType::Bool {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
+                let intermediate_entry = into_intermediate_bucket_entry(
+                    doc_count,
+                    reborrow_opt_collector(&mut sub_agg_collector),
+                    agg_data,
+                )?;
                let val = bool::from_u64(val);
                dict.insert(IntermediateKey::Bool(val), intermediate_entry);
            }
@@ -1080,14 +1138,22 @@ where
                })?;

            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
+                let intermediate_entry = into_intermediate_bucket_entry(
+                    doc_count,
+                    reborrow_opt_collector(&mut sub_agg_collector),
+                    agg_data,
+                )?;
                let val: u128 = compact_space_accessor.compact_to_u128(val as u32);
                let val = Ipv6Addr::from_u128(val);
                dict.insert(IntermediateKey::IpAddr(val), intermediate_entry);
            }
        } else {
            for (val, doc_count) in entries {
-                let intermediate_entry = into_intermediate_bucket_entry(doc_count, sub_agg)?;
+                let intermediate_entry = into_intermediate_bucket_entry(
+                    doc_count,
+                    reborrow_opt_collector(&mut sub_agg_collector),
+                    agg_data,
+                )?;
                if term_req.column_type == ColumnType::U64 {
                    dict.insert(IntermediateKey::U64(val), intermediate_entry);
                } else if term_req.column_type == ColumnType::I64 {
@@ -1121,13 +1187,13 @@ where
    }
 }

-impl<TermMap: TermAggregationMap, C: SubAggCache> SegmentTermCollector<TermMap, C> {
+impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentTermCollector<TermMap, B> {
    #[inline]
    fn collect_terms_with_docs(
        iter: impl Iterator<Item = (crate::DocId, u64)>,
        term_buckets: &mut TermMap,
        bucket_id_provider: &mut BucketIdProvider,
-        sub_agg: &mut CachedSubAggs<C>,
+        sub_agg: &mut BufferedSubAggs<B>,
    ) {
        for (doc, term_id) in iter {
            let bucket_id = term_buckets.term_entry(term_id, bucket_id_provider);
@@ -1200,7 +1266,7 @@ mod tests {
    use crate::aggregation::{AggregationLimitsGuard, DistributedAggregationCollector};
    use crate::indexer::NoMergePolicy;
    use crate::query::AllQuery;
-    use crate::schema::{IntoIpv6Addr, Schema, FAST, STRING};
+    use crate::schema::{IntoIpv6Addr, Schema, FAST, INDEXED, STRING, TEXT};
    use crate::{Index, IndexWriter};

    #[test]
@@ -1729,6 +1795,263 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn terms_aggregation_order_by_cardinality_desc_single_segment() -> crate::Result<()> {
+        terms_aggregation_order_by_cardinality_desc(true)
+    }
+    #[test]
+    fn terms_aggregation_order_by_cardinality_desc_multi_segment() -> crate::Result<()> {
+        terms_aggregation_order_by_cardinality_desc(false)
+    }
+    fn terms_aggregation_order_by_cardinality_desc(merge_segments: bool) -> crate::Result<()> {
+        // Distinct score values per bucket key: A→5, B→1, C→3.
+        // Order by cardinality desc must yield A, C, B.
+        let segment_and_terms = vec![vec![
+            (1.0, "A".to_string()),
+            (2.0, "A".to_string()),
+            (3.0, "A".to_string()),
+            (4.0, "A".to_string()),
+            (5.0, "A".to_string()),
+            (1.0, "B".to_string()),
+            (1.0, "B".to_string()),
+            (1.0, "B".to_string()),
+            (1.0, "C".to_string()),
+            (2.0, "C".to_string()),
+            (3.0, "C".to_string()),
+        ]];
+        let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
+
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "card": "desc" }
+                },
+                "aggs": {
+                    "card": { "cardinality": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][0]["card"]["value"], 5.0);
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][1]["card"]["value"], 3.0);
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
+        assert_eq!(res["my_texts"]["buckets"][2]["card"]["value"], 1.0);
+
+        // Asc engages the segment-cutoff path too (monotonic-safe: discarded buckets had
+        // local card >= cutoff, so merged card >= cutoff and they cannot be globally smallest).
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "card": "asc" }
+                },
+                "aggs": {
+                    "card": { "cardinality": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
+
+        // size=2 with desc engages the segment cutoff: must keep top-2 by cardinality (A, C),
+        // and `sum_other_doc_count` reflects the dropped B (3 docs).
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "size": 2,
+                    "order": { "card": "desc" }
+                },
+                "aggs": {
+                    "card": { "cardinality": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
+
+        // size=2 with asc engages the segment cutoff: must keep bottom-2 by cardinality (B, C).
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "size": 2,
+                    "order": { "card": "asc" }
+                },
+                "aggs": {
+                    "card": { "cardinality": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn terms_aggregation_order_by_sum_single_segment() -> crate::Result<()> {
+        terms_aggregation_order_by_sum(true)
+    }
+    #[test]
+    fn terms_aggregation_order_by_sum_multi_segment() -> crate::Result<()> {
+        terms_aggregation_order_by_sum(false)
+    }
+    fn terms_aggregation_order_by_sum(merge_segments: bool) -> crate::Result<()> {
+        // Per-bucket sums on the U64 `score` column (non-negative => sum is monotonic):
+        //   A → 1+2+3+4+5 = 15, B → 1+1+1 = 3, C → 1+2+3 = 6.
+        let segment_and_terms = vec![
+            vec![
+                (1.0, "A".to_string()),
+                (2.0, "A".to_string()),
+                (3.0, "A".to_string()),
+                (1.0, "B".to_string()),
+                (1.0, "C".to_string()),
+            ],
+            vec![
+                (4.0, "A".to_string()),
+                (5.0, "A".to_string()),
+                (1.0, "B".to_string()),
+                (1.0, "B".to_string()),
+                (2.0, "C".to_string()),
+                (3.0, "C".to_string()),
+            ],
+        ];
+        let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
+
+        // Desc on a Sum metric engages the fast path (column is U64).
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "total": "desc" }
+                },
+                "aggs": {
+                    "total": { "sum": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][0]["total"]["value"], 15.0);
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][1]["total"]["value"], 6.0);
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
+        assert_eq!(res["my_texts"]["buckets"][2]["total"]["value"], 3.0);
+
+        // Asc engages the fast path too — discarded buckets had local sum >= cutoff,
+        // and merged sum >= local (non-negative addends), so they cannot be globally smallest.
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "total": "asc" }
+                },
+                "aggs": {
+                    "total": { "sum": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
+
+        // size=2 desc with cutoff: top-2 by sum (A, C).
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "size": 2,
+                    "order": { "total": "desc" }
+                },
+                "aggs": {
+                    "total": { "sum": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
+
+        // Stats sub-property: ordering by `mystats.sum` on a U64 column also engages.
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "mystats.sum": "desc" }
+                },
+                "aggs": {
+                    "mystats": { "stats": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
+
+        // Sum on a signed column (I64) takes the same cutoff path. Results may be
+        // approximate near the boundary on adversarial data, but for this dataset the
+        // top-K is unambiguous.
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "total": "desc" }
+                },
+                "aggs": {
+                    "total": { "sum": { "field": "score_i64" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
+
+        // Order by extended_stats sub-property exercises compute_metric_value on the
+        // ExtendedStats collector. A→max=5, B→max=1, C→max=3, so desc by max → A, C, B.
+        let agg_req: Aggregations = serde_json::from_value(json!({
+            "my_texts": {
+                "terms": {
+                    "field": "string_id",
+                    "order": { "ext.max": "desc" }
+                },
+                "aggs": {
+                    "ext": { "extended_stats": { "field": "score" } }
+                }
+            }
+        }))
+        .unwrap();
+        let res = exec_request(agg_req, &index)?;
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
+
+        Ok(())
+    }
+
    #[test]
    fn terms_aggregation_test_order_key_single_segment() -> crate::Result<()> {
        terms_aggregation_test_order_key_merge_segment(true)
@@ -2894,4 +3217,101 @@ mod tests {

        Ok(())
    }
+
+    fn prep_index_with_n_unique_terms_plus_one_null(n: u64) -> crate::Result<Index> {
+        let mut schema_builder = Schema::builder();
+        let id_field = schema_builder.add_u64_field("id", INDEXED);
+        let title_field = schema_builder.add_text_field("title", TEXT | FAST);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema.clone());
+        // set to one thread to guarantee all docs end up in the same segment
+        let mut writer = index.writer_with_num_threads(1, 50_000_000)?;
+
+        writer.add_document(doc!(
+            id_field => 0u64,
+        ))?;
+        for i in 1u64..=n {
+            let title = format!("foo{i}");
+            writer.add_document(doc!(
+                id_field => i,
+                title_field => title,
+            ))?;
+        }
+
+        writer.commit()?;
+
+        Ok(index)
+    }
+
+    #[test]
+    fn null_bitset_bounds_check_regression() -> crate::Result<()> {
+        // include cases
+        for i in 0..=4 {
+            let index = prep_index_with_n_unique_terms_plus_one_null(i * 64)?;
+            let normal_req: Aggregations = serde_json::from_value(json!({
+                "my_bool": {
+                    "terms": {
+                        "field": "title",
+                        "missing": "__NULL__",
+                        "size": 1000,
+                    }
+                }
+            }))?;
+            let include_req: Aggregations = serde_json::from_value(json!({
+                "my_bool": {
+                    "terms": {
+                        "field": "title",
+                        "include": "foo(.*)",
+                        "missing": "__NULL__",
+                        "size": 1000,
+                    }
+                }
+            }))?;
+            let exclude_req: Aggregations = serde_json::from_value(json!({
+                "my_bool": {
+                    "terms": {
+                        "field": "title",
+                        "exclude": "foo(.*)",
+                        "missing": "__NULL__",
+                        "size": 1000,
+                    }
+                }
+            }))?;
+
+            let normal_res = exec_request(normal_req, &index)?;
+            let normal_buckets = normal_res["my_bool"]["buckets"].as_array().unwrap();
+            assert_eq!(
+                normal_buckets.len(),
+                (i * 64) as usize + 1,
+                "The normal request should return all 'foo' buckets, plus the missing term bucket",
+            );
+
+            let include_res = exec_request(include_req, &index)?;
+            eprintln!("include_res: {include_res:?}");
+            let include_buckets = include_res["my_bool"]["buckets"].as_array().unwrap();
+            assert_eq!(
+                include_buckets.len(),
+                (i * 64) as usize,
+                "The include request should return all 'foo' buckets, and not the missing term \
+                 bucket",
+            );
+            assert!(include_buckets
+                .iter()
+                .all(|b| b["key"].as_str().unwrap().starts_with("foo")));
+
+            let exclude_res = exec_request(exclude_req, &index)?;
+            let exclude_buckets = exclude_res["my_bool"]["buckets"].as_array().unwrap();
+            if i != 0 {
+                // TODO: Remove this if after fixing exclude + missing bug
+                assert_eq!(
+                    exclude_buckets.len(),
+                    1,
+                    "The exclude request should exclude all 'foo' buckets, and only the missing \
+                     term bucket",
+                );
+                assert_eq!(exclude_buckets[0]["key"], "__NULL__");
+            }
+        }
+        Ok(())
+    }
 }
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -5,7 +5,7 @@ use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
 use crate::aggregation::bucket::term_agg::TermsAggregation;
-use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
+use crate::aggregation::buffered_sub_aggs::{BufferedSubAggs, HighCardBufferedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
@@ -47,7 +47,7 @@ struct MissingCount {
 #[derive(Default, Debug)]
 pub struct TermMissingAgg {
    accessor_idx: usize,
-    sub_agg: Option<HighCardCachedSubAggs>,
+    sub_agg: Option<HighCardBufferedSubAggs>,
    /// Idx = parent bucket id, Value = missing count for that bucket
    missing_count_per_bucket: Vec<MissingCount>,
    bucket_id_provider: BucketIdProvider,
@@ -66,7 +66,7 @@ impl TermMissingAgg {
            None
        };

-        let sub_agg = sub_agg.map(CachedSubAggs::new);
+        let sub_agg = sub_agg.map(BufferedSubAggs::new);
        let bucket_id_provider = BucketIdProvider::default();

        Ok(Self {
@@ -177,6 +177,17 @@ impl SegmentAggregationCollector for TermMissingAgg {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // TODO: forward to `sub_agg` for nested order paths (`missing_agg>metric`).
+        None
+    }
 }

 #[cfg(test)]
--- a/src/aggregation/buffered_sub_aggs.rs
+++ b/src/aggregation/buffered_sub_aggs.rs
@@ -6,7 +6,7 @@ use crate::aggregation::bucket::MAX_NUM_TERMS_FOR_VEC;
 use crate::aggregation::BucketId;
 use crate::DocId;

-/// A cache for sub-aggregations, storing doc ids per bucket id.
+/// A buffer for sub-aggregations, storing doc ids per bucket id.
 /// Depending on the cardinality of the parent aggregation, we use different
 /// storage strategies.
 ///
@@ -24,21 +24,21 @@ use crate::DocId;
 /// aggregations.
 /// What this datastructure does in general is to group docs by bucket id.
 #[derive(Debug)]
-pub(crate) struct CachedSubAggs<C: SubAggCache> {
-    cache: C,
+pub(crate) struct BufferedSubAggs<B: SubAggBuffer> {
+    buffer: B,
    sub_agg_collector: Box<dyn SegmentAggregationCollector>,
    num_docs: usize,
 }

-pub type LowCardCachedSubAggs = CachedSubAggs<LowCardSubAggCache>;
-pub type HighCardCachedSubAggs = CachedSubAggs<HighCardSubAggCache>;
+pub type LowCardBufferedSubAggs = BufferedSubAggs<LowCardSubAggBuffer>;
+pub type HighCardBufferedSubAggs = BufferedSubAggs<HighCardSubAggBuffer>;

 const FLUSH_THRESHOLD: usize = 2048;

-/// A trait for caching sub-aggregation doc ids per bucket id.
+/// A trait for buffering sub-aggregation doc ids per bucket id.
 /// Different implementations can be used depending on the cardinality
 /// of the parent aggregation.
-pub trait SubAggCache: Debug {
+pub trait SubAggBuffer: Debug {
    fn new() -> Self;
    fn push(&mut self, bucket_id: BucketId, doc_id: DocId);
    fn flush_local(
@@ -49,22 +49,22 @@ pub trait SubAggCache: Debug {
    ) -> crate::Result<()>;
 }

-impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
+impl<Backend: SubAggBuffer + Debug> BufferedSubAggs<Backend> {
    pub fn new(sub_agg: Box<dyn SegmentAggregationCollector>) -> Self {
        Self {
-            cache: Backend::new(),
+            buffer: Backend::new(),
            sub_agg_collector: sub_agg,
            num_docs: 0,
        }
    }

-    pub fn get_sub_agg_collector(&mut self) -> &mut Box<dyn SegmentAggregationCollector> {
-        &mut self.sub_agg_collector
+    pub fn get_sub_agg_collector(&mut self) -> &mut dyn SegmentAggregationCollector {
+        &mut *self.sub_agg_collector
    }

    #[inline]
    pub fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
-        self.cache.push(bucket_id, doc_id);
+        self.buffer.push(bucket_id, doc_id);
        self.num_docs += 1;
    }

@@ -75,7 +75,7 @@ impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        if self.num_docs >= FLUSH_THRESHOLD {
-            self.cache
+            self.buffer
                .flush_local(&mut self.sub_agg_collector, agg_data, false)?;
            self.num_docs = 0;
        }
@@ -85,7 +85,7 @@ impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
    /// Note: this _does_ flush the sub aggregations.
    pub fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        if self.num_docs != 0 {
-            self.cache
+            self.buffer
                .flush_local(&mut self.sub_agg_collector, agg_data, true)?;
            self.num_docs = 0;
        }
@@ -94,11 +94,11 @@ impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
    }
 }

-/// Number of partitions for high cardinality sub-aggregation cache.
+/// Number of partitions for high cardinality sub-aggregation buffer.
 const NUM_PARTITIONS: usize = 16;

 #[derive(Debug)]
-pub(crate) struct HighCardSubAggCache {
+pub(crate) struct HighCardSubAggBuffer {
    /// This weird partitioning is used to do some cheap grouping on the bucket ids.
    /// bucket ids are dense, e.g. when we don't detect the cardinality as low cardinality,
    /// but there are just 16 bucket ids, each bucket id will go to its own partition.
@@ -108,7 +108,7 @@ pub(crate) struct HighCardSubAggCache {
    partitions: Box<[PartitionEntry; NUM_PARTITIONS]>,
 }

-impl HighCardSubAggCache {
+impl HighCardSubAggBuffer {
    #[inline]
    fn clear(&mut self) {
        for partition in self.partitions.iter_mut() {
@@ -131,7 +131,7 @@ impl PartitionEntry {
    }
 }

-impl SubAggCache for HighCardSubAggCache {
+impl SubAggBuffer for HighCardSubAggBuffer {
    fn new() -> Self {
        Self {
            partitions: Box::new(core::array::from_fn(|_| PartitionEntry::default())),
@@ -173,14 +173,14 @@ impl SubAggCache for HighCardSubAggCache {
 }

 #[derive(Debug)]
-pub(crate) struct LowCardSubAggCache {
-    /// Cache doc ids per bucket for sub-aggregations.
+pub(crate) struct LowCardSubAggBuffer {
+    /// Buffer doc ids per bucket for sub-aggregations.
    ///
    /// The outer Vec is indexed by BucketId.
    per_bucket_docs: Vec<Vec<DocId>>,
 }

-impl LowCardSubAggCache {
+impl LowCardSubAggBuffer {
    #[inline]
    fn clear(&mut self) {
        for v in &mut self.per_bucket_docs {
@@ -189,7 +189,7 @@ impl LowCardSubAggCache {
    }
 }

-impl SubAggCache for LowCardSubAggCache {
+impl SubAggBuffer for LowCardSubAggBuffer {
    fn new() -> Self {
        Self {
            per_bucket_docs: Vec::new(),
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -1,6 +1,6 @@
 use super::agg_req::Aggregations;
 use super::agg_result::AggregationResults;
-use super::cached_sub_aggs::LowCardCachedSubAggs;
+use super::buffered_sub_aggs::LowCardBufferedSubAggs;
 use super::intermediate_agg_result::IntermediateAggregationResults;
 use super::AggContextParams;
 // group buffering strategy is chosen explicitly by callers; no need to hash-group on the fly.
@@ -136,7 +136,7 @@ fn merge_fruits(
 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsSegmentCtx,
-    agg_collector: LowCardCachedSubAggs,
+    agg_collector: LowCardBufferedSubAggs,
    error: Option<TantivyError>,
 }

@@ -152,7 +152,7 @@ impl AggregationSegmentCollector {
        let mut agg_data =
            build_aggregations_data_from_req(agg, reader, segment_ordinal, context.clone())?;
        let mut result =
-            LowCardCachedSubAggs::new(build_segment_agg_collectors_root(&mut agg_data)?);
+            LowCardBufferedSubAggs::new(build_segment_agg_collectors_root(&mut agg_data)?);
        result
            .get_sub_agg_collector()
            .prepare_max_bucket(0, &agg_data)?; // prepare for bucket zero
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -1004,24 +1004,20 @@ impl IntermediateCompositeBucketResult {
    ) -> crate::Result<BucketResult> {
        let trimmed_entry_vec =
            trim_composite_buckets(self.entries, &self.orders, self.target_size)?;
-        let after_key = if trimmed_entry_vec.len() == req.size as usize {
-            trimmed_entry_vec
-                .last()
-                .map(|bucket| {
-                    let (intermediate_key, _entry) = bucket;
-                    intermediate_key
-                        .iter()
-                        .enumerate()
-                        .map(|(idx, intermediate_key)| {
-                            let source = &req.sources[idx];
-                            (source.name().to_string(), intermediate_key.clone().into())
-                        })
-                        .collect()
-                })
-                .unwrap()
-        } else {
-            FxHashMap::default()
-        };
+        let after_key = trimmed_entry_vec
+            .last()
+            .map(|bucket| {
+                let (intermediate_key, _entry) = bucket;
+                intermediate_key
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, intermediate_key)| {
+                        let source = &req.sources[idx];
+                        (source.name().to_string(), intermediate_key.clone().into())
+                    })
+                    .collect()
+            })
+            .unwrap_or_default();

        let buckets = trimmed_entry_vec
            .into_iter()
--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
--- a/src/aggregation/metric/extended_stats.rs
+++ b/src/aggregation/metric/extended_stats.rs
@@ -399,6 +399,26 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        bucket_id: BucketId,
+        sub_agg_name: &str,
+        sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        if self.name != sub_agg_name {
+            return None;
+        }
+        let extended = self.buckets.get(bucket_id as usize)?;
+        // Finalize is a pure read of accumulators — calling it here for the cutoff sort
+        // doesn't disturb the eventual intermediate result.
+        extended
+            .finalize()
+            .get_value(sub_agg_property)
+            .ok()
+            .flatten()
+    }
 }

 #[cfg(test)]
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -107,10 +107,9 @@ pub enum PercentileValues {
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// The entry when requesting percentiles with keyed: false
 pub struct PercentileValuesVecEntry {
-    /// Percentile
+    /// The percentile key (e.g. 1.0, 5.0, 25.0).
    pub key: f64,
-
-    /// Value at the percentile
+    /// The percentile value. `NaN` when there are no values.
    pub value: f64,
 }

--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -312,6 +312,26 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        bucket_id: BucketId,
+        sub_agg_name: &str,
+        sub_agg_property: &str,
+        agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        if agg_data.get_metric_req_data(self.accessor_idx).name != sub_agg_name {
+            return None;
+        }
+        let percentile: f64 = sub_agg_property.parse().ok()?;
+        if !(0.0..=100.0).contains(&percentile) {
+            return None;
+        }
+        let bucket = self.buckets.get(bucket_id as usize)?;
+        // DDSketch.quantile is a pure read; calling it here for the cutoff sort does
+        // not affect the intermediate state used for the final result.
+        bucket.sketch.quantile(percentile / 100.0).ok().flatten()
+    }
 }

 #[cfg(test)]
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -321,6 +321,40 @@ impl<const COLUMN_TYPE_ID: u8> SegmentAggregationCollector
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        bucket_id: BucketId,
+        sub_agg_name: &str,
+        sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        if self.name != sub_agg_name {
+            return None;
+        }
+        let stats = self.buckets.get(bucket_id as usize)?;
+        // The property depends on what we're collecting:
+        //   - StatsType::Stats exposes count/sum/min/max/avg via dotted property.
+        //   - Single-value kinds (Sum/Count/Min/Max/Average) expect an empty property and return
+        //     the value they were configured to collect.
+        let prop = match self.collecting_for {
+            StatsType::Stats if !sub_agg_property.is_empty() => sub_agg_property,
+            StatsType::Sum if sub_agg_property.is_empty() => "sum",
+            StatsType::Count if sub_agg_property.is_empty() => "count",
+            StatsType::Max if sub_agg_property.is_empty() => "max",
+            StatsType::Min if sub_agg_property.is_empty() => "min",
+            StatsType::Average if sub_agg_property.is_empty() => "avg",
+            _ => return None,
+        };
+        match prop {
+            "count" => Some(stats.count as f64),
+            "sum" => Some(stats.sum),
+            "min" if stats.count > 0 => Some(stats.min),
+            "max" if stats.count > 0 => Some(stats.max),
+            "avg" if stats.count > 0 => Some(stats.sum / stats.count as f64),
+            _ => None,
+        }
+    }
 }

 #[inline]
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -644,6 +644,17 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
        );
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        _bucket_id: BucketId,
+        _sub_agg_name: &str,
+        _sub_agg_property: &str,
+        _agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        // top_hits is not a numeric metric and cannot be used as an order target.
+        None
+    }
 }

 #[cfg(test)]
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -133,7 +133,7 @@ mod agg_limits;
 pub mod agg_req;
 pub mod agg_result;
 pub mod bucket;
-pub(crate) mod cached_sub_aggs;
+pub(crate) mod buffered_sub_aggs;
 mod collector;
 mod date;
 mod error;
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -76,6 +76,31 @@ pub trait SegmentAggregationCollector: Debug {
    fn flush(&mut self, _agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
        Ok(())
    }
+
+    /// Compute the segment-level metric value of the named direct-child metric for `bucket_id`.
+    ///
+    /// Used by parent term aggs that order by a sub-aggregation: the parent sorts on
+    /// this value and cuts off at segment time, matching the approximation tradeoff
+    /// Elasticsearch makes for any sub-agg ordering.
+    ///
+    /// `sub_agg_property` is the dotted suffix (e.g. `"sum"` in `mystats.sum`); empty when
+    /// the metric is a single-value kind such as cardinality.
+    ///
+    /// Returns `None` only on name mismatch, unknown property, or empty bucket. Implementations
+    /// may finalize their per-bucket state (e.g. compute a percentile from a sketch); calls
+    /// must be idempotent so the final intermediate result is unaffected.
+    ///
+    /// No default impl on purpose: every collector must decide explicitly whether it
+    /// produces a metric value, forwards into children (single-bucket aggs), or rejects
+    /// the lookup. A silent `None` default would let a parent term agg's cutoff sort all
+    /// buckets to the same key and drop arbitrary winners.
+    fn compute_metric_value(
+        &self,
+        bucket_id: BucketId,
+        sub_agg_name: &str,
+        sub_agg_property: &str,
+        agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64>;
 }

 #[derive(Default)]
@@ -137,4 +162,21 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
        }
        Ok(())
    }
+
+    fn compute_metric_value(
+        &self,
+        bucket_id: BucketId,
+        sub_agg_name: &str,
+        sub_agg_property: &str,
+        agg_data: &AggregationsSegmentCtx,
+    ) -> Option<f64> {
+        for agg in &self.aggs {
+            if let Some(value) =
+                agg.compute_metric_value(bucket_id, sub_agg_name, sub_agg_property, agg_data)
+            {
+                return Some(value);
+            }
+        }
+        None
+    }
 }
--- a/src/codec/mod.rs
+++ b/src/codec/mod.rs
@@ -0,0 +1,237 @@
+/// Codec specific to postings data.
+pub mod postings;
+
+/// Codec specific to positions data.
+pub mod positions;
+
+/// Standard tantivy codec. This is the codec you use by default.
+pub mod standard;
+
+use std::io;
+
+pub use standard::StandardCodec;
+
+use crate::codec::positions::PositionsCodec;
+use crate::codec::postings::PostingsCodec;
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::{Postings, TermInfo};
+use crate::query::score_combiner::DoNothingCombiner;
+use crate::query::term_query::TermScorer;
+use crate::query::{box_scorer, Bm25Weight, BufferedUnionScorer, Scorer, SumCombiner};
+use crate::schema::IndexRecordOption;
+use crate::{DocId, InvertedIndexReader, Score};
+
+/// Codecs describes how data is layed out on disk.
+pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
+    /// The specific postings codec used by this codec.
+    type PostingsCodec: PostingsCodec;
+
+    /// The specific positions codec used by this codec.
+    type PositionsCodec: PositionsCodec;
+
+    /// ID of the codec. It should be unique to your codec.
+    /// Make it human-readable, descriptive, short and unique.
+    const ID: &'static str;
+
+    /// Load codec based on the codec configuration.
+    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self>;
+
+    /// Get codec configuration.
+    fn to_json_props(&self) -> serde_json::Value;
+
+    /// Returns the postings codec.
+    fn postings_codec(&self) -> &Self::PostingsCodec;
+
+    /// Returns the positions codec.
+    fn positions_codec(&self) -> &Self::PositionsCodec;
+}
+
+/// Object-safe codec is a Codec that can be used in a trait object.
+///
+/// The point of it is to offer a way to use a codec without a proliferation of generics.
+pub trait ObjectSafeCodec: 'static + Send + Sync {
+    /// Loads a type-erased Postings object for the given term.
+    ///
+    /// If the schema used to build the index did not provide enough
+    /// information to match the requested `option`, a Postings is still
+    /// returned in a best-effort manner.
+    fn load_postings_type_erased(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        inverted_index_reader: &InvertedIndexReader,
+    ) -> io::Result<Box<dyn Postings>>;
+
+    /// Loads a type-erased TermScorer object for the given term.
+    ///
+    /// If the schema used to build the index did not provide enough
+    /// information to match the requested `option`, a TermScorer is still
+    /// returned in a best-effort manner.
+    ///
+    /// The point of this contraption is that the return TermScorer is backed,
+    /// not by Box<dyn Postings> but by the codec's concrete Postings type.
+    fn load_term_scorer_type_erased(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        inverted_index_reader: &InvertedIndexReader,
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+    ) -> io::Result<Box<dyn Scorer>>;
+
+    /// Loads a type-erased PhraseScorer object for the given term.
+    ///
+    /// If the schema used to build the index did not provide enough
+    /// information to match the requested `option`, a TermScorer is still
+    /// returned in a best-effort manner.
+    ///
+    /// The point of this contraption is that the return PhraseScorer is backed,
+    /// not by Box<dyn Postings> but by the codec's concrete Postings type.
+    fn new_phrase_scorer_type_erased(
+        &self,
+        term_infos: &[(usize, TermInfo)],
+        similarity_weight: Option<Bm25Weight>,
+        fieldnorm_reader: FieldNormReader,
+        slop: u32,
+        inverted_index_reader: &InvertedIndexReader,
+    ) -> io::Result<Box<dyn Scorer>>;
+
+    /// Performs a for_each_pruning operation on the given scorer.
+    ///
+    /// The function will go through matching documents and call the callback
+    /// function for all docs with a score exceeding the threshold.
+    ///
+    /// The function itself will return a larger threshold value,
+    /// meant to update the threshold value.
+    ///
+    /// If the codec and the scorer allow it, this function can rely on
+    /// optimizations like the block-max wand.
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    );
+
+    /// Builds a union scorer possibly specialized if
+    /// all scorers are `Term<Self::Postings>`.
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        score_combiner_type: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer>;
+}
+
+impl<TCodec: Codec> ObjectSafeCodec for TCodec {
+    fn load_postings_type_erased(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        inverted_index_reader: &InvertedIndexReader,
+    ) -> io::Result<Box<dyn Postings>> {
+        let postings = inverted_index_reader
+            .read_postings_from_terminfo_specialized(term_info, option, self)?;
+        Ok(Box::new(postings))
+    }
+
+    fn load_term_scorer_type_erased(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        inverted_index_reader: &InvertedIndexReader,
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+    ) -> io::Result<Box<dyn Scorer>> {
+        let scorer = inverted_index_reader.new_term_scorer_specialized(
+            term_info,
+            option,
+            fieldnorm_reader,
+            similarity_weight,
+            self,
+        )?;
+        Ok(box_scorer(scorer))
+    }
+
+    fn new_phrase_scorer_type_erased(
+        &self,
+        term_infos: &[(usize, TermInfo)],
+        similarity_weight: Option<Bm25Weight>,
+        fieldnorm_reader: FieldNormReader,
+        slop: u32,
+        inverted_index_reader: &InvertedIndexReader,
+    ) -> io::Result<Box<dyn Scorer>> {
+        let scorer = inverted_index_reader.new_phrase_scorer_type_specialized(
+            term_infos,
+            similarity_weight,
+            fieldnorm_reader,
+            slop,
+            self,
+        )?;
+        Ok(box_scorer(scorer))
+    }
+
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        sum_or_do_nothing_combiner: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer> {
+        if !scorers.iter().all(|scorer| {
+            scorer.is::<TermScorer<<<Self as Codec>::PostingsCodec as PostingsCodec>::Postings>>()
+        }) {
+            return box_scorer(BufferedUnionScorer::build(
+                scorers,
+                SumCombiner::default,
+                num_docs,
+            ));
+        }
+        let specialized_scorers: Vec<
+            TermScorer<<<Self as Codec>::PostingsCodec as PostingsCodec>::Postings>,
+        > = scorers
+            .into_iter()
+            .map(|scorer| {
+                *scorer.downcast::<TermScorer<_>>().ok().expect(
+                    "Downcast failed despite the fact we already checked the type was correct",
+                )
+            })
+            .collect();
+        match sum_or_do_nothing_combiner {
+            SumOrDoNothingCombiner::Sum => box_scorer(BufferedUnionScorer::build(
+                specialized_scorers,
+                SumCombiner::default,
+                num_docs,
+            )),
+            SumOrDoNothingCombiner::DoNothing => box_scorer(BufferedUnionScorer::build(
+                specialized_scorers,
+                DoNothingCombiner::default,
+                num_docs,
+            )),
+        }
+    }
+
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    ) {
+        let accerelerated_foreach_pruning_res =
+            <TCodec as Codec>::PostingsCodec::try_accelerated_for_each_pruning(
+                threshold, scorer, callback,
+            );
+        if let Err(mut scorer) = accerelerated_foreach_pruning_res {
+            // No acceleration available. We need to do things manually.
+            scorer.for_each_pruning(threshold, callback);
+        }
+    }
+}
+
+/// SumCombiner or DoNothingCombiner
+#[derive(Copy, Clone)]
+pub enum SumOrDoNothingCombiner {
+    /// Sum scores together
+    Sum,
+    /// Do not track any score.
+    DoNothing,
+}
--- a/src/codec/positions/mod.rs
+++ b/src/codec/positions/mod.rs
@@ -0,0 +1,49 @@
+use std::io;
+
+use common::OwnedBytes;
+
+/// Codec for the positions file.
+pub trait PositionsCodec: Send + Sync + 'static {
+    /// The serializer type created by this codec.
+    type Serializer<W: io::Write>: PositionsSerializer<W>;
+    /// The reader type created by this codec.
+    type Reader: PositionsReader;
+
+    /// Creates a new positions serializer writing into `writer`.
+    fn new_serializer<W: io::Write>(&self, writer: W) -> Self::Serializer<W>;
+
+    /// Opens a positions reader from the given raw byte slice.
+    fn open_reader(&self, data: OwnedBytes) -> io::Result<Self::Reader>;
+}
+
+/// Serializes delta-encoded positions for all terms in a field.
+///
+/// A single serializer is reused across all terms. Clients must call
+/// `close_term` after each term, then `close` once when the field is done.
+pub trait PositionsSerializer<W: io::Write> {
+    /// Returns the total number of bytes written since this serializer was created.
+    fn written_bytes(&self) -> u64;
+
+    /// Appends delta-encoded positions for the current document.
+    fn write_positions_delta(&mut self, positions_delta: &[u32]);
+
+    /// Finalizes and flushes positions data for the current term.
+    fn close_term(&mut self) -> io::Result<()>;
+
+    /// Flushes the underlying writer. Must be called once after all terms are done.
+    fn close(self) -> io::Result<()>;
+}
+
+/// Reads delta-encoded positions from a byte slice.
+pub trait PositionsReader: Send + 'static {
+    /// Fills `output` with delta-encoded positions starting at `offset`.
+    ///
+    /// Hidden contract: offset values should be non-decreasing for best performance;
+    /// passing a lower offset resets internal state and incurs extra work.
+    fn read(&mut self, offset: u64, output: &mut [u32]);
+
+    /// Returns a heap-allocated clone of this reader.
+    ///
+    /// Needed to clone `SegmentPostings`, which owns a boxed reader.
+    fn clone_box(&self) -> Box<dyn PositionsReader>;
+}
--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -1,5 +1,6 @@
 use std::ops::{Deref, DerefMut};

+use crate::codec::postings::PostingsWithBlockMax;
 use crate::query::term_query::TermScorer;
 use crate::query::Scorer;
 use crate::{DocId, DocSet, Score, TERMINATED};
@@ -13,8 +14,8 @@ use crate::{DocId, DocSet, Score, TERMINATED};
 /// We always have `before_pivot_len` < `pivot_len`.
 ///
 /// `None` is returned if we establish that no document can exceed the threshold.
-fn find_pivot_doc(
-    term_scorers: &[TermScorerWithMaxScore],
+fn find_pivot_doc<TPostings: PostingsWithBlockMax>(
+    term_scorers: &[TermScorerWithMaxScore<TPostings>],
    threshold: Score,
 ) -> Option<(usize, usize, DocId)> {
    let mut max_score = 0.0;
@@ -46,11 +47,11 @@ fn find_pivot_doc(
 /// the next doc candidate defined by the min of `last_doc_in_block + 1` for
 /// scorer in scorers[..pivot_len] and `scorer.doc()` for scorer in scorers[pivot_len..].
 /// Note: before and after calling this method, scorers need to be sorted by their `.doc()`.
-fn block_max_was_too_low_advance_one_scorer(
-    scorers: &mut [TermScorerWithMaxScore],
+fn block_max_was_too_low_advance_one_scorer<TPostings: PostingsWithBlockMax>(
+    scorers: &mut [TermScorerWithMaxScore<TPostings>],
    pivot_len: usize,
 ) {
-    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
+    debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
    let mut scorer_to_seek = pivot_len - 1;
    let mut global_max_score = scorers[scorer_to_seek].max_score;
    let mut doc_to_seek_after = scorers[scorer_to_seek].last_doc_in_block();
@@ -76,13 +77,16 @@ fn block_max_was_too_low_advance_one_scorer(
    scorers[scorer_to_seek].seek(doc_to_seek_after);

    restore_ordering(scorers, scorer_to_seek);
-    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
+    debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
 }

 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
 // except term_scorers[ord] that might be in advance compared to its ranks,
 // bubble up term_scorers[ord] in order to restore the ordering.
-fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
+fn restore_ordering<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut [TermScorerWithMaxScore<TPostings>],
+    ord: usize,
+) {
    let doc = term_scorers[ord].doc();
    for i in ord + 1..term_scorers.len() {
        if term_scorers[i].doc() >= doc {
@@ -90,16 +94,17 @@ fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
        }
        term_scorers.swap(i, i - 1);
    }
-    debug_assert!(is_sorted(term_scorers.iter().map(|scorer| scorer.doc())));
+    debug_assert!(term_scorers.iter().map(|scorer| scorer.doc()).is_sorted());
 }

 // Attempts to advance all term_scorers between `&term_scorers[0..before_len]` to the pivot.
 // If this works, return true.
 // If this fails (ie: one of the term_scorer does not contain `pivot_doc` and seek goes past the
 // pivot), reorder the term_scorers to ensure the list is still sorted and returns `false`.
-// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and return.
-fn align_scorers(
-    term_scorers: &mut Vec<TermScorerWithMaxScore>,
+// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and
+// return.
+fn align_scorers<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
    pivot_doc: DocId,
    before_pivot_len: usize,
 ) -> bool {
@@ -126,7 +131,10 @@ fn align_scorers(
 // Assumes terms_scorers[..pivot_len] are positioned on the same doc (pivot_doc).
 // Advance term_scorers[..pivot_len] and out of these removes the terminated scores.
 // Restores the ordering of term_scorers.
-fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<TermScorerWithMaxScore>, pivot_len: usize) {
+fn advance_all_scorers_on_pivot<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
+    pivot_len: usize,
+) {
    for term_scorer in &mut term_scorers[..pivot_len] {
        term_scorer.advance();
    }
@@ -145,31 +153,32 @@ fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<TermScorerWithMaxScore>,
 /// Implements the WAND (Weak AND) algorithm for dynamic pruning
 /// described in the paper "Faster Top-k Document Retrieval Using Block-Max Indexes".
 /// Link: <http://engineering.nyu.edu/~suel/papers/bmw.pdf>
-pub fn block_wand(
-    mut scorers: Vec<TermScorer>,
+pub fn block_wand<TPostings: PostingsWithBlockMax>(
+    mut scorers: Vec<TermScorer<TPostings>>,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
-    let mut scorers: Vec<TermScorerWithMaxScore> = scorers
+    scorers.retain(|scorer| scorer.doc() < TERMINATED);
+    if scorers.len() == 1 {
+        let scorer = scorers.pop().unwrap();
+        return block_wand_single_scorer(scorer, threshold, callback);
+    }
+    let mut scorers: Vec<TermScorerWithMaxScore<TPostings>> = scorers
        .iter_mut()
        .map(TermScorerWithMaxScore::from)
        .collect();
-    scorers.sort_by_key(|scorer| scorer.doc());
    // At this point we need to ensure that the scorers are sorted!
-    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
+    scorers.sort_by_key(|scorer| scorer.doc());
    while let Some((before_pivot_len, pivot_len, pivot_doc)) =
        find_pivot_doc(&scorers[..], threshold)
    {
-        debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
+        debug_assert!(scorers.iter().map(|scorer| scorer.doc()).is_sorted());
        debug_assert_ne!(pivot_doc, TERMINATED);
        debug_assert!(before_pivot_len < pivot_len);

        let block_max_score_upperbound: Score = scorers[..pivot_len]
            .iter_mut()
-            .map(|scorer| {
-                scorer.seek_block(pivot_doc);
-                scorer.block_max_score()
-            })
+            .map(|scorer| scorer.seek_block_max(pivot_doc))
            .sum();

        // Beware after shallow advance, skip readers can be in advance compared to
@@ -220,21 +229,22 @@ pub fn block_wand(
 ///   - On a block, advance until the end and execute `callback` when the doc score is greater or
 ///     equal to the `threshold`.
 pub fn block_wand_single_scorer(
-    mut scorer: TermScorer,
+    mut scorer: TermScorer<impl PostingsWithBlockMax>,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
    let mut doc = scorer.doc();
+    let mut block_max_score = scorer.seek_block_max(doc);
    loop {
        // We position the scorer on a block that can reach
        // the threshold.
-        while scorer.block_max_score() < threshold {
+        while block_max_score < threshold {
            let last_doc_in_block = scorer.last_doc_in_block();
            if last_doc_in_block == TERMINATED {
                return;
            }
            doc = last_doc_in_block + 1;
-            scorer.seek_block(doc);
+            block_max_score = scorer.seek_block_max(doc);
        }
        // Seek will effectively load that block.
        doc = scorer.seek(doc);
@@ -256,48 +266,38 @@ pub fn block_wand_single_scorer(
            }
        }
        doc += 1;
-        scorer.seek_block(doc);
+        block_max_score = scorer.seek_block_max(doc);
    }
 }

-struct TermScorerWithMaxScore<'a> {
-    scorer: &'a mut TermScorer,
+struct TermScorerWithMaxScore<'a, TPostings: PostingsWithBlockMax> {
+    scorer: &'a mut TermScorer<TPostings>,
    max_score: Score,
 }

-impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
-    fn from(scorer: &'a mut TermScorer) -> Self {
+impl<'a, TPostings: PostingsWithBlockMax> From<&'a mut TermScorer<TPostings>>
+    for TermScorerWithMaxScore<'a, TPostings>
+{
+    fn from(scorer: &'a mut TermScorer<TPostings>) -> Self {
        let max_score = scorer.max_score();
        TermScorerWithMaxScore { scorer, max_score }
    }
 }

-impl Deref for TermScorerWithMaxScore<'_> {
-    type Target = TermScorer;
+impl<TPostings: PostingsWithBlockMax> Deref for TermScorerWithMaxScore<'_, TPostings> {
+    type Target = TermScorer<TPostings>;

    fn deref(&self) -> &Self::Target {
        self.scorer
    }
 }

-impl DerefMut for TermScorerWithMaxScore<'_> {
+impl<TPostings: PostingsWithBlockMax> DerefMut for TermScorerWithMaxScore<'_, TPostings> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.scorer
    }
 }

-fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
-    if let Some(first) = it.next() {
-        let mut prev = first;
-        for doc in it {
-            if doc < prev {
-                return false;
-            }
-            prev = doc;
-        }
-    }
-    true
-}
 #[cfg(test)]
 mod tests {
    use std::cmp::Ordering;
--- a/src/codec/postings/mod.rs
+++ b/src/codec/postings/mod.rs
@@ -0,0 +1,139 @@
+use std::io;
+
+/// Block-max WAND algorithm.
+pub mod block_wand;
+use common::OwnedBytes;
+
+use crate::codec::positions::PositionsReader;
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::Postings;
+use crate::query::{Bm25Weight, Scorer};
+use crate::schema::IndexRecordOption;
+use crate::{DocId, Score};
+
+/// Postings codec.
+pub trait PostingsCodec: Send + Sync + 'static {
+    /// Serializer type for the postings codec.
+    type PostingsSerializer: PostingsSerializer;
+    /// Postings type for the postings codec.
+    type Postings: Postings + Clone;
+    /// Creates a new postings serializer.
+    fn new_serializer(
+        &self,
+        avg_fieldnorm: Score,
+        mode: IndexRecordOption,
+        fieldnorm_reader: Option<FieldNormReader>,
+    ) -> Self::PostingsSerializer;
+
+    /// Loads postings
+    ///
+    /// Record option is the option that was passed at indexing time.
+    /// Requested option is the option that is requested.
+    ///
+    /// For instance, we may have term_freq in the posting list
+    /// but we can skip decompressing as we read the posting list.
+    ///
+    /// If record option does not support the requested option,
+    /// this method does NOT return an error and will in fact restrict
+    /// requested_option to what is available.
+    ///
+    /// `position_reader` is `Some` iff `requested_option` includes positions.
+    /// It is already opened by the caller via the codec's `PositionsCodec`.
+    fn load_postings(
+        &self,
+        doc_freq: u32,
+        postings_data: OwnedBytes,
+        record_option: IndexRecordOption,
+        requested_option: IndexRecordOption,
+        position_reader: Option<Box<dyn PositionsReader>>,
+    ) -> io::Result<Self::Postings>;
+
+    /// If your codec supports different ways to accelerate `for_each_pruning` that's
+    /// where you should implement it.
+    ///
+    /// Returning `Err(scorer)` without mutating the scorer nor calling the callback function,
+    /// is never "wrong". It just leaves the responsability to the caller to call a fallback
+    /// implementation on the scorer.
+    ///
+    /// If your codec supports BlockMax-Wand, you just need to have your
+    /// postings implement `PostingsWithBlockMax` and copy what is done in the StandardPostings
+    /// codec to enable it.
+    fn try_accelerated_for_each_pruning(
+        _threshold: Score,
+        scorer: Box<dyn Scorer>,
+        _callback: &mut dyn FnMut(DocId, Score) -> Score,
+    ) -> Result<(), Box<dyn Scorer>> {
+        Err(scorer)
+    }
+}
+
+/// A postings serializer is a listener that is in charge of serializing postings
+///
+/// IO is done only once per postings, once all of the data has been received.
+/// A serializer will therefore contain internal buffers.
+///
+/// A serializer is created once and recycled for all postings.
+///
+/// Clients should use PostingsSerializer as follows.
+/// ```text
+/// // First postings list
+/// serializer.new_term(2, true);
+/// serializer.write_doc(2, 1);
+/// serializer.write_doc(6, 2);
+/// serializer.close_term(3, &mut wrt)?;
+/// // Second postings list
+/// serializer.new_term(1, true);
+/// serializer.write_doc(3, 1);
+/// serializer.close_term(1, &mut wrt)?;
+/// ```
+pub trait PostingsSerializer {
+    /// The term_doc_freq here is the number of documents
+    /// in the postings lists.
+    ///
+    /// It can be used to compute the idf that will be used for the
+    /// blockmax parameters.
+    ///
+    /// If not available (e.g. if we do not collect `term_frequencies`
+    /// blockwand is disabled), the term_doc_freq passed will be set 0.
+    fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool);
+
+    /// Codec-specific per-term payload.
+    ///
+    /// It is supplied right after `new_term` and before any `write_doc`, so the
+    /// codec can let it influence how the postings list is encoded.
+    ///
+    /// Hidden contract: `new_term` MUST reset any per-term payload state to its
+    /// default. This method is only called for terms that actually have a
+    /// payload registered, so a codec cannot rely on it being called for every
+    /// term.
+    ///
+    /// The default implementation ignores the payload.
+    fn set_term_payload(&mut self, _payload: &dyn std::any::Any) {}
+
+    /// Records a new document id for the current term.
+    /// The serializer may ignore it.
+    fn write_doc(&mut self, doc_id: DocId, term_freq: u32);
+
+    /// Closes the current term and writes the postings list associated.
+    fn close_term(&mut self, doc_freq: u32, wrt: &mut impl io::Write) -> io::Result<()>;
+}
+
+/// A light complement interface to Postings to allow block-max wand acceleration.
+pub trait PostingsWithBlockMax: Postings {
+    /// Moves the postings to the block containign `target_doc` and returns
+    /// an upperbound of the score for documents in the block.
+    ///
+    /// `Warning`: Calling this method may leave the postings in an invalid state.
+    /// callers are required to call seek before calling any other of the
+    /// `Postings` method (like doc / advance etc.).
+    fn seek_block_max(
+        &mut self,
+        target_doc: crate::DocId,
+        fieldnorm_reader: &FieldNormReader,
+        similarity_weight: &Bm25Weight,
+    ) -> Score;
+
+    /// Returns the last document in the current block (or Terminated if this
+    /// is the last block).
+    fn last_doc_in_block(&self) -> crate::DocId;
+}
--- a/src/codec/standard/mod.rs
+++ b/src/codec/standard/mod.rs
@@ -0,0 +1,44 @@
+use serde::{Deserialize, Serialize};
+
+use crate::codec::standard::positions::StandardPositionsCodec;
+use crate::codec::standard::postings::StandardPostingsCodec;
+use crate::codec::Codec;
+
+/// Tantivy's default postings codec.
+pub mod postings;
+
+/// Tantivy's default positions codec.
+pub mod positions;
+
+/// Tantivy's default codec.
+#[derive(Debug, Default, Clone, Serialize, Deserialize)]
+pub struct StandardCodec;
+
+impl Codec for StandardCodec {
+    type PostingsCodec = StandardPostingsCodec;
+    type PositionsCodec = StandardPositionsCodec;
+
+    const ID: &'static str = "tantivy-default";
+
+    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self> {
+        if !json_value.is_null() {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "Codec property for the StandardCodec are unexpected. expected null, got {}",
+                json_value.as_str().unwrap_or("null")
+            )));
+        }
+        Ok(StandardCodec)
+    }
+
+    fn to_json_props(&self) -> serde_json::Value {
+        serde_json::Value::Null
+    }
+
+    fn postings_codec(&self) -> &Self::PostingsCodec {
+        &StandardPostingsCodec
+    }
+
+    fn positions_codec(&self) -> &Self::PositionsCodec {
+        &StandardPositionsCodec
+    }
+}
--- a/src/codec/standard/positions/mod.rs
+++ b/src/codec/standard/positions/mod.rs
@@ -0,0 +1,50 @@
+use std::io;
+
+use common::OwnedBytes;
+
+use crate::codec::positions::{PositionsCodec, PositionsReader, PositionsSerializer};
+use crate::positions::{PositionReader, PositionSerializer};
+
+/// The default positions codec for tantivy.
+pub struct StandardPositionsCodec;
+
+impl PositionsCodec for StandardPositionsCodec {
+    type Serializer<W: io::Write> = PositionSerializer<W>;
+    type Reader = PositionReader;
+
+    fn new_serializer<W: io::Write>(&self, writer: W) -> Self::Serializer<W> {
+        PositionSerializer::new(writer)
+    }
+
+    fn open_reader(&self, data: OwnedBytes) -> io::Result<Self::Reader> {
+        PositionReader::open(data)
+    }
+}
+
+impl<W: io::Write> PositionsSerializer<W> for PositionSerializer<W> {
+    fn written_bytes(&self) -> u64 {
+        PositionSerializer::written_bytes(self)
+    }
+
+    fn write_positions_delta(&mut self, positions_delta: &[u32]) {
+        PositionSerializer::write_positions_delta(self, positions_delta);
+    }
+
+    fn close_term(&mut self) -> io::Result<()> {
+        PositionSerializer::close_term(self)
+    }
+
+    fn close(self) -> io::Result<()> {
+        PositionSerializer::close(self)
+    }
+}
+
+impl PositionsReader for PositionReader {
+    fn read(&mut self, offset: u64, output: &mut [u32]) {
+        PositionReader::read(self, offset, output);
+    }
+
+    fn clone_box(&self) -> Box<dyn PositionsReader> {
+        Box::new(self.clone())
+    }
+}
--- a/src/codec/standard/postings/block.rs
+++ b/src/codec/standard/postings/block.rs
@@ -0,0 +1,50 @@
+use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
+use crate::DocId;
+
+pub struct Block {
+    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE],
+    term_freqs: [u32; COMPRESSION_BLOCK_SIZE],
+    len: usize,
+}
+
+impl Block {
+    pub fn new() -> Self {
+        Block {
+            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE],
+            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE],
+            len: 0,
+        }
+    }
+
+    pub fn doc_ids(&self) -> &[DocId] {
+        &self.doc_ids[..self.len]
+    }
+
+    pub fn term_freqs(&self) -> &[u32] {
+        &self.term_freqs[..self.len]
+    }
+
+    pub fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    pub fn append_doc(&mut self, doc: DocId, term_freq: u32) {
+        let len = self.len;
+        self.doc_ids[len] = doc;
+        self.term_freqs[len] = term_freq;
+        self.len = len + 1;
+    }
+
+    pub fn is_full(&self) -> bool {
+        self.len == COMPRESSION_BLOCK_SIZE
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    pub fn last_doc(&self) -> DocId {
+        assert_eq!(self.len, COMPRESSION_BLOCK_SIZE);
+        self.doc_ids[COMPRESSION_BLOCK_SIZE - 1]
+    }
+}
--- a/src/codec/standard/postings/block_segment_postings.rs
+++ b/src/codec/standard/postings/block_segment_postings.rs
@@ -1,28 +1,19 @@
 use std::io;

-use common::VInt;
+use common::{OwnedBytes, VInt};

-use crate::directory::{FileSlice, OwnedBytes};
+use crate::codec::standard::postings::skip::{BlockInfo, SkipReader};
+use crate::codec::standard::postings::FreqReadingOption;
 use crate::fieldnorm::FieldNormReader;
-use crate::postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE};
-use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
+use crate::postings::compression::{BlockDecoder, VIntDecoder as _, COMPRESSION_BLOCK_SIZE};
 use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};

-fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
-    it.next().map(|first| it.fold(first, Score::max))
-}
-
 /// `BlockSegmentPostings` is a cursor iterating over blocks
 /// of documents.
-///
-/// # Warning
-///
-/// While it is useful for some very specific high-performance
-/// use cases, you should prefer using `SegmentPostings` for most usage.
 #[derive(Clone)]
-pub struct BlockSegmentPostings {
+pub(crate) struct BlockSegmentPostings {
    pub(crate) doc_decoder: BlockDecoder,
    block_loaded: bool,
    freq_decoder: BlockDecoder,
@@ -88,7 +79,7 @@ fn split_into_skips_and_postings(
 }

 impl BlockSegmentPostings {
-    /// Opens a `BlockSegmentPostings`.
+    /// Opens a `StandardPostingsReader`.
    /// `doc_freq` is the number of documents in the posting list.
    /// `record_option` represents the amount of data available according to the schema.
    /// `requested_option` is the amount of data requested by the user.
@@ -96,11 +87,10 @@ impl BlockSegmentPostings {
    /// term frequency blocks.
    pub(crate) fn open(
        doc_freq: u32,
-        data: FileSlice,
+        bytes: OwnedBytes,
        mut record_option: IndexRecordOption,
        requested_option: IndexRecordOption,
    ) -> io::Result<BlockSegmentPostings> {
-        let bytes = data.read_bytes()?;
        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
        let skip_reader = match skip_data_opt {
            Some(skip_data) => {
@@ -138,6 +128,86 @@ impl BlockSegmentPostings {
        block_segment_postings.load_block();
        Ok(block_segment_postings)
    }
+}
+
+fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
+    it.next().map(|first| it.fold(first, Score::max))
+}
+
+impl BlockSegmentPostings {
+    /// Returns the overall number of documents in the block postings.
+    /// It does not take in account whether documents are deleted or not.
+    ///
+    /// This `doc_freq` is simply the sum of the length of all of the blocks
+    /// length, and it does not take in account deleted documents.
+    pub fn doc_freq(&self) -> u32 {
+        self.doc_freq
+    }
+
+    /// Returns the array of docs in the current block.
+    ///
+    /// Before the first call to `.advance()`, the block
+    /// returned by `.docs()` is empty.
+    #[inline]
+    pub fn docs(&self) -> &[DocId] {
+        debug_assert!(self.block_loaded);
+        self.doc_decoder.output_array()
+    }
+
+    /// Return the document at index `idx` of the block.
+    #[inline]
+    pub fn doc(&self, idx: usize) -> u32 {
+        self.doc_decoder.output(idx)
+    }
+
+    /// Return the array of `term freq` in the block.
+    #[inline]
+    pub fn freqs(&self) -> &[u32] {
+        debug_assert!(self.block_loaded);
+        self.freq_decoder.output_array()
+    }
+
+    /// Return the frequency at index `idx` of the block.
+    #[inline]
+    pub fn freq(&self, idx: usize) -> u32 {
+        debug_assert!(self.block_loaded);
+        self.freq_decoder.output(idx)
+    }
+
+    /// Position on a block that may contains `target_doc`.
+    ///
+    /// If all docs are smaller than target, the block loaded may be empty,
+    /// or be the last an incomplete VInt block.
+    pub fn seek(&mut self, target_doc: DocId) -> usize {
+        // Move to the block that might contain our document.
+        self.seek_block_without_loading(target_doc);
+        self.load_block();
+
+        // At this point we are on the block that might contain our document.
+        let doc = self.doc_decoder.seek_within_block(target_doc);
+
+        // The last block is not full and padded with TERMINATED,
+        // so we are guaranteed to have at least one value (real or padding)
+        // that is >= target_doc.
+        debug_assert!(doc < COMPRESSION_BLOCK_SIZE);
+
+        // `doc` is now the first element >= `target_doc`.
+        // If all docs are smaller than target, the current block is incomplete and padded
+        // with TERMINATED. After the search, the cursor points to the first TERMINATED.
+        doc
+    }
+
+    pub fn position_offset(&self) -> u64 {
+        self.skip_reader.position_offset()
+    }
+
+    /// Advance to the next block.
+    pub fn advance(&mut self) {
+        self.skip_reader.advance();
+        self.block_loaded = false;
+        self.block_max_score_cache = None;
+        self.load_block();
+    }

    /// Returns the block_max_score for the current block.
    /// It does not require the block to be loaded. For instance, it is ok to call this method
@@ -160,7 +230,7 @@ impl BlockSegmentPostings {
        }
        // this is the last block of the segment posting list.
        // If it is actually loaded, we can compute block max manually.
-        if self.block_is_loaded() {
+        if self.block_loaded {
            let docs = self.doc_decoder.output_array().iter().cloned();
            let freqs = self.freq_decoder.output_array().iter().cloned();
            let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
@@ -177,112 +247,25 @@ impl BlockSegmentPostings {
        // We do not cache it however, so that it gets computed when once block is loaded.
        bm25_weight.max_score()
    }
+}

-    pub(crate) fn freq_reading_option(&self) -> FreqReadingOption {
-        self.freq_reading_option
-    }
-
-    // Resets the block segment postings on another position
-    // in the postings file.
-    //
-    // This is useful for enumerating through a list of terms,
-    // and consuming the associated posting lists while avoiding
-    // reallocating a `BlockSegmentPostings`.
-    //
-    // # Warning
-    //
-    // This does not reset the positions list.
-    pub(crate) fn reset(&mut self, doc_freq: u32, postings_data: OwnedBytes) -> io::Result<()> {
-        let (skip_data_opt, postings_data) =
-            split_into_skips_and_postings(doc_freq, postings_data)?;
-        self.data = postings_data;
-        self.block_max_score_cache = None;
-        self.block_loaded = false;
-        if let Some(skip_data) = skip_data_opt {
-            self.skip_reader.reset(skip_data, doc_freq);
-        } else {
-            self.skip_reader.reset(OwnedBytes::empty(), doc_freq);
+impl BlockSegmentPostings {
+    /// Returns an empty segment postings object
+    pub fn empty() -> BlockSegmentPostings {
+        BlockSegmentPostings {
+            doc_decoder: BlockDecoder::with_val(TERMINATED),
+            block_loaded: true,
+            freq_decoder: BlockDecoder::with_val(1),
+            freq_reading_option: FreqReadingOption::NoFreq,
+            block_max_score_cache: None,
+            doc_freq: 0,
+            data: OwnedBytes::empty(),
+            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
        }
-        self.doc_freq = doc_freq;
-        self.load_block();
-        Ok(())
    }

-    /// Returns the overall number of documents in the block postings.
-    /// It does not take in account whether documents are deleted or not.
-    ///
-    /// This `doc_freq` is simply the sum of the length of all of the blocks
-    /// length, and it does not take in account deleted documents.
-    pub fn doc_freq(&self) -> u32 {
-        self.doc_freq
-    }
-
-    /// Returns the array of docs in the current block.
-    ///
-    /// Before the first call to `.advance()`, the block
-    /// returned by `.docs()` is empty.
-    #[inline]
-    pub fn docs(&self) -> &[DocId] {
-        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_array()
-    }
-
-    /// Return the document at index `idx` of the block.
-    #[inline]
-    pub fn doc(&self, idx: usize) -> u32 {
-        self.doc_decoder.output(idx)
-    }
-
-    /// Return the array of `term freq` in the block.
-    #[inline]
-    pub fn freqs(&self) -> &[u32] {
-        debug_assert!(self.block_is_loaded());
-        self.freq_decoder.output_array()
-    }
-
-    /// Return the frequency at index `idx` of the block.
-    #[inline]
-    pub fn freq(&self, idx: usize) -> u32 {
-        debug_assert!(self.block_is_loaded());
-        self.freq_decoder.output(idx)
-    }
-
-    /// Returns the length of the current block.
-    ///
-    /// All blocks have a length of `NUM_DOCS_PER_BLOCK`,
-    /// except the last block that may have a length
-    /// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
-    #[inline]
-    pub fn block_len(&self) -> usize {
-        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_len
-    }
-
-    /// Position on a block that may contains `target_doc`.
-    ///
-    /// If all docs are smaller than target, the block loaded may be empty,
-    /// or be the last an incomplete VInt block.
-    pub fn seek(&mut self, target_doc: DocId) -> usize {
-        // Move to the block that might contain our document.
-        self.seek_block(target_doc);
-        self.load_block();
-
-        // At this point we are on the block that might contain our document.
-        let doc = self.doc_decoder.seek_within_block(target_doc);
-
-        // The last block is not full and padded with TERMINATED,
-        // so we are guaranteed to have at least one value (real or padding)
-        // that is >= target_doc.
-        debug_assert!(doc < COMPRESSION_BLOCK_SIZE);
-
-        // `doc` is now the first element >= `target_doc`.
-        // If all docs are smaller than target, the current block is incomplete and padded
-        // with TERMINATED. After the search, the cursor points to the first TERMINATED.
-        doc
-    }
-
-    pub(crate) fn position_offset(&self) -> u64 {
-        self.skip_reader.position_offset()
+    pub(crate) fn skip_reader(&self) -> &SkipReader {
+        &self.skip_reader
    }

    /// Dangerous API! This calls seeks the next block on the skip list,
@@ -291,19 +274,15 @@ impl BlockSegmentPostings {
    /// `.load_block()` needs to be called manually afterwards.
    /// If all docs are smaller than target, the block loaded may be empty,
    /// or be the last an incomplete VInt block.
-    pub(crate) fn seek_block(&mut self, target_doc: DocId) {
+    pub(crate) fn seek_block_without_loading(&mut self, target_doc: DocId) {
        if self.skip_reader.seek(target_doc) {
            self.block_max_score_cache = None;
            self.block_loaded = false;
        }
    }

-    pub(crate) fn block_is_loaded(&self) -> bool {
-        self.block_loaded
-    }
-
    pub(crate) fn load_block(&mut self) {
-        if self.block_is_loaded() {
+        if self.block_loaded {
            return;
        }
        let offset = self.skip_reader.byte_offset();
@@ -351,68 +330,40 @@ impl BlockSegmentPostings {
        }
        self.block_loaded = true;
    }
-
-    /// Advance to the next block.
-    pub fn advance(&mut self) {
-        self.skip_reader.advance();
-        self.block_loaded = false;
-        self.block_max_score_cache = None;
-        self.load_block();
-    }
-
-    /// Returns an empty segment postings object
-    pub fn empty() -> BlockSegmentPostings {
-        BlockSegmentPostings {
-            doc_decoder: BlockDecoder::with_val(TERMINATED),
-            block_loaded: true,
-            freq_decoder: BlockDecoder::with_val(1),
-            freq_reading_option: FreqReadingOption::NoFreq,
-            block_max_score_cache: None,
-            doc_freq: 0,
-            data: OwnedBytes::empty(),
-            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
-        }
-    }
-
-    pub(crate) fn skip_reader(&self) -> &SkipReader {
-        &self.skip_reader
-    }
 }

 #[cfg(test)]
 mod tests {
-    use common::HasLen;
+    use common::OwnedBytes;

    use super::BlockSegmentPostings;
+    use crate::codec::postings::PostingsSerializer;
+    use crate::codec::standard::postings::segment_postings::SegmentPostings;
+    use crate::codec::standard::postings::StandardPostingsSerializer;
    use crate::docset::{DocSet, TERMINATED};
-    use crate::index::Index;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-    use crate::postings::postings::Postings;
-    use crate::postings::SegmentPostings;
-    use crate::schema::{IndexRecordOption, Schema, Term, INDEXED};
-    use crate::DocId;
+    use crate::schema::IndexRecordOption;

-    #[test]
-    fn test_empty_segment_postings() {
-        let mut postings = SegmentPostings::empty();
-        assert_eq!(postings.doc(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.doc_freq(), 0);
-        assert_eq!(postings.len(), 0);
-    }
-
-    #[test]
-    fn test_empty_postings_doc_returns_terminated() {
-        let mut postings = SegmentPostings::empty();
-        assert_eq!(postings.doc(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-    }
-
-    #[test]
-    fn test_empty_postings_doc_term_freq_returns_0() {
-        let postings = SegmentPostings::empty();
-        assert_eq!(postings.term_freq(), 1);
+    #[cfg(test)]
+    fn build_block_postings(docs: &[u32]) -> BlockSegmentPostings {
+        let doc_freq = docs.len() as u32;
+        let mut postings_serializer =
+            StandardPostingsSerializer::new(1.0f32, IndexRecordOption::Basic, None);
+        postings_serializer.new_term(docs.len() as u32, false);
+        for doc in docs {
+            postings_serializer.write_doc(*doc, 1u32);
+        }
+        let mut buffer: Vec<u8> = Vec::new();
+        postings_serializer
+            .close_term(doc_freq, &mut buffer)
+            .unwrap();
+        BlockSegmentPostings::open(
+            doc_freq,
+            OwnedBytes::new(buffer),
+            IndexRecordOption::Basic,
+            IndexRecordOption::Basic,
+        )
+        .unwrap()
    }

    #[test]
@@ -427,7 +378,7 @@ mod tests {

    #[test]
    fn test_block_segment_postings() -> crate::Result<()> {
-        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
+        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>());
        let mut offset: u32 = 0u32;
        // checking that the `doc_freq` is correct
        assert_eq!(block_segments.doc_freq(), 100_000);
@@ -452,7 +403,7 @@ mod tests {
        doc_ids.push(129);
        doc_ids.push(130);
        {
-            let block_segments = build_block_postings(&doc_ids)?;
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.seek(128), 129);
            assert_eq!(docset.doc(), 129);
@@ -461,7 +412,7 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids).unwrap();
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.seek(129), 129);
            assert_eq!(docset.doc(), 129);
@@ -470,7 +421,7 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids)?;
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.doc(), 0);
            assert_eq!(docset.seek(131), TERMINATED);
@@ -479,38 +430,13 @@ mod tests {
        Ok(())
    }

-    fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
-        let mut schema_builder = Schema::builder();
-        let int_field = schema_builder.add_u64_field("id", INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        let mut last_doc = 0u32;
-        for &doc in docs {
-            for _ in last_doc..doc {
-                index_writer.add_document(doc!(int_field=>1u64))?;
-            }
-            index_writer.add_document(doc!(int_field=>0u64))?;
-            last_doc = doc + 1;
-        }
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let segment_reader = searcher.segment_reader(0);
-        let inverted_index = segment_reader.inverted_index(int_field).unwrap();
-        let term = Term::from_field_u64(int_field, 0u64);
-        let term_info = inverted_index.get_term_info(&term)?.unwrap();
-        let block_postings = inverted_index
-            .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
-        Ok(block_postings)
-    }
-
    #[test]
    fn test_block_segment_postings_seek() -> crate::Result<()> {
-        let mut docs = vec![0];
+        let mut docs = Vec::new();
        for i in 0..1300 {
            docs.push((i * i / 100) + i);
        }
-        let mut block_postings = build_block_postings(&docs[..])?;
+        let mut block_postings = build_block_postings(&docs[..]);
        for i in &[0, 424, 10000] {
            block_postings.seek(*i);
            let docs = block_postings.docs();
@@ -521,40 +447,4 @@ mod tests {
        assert_eq!(block_postings.doc(COMPRESSION_BLOCK_SIZE - 1), TERMINATED);
        Ok(())
    }
-
-    #[test]
-    fn test_reset_block_segment_postings() -> crate::Result<()> {
-        let mut schema_builder = Schema::builder();
-        let int_field = schema_builder.add_u64_field("id", INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        // create two postings list, one containing even number,
-        // the other containing odd numbers.
-        for i in 0..6 {
-            let doc = doc!(int_field=> (i % 2) as u64);
-            index_writer.add_document(doc)?;
-        }
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let segment_reader = searcher.segment_reader(0);
-
-        let mut block_segments;
-        {
-            let term = Term::from_field_u64(int_field, 0u64);
-            let inverted_index = segment_reader.inverted_index(int_field)?;
-            let term_info = inverted_index.get_term_info(&term)?.unwrap();
-            block_segments = inverted_index
-                .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
-        }
-        assert_eq!(block_segments.docs(), &[0, 2, 4]);
-        {
-            let term = Term::from_field_u64(int_field, 1u64);
-            let inverted_index = segment_reader.inverted_index(int_field)?;
-            let term_info = inverted_index.get_term_info(&term)?.unwrap();
-            inverted_index.reset_block_postings_from_terminfo(&term_info, &mut block_segments)?;
-        }
-        assert_eq!(block_segments.docs(), &[1, 3, 5]);
-        Ok(())
-    }
 }
--- a/src/codec/standard/postings/mod.rs
+++ b/src/codec/standard/postings/mod.rs
@@ -0,0 +1,163 @@
+use std::io;
+
+use crate::codec::positions::PositionsReader;
+use crate::codec::postings::block_wand::{block_wand, block_wand_single_scorer};
+use crate::codec::postings::PostingsCodec;
+use crate::codec::standard::postings::block_segment_postings::BlockSegmentPostings;
+pub use crate::codec::standard::postings::segment_postings::SegmentPostings;
+use crate::fieldnorm::FieldNormReader;
+use crate::query::term_query::TermScorer;
+use crate::query::{BufferedUnionScorer, Scorer, SumCombiner};
+use crate::schema::IndexRecordOption;
+use crate::{DocSet as _, Score, TERMINATED};
+
+mod block;
+mod block_segment_postings;
+mod segment_postings;
+mod skip;
+mod standard_postings_serializer;
+
+pub use segment_postings::SegmentPostings as StandardPostings;
+pub use standard_postings_serializer::StandardPostingsSerializer;
+
+/// The default postings codec for tantivy.
+pub struct StandardPostingsCodec;
+
+#[expect(clippy::enum_variant_names)]
+#[derive(Debug, PartialEq, Clone, Copy, Eq)]
+pub(crate) enum FreqReadingOption {
+    NoFreq,
+    SkipFreq,
+    ReadFreq,
+}
+
+impl PostingsCodec for StandardPostingsCodec {
+    type PostingsSerializer = StandardPostingsSerializer;
+    type Postings = SegmentPostings;
+
+    fn new_serializer(
+        &self,
+        avg_fieldnorm: Score,
+        mode: IndexRecordOption,
+        fieldnorm_reader: Option<FieldNormReader>,
+    ) -> Self::PostingsSerializer {
+        StandardPostingsSerializer::new(avg_fieldnorm, mode, fieldnorm_reader)
+    }
+
+    fn load_postings(
+        &self,
+        doc_freq: u32,
+        postings_data: common::OwnedBytes,
+        record_option: IndexRecordOption,
+        requested_option: IndexRecordOption,
+        position_reader: Option<Box<dyn PositionsReader>>,
+    ) -> io::Result<Self::Postings> {
+        // Rationalize record_option/requested_option.
+        let requested_option = requested_option.downgrade(record_option);
+        let block_segment_postings =
+            BlockSegmentPostings::open(doc_freq, postings_data, record_option, requested_option)?;
+        Ok(SegmentPostings::from_block_postings(
+            block_segment_postings,
+            position_reader,
+        ))
+    }
+
+    fn try_accelerated_for_each_pruning(
+        mut threshold: Score,
+        mut scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(crate::DocId, Score) -> Score,
+    ) -> Result<(), Box<dyn Scorer>> {
+        scorer = match scorer.downcast::<TermScorer<Self::Postings>>() {
+            Ok(term_scorer) => {
+                block_wand_single_scorer(*term_scorer, threshold, callback);
+                return Ok(());
+            }
+            Err(scorer) => scorer,
+        };
+        let mut union_scorer =
+            scorer.downcast::<BufferedUnionScorer<Box<dyn Scorer>, SumCombiner>>()?;
+        if !union_scorer
+            .scorers()
+            .iter()
+            .all(|scorer| scorer.is::<TermScorer<Self::Postings>>())
+        {
+            return Err(union_scorer);
+        }
+        let doc = union_scorer.doc();
+        if doc == TERMINATED {
+            return Ok(());
+        }
+        let score = union_scorer.score();
+        if score > threshold {
+            threshold = callback(doc, score);
+        }
+        let boxed_scorers: Vec<Box<dyn Scorer>> = union_scorer.into_scorers();
+        let scorers: Vec<TermScorer<Self::Postings>> = boxed_scorers
+            .into_iter()
+            .map(|scorer| {
+                *scorer.downcast::<TermScorer<Self::Postings>>().ok().expect(
+                    "Downcast failed despite the fact we already checked the type was correct",
+                )
+            })
+            .collect();
+        block_wand(scorers, threshold, callback);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use common::OwnedBytes;
+
+    use super::*;
+    use crate::codec::postings::PostingsSerializer as _;
+    use crate::postings::Postings as _;
+
+    fn test_segment_postings_tf_aux(num_docs: u32, include_term_freq: bool) -> SegmentPostings {
+        let mut postings_serializer =
+            StandardPostingsCodec.new_serializer(1.0f32, IndexRecordOption::WithFreqs, None);
+        let mut buffer = Vec::new();
+        postings_serializer.new_term(num_docs, include_term_freq);
+        for i in 0..num_docs {
+            postings_serializer.write_doc(i, 2);
+        }
+        postings_serializer
+            .close_term(num_docs, &mut buffer)
+            .unwrap();
+        StandardPostingsCodec
+            .load_postings(
+                num_docs,
+                OwnedBytes::new(buffer),
+                IndexRecordOption::WithFreqs,
+                IndexRecordOption::WithFreqs,
+                None,
+            )
+            .unwrap()
+    }
+
+    #[test]
+    fn test_segment_postings_small_block_with_and_without_freq() {
+        let small_block_without_term_freq = test_segment_postings_tf_aux(1, false);
+        assert!(!small_block_without_term_freq.has_freq());
+        assert_eq!(small_block_without_term_freq.doc(), 0);
+        assert_eq!(small_block_without_term_freq.term_freq(), 1);
+
+        let small_block_with_term_freq = test_segment_postings_tf_aux(1, true);
+        assert!(small_block_with_term_freq.has_freq());
+        assert_eq!(small_block_with_term_freq.doc(), 0);
+        assert_eq!(small_block_with_term_freq.term_freq(), 2);
+    }
+
+    #[test]
+    fn test_segment_postings_large_block_with_and_without_freq() {
+        let large_block_without_term_freq = test_segment_postings_tf_aux(128, false);
+        assert!(!large_block_without_term_freq.has_freq());
+        assert_eq!(large_block_without_term_freq.doc(), 0);
+        assert_eq!(large_block_without_term_freq.term_freq(), 1);
+
+        let large_block_with_term_freq = test_segment_postings_tf_aux(128, true);
+        assert!(large_block_with_term_freq.has_freq());
+        assert_eq!(large_block_with_term_freq.doc(), 0);
+        assert_eq!(large_block_with_term_freq.term_freq(), 2);
+    }
+}
--- a/src/codec/standard/postings/segment_postings.rs
+++ b/src/codec/standard/postings/segment_postings.rs
@@ -1,22 +1,34 @@
-use common::HasLen;
+use common::BitSet;

+use super::BlockSegmentPostings;
+use crate::codec::positions::PositionsReader;
+use crate::codec::postings::PostingsWithBlockMax;
 use crate::docset::DocSet;
-use crate::fastfield::AliveBitSet;
-use crate::positions::PositionReader;
+use crate::fieldnorm::FieldNormReader;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::postings::{BlockSegmentPostings, Postings};
-use crate::{DocId, TERMINATED};
+use crate::postings::{DocFreq, Postings};
+use crate::query::Bm25Weight;
+use crate::{DocId, Score};

 /// `SegmentPostings` represents the inverted list or postings associated with
 /// a term in a `Segment`.
 ///
 /// As we iterate through the `SegmentPostings`, the frequencies are optionally decoded.
 /// Positions on the other hand, are optionally entirely decoded upfront.
-#[derive(Clone)]
 pub struct SegmentPostings {
    pub(crate) block_cursor: BlockSegmentPostings,
    cur: usize,
-    position_reader: Option<PositionReader>,
+    position_reader: Option<Box<dyn PositionsReader>>,
+}
+
+impl Clone for SegmentPostings {
+    fn clone(&self) -> Self {
+        SegmentPostings {
+            block_cursor: self.block_cursor.clone(),
+            cur: self.cur,
+            position_reader: self.position_reader.as_ref().map(|r| r.clone_box()),
+        }
+    }
 }

 impl SegmentPostings {
@@ -29,31 +41,6 @@ impl SegmentPostings {
        }
    }

-    /// Compute the number of non-deleted documents.
-    ///
-    /// This method will clone and scan through the posting lists.
-    /// (this is a rather expensive operation).
-    pub fn doc_freq_given_deletes(&self, alive_bitset: &AliveBitSet) -> u32 {
-        let mut docset = self.clone();
-        let mut doc_freq = 0;
-        loop {
-            let doc = docset.doc();
-            if doc == TERMINATED {
-                return doc_freq;
-            }
-            if alive_bitset.is_alive(doc) {
-                doc_freq += 1u32;
-            }
-            docset.advance();
-        }
-    }
-
-    /// Returns the overall number of documents in the block postings.
-    /// It does not take in account whether documents are deleted or not.
-    pub fn doc_freq(&self) -> u32 {
-        self.block_cursor.doc_freq()
-    }
-
    /// Creates a segment postings object with the given documents
    /// and no frequency encoded.
    ///
@@ -64,13 +51,19 @@ impl SegmentPostings {
    /// buffer with the serialized data.
    #[cfg(test)]
    pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
-        use crate::directory::FileSlice;
-        use crate::postings::serializer::PostingsSerializer;
+        use common::OwnedBytes;
+
        use crate::schema::IndexRecordOption;
        let mut buffer = Vec::new();
        {
+            use crate::codec::postings::PostingsSerializer;
+
            let mut postings_serializer =
-                PostingsSerializer::new(0.0, IndexRecordOption::Basic, None);
+                crate::codec::standard::postings::StandardPostingsSerializer::new(
+                    0.0,
+                    IndexRecordOption::Basic,
+                    None,
+                );
            postings_serializer.new_term(docs.len() as u32, false);
            for &doc in docs {
                postings_serializer.write_doc(doc, 1u32);
@@ -81,7 +74,7 @@ impl SegmentPostings {
        }
        let block_segment_postings = BlockSegmentPostings::open(
            docs.len() as u32,
-            FileSlice::from(buffer),
+            OwnedBytes::new(buffer),
            IndexRecordOption::Basic,
            IndexRecordOption::Basic,
        )
@@ -95,9 +88,11 @@ impl SegmentPostings {
        doc_and_tfs: &[(u32, u32)],
        fieldnorms: Option<&[u32]>,
    ) -> SegmentPostings {
-        use crate::directory::FileSlice;
+        use common::OwnedBytes;
+
+        use crate::codec::postings::PostingsSerializer as _;
+        use crate::codec::standard::postings::StandardPostingsSerializer;
        use crate::fieldnorm::FieldNormReader;
-        use crate::postings::serializer::PostingsSerializer;
        use crate::schema::IndexRecordOption;
        use crate::Score;
        let mut buffer: Vec<u8> = Vec::new();
@@ -114,7 +109,7 @@ impl SegmentPostings {
                total_num_tokens as Score / fieldnorms.len() as Score
            })
            .unwrap_or(0.0);
-        let mut postings_serializer = PostingsSerializer::new(
+        let mut postings_serializer = StandardPostingsSerializer::new(
            average_field_norm,
            IndexRecordOption::WithFreqs,
            fieldnorm_reader,
@@ -128,7 +123,7 @@ impl SegmentPostings {
            .unwrap();
        let block_segment_postings = BlockSegmentPostings::open(
            doc_and_tfs.len() as u32,
-            FileSlice::from(buffer),
+            OwnedBytes::new(buffer),
            IndexRecordOption::WithFreqs,
            IndexRecordOption::WithFreqs,
        )
@@ -143,7 +138,7 @@ impl SegmentPostings {
    /// * `freq_handler` - the freq handler is in charge of decoding frequencies and/or positions
    pub(crate) fn from_block_postings(
        segment_block_postings: BlockSegmentPostings,
-        position_reader: Option<PositionReader>,
+        position_reader: Option<Box<dyn PositionsReader>>,
    ) -> SegmentPostings {
        SegmentPostings {
            block_cursor: segment_block_postings,
@@ -158,7 +153,6 @@ impl DocSet for SegmentPostings {
    // next needs to be called a first time to point to the correct element.
    #[inline]
    fn advance(&mut self) -> DocId {
-        debug_assert!(self.block_cursor.block_is_loaded());
        if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
            self.cur = 0;
            self.block_cursor.advance();
@@ -197,13 +191,31 @@ impl DocSet for SegmentPostings {
    }

    fn size_hint(&self) -> u32 {
-        self.len() as u32
+        self.doc_freq().into()
    }
-}

-impl HasLen for SegmentPostings {
-    fn len(&self) -> usize {
-        self.block_cursor.doc_freq() as usize
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        let bitset_max_value: DocId = bitset.max_value();
+        loop {
+            let docs = self.block_cursor.docs();
+            let Some(&last_doc) = docs.last() else {
+                break;
+            };
+            if last_doc < bitset_max_value {
+                // All docs are within the range of the bitset
+                for &doc in docs {
+                    bitset.insert(doc);
+                }
+            } else {
+                for &doc in docs {
+                    if doc < bitset_max_value {
+                        bitset.insert(doc);
+                    }
+                }
+                break;
+            }
+            self.block_cursor.advance();
+        }
    }
 }

@@ -229,6 +241,13 @@ impl Postings for SegmentPostings {
        self.block_cursor.freq(self.cur)
    }

+    /// Returns the overall number of documents in the block postings.
+    /// It does not take in account whether documents are deleted or not.
+    #[inline(always)]
+    fn doc_freq(&self) -> DocFreq {
+        DocFreq::Exact(self.block_cursor.doc_freq())
+    }
+
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        let term_freq = self.term_freq();
        let prev_len = output.len();
@@ -252,24 +271,42 @@ impl Postings for SegmentPostings {
            }
        }
    }
+
+    fn has_freq(&self) -> bool {
+        !self.block_cursor.freqs().is_empty()
+    }
+}
+
+impl PostingsWithBlockMax for SegmentPostings {
+    fn seek_block_max(
+        &mut self,
+        target_doc: crate::DocId,
+        fieldnorm_reader: &FieldNormReader,
+        similarity_weight: &Bm25Weight,
+    ) -> Score {
+        self.block_cursor.seek_block_without_loading(target_doc);
+        self.block_cursor
+            .block_max_score(fieldnorm_reader, similarity_weight)
+    }
+
+    fn last_doc_in_block(&self) -> crate::DocId {
+        self.block_cursor.skip_reader().last_doc_in_block()
+    }
 }

 #[cfg(test)]
 mod tests {
-
-    use common::HasLen;
-
    use super::SegmentPostings;
    use crate::docset::{DocSet, TERMINATED};
-    use crate::fastfield::AliveBitSet;
-    use crate::postings::postings::Postings;
+    use crate::postings::Postings;

    #[test]
    fn test_empty_segment_postings() {
        let mut postings = SegmentPostings::empty();
+        assert_eq!(postings.doc(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.len(), 0);
+        assert_eq!(postings.doc_freq(), crate::postings::DocFreq::Exact(0));
    }

    #[test]
@@ -284,15 +321,4 @@ mod tests {
        let postings = SegmentPostings::empty();
        assert_eq!(postings.term_freq(), 1);
    }
-
-    #[test]
-    fn test_doc_freq() {
-        let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
-        assert_eq!(docs.doc_freq(), 3);
-        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&alive_bitset), 2);
-        let all_deleted =
-            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
-    }
 }
--- a/src/codec/standard/postings/skip.rs
+++ b/src/codec/standard/postings/skip.rs
@@ -146,23 +146,6 @@ impl SkipReader {
        skip_reader
    }

-    pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
-        self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            0
-        } else {
-            TERMINATED
-        };
-        self.last_doc_in_previous_block = 0u32;
-        self.owned_read = data;
-        self.block_info = BlockInfo::VInt { num_docs: doc_freq };
-        self.byte_offset = 0;
-        self.remaining_docs = doc_freq;
-        self.position_offset = 0u64;
-        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            self.read_block_info();
-        }
-    }
-
    // Returns the block max score for this block if available.
    //
    // The block max score is available for all full bitpacked block,
--- a/src/codec/standard/postings/standard_postings_serializer.rs
+++ b/src/codec/standard/postings/standard_postings_serializer.rs
@@ -0,0 +1,184 @@
+use std::cmp::Ordering;
+use std::io::{self, Write as _};
+
+use common::{BinarySerializable as _, VInt};
+
+use crate::codec::postings::PostingsSerializer;
+use crate::codec::standard::postings::block::Block;
+use crate::codec::standard::postings::skip::SkipSerializer;
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::compression::{BlockEncoder, VIntEncoder as _, COMPRESSION_BLOCK_SIZE};
+use crate::query::Bm25Weight;
+use crate::schema::IndexRecordOption;
+use crate::{DocId, Score};
+
+/// Serializer object for tantivy's default postings format.
+pub struct StandardPostingsSerializer {
+    last_doc_id_encoded: u32,
+
+    block_encoder: BlockEncoder,
+    block: Box<Block>,
+
+    postings_write: Vec<u8>,
+    skip_write: SkipSerializer,
+
+    mode: IndexRecordOption,
+    fieldnorm_reader: Option<FieldNormReader>,
+
+    bm25_weight: Option<Bm25Weight>,
+    avg_fieldnorm: Score, /* Average number of term in the field for that segment.
+                           * this value is used to compute the block wand information. */
+    term_has_freq: bool,
+}
+
+impl StandardPostingsSerializer {
+    pub(crate) fn new(
+        avg_fieldnorm: Score,
+        mode: IndexRecordOption,
+        fieldnorm_reader: Option<FieldNormReader>,
+    ) -> StandardPostingsSerializer {
+        Self {
+            last_doc_id_encoded: 0,
+            block_encoder: BlockEncoder::new(),
+            block: Box::new(Block::new()),
+            postings_write: Vec::new(),
+            skip_write: SkipSerializer::new(),
+            mode,
+            fieldnorm_reader,
+            bm25_weight: None,
+            avg_fieldnorm,
+            term_has_freq: false,
+        }
+    }
+}
+
+impl PostingsSerializer for StandardPostingsSerializer {
+    fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
+        self.clear();
+
+        self.term_has_freq = self.mode.has_freq() && record_term_freq;
+        if !self.term_has_freq {
+            return;
+        }
+
+        let num_docs_in_segment: u64 =
+            if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
+                fieldnorm_reader.num_docs() as u64
+            } else {
+                return;
+            };
+
+        if num_docs_in_segment == 0 {
+            return;
+        }
+
+        self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
+            term_doc_freq as u64,
+            num_docs_in_segment,
+            self.avg_fieldnorm,
+        ));
+    }
+
+    fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
+        self.block.append_doc(doc_id, term_freq);
+        if self.block.is_full() {
+            self.write_block();
+        }
+    }
+
+    fn close_term(&mut self, doc_freq: u32, output_write: &mut impl io::Write) -> io::Result<()> {
+        if !self.block.is_empty() {
+            // we have doc ids waiting to be written
+            // this happens when the number of doc ids is
+            // not a perfect multiple of our block size.
+            //
+            // In that case, the remaining part is encoded
+            // using variable int encoding.
+            {
+                let block_encoded = self
+                    .block_encoder
+                    .compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
+                self.postings_write.write_all(block_encoded)?;
+            }
+            // ... Idem for term frequencies
+            if self.term_has_freq {
+                let block_encoded = self
+                    .block_encoder
+                    .compress_vint_unsorted(self.block.term_freqs());
+                self.postings_write.write_all(block_encoded)?;
+            }
+            self.block.clear();
+        }
+        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
+            let skip_data = self.skip_write.data();
+            VInt(skip_data.len() as u64).serialize(output_write)?;
+            output_write.write_all(skip_data)?;
+        }
+        output_write.write_all(&self.postings_write[..])?;
+        self.skip_write.clear();
+        self.postings_write.clear();
+        self.bm25_weight = None;
+        Ok(())
+    }
+}
+
+impl StandardPostingsSerializer {
+    fn clear(&mut self) {
+        self.bm25_weight = None;
+        self.block.clear();
+        self.last_doc_id_encoded = 0;
+    }
+
+    fn write_block(&mut self) {
+        {
+            // encode the doc ids
+            let (num_bits, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
+            self.last_doc_id_encoded = self.block.last_doc();
+            self.skip_write
+                .write_doc(self.last_doc_id_encoded, num_bits);
+            // last el block 0, offset block 1,
+            self.postings_write.extend(block_encoded);
+        }
+        if self.term_has_freq {
+            let (num_bits, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_unsorted(self.block.term_freqs(), true);
+            self.postings_write.extend(block_encoded);
+            self.skip_write.write_term_freq(num_bits);
+            if self.mode.has_positions() {
+                // We serialize the sum of term freqs within the skip information
+                // in order to navigate through positions.
+                let sum_freq = self.block.term_freqs().iter().cloned().sum();
+                self.skip_write.write_total_term_freq(sum_freq);
+            }
+            let mut blockwand_params = (0u8, 0u32);
+            if let Some(bm25_weight) = self.bm25_weight.as_ref() {
+                if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
+                    let docs = self.block.doc_ids().iter().cloned();
+                    let term_freqs = self.block.term_freqs().iter().cloned();
+                    let fieldnorms = docs.map(|doc| fieldnorm_reader.fieldnorm_id(doc));
+                    blockwand_params = fieldnorms
+                        .zip(term_freqs)
+                        .max_by(
+                            |(left_fieldnorm_id, left_term_freq),
+                             (right_fieldnorm_id, right_term_freq)| {
+                                let left_score =
+                                    bm25_weight.tf_factor(*left_fieldnorm_id, *left_term_freq);
+                                let right_score =
+                                    bm25_weight.tf_factor(*right_fieldnorm_id, *right_term_freq);
+                                left_score
+                                    .partial_cmp(&right_score)
+                                    .unwrap_or(Ordering::Equal)
+                            },
+                        )
+                        .unwrap();
+                }
+            }
+            let (fieldnorm_id, term_freq) = blockwand_params;
+            self.skip_write.write_blockwand_max(fieldnorm_id, term_freq);
+        }
+        self.block.clear();
+    }
+}
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,5 +1,6 @@
 use super::Collector;
 use crate::collector::SegmentCollector;
+use crate::query::Weight;
 use crate::{DocId, Score, SegmentOrdinal, SegmentReader};

 /// `CountCollector` collector only counts how many
@@ -55,6 +56,15 @@ impl Collector for Count {
    fn merge_fruits(&self, segment_counts: Vec<usize>) -> crate::Result<usize> {
        Ok(segment_counts.into_iter().sum())
    }
+
+    fn collect_segment(
+        &self,
+        weight: &dyn Weight,
+        _segment_ord: u32,
+        reader: &SegmentReader,
+    ) -> crate::Result<usize> {
+        Ok(weight.count(reader)? as usize)
+    }
 }

 #[derive(Default)]
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -389,6 +389,13 @@ impl SegmentCollector for FacetSegmentCollector {
            }
            let mut facet = vec![];
            let (facet_ord, facet_depth) = self.unique_facet_ords[collapsed_facet_ord];
+            // u64::MAX is used as a sentinel for unmapped ordinals (e.g. when a
+            // document has the exact registered facet, not a child of it).
+            // Passing it to ord_to_term would resolve to the last dictionary
+            // entry and produce a spurious facet from an unrelated branch.
+            if facet_ord == u64::MAX {
+                continue;
+            }
            // TODO handle errors.
            if facet_dict.ord_to_term(facet_ord, &mut facet).is_ok() {
                if let Some((end_collapsed_facet, _)) = facet
@@ -814,6 +821,63 @@ mod tests {
        assert!(!super::is_child_facet(&b"foo\0bar"[..], &b"foo"[..]));
        assert!(!super::is_child_facet(&b"foo"[..], &b"foobar\0baz"[..]));
    }
+
+    // Regression test for https://github.com/quickwit-oss/tantivy/issues/2494
+    // When a document has the exact registered facet path (not just a child),
+    // harvest() must not turn the unmapped sentinel into a spurious root entry.
+    #[test]
+    fn test_facet_collector_wrong_root() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let mut index_writer: IndexWriter = index.writer_for_tests()?;
+        let facets: Vec<&str> = vec![
+            "/science-fiction/asimov",
+            "/science-fiction/clarke",
+            "/science-fiction/dick",
+            "/science-fiction/herbert",
+            "/science-fiction/orwell",
+            // This exact match on the registered facet is the bug trigger:
+            // its ordinal maps to the sentinel (u64::MAX, 0) in the collapse
+            // mapping, which without the fix resolves to an unrelated term.
+            "/fantasy/epic-fantasy",
+            "/fantasy/epic-fantasy/tolkien",
+            "/fantasy/epic-fantasy/martin",
+        ];
+        for facet_str in &facets {
+            index_writer.add_document(doc!(
+                facet_field => Facet::from(*facet_str)
+            ))?;
+        }
+        index_writer.commit()?;
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+
+        let term = Term::from_facet(facet_field, &Facet::from("/fantasy/epic-fantasy"));
+        let query = TermQuery::new(term, IndexRecordOption::Basic);
+
+        let mut facet_collector = FacetCollector::for_field("facet");
+        facet_collector.add_facet("/fantasy/epic-fantasy");
+        let counts: FacetCounts = searcher.search(&query, &facet_collector)?;
+
+        let result: Vec<(String, u64)> = counts
+            .get("/")
+            .map(|(facet, count)| (facet.to_string(), count))
+            .collect();
+
+        // Only children of /fantasy/epic-fantasy should appear, not /science-fiction
+        assert_eq!(
+            result,
+            vec![
+                ("/fantasy/epic-fantasy/martin".to_string(), 1),
+                ("/fantasy/epic-fantasy/tolkien".to_string(), 1),
+            ]
+        );
+        Ok(())
+    }
 }

 #[cfg(all(test, feature = "unstable"))]
--- a/src/collector/sort_key/sort_by_score.rs
+++ b/src/collector/sort_key/sort_by_score.rs
@@ -1,5 +1,8 @@
+use std::cmp::{Ordering, Reverse};
+use std::collections::BinaryHeap;
+
 use crate::collector::sort_key::NaturalComparator;
-use crate::collector::{SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
+use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
 use crate::{DocAddress, DocId, Score};

 /// Sort by similarity score.
@@ -25,6 +28,10 @@ impl SortKeyComputer for SortBySimilarityScore {
    }

    // Sorting by score is special in that it allows for the Block-Wand optimization.
+    //
+    // We use a BinaryHeap (TopNHeap) instead of TopNComputer here so that the
+    // threshold is always the exact K-th best score. TopNComputer only updates its
+    // threshold every K docs (at truncation), giving Block-WAND a stale bound.
    fn collect_segment_top_k(
        &self,
        k: usize,
@@ -32,12 +39,10 @@ impl SortKeyComputer for SortBySimilarityScore {
        reader: &crate::SegmentReader,
        segment_ord: u32,
    ) -> crate::Result<Vec<(Self::SortKey, DocAddress)>> {
-        let mut top_n: TopNComputer<Score, DocId, Self::Comparator> =
-            TopNComputer::new_with_comparator(k, self.comparator());
+        let mut top_n = TopNHeap::new(k);

        if let Some(alive_bitset) = reader.alive_bitset() {
            let mut threshold = Score::MIN;
-            top_n.threshold = Some(threshold);
            weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
                if alive_bitset.is_deleted(doc) {
                    return threshold;
@@ -56,7 +61,7 @@ impl SortKeyComputer for SortBySimilarityScore {
        Ok(top_n
            .into_vec()
            .into_iter()
-            .map(|cid| (cid.sort_key, DocAddress::new(segment_ord, cid.doc)))
+            .map(|(score, doc)| (score, DocAddress::new(segment_ord, doc)))
            .collect())
    }
 }
@@ -75,3 +80,204 @@ impl SegmentSortKeyComputer for SortBySimilarityScore {
        score
    }
 }
+
+/// Min-heap entry: higher score = greater, lower doc wins ties.
+struct ScoreHeapEntry {
+    score: Score,
+    doc: DocId,
+}
+
+impl Eq for ScoreHeapEntry {}
+
+impl PartialEq for ScoreHeapEntry {
+    fn eq(&self, other: &Self) -> bool {
+        self.cmp(other) == Ordering::Equal
+    }
+}
+
+impl PartialOrd for ScoreHeapEntry {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for ScoreHeapEntry {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.score
+            .partial_cmp(&other.score)
+            .unwrap_or(Ordering::Equal)
+            .then_with(|| other.doc.cmp(&self.doc))
+    }
+}
+
+/// Heap-based top-K for score collection. O(log K) per insert, but the threshold
+/// is always tight, so Block-WAND prunes better than with [`TopNComputer`]'s
+/// buffer/median approach.
+///
+/// Like [`TopNComputer`], items must arrive in ascending doc order, and equal
+/// scores are rejected (strict `>`) so that lower doc IDs win ties.
+///
+/// [`TopNComputer`]: crate::collector::TopNComputer
+struct TopNHeap {
+    heap: BinaryHeap<Reverse<ScoreHeapEntry>>,
+    top_n: usize,
+    threshold: Option<Score>,
+}
+
+impl TopNHeap {
+    fn new(top_n: usize) -> Self {
+        TopNHeap {
+            heap: BinaryHeap::with_capacity(top_n),
+            top_n,
+            threshold: None,
+        }
+    }
+
+    #[inline]
+    fn push(&mut self, score: Score, doc: DocId) {
+        if self.heap.len() < self.top_n {
+            self.heap.push(Reverse(ScoreHeapEntry { score, doc }));
+            if self.heap.len() == self.top_n {
+                self.threshold = self.heap.peek().map(|Reverse(entry)| entry.score);
+            }
+        } else if let Some(threshold) = self.threshold {
+            if score > threshold {
+                // peek_mut + assign is a single sift-down, vs pop + push = two sifts.
+                if let Some(mut min) = self.heap.peek_mut() {
+                    *min = Reverse(ScoreHeapEntry { score, doc });
+                }
+                self.threshold = self.heap.peek().map(|Reverse(entry)| entry.score);
+            }
+        }
+    }
+
+    fn into_vec(self) -> Vec<(Score, DocId)> {
+        self.heap
+            .into_vec()
+            .into_iter()
+            .map(|Reverse(entry)| (entry.score, entry.doc))
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use proptest::prelude::*;
+
+    use super::*;
+    use crate::collector::sort_key::NaturalComparator;
+    use crate::collector::TopNComputer;
+
+    #[test]
+    fn test_top_n_heap_zero_capacity() {
+        let mut heap = TopNHeap::new(0);
+        heap.push(1.0, 0);
+        heap.push(2.0, 1);
+        assert!(heap.into_vec().is_empty());
+    }
+
+    #[test]
+    fn test_top_n_heap_basic() {
+        let mut heap = TopNHeap::new(2);
+        heap.push(1.0, 0);
+        heap.push(3.0, 1);
+        heap.push(2.0, 2);
+
+        let mut results = heap.into_vec();
+        results.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1)));
+        assert_eq!(results, vec![(3.0, 1), (2.0, 2)]);
+    }
+
+    #[test]
+    fn test_top_n_heap_threshold_always_accurate() {
+        let mut heap = TopNHeap::new(2);
+        assert_eq!(heap.threshold, None);
+
+        heap.push(1.0, 0);
+        assert_eq!(heap.threshold, None);
+
+        heap.push(3.0, 1);
+        assert_eq!(heap.threshold, Some(1.0));
+
+        heap.push(2.0, 2); // evicts 1.0
+        assert_eq!(heap.threshold, Some(2.0));
+
+        heap.push(4.0, 3); // evicts 2.0
+        assert_eq!(heap.threshold, Some(3.0));
+    }
+
+    #[test]
+    fn test_top_n_heap_tiebreaking_lower_doc_wins() {
+        let mut heap = TopNHeap::new(2);
+        heap.push(5.0, 0);
+        heap.push(5.0, 1);
+        heap.push(5.0, 2); // rejected: not strictly > threshold
+
+        let mut results = heap.into_vec();
+        results.sort_by_key(|&(_, doc)| doc);
+        assert_eq!(results, vec![(5.0, 0), (5.0, 1)]);
+    }
+
+    #[test]
+    fn test_top_n_heap_single_element() {
+        let mut heap = TopNHeap::new(1);
+        heap.push(1.0, 0);
+        assert_eq!(heap.threshold, Some(1.0));
+
+        heap.push(0.5, 1); // rejected
+        heap.push(2.0, 2); // accepted
+        assert_eq!(heap.threshold, Some(2.0));
+
+        let results = heap.into_vec();
+        assert_eq!(results, vec![(2.0, 2)]);
+    }
+
+    #[test]
+    fn test_top_n_heap_under_capacity() {
+        let mut heap = TopNHeap::new(5);
+        heap.push(3.0, 0);
+        heap.push(1.0, 1);
+        heap.push(2.0, 2);
+        // Only 3 elements, capacity is 5 — all should be kept
+        assert_eq!(heap.threshold, None);
+
+        let mut results = heap.into_vec();
+        results.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1)));
+        assert_eq!(results, vec![(3.0, 0), (2.0, 2), (1.0, 1)]);
+    }
+
+    proptest! {
+        #[test]
+        fn test_top_n_heap_matches_top_n_computer(
+            limit in 0..20_usize,
+            mut docs in proptest::collection::vec((0..1000_u32, 0..1000_u32), 0..200_usize),
+        ) {
+            // Both require ascending doc order.
+            docs.sort_by_key(|(_, doc_id)| *doc_id);
+            docs.dedup_by_key(|(_, doc_id)| *doc_id);
+
+            let mut heap = TopNHeap::new(limit);
+            let mut computer: TopNComputer<Score, DocId, NaturalComparator> =
+                TopNComputer::new_with_comparator(limit, NaturalComparator);
+
+            for &(score_u32, doc) in &docs {
+                let score = score_u32 as Score;
+                heap.push(score, doc);
+                computer.push(score, doc);
+            }
+
+            let mut heap_results = heap.into_vec();
+            heap_results.sort_by(|a, b| {
+                b.0.partial_cmp(&a.0).unwrap().then_with(|| a.1.cmp(&b.1))
+            });
+
+            let computer_results: Vec<(Score, DocId)> = computer
+                .into_sorted_vec()
+                .into_iter()
+                .map(|cd| (cd.sort_key, cd.doc))
+                .collect();
+
+            prop_assert_eq!(heap_results, computer_results);
+        }
+    }
+}
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -52,7 +52,7 @@ impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {
        if schema_type != T::to_type() {
            return Err(crate::TantivyError::SchemaError(format!(
                "Field `{}` is of type {schema_type:?}, not of the type {:?}.",
-                &self.field,
+                self.field,
                T::to_type()
            )));
        }
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -513,7 +513,9 @@ pub struct TopNComputer<Score, D, C> {
    /// The buffer reverses sort order to get top-semantics instead of bottom-semantics
    buffer: Vec<ComparableDoc<Score, D>>,
    top_n: usize,
-    pub(crate) threshold: Option<Score>,
+    /// The current threshold for pruning. Documents with scores at or below
+    /// this value are skipped by `push()`. Updated when the buffer is truncated.
+    pub threshold: Option<Score>,
    comparator: C,
 }

--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -4,7 +4,7 @@ use common::{replace_in_place, JsonPathWriter};
 use rustc_hash::FxHashMap;

 use crate::indexer::indexing_term::IndexingTerm;
-use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
+use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter as _, PostingsWriterEnum};
 use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{Type, DATE_TIME_PRECISION_INDEXED};
 use crate::time::format_description::well_known::Rfc3339;
@@ -52,7 +52,8 @@ use crate::{DateTime, DocId, Term};
 /// We can therefore afford working with a map that is not imperfect. It is fine if several
 /// path map to the same index position as long as the probability is relatively low.
 #[derive(Default)]
-pub(crate) struct IndexingPositionsPerPath {
+#[doc(hidden)]
+pub struct IndexingPositionsPerPath {
    positions_per_path: FxHashMap<u32, IndexingPosition>,
 }

@@ -80,7 +81,7 @@ fn index_json_object<'a, V: Value<'a>>(
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut dyn PostingsWriter,
+    postings_writer: &mut PostingsWriterEnum,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
@@ -104,13 +105,14 @@ fn index_json_object<'a, V: Value<'a>>(
 }

 #[expect(clippy::too_many_arguments)]
-pub(crate) fn index_json_value<'a, V: Value<'a>>(
+#[doc(hidden)]
+pub fn index_json_value<'a, V: Value<'a>>(
    doc: DocId,
    json_value: V,
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut dyn PostingsWriter,
+    postings_writer: &mut PostingsWriterEnum,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,4 +1,6 @@
-use std::borrow::{Borrow, BorrowMut};
+use std::ops::{Deref as _, DerefMut as _};
+
+use common::{BitSet, TinySet};

 use crate::fastfield::AliveBitSet;
 use crate::DocId;
@@ -14,6 +16,12 @@ pub const TERMINATED: DocId = i32::MAX as u32;
 /// exactly this size as long as we can fill the buffer.
 pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;

+/// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`].
+pub const BLOCK_NUM_TINYBITSETS: usize = 16;
+
+/// Number of doc IDs covered by one block: `BLOCK_NUM_TINYBITSETS * 64 = 1024`.
+pub const BLOCK_WINDOW: u32 = BLOCK_NUM_TINYBITSETS as u32 * 64;
+
 /// Represents an iterable set of sorted doc ids.
 pub trait DocSet: Send {
    /// Goes to the next element.
@@ -130,6 +138,19 @@ pub trait DocSet: Send {
        buffer.len()
    }

+    /// Fills the given bitset with the documents in the docset.
+    ///
+    /// If the docset max_doc is smaller than the largest doc, this function might not consume the
+    /// docset entirely.
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        let bitset_max_value: u32 = bitset.max_value();
+        let mut doc = self.doc();
+        while doc < bitset_max_value {
+            bitset.insert(doc);
+            doc = self.advance();
+        }
+    }
+
    /// Returns the current document
    /// Right after creating a new `DocSet`, the docset points to the first document.
    ///
@@ -160,6 +181,31 @@ pub trait DocSet: Send {
        self.size_hint() as u64
    }

+    /// Fills a bitmask representing which documents in `[min_doc, min_doc + BLOCK_WINDOW)` are
+    /// present in this docset.
+    ///
+    /// The window is divided into `BLOCK_NUM_TINYBITSETS` buckets of 64 docs each.
+    /// Returns the next doc `>= min_doc + BLOCK_WINDOW`, or `TERMINATED` if exhausted.
+    fn fill_bitset_block(
+        &mut self,
+        min_doc: DocId,
+        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
+    ) -> DocId {
+        self.seek(min_doc);
+        let horizon = min_doc + BLOCK_WINDOW;
+        loop {
+            let doc = self.doc();
+            if doc >= horizon {
+                return doc;
+            }
+            let delta = doc - min_doc;
+            mask[(delta / 64) as usize].insert_mut(delta % 64);
+            if self.advance() == TERMINATED {
+                return TERMINATED;
+            }
+        }
+    }
+
    /// Returns the number documents matching.
    /// Calling this method consumes the `DocSet`.
    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
@@ -214,6 +260,18 @@ impl DocSet for &mut dyn DocSet {
        (**self).seek_danger(target)
    }

+    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+        (**self).fill_buffer(buffer)
+    }
+
+    fn fill_bitset_block(
+        &mut self,
+        min_doc: DocId,
+        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
+    ) -> DocId {
+        (**self).fill_bitset_block(min_doc, mask)
+    }
+
    fn doc(&self) -> u32 {
        (**self).doc()
    }
@@ -233,51 +291,66 @@ impl DocSet for &mut dyn DocSet {
    fn count_including_deleted(&mut self) -> u32 {
        (**self).count_including_deleted()
    }
+
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        (**self).fill_bitset(bitset);
+    }
 }

 impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
+    #[inline]
    fn advance(&mut self) -> DocId {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.advance()
+        self.deref_mut().advance()
    }

+    #[inline]
    fn seek(&mut self, target: DocId) -> DocId {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.seek(target)
+        self.deref_mut().seek(target)
    }

    fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.seek_danger(target)
+        self.deref_mut().seek_danger(target)
    }

+    #[inline]
    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.fill_buffer(buffer)
+        self.deref_mut().fill_buffer(buffer)
    }

+    fn fill_bitset_block(
+        &mut self,
+        min_doc: DocId,
+        mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
+    ) -> DocId {
+        let unboxed: &mut TDocSet = &mut **self;
+        unboxed.fill_bitset_block(min_doc, mask)
+    }
+
+    #[inline]
    fn doc(&self) -> DocId {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.doc()
+        self.deref().doc()
    }

+    #[inline]
    fn size_hint(&self) -> u32 {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.size_hint()
+        self.deref().size_hint()
    }

+    #[inline]
    fn cost(&self) -> u64 {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.cost()
+        self.deref().cost()
    }

+    #[inline]
    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count(alive_bitset)
+        self.deref_mut().count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count_including_deleted()
+        self.deref_mut().count_including_deleted()
+    }
+
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        self.deref_mut().fill_bitset(bitset);
    }
 }
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -117,6 +117,24 @@ impl FastFieldsWriter {
        Ok(())
    }

+    /// Indexes the fast fields of a new document from its `(field, value)` pairs directly.
+    ///
+    /// This is like [`add_document`](Self::add_document), but for documents that cannot
+    /// satisfy the `Document` trait's `'static` bound (e.g. a value borrowing from a batch
+    /// being indexed). The caller supplies the document's field/value pairs; like
+    /// `add_document` it advances `num_docs` by exactly one.
+    pub fn add_document_from_values<'a, V: Value<'a>>(
+        &mut self,
+        fields_and_values: impl Iterator<Item = (Field, V)>,
+    ) -> crate::Result<()> {
+        let doc_id = self.num_docs;
+        for (field, value) in fields_and_values {
+            self.add_doc_value(doc_id, field, value)?;
+        }
+        self.num_docs += 1;
+        Ok(())
+    }
+
    fn add_doc_value<'a, V: Value<'a>>(
        &mut self,
        doc_id: DocId,
--- a/src/index/codec_configuration.rs
+++ b/src/index/codec_configuration.rs
@@ -0,0 +1,49 @@
+use std::borrow::Cow;
+
+use serde::{Deserialize, Serialize};
+
+use crate::codec::{Codec, StandardCodec};
+
+/// A Codec configuration is just a serializable object.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct CodecConfiguration {
+    codec_id: Cow<'static, str>,
+    #[serde(default, skip_serializing_if = "serde_json::Value::is_null")]
+    props: serde_json::Value,
+}
+
+impl CodecConfiguration {
+    /// Returns true if the codec is the standard codec.
+    pub fn is_standard(&self) -> bool {
+        self.codec_id == StandardCodec::ID && self.props.is_null()
+    }
+
+    /// Creates a codec instance from the configuration.
+    ///
+    /// If the codec id does not match the code's name, an error is returned.
+    pub fn to_codec<C: Codec>(&self) -> crate::Result<C> {
+        if self.codec_id != C::ID {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "Codec id mismatch: expected {}, got {}",
+                C::ID,
+                self.codec_id
+            )));
+        }
+        C::from_json_props(&self.props)
+    }
+}
+
+impl<'a, C: Codec> From<&'a C> for CodecConfiguration {
+    fn from(codec: &'a C) -> Self {
+        CodecConfiguration {
+            codec_id: Cow::Borrowed(C::ID),
+            props: codec.to_json_props(),
+        }
+    }
+}
+
+impl Default for CodecConfiguration {
+    fn default() -> Self {
+        CodecConfiguration::from(&StandardCodec)
+    }
+}
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -8,12 +8,14 @@ use std::thread::available_parallelism;
 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
 use super::{FieldMetadata, IndexSettings};
+use crate::codec::StandardCodec;
 use crate::core::{Executor, META_FILEPATH};
 use crate::directory::error::OpenReadError;
 #[cfg(feature = "mmap")]
 use crate::directory::MmapDirectory;
 use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK};
 use crate::error::{DataCorruption, TantivyError};
+use crate::index::codec_configuration::CodecConfiguration;
 use crate::index::{IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory};
 use crate::indexer::index_writer::{
    IndexWriterOptions, MAX_NUM_THREAD, MEMORY_BUDGET_NUM_BYTES_MIN,
@@ -59,6 +61,7 @@ fn save_new_metas(
    schema: Schema,
    index_settings: IndexSettings,
    directory: &dyn Directory,
+    codec: CodecConfiguration,
 ) -> crate::Result<()> {
    save_metas(
        &IndexMeta {
@@ -67,6 +70,7 @@ fn save_new_metas(
            schema,
            opstamp: 0u64,
            payload: None,
+            codec,
        },
        directory,
    )?;
@@ -101,18 +105,21 @@ fn save_new_metas(
 /// };
 /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
 /// ```
-pub struct IndexBuilder {
+pub struct IndexBuilder<Codec: crate::codec::Codec = StandardCodec> {
    schema: Option<Schema>,
    index_settings: IndexSettings,
    tokenizer_manager: TokenizerManager,
    fast_field_tokenizer_manager: TokenizerManager,
+    codec: Codec,
 }
-impl Default for IndexBuilder {
+
+impl Default for IndexBuilder<StandardCodec> {
    fn default() -> Self {
        IndexBuilder::new()
    }
 }
-impl IndexBuilder {
+
+impl IndexBuilder<StandardCodec> {
    /// Creates a new `IndexBuilder`
    pub fn new() -> Self {
        Self {
@@ -120,6 +127,21 @@ impl IndexBuilder {
            index_settings: IndexSettings::default(),
            tokenizer_manager: TokenizerManager::default(),
            fast_field_tokenizer_manager: TokenizerManager::default(),
+            codec: StandardCodec,
+        }
+    }
+}
+
+impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
+    /// Set the codec
+    #[must_use]
+    pub fn codec<NewCodec: crate::codec::Codec>(self, codec: NewCodec) -> IndexBuilder<NewCodec> {
+        IndexBuilder {
+            schema: self.schema,
+            index_settings: self.index_settings,
+            tokenizer_manager: self.tokenizer_manager,
+            fast_field_tokenizer_manager: self.fast_field_tokenizer_manager,
+            codec,
        }
    }

@@ -154,7 +176,7 @@ impl IndexBuilder {
    /// The index will be allocated in anonymous memory.
    /// This is useful for indexing small set of documents
    /// for instances like unit test or temporary in memory index.
-    pub fn create_in_ram(self) -> Result<Index, TantivyError> {
+    pub fn create_in_ram(self) -> Result<Index<Codec>, TantivyError> {
        let ram_directory = RamDirectory::create();
        self.create(ram_directory)
    }
@@ -165,7 +187,7 @@ impl IndexBuilder {
    /// If a previous index was in this directory, it returns an
    /// [`TantivyError::IndexAlreadyExists`] error.
    #[cfg(feature = "mmap")]
-    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
+    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index<Codec>> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
        if Index::exists(&*mmap_directory)? {
            return Err(TantivyError::IndexAlreadyExists);
@@ -186,7 +208,7 @@ impl IndexBuilder {
        self,
        dir: impl Into<Box<dyn Directory>>,
        mem_budget: usize,
-    ) -> crate::Result<SingleSegmentIndexWriter<D>> {
+    ) -> crate::Result<SingleSegmentIndexWriter<Codec, D>> {
        let index = self.create(dir)?;
        let index_simple_writer = SingleSegmentIndexWriter::new(index, mem_budget)?;
        Ok(index_simple_writer)
@@ -202,7 +224,7 @@ impl IndexBuilder {
    /// For other unit tests, prefer the [`RamDirectory`], see:
    /// [`IndexBuilder::create_in_ram()`].
    #[cfg(feature = "mmap")]
-    pub fn create_from_tempdir(self) -> crate::Result<Index> {
+    pub fn create_from_tempdir(self) -> crate::Result<Index<Codec>> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
        self.create(mmap_directory)
    }
@@ -215,12 +237,15 @@ impl IndexBuilder {
    }

    /// Opens or creates a new index in the provided directory
-    pub fn open_or_create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+    pub fn open_or_create<T: Into<Box<dyn Directory>>>(
+        self,
+        dir: T,
+    ) -> crate::Result<Index<Codec>> {
        let dir: Box<dyn Directory> = dir.into();
        if !Index::exists(&*dir)? {
            return self.create(dir);
        }
-        let mut index = Index::open(dir)?;
+        let mut index: Index<Codec> = Index::<Codec>::open_with_codec(dir)?;
        index.set_tokenizers(self.tokenizer_manager.clone());
        if index.schema() == self.get_expect_schema()? {
            Ok(index)
@@ -244,18 +269,25 @@ impl IndexBuilder {
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
-    fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+    pub fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index<Codec>> {
+        self.create_avoid_monomorphization(dir.into())
+    }
+
+    fn create_avoid_monomorphization(self, dir: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
        self.validate()?;
-        let dir = dir.into();
        let directory = ManagedDirectory::wrap(dir)?;
+        let codec: CodecConfiguration = CodecConfiguration::from(&self.codec);
        save_new_metas(
            self.get_expect_schema()?,
            self.index_settings.clone(),
            &directory,
+            codec,
        )?;
-        let mut metas = IndexMeta::with_schema(self.get_expect_schema()?);
+        let schema = self.get_expect_schema()?;
+        let mut metas = IndexMeta::with_schema_and_codec(schema, &self.codec);
        metas.index_settings = self.index_settings;
-        let mut index = Index::open_from_metas(directory, &metas, SegmentMetaInventory::default());
+        let mut index: Index<Codec> =
+            Index::<Codec>::open_from_metas(directory, &metas, SegmentMetaInventory::default())?;
        index.set_tokenizers(self.tokenizer_manager);
        index.set_fast_field_tokenizers(self.fast_field_tokenizer_manager);
        Ok(index)
@@ -264,7 +296,7 @@ impl IndexBuilder {

 /// Search Index
 #[derive(Clone)]
-pub struct Index {
+pub struct Index<Codec: crate::codec::Codec = crate::codec::StandardCodec> {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
@@ -272,6 +304,7 @@ pub struct Index {
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
+    codec: Codec,
 }

 impl Index {
@@ -279,41 +312,6 @@ impl Index {
    pub fn builder() -> IndexBuilder {
        IndexBuilder::new()
    }
-    /// Examines the directory to see if it contains an index.
-    ///
-    /// Effectively, it only checks for the presence of the `meta.json` file.
-    pub fn exists(dir: &dyn Directory) -> Result<bool, OpenReadError> {
-        dir.exists(&META_FILEPATH)
-    }
-
-    /// Accessor to the search executor.
-    ///
-    /// This pool is used by default when calling `searcher.search(...)`
-    /// to perform search on the individual segments.
-    ///
-    /// By default the executor is single thread, and simply runs in the calling thread.
-    pub fn search_executor(&self) -> &Executor {
-        &self.executor
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with a given number of threads.
-    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
-        Ok(())
-    }
-
-    /// Custom thread pool by a outer thread pool.
-    pub fn set_executor(&mut self, executor: Executor) {
-        self.executor = executor;
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with as many threads as there are CPUs on the system.
-    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = available_parallelism()?.get();
-        self.set_multithread_executor(default_num_threads)
-    }

    /// Creates a new index using the [`RamDirectory`].
    ///
@@ -324,6 +322,13 @@ impl Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }

+    /// Examines the directory to see if it contains an index.
+    ///
+    /// Effectively, it only checks for the presence of the `meta.json` file.
+    pub fn exists(directory: &dyn Directory) -> Result<bool, OpenReadError> {
+        directory.exists(&META_FILEPATH)
+    }
+
    /// Creates a new index in a given filepath.
    /// The index will use the [`MmapDirectory`].
    ///
@@ -370,20 +375,108 @@ impl Index {
        schema: Schema,
        settings: IndexSettings,
    ) -> crate::Result<Index> {
-        let dir: Box<dyn Directory> = dir.into();
+        Self::create_to_avoid_monomorphization(dir.into(), schema, settings)
+    }
+
+    fn create_to_avoid_monomorphization(
+        dir: Box<dyn Directory>,
+        schema: Schema,
+        settings: IndexSettings,
+    ) -> crate::Result<Index> {
        let mut builder = IndexBuilder::new().schema(schema);
        builder = builder.settings(settings);
        builder.create(dir)
    }

+    /// Opens a new directory from an index path.
+    #[cfg(feature = "mmap")]
+    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
+        Self::open_in_dir_to_avoid_monomorphization(directory_path.as_ref())
+    }
+
+    #[cfg(feature = "mmap")]
+    #[inline(never)]
+    fn open_in_dir_to_avoid_monomorphization(directory_path: &Path) -> crate::Result<Index> {
+        let mmap_directory = MmapDirectory::open(directory_path)?;
+        Index::open(mmap_directory)
+    }
+
+    /// Open the index using the provided directory
+    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
+        Index::<StandardCodec>::open_with_codec(directory.into())
+    }
+}
+
+impl<Codec: crate::codec::Codec> Index<Codec> {
+    /// Returns a version of this index with the standard codec.
+    /// This is useful when you need to pass the index to APIs that
+    /// don't care about the codec (e.g., for reading).
+    pub(crate) fn with_standard_codec(&self) -> Index<StandardCodec> {
+        Index {
+            directory: self.directory.clone(),
+            schema: self.schema.clone(),
+            settings: self.settings.clone(),
+            executor: self.executor.clone(),
+            tokenizers: self.tokenizers.clone(),
+            fast_field_tokenizers: self.fast_field_tokenizers.clone(),
+            inventory: self.inventory.clone(),
+            codec: StandardCodec,
+        }
+    }
+
+    /// Open the index using the provided directory
+    #[inline(never)]
+    pub fn open_with_codec(directory: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
+        let directory = ManagedDirectory::wrap(directory)?;
+        let inventory = SegmentMetaInventory::default();
+        let metas = load_metas(&directory, &inventory)?;
+        let index: Index<Codec> = Index::<Codec>::open_from_metas(directory, &metas, inventory)?;
+        Ok(index)
+    }
+
+    /// Accessor to the codec.
+    pub fn codec(&self) -> &Codec {
+        &self.codec
+    }
+
+    /// Accessor to the search executor.
+    ///
+    /// This pool is used by default when calling `searcher.search(...)`
+    /// to perform search on the individual segments.
+    ///
+    /// By default the executor is single thread, and simply runs in the calling thread.
+    pub fn search_executor(&self) -> &Executor {
+        &self.executor
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
+        Ok(())
+    }
+
+    /// Custom thread pool by a outer thread pool.
+    pub fn set_executor(&mut self, executor: Executor) {
+        self.executor = executor;
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with as many threads as there are CPUs on the system.
+    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
+        let default_num_threads = available_parallelism()?.get();
+        self.set_multithread_executor(default_num_threads)
+    }
+
    /// Creates a new index given a directory and an [`IndexMeta`].
-    fn open_from_metas(
+    fn open_from_metas<C: crate::codec::Codec>(
        directory: ManagedDirectory,
        metas: &IndexMeta,
        inventory: SegmentMetaInventory,
-    ) -> Index {
+    ) -> crate::Result<Index<C>> {
        let schema = metas.schema.clone();
-        Index {
+        let codec = metas.codec.to_codec::<C>()?;
+        Ok(Index {
            settings: metas.index_settings.clone(),
            directory,
            schema,
@@ -391,7 +484,8 @@ impl Index {
            fast_field_tokenizers: TokenizerManager::default(),
            executor: Executor::single_thread(),
            inventory,
-        }
+            codec,
+        })
    }

    /// Setter for the tokenizer manager.
@@ -447,7 +541,7 @@ impl Index {
    /// Create a default [`IndexReader`] for the given index.
    ///
    /// See [`Index.reader_builder()`].
-    pub fn reader(&self) -> crate::Result<IndexReader> {
+    pub fn reader(&self) -> crate::Result<IndexReader<Codec>> {
        self.reader_builder().try_into()
    }

@@ -455,17 +549,10 @@ impl Index {
    ///
    /// Most project should create at most one reader for a given index.
    /// This method is typically called only once per `Index` instance.
-    pub fn reader_builder(&self) -> IndexReaderBuilder {
+    pub fn reader_builder(&self) -> IndexReaderBuilder<Codec> {
        IndexReaderBuilder::new(self.clone())
    }

-    /// Opens a new directory from an index path.
-    #[cfg(feature = "mmap")]
-    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
-        let mmap_directory = MmapDirectory::open(directory_path)?;
-        Index::open(mmap_directory)
-    }
-
    /// Returns the list of the segment metas tracked by the index.
    ///
    /// Such segments can of course be part of the index,
@@ -506,16 +593,6 @@ impl Index {
        self.inventory.new_segment_meta(segment_id, max_doc)
    }

-    /// Open the index using the provided directory
-    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
-        let directory = directory.into();
-        let directory = ManagedDirectory::wrap(directory)?;
-        let inventory = SegmentMetaInventory::default();
-        let metas = load_metas(&directory, &inventory)?;
-        let index = Index::open_from_metas(directory, &metas, inventory);
-        Ok(index)
-    }
-
    /// Reads the index meta file from the directory.
    pub fn load_metas(&self) -> crate::Result<IndexMeta> {
        load_metas(self.directory(), &self.inventory)
@@ -539,7 +616,7 @@ impl Index {
    pub fn writer_with_options<D: Document>(
        &self,
        options: IndexWriterOptions,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let directory_lock = self
            .directory
            .acquire_lock(&INDEX_WRITER_LOCK)
@@ -581,7 +658,7 @@ impl Index {
        &self,
        num_threads: usize,
        overall_memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let memory_arena_in_bytes_per_thread = overall_memory_budget_in_bytes / num_threads;
        let options = IndexWriterOptions::builder()
            .num_worker_threads(num_threads)
@@ -595,7 +672,7 @@ impl Index {
    /// That index writer only simply has a single thread and a memory budget of 15 MB.
    /// Using a single thread gives us a deterministic allocation of DocId.
    #[cfg(test)]
-    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<D>> {
+    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<Codec, D>> {
        self.writer_with_num_threads(1, MEMORY_BUDGET_NUM_BYTES_MIN)
    }

@@ -613,7 +690,7 @@ impl Index {
    pub fn writer<D: Document>(
        &self,
        memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
@@ -640,7 +717,7 @@ impl Index {
    }

    /// Returns the list of segments that are searchable
-    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment>> {
+    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment<Codec>>> {
        Ok(self
            .searchable_segment_metas()?
            .into_iter()
@@ -649,12 +726,12 @@ impl Index {
    }

    #[doc(hidden)]
-    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment {
+    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment<Codec> {
        Segment::for_index(self.clone(), segment_meta)
    }

    /// Creates a new segment.
-    pub fn new_segment(&self) -> Segment {
+    pub fn new_segment(&self) -> Segment<Codec> {
        let segment_meta = self
            .inventory
            .new_segment_meta(SegmentId::generate_random(), 0);
@@ -708,7 +785,7 @@ impl Index {
 }

 impl fmt::Debug for Index {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Index({:?})", self.directory)
    }
 }
--- a/src/index/index_meta.rs
+++ b/src/index/index_meta.rs
@@ -5,7 +5,8 @@ use std::path::PathBuf;
 use serde::{Deserialize, Serialize};

 use super::SegmentComponent;
-use crate::index::SegmentId;
+use crate::codec::Codec;
+use crate::index::{CodecConfiguration, SegmentId};
 use crate::schema::Schema;
 use crate::store::Compressor;
 use crate::{Inventory, Opstamp, TrackedObject};
@@ -286,8 +287,10 @@ pub struct IndexMeta {
    /// This payload is entirely unused by tantivy.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
+    /// Codec configuration for the index.
+    #[serde(skip_serializing_if = "CodecConfiguration::is_standard")]
+    pub codec: CodecConfiguration,
 }
-
 #[derive(Deserialize, Debug)]
 struct UntrackedIndexMeta {
    pub segments: Vec<InnerSegmentMeta>,
@@ -297,6 +300,8 @@ struct UntrackedIndexMeta {
    pub opstamp: Opstamp,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
+    #[serde(default)]
+    pub codec: CodecConfiguration,
 }

 impl UntrackedIndexMeta {
@@ -311,6 +316,7 @@ impl UntrackedIndexMeta {
            schema: self.schema,
            opstamp: self.opstamp,
            payload: self.payload,
+            codec: self.codec,
        }
    }
 }
@@ -321,13 +327,14 @@ impl IndexMeta {
    ///
    /// This new index does not contains any segments.
    /// Opstamp will the value `0u64`.
-    pub fn with_schema(schema: Schema) -> IndexMeta {
+    pub fn with_schema_and_codec<C: Codec>(schema: Schema, codec: &C) -> IndexMeta {
        IndexMeta {
            index_settings: IndexSettings::default(),
            segments: vec![],
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: CodecConfiguration::from(codec),
        }
    }

@@ -378,14 +385,38 @@ mod tests {
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: Default::default(),
        };
-        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
+        let json_value: serde_json::Value =
+            serde_json::to_value(&index_metas).expect("serialization failed");
        assert_eq!(
-            json,
-            r#"{"index_settings":{"docstore_compression":"none","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            &json_value,
+            &serde_json::json!(
+            {
+              "index_settings": {
+                "docstore_compression": "none",
+                "docstore_blocksize": 16384
+              },
+              "segments": [],
+              "schema": [
+                {
+                  "name": "text",
+                  "type": "text",
+                  "options": {
+                    "indexing": {
+                      "record": "position",
+                      "fieldnorms": true,
+                      "tokenizer": "default"
+                    },
+                    "stored": false,
+                    "fast": false
+                  }
+                }
+              ],
+              "opstamp": 0
+            })
        );
-
-        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
+        let deser_meta: UntrackedIndexMeta = serde_json::from_value(json_value).unwrap();
        assert_eq!(index_metas.index_settings, deser_meta.index_settings);
        assert_eq!(index_metas.schema, deser_meta.schema);
        assert_eq!(index_metas.opstamp, deser_meta.opstamp);
@@ -411,14 +442,39 @@ mod tests {
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: Default::default(),
        };
-        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
+        let json_value = serde_json::to_value(&index_metas).expect("serialization failed");
        assert_eq!(
-            json,
-            r#"{"index_settings":{"docstore_compression":"zstd(compression_level=4)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            &json_value,
+            &serde_json::json!(
+                {
+                  "index_settings": {
+                    "docstore_compression": "zstd(compression_level=4)",
+                    "docstore_blocksize": 1000000
+                  },
+                  "segments": [],
+                  "schema": [
+                    {
+                      "name": "text",
+                      "type": "text",
+                      "options": {
+                        "indexing": {
+                          "record": "position",
+                          "fieldnorms": true,
+                          "tokenizer": "default"
+                        },
+                        "stored": false,
+                        "fast": false
+                      }
+                    }
+                  ],
+                  "opstamp": 0
+                }
+            )
        );

-        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
+        let deser_meta: UntrackedIndexMeta = serde_json::from_value(json_value).unwrap();
        assert_eq!(index_metas.index_settings, deser_meta.index_settings);
        assert_eq!(index_metas.schema, deser_meta.schema);
        assert_eq!(index_metas.opstamp, deser_meta.opstamp);
--- a/src/index/inverted_index_reader.rs
+++ b/src/index/inverted_index_reader.rs
@@ -1,4 +1,5 @@
 use std::io;
+use std::sync::Arc;

 use common::json_path_writer::JSON_END_OF_PATH;
 use common::{BinarySerializable, ByteCount};
@@ -9,9 +10,14 @@ use itertools::Itertools;
 #[cfg(feature = "quickwit")]
 use tantivy_fst::automaton::{AlwaysMatch, Automaton};

+use crate::codec::positions::PositionsCodec;
+use crate::codec::postings::PostingsCodec;
+use crate::codec::{Codec, ObjectSafeCodec, StandardCodec};
 use crate::directory::FileSlice;
-use crate::positions::PositionReader;
-use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::{Postings, TermInfo};
+use crate::query::term_query::TermScorer;
+use crate::query::{Bm25Weight, PhraseScorer, Scorer};
 use crate::schema::{IndexRecordOption, Term, Type};
 use crate::termdict::TermDictionary;

@@ -33,6 +39,7 @@ pub struct InvertedIndexReader {
    positions_file_slice: FileSlice,
    record_option: IndexRecordOption,
    total_num_tokens: u64,
+    codec: Arc<dyn ObjectSafeCodec>,
 }

 /// Object that records the amount of space used by a field in an inverted index.
@@ -68,6 +75,7 @@ impl InvertedIndexReader {
        postings_file_slice: FileSlice,
        positions_file_slice: FileSlice,
        record_option: IndexRecordOption,
+        codec: Arc<dyn ObjectSafeCodec>,
    ) -> io::Result<InvertedIndexReader> {
        let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
        let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?;
@@ -77,6 +85,7 @@ impl InvertedIndexReader {
            positions_file_slice,
            record_option,
            total_num_tokens,
+            codec,
        })
    }

@@ -89,6 +98,7 @@ impl InvertedIndexReader {
            positions_file_slice: FileSlice::empty(),
            record_option,
            total_num_tokens: 0u64,
+            codec: Arc::new(StandardCodec),
        }
    }

@@ -160,61 +170,99 @@ impl InvertedIndexReader {
        Ok(fields)
    }

-    /// Resets the block segment to another position of the postings
-    /// file.
-    ///
-    /// This is useful for enumerating through a list of terms,
-    /// and consuming the associated posting lists while avoiding
-    /// reallocating a [`BlockSegmentPostings`].
-    ///
-    /// # Warning
-    ///
-    /// This does not reset the positions list.
-    pub fn reset_block_postings_from_terminfo(
+    pub(crate) fn new_term_scorer_specialized<C: Codec>(
        &self,
        term_info: &TermInfo,
-        block_postings: &mut BlockSegmentPostings,
-    ) -> io::Result<()> {
-        let postings_slice = self
-            .postings_file_slice
-            .slice(term_info.postings_range.clone());
-        let postings_bytes = postings_slice.read_bytes()?;
-        block_postings.reset(term_info.doc_freq, postings_bytes)?;
-        Ok(())
-    }
-
-    /// Returns a block postings given a `Term`.
-    /// This method is for an advanced usage only.
-    ///
-    /// Most users should prefer using [`Self::read_postings()`] instead.
-    pub fn read_block_postings(
-        &self,
-        term: &Term,
        option: IndexRecordOption,
-    ) -> io::Result<Option<BlockSegmentPostings>> {
-        self.get_term_info(term)?
-            .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
-            .transpose()
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+        codec: &C,
+    ) -> io::Result<TermScorer<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings>> {
+        let postings = self.read_postings_from_terminfo_specialized(term_info, option, codec)?;
+        let term_scorer = TermScorer::new(postings, fieldnorm_reader, similarity_weight);
+        Ok(term_scorer)
    }

-    /// Returns a block postings given a `term_info`.
-    /// This method is for an advanced usage only.
-    ///
-    /// Most users should prefer using [`Self::read_postings()`] instead.
-    pub fn read_block_postings_from_terminfo(
+    pub(crate) fn new_phrase_scorer_type_specialized<C: Codec>(
+        &self,
+        term_infos: &[(usize, TermInfo)],
+        similarity_weight_opt: Option<Bm25Weight>,
+        fieldnorm_reader: FieldNormReader,
+        slop: u32,
+        codec: &C,
+    ) -> io::Result<PhraseScorer<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings>> {
+        let mut offset_and_term_postings: Vec<(
+            usize,
+            <<C as Codec>::PostingsCodec as PostingsCodec>::Postings,
+        )> = Vec::with_capacity(term_infos.len());
+        for (offset, term_info) in term_infos {
+            let postings = self.read_postings_from_terminfo_specialized(
+                term_info,
+                IndexRecordOption::WithFreqsAndPositions,
+                codec,
+            )?;
+            offset_and_term_postings.push((*offset, postings));
+        }
+        let phrase_scorer = PhraseScorer::new(
+            offset_and_term_postings,
+            similarity_weight_opt,
+            fieldnorm_reader,
+            slop,
+        );
+        Ok(phrase_scorer)
+    }
+
+    /// Build a new term scorer.
+    pub fn new_term_scorer(
        &self,
        term_info: &TermInfo,
-        requested_option: IndexRecordOption,
-    ) -> io::Result<BlockSegmentPostings> {
+        option: IndexRecordOption,
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+    ) -> io::Result<Box<dyn Scorer>> {
+        let term_scorer = self.codec.load_term_scorer_type_erased(
+            term_info,
+            option,
+            self,
+            fieldnorm_reader,
+            similarity_weight,
+        )?;
+        Ok(term_scorer)
+    }
+
+    /// Returns a postings object specific with a concrete type.
+    ///
+    /// This requires you to provied the actual codec.
+    pub fn read_postings_from_terminfo_specialized<C: Codec>(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        codec: &C,
+    ) -> io::Result<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings> {
+        let option = option.downgrade(self.record_option);
        let postings_data = self
            .postings_file_slice
-            .slice(term_info.postings_range.clone());
-        BlockSegmentPostings::open(
-            term_info.doc_freq,
-            postings_data,
-            self.record_option,
-            requested_option,
-        )
+            .slice(term_info.postings_range.clone())
+            .read_bytes()?;
+        let position_reader = if option.has_positions() {
+            let positions_data = self
+                .positions_file_slice
+                .slice(term_info.positions_range.clone())
+                .read_bytes()?;
+            let reader = codec.positions_codec().open_reader(positions_data)?;
+            Some(Box::new(reader) as Box<dyn crate::codec::positions::PositionsReader>)
+        } else {
+            None
+        };
+        let postings: <<C as Codec>::PostingsCodec as PostingsCodec>::Postings =
+            codec.postings_codec().load_postings(
+                term_info.doc_freq,
+                postings_data,
+                self.record_option,
+                option,
+                position_reader,
+            )?;
+        Ok(postings)
    }

    /// Returns a posting object given a `term_info`.
@@ -225,25 +273,9 @@ impl InvertedIndexReader {
        &self,
        term_info: &TermInfo,
        option: IndexRecordOption,
-    ) -> io::Result<SegmentPostings> {
-        let option = option.downgrade(self.record_option);
-
-        let block_postings = self.read_block_postings_from_terminfo(term_info, option)?;
-        let position_reader = {
-            if option.has_positions() {
-                let positions_data = self
-                    .positions_file_slice
-                    .read_bytes_slice(term_info.positions_range.clone())?;
-                let position_reader = PositionReader::open(positions_data)?;
-                Some(position_reader)
-            } else {
-                None
-            }
-        };
-        Ok(SegmentPostings::from_block_postings(
-            block_postings,
-            position_reader,
-        ))
+    ) -> io::Result<Box<dyn Postings>> {
+        self.codec
+            .load_postings_type_erased(term_info, option, self)
    }

    /// Returns the total number of tokens recorded for all documents
@@ -266,7 +298,7 @@ impl InvertedIndexReader {
        &self,
        term: &Term,
        option: IndexRecordOption,
-    ) -> io::Result<Option<SegmentPostings>> {
+    ) -> io::Result<Option<Box<dyn Postings>>> {
        self.get_term_info(term)?
            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
            .transpose()
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -2,6 +2,7 @@
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.

+mod codec_configuration;
 mod index;
 mod index_meta;
 mod inverted_index_reader;
@@ -10,6 +11,7 @@ mod segment_component;
 mod segment_id;
 mod segment_reader;

+pub use self::codec_configuration::CodecConfiguration;
 pub use self::index::{Index, IndexBuilder};
 pub(crate) use self::index_meta::SegmentMetaInventory;
 pub use self::index_meta::{IndexMeta, IndexSettings, Order, SegmentMeta};
--- a/src/index/segment.rs
+++ b/src/index/segment.rs
@@ -2,6 +2,7 @@ use std::fmt;
 use std::path::PathBuf;

 use super::SegmentComponent;
+use crate::codec::StandardCodec;
 use crate::directory::error::{OpenReadError, OpenWriteError};
 use crate::directory::{Directory, FileSlice, WritePtr};
 use crate::index::{Index, SegmentId, SegmentMeta};
@@ -10,25 +11,25 @@ use crate::Opstamp;

 /// A segment is a piece of the index.
 #[derive(Clone)]
-pub struct Segment {
-    index: Index,
+pub struct Segment<C: crate::codec::Codec = StandardCodec> {
+    index: Index<C>,
    meta: SegmentMeta,
 }

-impl fmt::Debug for Segment {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl<C: crate::codec::Codec> fmt::Debug for Segment<C> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Segment({:?})", self.id().uuid_string())
    }
 }

-impl Segment {
+impl<C: crate::codec::Codec> Segment<C> {
    /// Creates a new segment given an `Index` and a `SegmentId`
-    pub(crate) fn for_index(index: Index, meta: SegmentMeta) -> Segment {
+    pub(crate) fn for_index(index: Index<C>, meta: SegmentMeta) -> Segment<C> {
        Segment { index, meta }
    }

    /// Returns the index the segment belongs to.
-    pub fn index(&self) -> &Index {
+    pub fn index(&self) -> &Index<C> {
        &self.index
    }

@@ -46,7 +47,7 @@ impl Segment {
    ///
    /// This method is only used when updating `max_doc` from 0
    /// as we finalize a fresh new segment.
-    pub fn with_max_doc(self, max_doc: u32) -> Segment {
+    pub fn with_max_doc(self, max_doc: u32) -> Segment<C> {
        Segment {
            index: self.index,
            meta: self.meta.with_max_doc(max_doc),
@@ -55,7 +56,7 @@ impl Segment {

    #[doc(hidden)]
    #[must_use]
-    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment {
+    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment<C> {
        Segment {
            index: self.index,
            meta: self.meta.with_delete_meta(num_deleted_docs, opstamp),
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -6,6 +6,8 @@ use common::{ByteCount, HasLen};
 use fnv::FnvHashMap;
 use itertools::Itertools;

+use crate::codec::ObjectSafeCodec;
+use crate::directory::error::OpenReadError;
 use crate::directory::{CompositeFile, FileSlice};
 use crate::error::DataCorruption;
 use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
@@ -47,6 +49,7 @@ pub struct SegmentReader {
    store_file: FileSlice,
    alive_bitset_opt: Option<AliveBitSet>,
    schema: Schema,
+    codec: Arc<dyn ObjectSafeCodec>,
 }

 impl SegmentReader {
@@ -67,6 +70,11 @@ impl SegmentReader {
        &self.schema
    }

+    /// Returns the index codec.
+    pub fn codec(&self) -> &dyn ObjectSafeCodec {
+        &*self.codec
+    }
+
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
@@ -140,15 +148,16 @@ impl SegmentReader {
    }

    /// Open a new segment for reading.
-    pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
+    pub fn open<C: crate::codec::Codec>(segment: &Segment<C>) -> crate::Result<SegmentReader> {
        Self::open_with_custom_alive_set(segment, None)
    }

    /// Open a new segment for reading.
-    pub fn open_with_custom_alive_set(
-        segment: &Segment,
+    pub fn open_with_custom_alive_set<C: crate::codec::Codec>(
+        segment: &Segment<C>,
        custom_bitset: Option<AliveBitSet>,
    ) -> crate::Result<SegmentReader> {
+        let codec: Arc<dyn ObjectSafeCodec> = Arc::new(segment.index().codec().clone());
        let termdict_file = segment.open_read(SegmentComponent::Terms)?;
        let termdict_composite = CompositeFile::open(&termdict_file)?;

@@ -159,12 +168,10 @@ impl SegmentReader {
        let postings_file = segment.open_read(SegmentComponent::Postings)?;
        let postings_composite = CompositeFile::open(&postings_file)?;

-        let positions_composite = {
-            if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
-                CompositeFile::open(&positions_file)?
-            } else {
-                CompositeFile::empty()
-            }
+        let positions_composite = match segment.open_read(SegmentComponent::Positions) {
+            Ok(positions_file) => CompositeFile::open(&positions_file)?,
+            Err(OpenReadError::FileDoesNotExist(_)) => CompositeFile::empty(),
+            Err(open_read_error) => return Err(open_read_error.into()),
        };

        let schema = segment.schema();
@@ -204,6 +211,7 @@ impl SegmentReader {
            alive_bitset_opt,
            positions_composite,
            schema,
+            codec,
        })
    }

@@ -273,6 +281,7 @@ impl SegmentReader {
            postings_file,
            positions_file,
            record_option,
+            self.codec.clone(),
        )?);

        // by releasing the lock in between, we may end up opening the inverting index
@@ -323,7 +332,7 @@ impl SegmentReader {
                            // Without expand dots enabled dots need to be escaped.
                            let escaped_json_path = json_path.replace('.', "\\.");
                            let full_path = format!("{field_name}.{escaped_json_path}");
-                            let full_path_unescaped = format!("{}.{}", field_name, &json_path);
+                            let full_path_unescaped = format!("{}.{}", field_name, json_path);
                            map_to_canonical.insert(full_path_unescaped, full_path.to_string());
                            full_path
                        } else {
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -9,6 +9,7 @@ use smallvec::smallvec;
 use super::operation::{AddOperation, UserOperation};
 use super::segment_updater::SegmentUpdater;
 use super::{AddBatch, AddBatchReceiver, AddBatchSender, PreparedCommit};
+use crate::codec::{Codec, StandardCodec};
 use crate::directory::{DirectoryLock, GarbageCollectionResult, TerminatingWrite};
 use crate::error::TantivyError;
 use crate::fastfield::write_alive_bitset;
@@ -68,12 +69,12 @@ pub struct IndexWriterOptions {
 /// indexing queue.
 /// Each indexing thread builds its own independent [`Segment`], via
 /// a `SegmentWriter` object.
-pub struct IndexWriter<D: Document = TantivyDocument> {
+pub struct IndexWriter<C: Codec = StandardCodec, D: Document = TantivyDocument> {
    // the lock is just used to bind the
    // lifetime of the lock with that of the IndexWriter.
    _directory_lock: Option<DirectoryLock>,

-    index: Index,
+    index: Index<C>,

    options: IndexWriterOptions,

@@ -82,7 +83,7 @@ pub struct IndexWriter<D: Document = TantivyDocument> {
    index_writer_status: IndexWriterStatus<D>,
    operation_sender: AddBatchSender<D>,

-    segment_updater: SegmentUpdater,
+    segment_updater: SegmentUpdater<C>,

    worker_id: usize,

@@ -128,8 +129,8 @@ fn compute_deleted_bitset(
 /// is `==` target_opstamp.
 /// For instance, there was no delete operation between the state of the `segment_entry` and
 /// the `target_opstamp`, `segment_entry` is not updated.
-pub fn advance_deletes(
-    mut segment: Segment,
+pub fn advance_deletes<C: Codec>(
+    mut segment: Segment<C>,
    segment_entry: &mut SegmentEntry,
    target_opstamp: Opstamp,
 ) -> crate::Result<()> {
@@ -179,11 +180,11 @@ pub fn advance_deletes(
    Ok(())
 }

-fn index_documents<D: Document>(
+fn index_documents<C: crate::codec::Codec, D: Document>(
    memory_budget: usize,
-    segment: Segment,
+    segment: Segment<C>,
    grouped_document_iterator: &mut dyn Iterator<Item = AddBatch<D>>,
-    segment_updater: &SegmentUpdater,
+    segment_updater: &SegmentUpdater<C>,
    mut delete_cursor: DeleteCursor,
 ) -> crate::Result<()> {
    let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone())?;
@@ -226,8 +227,8 @@ fn index_documents<D: Document>(
 }

 /// `doc_opstamps` is required to be non-empty.
-fn apply_deletes(
-    segment: &Segment,
+fn apply_deletes<C: crate::codec::Codec>(
+    segment: &Segment<C>,
    delete_cursor: &mut DeleteCursor,
    doc_opstamps: &[Opstamp],
 ) -> crate::Result<Option<BitSet>> {
@@ -262,7 +263,7 @@ fn apply_deletes(
    })
 }

-impl<D: Document> IndexWriter<D> {
+impl<C: Codec, D: Document> IndexWriter<C, D> {
    /// Create a new index writer. Attempts to acquire a lockfile.
    ///
    /// The lockfile should be deleted on drop, but it is possible
@@ -278,7 +279,7 @@ impl<D: Document> IndexWriter<D> {
    /// If the memory arena per thread is too small or too big, returns
    /// `TantivyError::InvalidArgument`
    pub(crate) fn new(
-        index: &Index,
+        index: &Index<C>,
        options: IndexWriterOptions,
        directory_lock: DirectoryLock,
    ) -> crate::Result<Self> {
@@ -345,7 +346,7 @@ impl<D: Document> IndexWriter<D> {
    }

    /// Accessor to the index.
-    pub fn index(&self) -> &Index {
+    pub fn index(&self) -> &Index<C> {
        &self.index
    }

@@ -393,7 +394,7 @@ impl<D: Document> IndexWriter<D> {
    /// It is safe to start writing file associated with the new `Segment`.
    /// These will not be garbage collected as long as an instance object of
    /// `SegmentMeta` object associated with the new `Segment` is "alive".
-    pub fn new_segment(&self) -> Segment {
+    pub fn new_segment(&self) -> Segment<C> {
        self.index.new_segment()
    }

@@ -615,7 +616,7 @@ impl<D: Document> IndexWriter<D> {
    /// It is also possible to add a payload to the `commit`
    /// using this API.
    /// See [`PreparedCommit::set_payload()`].
-    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, D>> {
+    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, C, D>> {
        // Here, because we join all of the worker threads,
        // all of the segment update for this commit have been
        // sent.
@@ -665,7 +666,7 @@ impl<D: Document> IndexWriter<D> {
        self.prepare_commit()?.commit()
    }

-    pub(crate) fn segment_updater(&self) -> &SegmentUpdater {
+    pub(crate) fn segment_updater(&self) -> &SegmentUpdater<C> {
        &self.segment_updater
    }

@@ -804,7 +805,7 @@ impl<D: Document> IndexWriter<D> {
    }
 }

-impl<D: Document> Drop for IndexWriter<D> {
+impl<C: Codec, D: Document> Drop for IndexWriter<C, D> {
    fn drop(&mut self) {
        self.segment_updater.kill();
        self.drop_sender();
--- a/src/indexer/indexing_term.rs
+++ b/src/indexer/indexing_term.rs
@@ -13,7 +13,8 @@ use crate::schema::Field;
 /// We serialize the field, because we index everything in a single
 /// global term dictionary during indexing.
 #[derive(Clone)]
-pub(crate) struct IndexingTerm<B = Vec<u8>>(B)
+#[doc(hidden)]
+pub struct IndexingTerm<B = Vec<u8>>(B)
 where B: AsRef<[u8]>;

 /// The number of bytes used as metadata by `Term`.
@@ -42,7 +43,7 @@ impl IndexingTerm {
    }

    /// Removes the value_bytes and set the field
-    pub(crate) fn clear_with_field(&mut self, field: Field) {
+    pub fn clear_with_field(&mut self, field: Field) {
        self.truncate_value_bytes(0);
        self.set_field(field);
    }
--- a/src/indexer/merge_index_test.rs
+++ b/src/indexer/merge_index_test.rs
@@ -1,9 +1,10 @@
 #[cfg(test)]
 mod tests {
+    use crate::codec::StandardCodec;
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
-    use crate::postings::Postings;
+    use crate::postings::{DocFreq, Postings};
    use crate::query::QueryParser;
    use crate::schema::{
        self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
@@ -121,21 +122,26 @@ mod tests {
            let my_text_field = index.schema().get_field("text_field").unwrap();
            let term_a = Term::from_field_text(my_text_field, "text");
            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let term_info = inverted_index.get_term_info(&term_a).unwrap().unwrap();
            let mut postings = inverted_index
-                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
-                .unwrap()
+                .read_postings_from_terminfo_specialized(
+                    &term_info,
+                    IndexRecordOption::WithFreqsAndPositions,
+                    &StandardCodec,
+                )
                .unwrap();
-            assert_eq!(postings.doc_freq(), 2);
+            assert_eq!(postings.doc_freq(), DocFreq::Exact(2));
            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
            assert_eq!(
-                postings.doc_freq_given_deletes(
+                crate::indexer::merger::doc_freq_given_deletes(
+                    &postings,
                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                ),
                2
            );

            assert_eq!(postings.term_freq(), 1);
-            let mut output = vec![];
+            let mut output = Vec::new();
            postings.positions(&mut output);
            assert_eq!(output, vec![1]);
            postings.advance();
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -7,6 +7,8 @@ use common::ReadOnlyBitSet;
 use itertools::Itertools;
 use measure_time::debug_time;

+use crate::codec::postings::PostingsCodec;
+use crate::codec::{Codec, StandardCodec};
 use crate::directory::WritePtr;
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
@@ -15,7 +17,7 @@ use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer,
 use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
-use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
+use crate::postings::{InvertedIndexSerializer, Postings};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
@@ -76,10 +78,11 @@ fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::
    Ok(total_num_tokens)
 }

-pub struct IndexMerger {
+pub struct IndexMerger<C: Codec = StandardCodec> {
    schema: Schema,
    pub(crate) readers: Vec<SegmentReader>,
    max_doc: u32,
+    codec: C,
 }

 struct DeltaComputer {
@@ -144,8 +147,8 @@ fn extract_fast_field_required_columns(schema: &Schema) -> Vec<(String, ColumnTy
        .collect()
 }

-impl IndexMerger {
-    pub fn open(schema: Schema, segments: &[Segment]) -> crate::Result<IndexMerger> {
+impl<C: Codec> IndexMerger<C> {
+    pub fn open(schema: Schema, segments: &[Segment<C>]) -> crate::Result<IndexMerger<C>> {
        let alive_bitset = segments.iter().map(|_| None).collect_vec();
        Self::open_with_custom_alive_set(schema, segments, alive_bitset)
    }
@@ -162,11 +165,15 @@ impl IndexMerger {
    // This can be used to merge but also apply an additional filter.
    // One use case is demux, which is basically taking a list of
    // segments and partitions them e.g. by a value in a field.
+    //
+    // # Panics if segments is empty.
    pub fn open_with_custom_alive_set(
        schema: Schema,
-        segments: &[Segment],
+        segments: &[Segment<C>],
        alive_bitset_opt: Vec<Option<AliveBitSet>>,
-    ) -> crate::Result<IndexMerger> {
+    ) -> crate::Result<IndexMerger<C>> {
+        assert!(!segments.is_empty());
+        let codec = segments[0].index().codec().clone();
        let mut readers = vec![];
        for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) {
            if segment.meta().num_docs() > 0 {
@@ -189,6 +196,7 @@ impl IndexMerger {
            schema,
            readers,
            max_doc,
+            codec,
        })
    }

@@ -287,7 +295,7 @@ impl IndexMerger {
        &self,
        indexed_field: Field,
        _field_type: &FieldType,
-        serializer: &mut InvertedIndexSerializer,
+        serializer: &mut InvertedIndexSerializer<C>,
        fieldnorm_reader: Option<FieldNormReader>,
        doc_id_mapping: &SegmentDocIdMapping,
    ) -> crate::Result<()> {
@@ -355,7 +363,10 @@ impl IndexMerger {
                         indexed. Have you modified the schema?",
        );

-        let mut segment_postings_containing_the_term: Vec<(usize, SegmentPostings)> = vec![];
+        let mut segment_postings_containing_the_term: Vec<(
+            usize,
+            <C::PostingsCodec as PostingsCodec>::Postings,
+        )> = Vec::with_capacity(self.readers.len());

        while merged_terms.advance() {
            segment_postings_containing_the_term.clear();
@@ -367,17 +378,24 @@ impl IndexMerger {
            for (segment_ord, term_info) in merged_terms.current_segment_ords_and_term_infos() {
                let segment_reader = &self.readers[segment_ord];
                let inverted_index: &InvertedIndexReader = &field_readers[segment_ord];
-                let segment_postings = inverted_index
-                    .read_postings_from_terminfo(&term_info, segment_postings_option)?;
+                let postings = inverted_index.read_postings_from_terminfo_specialized(
+                    &term_info,
+                    segment_postings_option,
+                    &self.codec,
+                )?;
                let alive_bitset_opt = segment_reader.alive_bitset();
                let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
-                    segment_postings.doc_freq_given_deletes(alive_bitset)
+                    doc_freq_given_deletes(&postings, alive_bitset)
                } else {
-                    segment_postings.doc_freq()
+                    // We do not an exact document frequency here.
+                    match postings.doc_freq() {
+                        crate::postings::DocFreq::Approximate(_) => exact_doc_freq(&postings),
+                        crate::postings::DocFreq::Exact(doc_freq) => doc_freq,
+                    }
                };
                if doc_freq > 0u32 {
                    total_doc_freq += doc_freq;
-                    segment_postings_containing_the_term.push((segment_ord, segment_postings));
+                    segment_postings_containing_the_term.push((segment_ord, postings));
                }
            }

@@ -395,11 +413,7 @@ impl IndexMerger {
            assert!(!segment_postings_containing_the_term.is_empty());

            let has_term_freq = {
-                let has_term_freq = !segment_postings_containing_the_term[0]
-                    .1
-                    .block_cursor
-                    .freqs()
-                    .is_empty();
+                let has_term_freq = segment_postings_containing_the_term[0].1.has_freq();
                for (_, postings) in &segment_postings_containing_the_term[1..] {
                    // This may look at a strange way to test whether we have term freq or not.
                    // With JSON object, the schema is not sufficient to know whether a term
@@ -415,7 +429,7 @@ impl IndexMerger {
                    //
                    // Overall the reliable way to know if we have actual frequencies loaded or not
                    // is to check whether the actual decoded array is empty or not.
-                    if has_term_freq == postings.block_cursor.freqs().is_empty() {
+                    if postings.has_freq() != has_term_freq {
                        return Err(DataCorruption::comment_only(
                            "Term freqs are inconsistent across segments",
                        )
@@ -467,7 +481,7 @@ impl IndexMerger {

    fn write_postings(
        &self,
-        serializer: &mut InvertedIndexSerializer,
+        serializer: &mut InvertedIndexSerializer<C>,
        fieldnorm_readers: FieldNormReaders,
        doc_id_mapping: &SegmentDocIdMapping,
    ) -> crate::Result<()> {
@@ -525,7 +539,7 @@ impl IndexMerger {
    ///
    /// # Returns
    /// The number of documents in the resulting segment.
-    pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
+    pub fn write(&self, mut serializer: SegmentSerializer<C>) -> crate::Result<u32> {
        let doc_id_mapping = self.get_doc_id_from_concatenated_data()?;
        debug!("write-fieldnorms");
        if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -553,6 +567,43 @@ impl IndexMerger {
    }
 }

+/// Compute the number of non-deleted documents.
+///
+/// This method will clone and scan through the posting lists.
+/// (this is a rather expensive operation).
+pub(crate) fn doc_freq_given_deletes<P: Postings + Clone>(
+    postings: &P,
+    alive_bitset: &AliveBitSet,
+) -> u32 {
+    let mut docset = postings.clone();
+    let mut doc_freq = 0;
+    loop {
+        let doc = docset.doc();
+        if doc == TERMINATED {
+            return doc_freq;
+        }
+        if alive_bitset.is_alive(doc) {
+            doc_freq += 1u32;
+        }
+        docset.advance();
+    }
+}
+
+/// If the postings is not able to inform us of the document frequency,
+/// we just scan through it.
+pub(crate) fn exact_doc_freq<P: Postings + Clone>(postings: &P) -> u32 {
+    let mut docset = postings.clone();
+    let mut doc_freq = 0;
+    loop {
+        let doc = docset.doc();
+        if doc == TERMINATED {
+            return doc_freq;
+        }
+        doc_freq += 1u32;
+        docset.advance();
+    }
+}
+
 #[cfg(test)]
 mod tests {

@@ -561,12 +612,16 @@ mod tests {
    use proptest::strategy::Strategy;
    use schema::FAST;

+    use crate::codec::postings::PostingsCodec;
+    use crate::codec::standard::postings::StandardPostingsCodec;
    use crate::collector::tests::{
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
+    use crate::fastfield::AliveBitSet;
    use crate::index::{Index, SegmentId};
    use crate::indexer::NoMergePolicy;
+    use crate::postings::{DocFreq, Postings as _};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
    use crate::schema::{
        Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
@@ -1518,10 +1573,10 @@ mod tests {
        let searcher = reader.searcher();
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)?
+            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)
            .unwrap();
        assert_eq!(term_scorer.doc(), 0);
-        assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
+        assert_nearly_equals!(term_scorer.seek_block_max(0), 0.0079681855);
        assert_nearly_equals!(term_scorer.score(), 0.0079681855);
        for _ in 0..81 {
            writer.add_document(doc!(text=>"hello happy tax payer"))?;
@@ -1534,13 +1589,13 @@ mod tests {
        for segment_reader in searcher.segment_readers() {
            let mut term_scorer = term_query
                .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-                .term_scorer_for_test(segment_reader, 1.0)?
+                .term_scorer_for_test(segment_reader, 1.0)
                .unwrap();
            // the difference compared to before is intrinsic to the bm25 formula. no worries
            // there.
            for doc in segment_reader.doc_ids_alive() {
                assert_eq!(term_scorer.doc(), doc);
-                assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
+                assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
                assert_nearly_equals!(term_scorer.score(), 0.003478312);
                term_scorer.advance();
            }
@@ -1560,12 +1615,12 @@ mod tests {
        let segment_reader = searcher.segment_reader(0u32);
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(segment_reader, 1.0)?
+            .term_scorer_for_test(segment_reader, 1.0)
            .unwrap();
        // the difference compared to before is intrinsic to the bm25 formula. no worries there.
        for doc in segment_reader.doc_ids_alive() {
            assert_eq!(term_scorer.doc(), doc);
-            assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
+            assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
            assert_nearly_equals!(term_scorer.score(), 0.003478312);
            term_scorer.advance();
        }
@@ -1579,4 +1634,16 @@ mod tests {
        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
        assert!((super::MAX_DOC_LIMIT as i32) < 0);
    }
+
+    #[test]
+    fn test_doc_freq_given_delete() {
+        let docs =
+            <StandardPostingsCodec as PostingsCodec>::Postings::create_from_docs(&[0, 2, 10]);
+        assert_eq!(docs.doc_freq(), DocFreq::Exact(3));
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
+        assert_eq!(super::doc_freq_given_deletes(&docs, &alive_bitset), 2);
+        let all_deleted =
+            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
+        assert_eq!(super::doc_freq_given_deletes(&docs, &all_deleted), 0);
+    }
 }
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -34,6 +34,8 @@ use crossbeam_channel as channel;
 use smallvec::SmallVec;

 pub use self::index_writer::{advance_deletes, IndexWriter, IndexWriterOptions};
+#[doc(hidden)]
+pub use self::indexing_term::IndexingTerm;
 pub use self::log_merge_policy::LogMergePolicy;
 pub use self::merge_operation::MergeOperation;
 pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
--- a/src/indexer/prepared_commit.rs
+++ b/src/indexer/prepared_commit.rs
@@ -1,16 +1,17 @@
 use super::IndexWriter;
+use crate::codec::Codec;
 use crate::schema::document::Document;
 use crate::{FutureResult, Opstamp, TantivyDocument};

 /// A prepared commit
-pub struct PreparedCommit<'a, D: Document = TantivyDocument> {
-    index_writer: &'a mut IndexWriter<D>,
+pub struct PreparedCommit<'a, C: Codec, D: Document = TantivyDocument> {
+    index_writer: &'a mut IndexWriter<C, D>,
    payload: Option<String>,
    opstamp: Opstamp,
 }

-impl<'a, D: Document> PreparedCommit<'a, D> {
-    pub(crate) fn new(index_writer: &'a mut IndexWriter<D>, opstamp: Opstamp) -> Self {
+impl<'a, C: Codec, D: Document> PreparedCommit<'a, C, D> {
+    pub(crate) fn new(index_writer: &'a mut IndexWriter<C, D>, opstamp: Opstamp) -> Self {
        Self {
            index_writer,
            payload: None,
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -8,17 +8,17 @@ use crate::store::StoreWriter;

 /// Segment serializer is in charge of laying out on disk
 /// the data accumulated and sorted by the `SegmentWriter`.
-pub struct SegmentSerializer {
-    segment: Segment,
+pub struct SegmentSerializer<C: crate::codec::Codec> {
+    segment: Segment<C>,
    pub(crate) store_writer: StoreWriter,
    fast_field_write: WritePtr,
    fieldnorms_serializer: Option<FieldNormsSerializer>,
-    postings_serializer: InvertedIndexSerializer,
+    postings_serializer: InvertedIndexSerializer<C>,
 }

-impl SegmentSerializer {
+impl<C: crate::codec::Codec> SegmentSerializer<C> {
    /// Creates a new `SegmentSerializer`.
-    pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
+    pub fn for_segment(mut segment: Segment<C>) -> crate::Result<SegmentSerializer<C>> {
        let settings = segment.index().settings().clone();
        let store_writer = {
            let store_write = segment.open_write(SegmentComponent::Store)?;
@@ -50,12 +50,12 @@ impl SegmentSerializer {
        self.store_writer.mem_usage()
    }

-    pub fn segment(&self) -> &Segment {
+    pub fn segment(&self) -> &Segment<C> {
        &self.segment
    }

    /// Accessor to the `PostingsSerializer`.
-    pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
+    pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer<C> {
        &mut self.postings_serializer
    }

--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -10,10 +10,13 @@ use std::sync::{Arc, RwLock};
 use rayon::{ThreadPool, ThreadPoolBuilder};

 use super::segment_manager::SegmentManager;
+use crate::codec::Codec;
 use crate::core::META_FILEPATH;
 use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
 use crate::fastfield::AliveBitSet;
-use crate::index::{Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta};
+use crate::index::{
+    CodecConfiguration, Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta,
+};
 use crate::indexer::delete_queue::DeleteCursor;
 use crate::indexer::index_writer::advance_deletes;
 use crate::indexer::merge_operation::MergeOperationInventory;
@@ -61,10 +64,10 @@ pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate:
 // We voluntarily pass a merge_operation ref to guarantee that
 // the merge_operation is alive during the process
 #[derive(Clone)]
-pub(crate) struct SegmentUpdater(Arc<InnerSegmentUpdater>);
+pub(crate) struct SegmentUpdater<C: Codec>(Arc<InnerSegmentUpdater<C>>);

-impl Deref for SegmentUpdater {
-    type Target = InnerSegmentUpdater;
+impl<C: Codec> Deref for SegmentUpdater<C> {
+    type Target = InnerSegmentUpdater<C>;

    #[inline]
    fn deref(&self) -> &Self::Target {
@@ -72,8 +75,8 @@ impl Deref for SegmentUpdater {
    }
 }

-fn garbage_collect_files(
-    segment_updater: SegmentUpdater,
+fn garbage_collect_files<C: Codec>(
+    segment_updater: SegmentUpdater<C>,
 ) -> crate::Result<GarbageCollectionResult> {
    info!("Running garbage collection");
    let mut index = segment_updater.index.clone();
@@ -84,8 +87,8 @@ fn garbage_collect_files(

 /// Merges a list of segments the list of segment givens in the `segment_entries`.
 /// This function happens in the calling thread and is computationally expensive.
-fn merge(
-    index: &Index,
+fn merge<Codec: crate::codec::Codec>(
+    index: &Index<Codec>,
    mut segment_entries: Vec<SegmentEntry>,
    target_opstamp: Opstamp,
 ) -> crate::Result<Option<SegmentEntry>> {
@@ -108,13 +111,13 @@ fn merge(

    let delete_cursor = segment_entries[0].delete_cursor().clone();

-    let segments: Vec<Segment> = segment_entries
+    let segments: Vec<Segment<Codec>> = segment_entries
        .iter()
        .map(|segment_entry| index.segment(segment_entry.meta().clone()))
        .collect();

    // An IndexMerger is like a "view" of our merged segments.
-    let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;
+    let merger: IndexMerger<Codec> = IndexMerger::open(index.schema(), &segments[..])?;

    // ... we just serialize this index merger in our new segment to merge the segments.
    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;
@@ -139,10 +142,10 @@ fn merge(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_indices<T: Into<Box<dyn Directory>>>(
-    indices: &[Index],
-    output_directory: T,
-) -> crate::Result<Index> {
+pub fn merge_indices<Codec: crate::codec::Codec>(
+    indices: &[Index<Codec>],
+    output_directory: Box<dyn Directory>,
+) -> crate::Result<Index<Codec>> {
    if indices.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -163,7 +166,7 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
        ));
    }

-    let mut segments: Vec<Segment> = Vec::new();
+    let mut segments: Vec<Segment<Codec>> = Vec::new();
    for index in indices {
        segments.extend(index.searchable_segments()?);
    }
@@ -185,12 +188,12 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
-    segments: &[Segment],
+pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory>>>(
+    segments: &[Segment<C>],
    target_settings: IndexSettings,
    filter_doc_ids: Vec<Option<AliveBitSet>>,
    output_directory: T,
-) -> crate::Result<Index> {
+) -> crate::Result<Index<C>> {
    if segments.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -211,14 +214,15 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
        ));
    }

-    let mut merged_index = Index::create(
-        output_directory,
-        target_schema.clone(),
-        target_settings.clone(),
-    )?;
+    let mut merged_index: Index<C> = Index::builder()
+        .schema(target_schema.clone())
+        .codec(segments[0].index().codec().clone())
+        .settings(target_settings.clone())
+        .create(output_directory.into())?;
+
    let merged_segment = merged_index.new_segment();
    let merged_segment_id = merged_segment.id();
-    let merger: IndexMerger =
+    let merger: IndexMerger<C> =
        IndexMerger::open_with_custom_alive_set(merged_index.schema(), segments, filter_doc_ids)?;
    let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
    let num_docs = merger.write(segment_serializer)?;
@@ -235,6 +239,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
            ))
            .trim_end()
    );
+    let codec_configuration = CodecConfiguration::from(segments[0].index().codec());

    let index_meta = IndexMeta {
        index_settings: target_settings, // index_settings of all segments should be the same
@@ -242,6 +247,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
        schema: target_schema,
        opstamp: 0u64,
        payload: Some(stats),
+        codec: codec_configuration,
    };

    // save the meta.json
@@ -250,7 +256,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
    Ok(merged_index)
 }

-pub(crate) struct InnerSegmentUpdater {
+pub(crate) struct InnerSegmentUpdater<C: Codec> {
    // we keep a copy of the current active IndexMeta to
    // avoid loading the file every time we need it in the
    // `SegmentUpdater`.
@@ -261,7 +267,7 @@ pub(crate) struct InnerSegmentUpdater {
    pool: ThreadPool,
    merge_thread_pool: ThreadPool,

-    index: Index,
+    index: Index<C>,
    segment_manager: SegmentManager,
    merge_policy: RwLock<Arc<dyn MergePolicy>>,
    killed: AtomicBool,
@@ -269,13 +275,13 @@ pub(crate) struct InnerSegmentUpdater {
    merge_operations: MergeOperationInventory,
 }

-impl SegmentUpdater {
+impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
    pub fn create(
-        index: Index,
+        index: Index<Codec>,
        stamper: Stamper,
        delete_cursor: &DeleteCursor,
        num_merge_threads: usize,
-    ) -> crate::Result<SegmentUpdater> {
+    ) -> crate::Result<Self> {
        let segments = index.searchable_segment_metas()?;
        let segment_manager = SegmentManager::from_segments(segments, delete_cursor);
        let pool = ThreadPoolBuilder::new()
@@ -405,12 +411,14 @@ impl SegmentUpdater {
            // Segment 1 from disk 1, Segment 1 from disk 2, etc.
            committed_segment_metas
                .sort_by_key(|segment_meta| std::cmp::Reverse(segment_meta.max_doc()));
+            let codec = CodecConfiguration::from(index.codec());
            let index_meta = IndexMeta {
                index_settings: index.settings().clone(),
                segments: committed_segment_metas,
                schema: index.schema(),
                opstamp,
                payload: commit_message,
+                codec,
            };
            // TODO add context to the error.
            save_metas(&index_meta, directory.box_clone().borrow_mut())?;
@@ -444,7 +452,7 @@ impl SegmentUpdater {
        opstamp: Opstamp,
        payload: Option<String>,
    ) -> FutureResult<Opstamp> {
-        let segment_updater: SegmentUpdater = self.clone();
+        let segment_updater: SegmentUpdater<Codec> = self.clone();
        self.schedule_task(move || {
            let segment_entries = segment_updater.purge_deletes(opstamp)?;
            segment_updater.segment_manager.commit(segment_entries);
@@ -700,6 +708,7 @@ impl SegmentUpdater {
 #[cfg(test)]
 mod tests {
    use super::merge_indices;
+    use crate::codec::StandardCodec;
    use crate::collector::TopDocs;
    use crate::directory::RamDirectory;
    use crate::fastfield::AliveBitSet;
@@ -930,7 +939,7 @@ mod tests {

    #[test]
    fn test_merge_empty_indices_array() {
-        let merge_result = merge_indices(&[], RamDirectory::default());
+        let merge_result = merge_indices::<StandardCodec>(&[], Box::new(RamDirectory::default()));
        assert!(merge_result.is_err());
    }

@@ -957,7 +966,10 @@ mod tests {
        };

        // mismatched schema index list
-        let result = merge_indices(&[first_index, second_index], RamDirectory::default());
+        let result = merge_indices(
+            &[first_index, second_index],
+            Box::new(RamDirectory::default()),
+        );
        assert!(result.is_err());

        Ok(())
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -1,9 +1,12 @@
+use std::any::Any;
+
 use columnar::MonotonicallyMappableToU64;
 use common::JsonPathWriter;
 use itertools::Itertools;
 use tokenizer_api::BoxTokenStream;

 use super::operation::AddOperation;
+use crate::codec::Codec;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
 use crate::index::{Segment, SegmentComponent};
@@ -12,10 +15,10 @@ use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
-    PerFieldPostingsWriter, PostingsWriter,
+    PerFieldPostingsWriter, PostingsWriter, PostingsWriterEnum,
 };
 use crate::schema::document::{Document, Value};
-use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
+use crate::schema::{Field, FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
 use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
 use crate::{DocId, Opstamp, TantivyError};

@@ -45,22 +48,22 @@ fn compute_initial_table_size(per_thread_memory_budget: usize) -> crate::Result<
 ///
 /// They creates the postings list in anonymous memory.
 /// The segment is laid on disk when the segment gets `finalized`.
-pub struct SegmentWriter {
+pub struct SegmentWriter<Codec: crate::codec::Codec> {
    pub(crate) max_doc: DocId,
    pub(crate) ctx: IndexingContext,
-    pub(crate) per_field_postings_writers: PerFieldPostingsWriter,
-    pub(crate) segment_serializer: SegmentSerializer,
+    pub per_field_postings_writers: PerFieldPostingsWriter,
+    pub(crate) segment_serializer: SegmentSerializer<Codec>,
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
    pub(crate) json_positions_per_path: IndexingPositionsPerPath,
    pub(crate) doc_opstamps: Vec<Opstamp>,
+    schema: Schema,
    per_field_text_analyzers: Vec<TextAnalyzer>,
    term_buffer: IndexingTerm,
-    schema: Schema,
 }

-impl SegmentWriter {
+impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
    /// Creates a new `SegmentWriter`
    ///
    /// The arguments are defined as follows
@@ -70,7 +73,10 @@ impl SegmentWriter {
    ///   behavior as a memory limit.
    /// - segment: The segment being written
    /// - schema
-    pub fn for_segment(memory_budget_in_bytes: usize, segment: Segment) -> crate::Result<Self> {
+    pub fn for_segment(
+        memory_budget_in_bytes: usize,
+        segment: Segment<Codec>,
+    ) -> crate::Result<Self> {
        let schema = segment.schema();
        let tokenizer_manager = segment.index().tokenizers().clone();
        let tokenizer_manager_fast_field = segment.index().fast_field_tokenizer().clone();
@@ -144,6 +150,111 @@ impl SegmentWriter {
            + self.segment_serializer.mem_usage()
    }

+    /// Attaches or updates a codec-specific payload on a term of a regular
+    /// (non-JSON) field.
+    ///
+    /// `value_bytes` is the serialized term value, i.e. exactly what would be
+    /// appended after the field id (the raw text bytes for a str field, or the
+    /// big-endian bytes for a numeric field).
+    ///
+    /// If the term does not exist yet, it is inserted with an empty recorder so
+    /// that it still gets serialized even though it belongs to no document.
+    /// `updater` receives the previously registered payload (`None` if absent)
+    /// and returns the payload to store. The payload is handed to the codec's
+    /// postings serializer (via `set_term_payload`) at the beginning of the
+    /// term during serialization.
+    pub(crate) fn update_term_payload(
+        &mut self,
+        field: Field,
+        value_bytes: &[u8],
+        updater: impl FnOnce(Option<Box<dyn Any + Send>>) -> Box<dyn Any + Send>,
+    ) {
+        let mut term = IndexingTerm::with_capacity(value_bytes.len());
+        term.set_field(field);
+        term.append_bytes(value_bytes);
+        self.update_term_payload_for_serialized_term(field, term.serialized_term(), updater);
+    }
+
+    /// Same as [`Self::update_term_payload`] for a JSON field.
+    ///
+    /// `value_bytes` must be the type-tagged value (`[type code][value]`), the
+    /// representation that follows the path within a JSON term.
+    pub(crate) fn update_json_term_payload(
+        &mut self,
+        field: Field,
+        json_path: &str,
+        value_bytes: &[u8],
+        updater: impl FnOnce(Option<Box<dyn Any + Send>>) -> Box<dyn Any + Send>,
+    ) {
+        let unordered_id = self
+            .ctx
+            .path_to_unordered_id
+            .get_or_allocate_unordered_id(json_path);
+        // JSON term key layout: `[field:4][unordered_path_id:4][type code][value]`.
+        let mut serialized_term = Vec::with_capacity(8 + value_bytes.len());
+        serialized_term.extend_from_slice(&field.field_id().to_be_bytes());
+        serialized_term.extend_from_slice(&unordered_id.to_be_bytes());
+        serialized_term.extend_from_slice(value_bytes);
+        self.update_term_payload_for_serialized_term(field, &serialized_term, updater);
+    }
+
+    fn update_term_payload_for_serialized_term(
+        &mut self,
+        field: Field,
+        serialized_term: &[u8],
+        updater: impl FnOnce(Option<Box<dyn Any + Send>>) -> Box<dyn Any + Send>,
+    ) {
+        let postings_writer = self.per_field_postings_writers.get_for_field(field);
+        let addr = postings_writer.ensure_term(serialized_term, &mut self.ctx);
+        let previous_payload = self.ctx.codec_term_payloads.remove(&addr);
+        let new_payload = updater(previous_payload);
+        self.ctx.codec_term_payloads.insert(addr, new_payload);
+    }
+
+    /// Returns disjoint mutable borrows of the pieces needed to index field
+    /// values outside of `index_document` (e.g. moshiki's placeholder
+    /// routines): the per-field postings writers, the indexing context
+    /// (memory arena + term hashmap), the shared term buffer, and the
+    /// per-field text analyzers (indexed by `Field::field_id`).
+    ///
+    /// The text analyzers are exactly the ones `index_document` uses, so
+    /// indexing a value through them yields identical postings.
+    #[doc(hidden)]
+    pub fn indexing_parts(
+        &mut self,
+    ) -> (
+        &mut PerFieldPostingsWriter,
+        &mut IndexingContext,
+        &mut IndexingTerm,
+        &mut [TextAnalyzer],
+    ) {
+        (
+            &mut self.per_field_postings_writers,
+            &mut self.ctx,
+            &mut self.term_buffer,
+            &mut self.per_field_text_analyzers,
+        )
+    }
+
+    /// Indexes the fast fields of one document from its `(field, value)` pairs, and
+    /// advances `max_doc` by one.
+    ///
+    /// This is for callers (e.g. moshiki) that drive the postings/positions through
+    /// [`indexing_parts`](Self::indexing_parts) with explicit doc ids and need a matching
+    /// fast-field + doc-count pass. It is the document-creating step: it keeps the
+    /// fast-field writer's `num_docs` and `max_doc` in lockstep, so it must be called
+    /// exactly once per document, in doc order.
+    #[doc(hidden)]
+    pub fn add_fast_field_document<'a, V: Value<'a>>(
+        &mut self,
+        fields_and_values: impl Iterator<Item = (Field, V)>,
+    ) -> crate::Result<()> {
+        self.fast_field_writers
+            .add_document_from_values(fields_and_values)?;
+        self.max_doc += 1;
+        Ok(())
+    }
+
    fn index_document<D: Document>(&mut self, doc: &D) -> crate::Result<()> {
        let doc_id = self.max_doc;

@@ -169,7 +280,7 @@ impl SegmentWriter {
            }

            let (term_buffer, ctx) = (&mut self.term_buffer, &mut self.ctx);
-            let postings_writer: &mut dyn PostingsWriter =
+            let postings_writer: &mut PostingsWriterEnum =
                self.per_field_postings_writers.get_for_field_mut(field);
            term_buffer.clear_with_field(field);

@@ -386,13 +497,13 @@ impl SegmentWriter {
 /// to the `SegmentSerializer`.
 ///
 /// `doc_id_map` is used to map to the new doc_id order.
-fn remap_and_write(
+fn remap_and_write<C: Codec>(
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    ctx: IndexingContext,
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: &FieldNormsWriter,
-    mut serializer: SegmentSerializer,
+    mut serializer: SegmentSerializer<C>,
 ) -> crate::Result<()> {
    debug!("remap-and-write");
    if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
--- a/src/indexer/single_segment_index_writer.rs
+++ b/src/indexer/single_segment_index_writer.rs
@@ -1,28 +1,35 @@
+use std::any::Any;
 use std::marker::PhantomData;

+use crate::codec::StandardCodec;
+use crate::index::CodecConfiguration;
 use crate::indexer::operation::AddOperation;
 use crate::indexer::segment_updater::save_metas;
 use crate::indexer::SegmentWriter;
 use crate::schema::document::Document;
+use crate::schema::{Field, Schema};
 use crate::{Directory, Index, IndexMeta, Opstamp, Segment, TantivyDocument};

 #[doc(hidden)]
-pub struct SingleSegmentIndexWriter<D: Document = TantivyDocument> {
-    segment_writer: SegmentWriter,
-    segment: Segment,
+pub struct SingleSegmentIndexWriter<
+    Codec: crate::codec::Codec = StandardCodec,
+    D: Document = TantivyDocument,
+> {
+    pub segment_writer: SegmentWriter<Codec>,
+    segment: Segment<Codec>,
    opstamp: Opstamp,
-    _phantom: PhantomData<D>,
+    _doc: PhantomData<D>,
 }

-impl<D: Document> SingleSegmentIndexWriter<D> {
-    pub fn new(index: Index, mem_budget: usize) -> crate::Result<Self> {
+impl<Codec: crate::codec::Codec, D: Document> SingleSegmentIndexWriter<Codec, D> {
+    pub fn new(index: Index<Codec>, mem_budget: usize) -> crate::Result<Self> {
        let segment = index.new_segment();
        let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?;
        Ok(Self {
            segment_writer,
            segment,
            opstamp: 0,
-            _phantom: PhantomData,
+            _doc: PhantomData,
        })
    }

@@ -37,10 +44,51 @@ impl<D: Document> SingleSegmentIndexWriter<D> {
            .add_document(AddOperation { opstamp, document })
    }

-    pub fn finalize(self) -> crate::Result<Index> {
+    pub fn schema(&self) -> Schema {
+        self.segment.schema()
+    }
+
+    /// Attaches or updates a codec-specific payload on a term of a regular
+    /// (non-JSON) field.
+    ///
+    /// `value_bytes` is the serialized term value, i.e. exactly what would be
+    /// appended after the field id (the raw text bytes for a str field, or the
+    /// big-endian bytes for a numeric field).
+    ///
+    /// The term does not need to belong to any document: if it does not exist
+    /// yet, it is created with an empty recorder so it still gets serialized.
+    /// `updater` receives the previously registered payload (`None` if absent)
+    /// and returns the payload to store. The payload is handed to the codec at
+    /// the beginning of the term during serialization.
+    pub fn update_term_payload(
+        &mut self,
+        field: Field,
+        value_bytes: &[u8],
+        updater: impl FnOnce(Option<Box<dyn Any + Send>>) -> Box<dyn Any + Send>,
+    ) {
+        self.segment_writer
+            .update_term_payload(field, value_bytes, updater);
+    }
+
+    /// Same as [`Self::update_term_payload`] for a JSON field.
+    ///
+    /// `value_bytes` must be the type-tagged value (`[type code][value]`), the
+    /// representation that follows the path within a JSON term.
+    pub fn update_json_term_payload(
+        &mut self,
+        field: Field,
+        json_path: &str,
+        value_bytes: &[u8],
+        updater: impl FnOnce(Option<Box<dyn Any + Send>>) -> Box<dyn Any + Send>,
+    ) {
+        self.segment_writer
+            .update_json_term_payload(field, json_path, value_bytes, updater);
+    }
+
+    pub fn finalize(self) -> crate::Result<Index<Codec>> {
        let max_doc = self.segment_writer.max_doc();
        self.segment_writer.finalize()?;
-        let segment: Segment = self.segment.with_max_doc(max_doc);
+        let segment: Segment<Codec> = self.segment.with_max_doc(max_doc);
        let index = segment.index();
        let index_meta = IndexMeta {
            index_settings: index.settings().clone(),
@@ -48,9 +96,245 @@ impl<D: Document> SingleSegmentIndexWriter<D> {
            schema: index.schema(),
            opstamp: 0,
            payload: None,
+            codec: CodecConfiguration::from(index.codec()),
        };
        save_metas(&index_meta, index.directory())?;
        index.directory().sync_directory()?;
        Ok(segment.index().clone())
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::any::Any;
+    use std::cell::RefCell;
+    use std::io;
+
+    use super::SingleSegmentIndexWriter;
+    use crate::codec::positions::PositionsReader;
+    use crate::codec::postings::{PostingsCodec, PostingsSerializer};
+    use crate::codec::standard::positions::StandardPositionsCodec;
+    use crate::codec::standard::postings::{
+        SegmentPostings, StandardPostingsCodec, StandardPostingsSerializer,
+    };
+    use crate::codec::Codec;
+    use crate::fieldnorm::FieldNormReader;
+    use crate::schema::{IndexRecordOption, Schema, Type, STRING};
+    use crate::{DocId, Score, Term};
+
+    // The codec is round-tripped through `from_json_props` when the index is
+    // opened, so it cannot carry the capture sink itself. We use a thread-local
+    // sink instead: the `SingleSegmentIndexWriter` is single-threaded, so
+    // serialization runs on the test thread, and each test owns its own
+    // thread-local (clear it at the start of the test).
+    thread_local! {
+        static CAPTURED_PAYLOADS: RefCell<Vec<u64>> = const { RefCell::new(Vec::new()) };
+    }
+
+    fn reset_captured() {
+        CAPTURED_PAYLOADS.with(|captured| captured.borrow_mut().clear());
+    }
+
+    fn captured_payloads() -> Vec<u64> {
+        CAPTURED_PAYLOADS.with(|captured| captured.borrow().clone())
+    }
+
+    /// A postings serializer that delegates to the standard one, but records
+    /// the `u64` payload value of every term that carries a codec payload.
+    struct CapturingPostingsSerializer {
+        inner: StandardPostingsSerializer,
+    }
+
+    impl PostingsSerializer for CapturingPostingsSerializer {
+        fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
+            self.inner.new_term(term_doc_freq, record_term_freq);
+        }
+
+        fn set_term_payload(&mut self, payload: &dyn Any) {
+            let value = *payload
+                .downcast_ref::<u64>()
+                .expect("payload should be a u64");
+            CAPTURED_PAYLOADS.with(|captured| captured.borrow_mut().push(value));
+        }
+
+        fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
+            self.inner.write_doc(doc_id, term_freq);
+        }
+
+        fn close_term(&mut self, doc_freq: u32, wrt: &mut impl io::Write) -> io::Result<()> {
+            self.inner.close_term(doc_freq, wrt)
+        }
+    }
+
+    #[derive(Clone, Debug)]
+    struct CapturingPostingsCodec;
+
+    impl PostingsCodec for CapturingPostingsCodec {
+        type PostingsSerializer = CapturingPostingsSerializer;
+        type Postings = SegmentPostings;
+
+        fn new_serializer(
+            &self,
+            avg_fieldnorm: Score,
+            mode: IndexRecordOption,
+            fieldnorm_reader: Option<FieldNormReader>,
+        ) -> Self::PostingsSerializer {
+            CapturingPostingsSerializer {
+                inner: StandardPostingsCodec.new_serializer(avg_fieldnorm, mode, fieldnorm_reader),
+            }
+        }
+
+        fn load_postings(
+            &self,
+            doc_freq: u32,
+            postings_data: common::OwnedBytes,
+            record_option: IndexRecordOption,
+            requested_option: IndexRecordOption,
+            position_reader: Option<Box<dyn PositionsReader>>,
+        ) -> io::Result<Self::Postings> {
+            StandardPostingsCodec.load_postings(
+                doc_freq,
+                postings_data,
+                record_option,
+                requested_option,
+                position_reader,
+            )
+        }
+    }
+
+    #[derive(Clone, Debug, Default)]
+    struct CapturingCodec;
+
+    impl Codec for CapturingCodec {
+        type PostingsCodec = CapturingPostingsCodec;
+        type PositionsCodec = StandardPositionsCodec;
+
+        const ID: &'static str = "test-capturing-codec";
+
+        fn from_json_props(_json_value: &serde_json::Value) -> crate::Result<Self> {
+            Ok(CapturingCodec)
+        }
+
+        fn to_json_props(&self) -> serde_json::Value {
+            serde_json::Value::Null
+        }
+
+        fn postings_codec(&self) -> &Self::PostingsCodec {
+            &CapturingPostingsCodec
+        }
+
+        fn positions_codec(&self) -> &Self::PositionsCodec {
+            &StandardPositionsCodec
+        }
+    }
+
+    fn build_writer(schema: Schema) -> SingleSegmentIndexWriter<CapturingCodec> {
+        let index = crate::IndexBuilder::default()
+            .codec(CapturingCodec)
+            .schema(schema)
+            .create_in_ram()
+            .unwrap();
+        SingleSegmentIndexWriter::new(index, 15_000_000).unwrap()
+    }
+
+    #[test]
+    fn test_update_term_payload_regular_field() {
+        reset_captured();
+        let mut schema_builder = Schema::builder();
+        let text = schema_builder.add_text_field("text", STRING);
+        let schema = schema_builder.build();
+        let mut writer = build_writer(schema);
+
+        writer.add_document(crate::doc!(text => "alpha")).unwrap();
+        writer.add_document(crate::doc!(text => "beta")).unwrap();
+        writer.add_document(crate::doc!(text => "gamma")).unwrap();
+
+        // Existing term that belongs to a document.
+        writer.update_term_payload(text, b"beta", |previous_payload| {
+            assert!(previous_payload.is_none());
+            Box::new(100u64)
+        });
+        // Updating the same term: the previous payload is handed back.
+        writer.update_term_payload(text, b"beta", |previous_payload| {
+            let previous = previous_payload.expect("expected the previous payload");
+            assert_eq!(*previous.downcast::<u64>().unwrap(), 100u64);
+            Box::new(101u64)
+        });
+        // Brand-new term that belongs to no document: an empty recorder is
+        // created so it still lands in the term dictionary.
+        writer.update_term_payload(text, b"zeta", |previous_payload| {
+            assert!(previous_payload.is_none());
+            Box::new(200u64)
+        });
+
+        let index = writer.finalize().unwrap();
+
+        // Terms are serialized in sorted order: alpha, beta, gamma, zeta.
+        // Only beta and zeta carry a payload.
+        assert_eq!(captured_payloads(), vec![101u64, 200u64]);
+
+        let searcher = index.reader().unwrap().searcher();
+        let segment_reader = searcher.segment_reader(0);
+        let inverted_index = segment_reader.inverted_index(text).unwrap();
+
+        let beta_info = inverted_index
+            .get_term_info(&Term::from_field_text(text, "beta"))
+            .unwrap()
+            .expect("beta should be in the dictionary");
+        assert_eq!(beta_info.doc_freq, 1);
+
+        let zeta_info = inverted_index
+            .get_term_info(&Term::from_field_text(text, "zeta"))
+            .unwrap()
+            .expect("zeta (no document) should still be in the dictionary");
+        assert_eq!(zeta_info.doc_freq, 0);
+    }
+
+    #[test]
+    fn test_update_json_term_payload() {
+        reset_captured();
+        let mut schema_builder = Schema::builder();
+        let json_field = schema_builder.add_json_field("json", STRING);
+        let schema = schema_builder.build();
+        let mut writer = build_writer(schema);
+
+        writer
+            .add_document(crate::doc!(json_field => serde_json::json!({"name": "hello"})))
+            .unwrap();
+
+        let str_value = |value: &str| {
+            let mut bytes = vec![Type::Str.to_code()];
+            bytes.extend_from_slice(value.as_bytes());
+            bytes
+        };
+
+        // Existing str JSON term (path "name", value "hello").
+        writer.update_json_term_payload(json_field, "name", &str_value("hello"), |previous| {
+            assert!(previous.is_none());
+            Box::new(1u64)
+        });
+        // Brand-new str JSON term with no document.
+        writer.update_json_term_payload(json_field, "name", &str_value("world"), |previous| {
+            assert!(previous.is_none());
+            Box::new(2u64)
+        });
+        // Brand-new non-str (numeric) JSON term with no document: exercises the
+        // DocIdRecorder branch of `ensure_term`.
+        let numeric_value = {
+            let mut bytes = vec![Type::I64.to_code()];
+            bytes.extend_from_slice(&[0u8; 8]);
+            bytes
+        };
+        writer.update_json_term_payload(json_field, "count", &numeric_value, |previous| {
+            assert!(previous.is_none());
+            Box::new(3u64)
+        });
+
+        // Should not panic and should serialize cleanly.
+        let _index = writer.finalize().unwrap();
+
+        let mut got = captured_payloads();
+        got.sort_unstable();
+        assert_eq!(got, vec![1u64, 2u64, 3u64]);
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -166,6 +166,9 @@ mod functional_test;

 #[macro_use]
 mod macros;
+
+/// Tantivy codecs describes how data is layed out on disk.
+pub mod codec;
 mod future_result;

 // Re-exports
@@ -218,7 +221,7 @@ pub mod snippet;
 use std::fmt;

 pub use census::{Inventory, TrackedObject};
-pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, HasLen};
+pub use common::{self, f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, HasLen};
 use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};

--- a/src/postings/indexing_context.rs
+++ b/src/postings/indexing_context.rs
@@ -1,16 +1,29 @@
-use stacker::{ArenaHashMap, MemoryArena};
+use std::any::Any;
+
+use fnv::FnvHashMap;
+use stacker::{Addr, ArenaHashMap, MemoryArena};

 use crate::indexer::path_to_unordered_id::PathToUnorderedId;

 /// IndexingContext contains all of the transient memory arenas
 /// required for building the inverted index.
-pub(crate) struct IndexingContext {
+#[doc(hidden)]
+pub struct IndexingContext {
    /// The term index is an adhoc hashmap,
    /// itself backed by a dedicated memory arena.
-    pub term_index: ArenaHashMap,
+    pub(crate) term_index: ArenaHashMap,
    /// Arena is a memory arena that stores posting lists / term frequencies / positions.
-    pub arena: MemoryArena,
-    pub path_to_unordered_id: PathToUnorderedId,
+    pub(crate) arena: MemoryArena,
+    pub(crate) path_to_unordered_id: PathToUnorderedId,
+    /// Optional codec-specific payload attached to a term, keyed by the value
+    /// `Addr` of the term's recorder in `term_index`.
+    ///
+    /// Hidden contract: keying on `Addr` is sound because a term's recorder
+    /// address never changes once allocated (the arena only appends, and
+    /// `subscribe` updates the recorder in place). The payload is therefore
+    /// looked up by `Addr` at serialization time and fed to the codec's
+    /// postings serializer at the beginning of the term.
+    pub(crate) codec_term_payloads: FnvHashMap<Addr, Box<dyn Any + Send>>,
 }

 impl IndexingContext {
@@ -21,6 +34,7 @@ impl IndexingContext {
            arena: MemoryArena::default(),
            term_index,
            path_to_unordered_id: PathToUnorderedId::default(),
+            codec_term_payloads: FnvHashMap::default(),
        }
    }

--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -3,6 +3,7 @@ use std::io;
 use common::json_path_writer::JSON_END_OF_PATH;
 use stacker::Addr;

+use crate::codec::Codec;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
 use crate::postings::postings_writer::SpecializedPostingsWriter;
@@ -17,17 +18,11 @@ use crate::DocId;
 /// `subscribe` is called directly to index non-text tokens, while
 /// `index_text` is used to index text.
 #[derive(Default)]
-pub(crate) struct JsonPostingsWriter<Rec: Recorder> {
+pub struct JsonPostingsWriter<Rec: Recorder> {
    str_posting_writer: SpecializedPostingsWriter<Rec>,
    non_str_posting_writer: SpecializedPostingsWriter<DocIdRecorder>,
 }

-impl<Rec: Recorder> From<JsonPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
-    fn from(json_postings_writer: JsonPostingsWriter<Rec>) -> Box<dyn PostingsWriter> {
-        Box::new(json_postings_writer)
-    }
-}
-
 impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    #[inline]
    fn subscribe(
@@ -58,12 +53,12 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    }

    /// The actual serialization format is handled by the `PostingsSerializer`.
-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        ordered_term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer,
+        serializer: &mut FieldSerializer<C>,
    ) -> io::Result<()> {
        let mut term_buffer = JsonTermSerializer(Vec::with_capacity(48));
        let mut buffer_lender = BufferLender::default();
@@ -101,6 +96,20 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
        Ok(())
    }

+    fn ensure_term(&self, serialized_term: &[u8], ctx: &mut IndexingContext) -> Addr {
+        // JSON term key layout: `[field:4][unordered_path_id:4][type code][value]`.
+        // Str values are recorded with `Rec`, all other types with `DocIdRecorder`
+        // (mirroring the dispatch in `serialize`).
+        let typ = Type::from_code(serialized_term[8]).expect("Invalid type code in JSON term");
+        if typ == Type::Str {
+            ctx.term_index
+                .get_or_create_value_addr::<Rec>(serialized_term, Rec::default)
+        } else {
+            ctx.term_index
+                .get_or_create_value_addr::<DocIdRecorder>(serialized_term, DocIdRecorder::default)
+        }
+    }
+
    fn total_num_tokens(&self) -> u64 {
        self.str_posting_writer.total_num_tokens() + self.non_str_posting_writer.total_num_tokens()
    }
--- a/src/postings/loaded_postings.rs
+++ b/src/postings/loaded_postings.rs
@@ -1,5 +1,5 @@
 use crate::docset::{DocSet, TERMINATED};
-use crate::postings::{Postings, SegmentPostings};
+use crate::postings::{DocFreq, Postings};
 use crate::DocId;

 /// `LoadedPostings` is a `DocSet` and `Postings` implementation.
@@ -25,16 +25,16 @@ impl LoadedPostings {
    /// Creates a new `LoadedPostings` from a `SegmentPostings`.
    ///
    /// It will also preload positions, if positions are available in the SegmentPostings.
-    pub fn load(segment_postings: &mut SegmentPostings) -> LoadedPostings {
-        let num_docs = segment_postings.doc_freq() as usize;
+    pub fn load(postings: &mut Box<dyn Postings>) -> LoadedPostings {
+        let num_docs: usize = u32::from(postings.doc_freq()) as usize;
        let mut doc_ids = Vec::with_capacity(num_docs);
        let mut positions = Vec::with_capacity(num_docs);
        let mut position_offsets = Vec::with_capacity(num_docs);
-        while segment_postings.doc() != TERMINATED {
+        while postings.doc() != TERMINATED {
            position_offsets.push(positions.len() as u32);
-            doc_ids.push(segment_postings.doc());
-            segment_postings.append_positions_with_offset(0, &mut positions);
-            segment_postings.advance();
+            doc_ids.push(postings.doc());
+            postings.append_positions_with_offset(0, &mut positions);
+            postings.advance();
        }
        position_offsets.push(positions.len() as u32);
        LoadedPostings {
@@ -101,6 +101,14 @@ impl Postings for LoadedPostings {
            output.push(*pos + offset);
        }
    }
+
+    fn has_freq(&self) -> bool {
+        true
+    }
+
+    fn doc_freq(&self) -> DocFreq {
+        DocFreq::Exact(self.doc_ids.len() as u32)
+    }
 }

 #[cfg(test)]
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -4,7 +4,6 @@ mod block_search;

 pub(crate) use self::block_search::branchless_binary_search;

-mod block_segment_postings;
 pub(crate) mod compression;
 mod indexing_context;
 mod json_postings_writer;
@@ -13,33 +12,29 @@ mod per_field_postings_writer;
 mod postings;
 mod postings_writer;
 mod recorder;
-mod segment_postings;
 /// Serializer module for the inverted index
 pub mod serializer;
-mod skip;
 mod term_info;

 pub(crate) use loaded_postings::LoadedPostings;
+pub use postings::DocFreq;
 pub(crate) use stacker::compute_table_memory_size;

-pub use self::block_segment_postings::BlockSegmentPostings;
-pub(crate) use self::indexing_context::IndexingContext;
-pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter;
+#[doc(hidden)]
+pub use self::indexing_context::IndexingContext;
+#[doc(hidden)]
+pub use self::per_field_postings_writer::PerFieldPostingsWriter;
 pub use self::postings::Postings;
-pub(crate) use self::postings_writer::{serialize_postings, IndexingPosition, PostingsWriter};
-pub use self::segment_postings::SegmentPostings;
+#[doc(hidden)]
+pub use self::postings_writer::IndexingPosition;
+pub use self::postings_writer::PostingsWriterEnum;
+pub(crate) use self::postings_writer::{serialize_postings, PostingsWriter};
+pub use self::recorder::{
+    BufferLender, DocIdRecorder, Recorder, TermFrequencyRecorder, TfAndPositionRecorder,
+};
 pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
-pub(crate) use self::skip::{BlockInfo, SkipReader};
 pub use self::term_info::TermInfo;

-#[expect(clippy::enum_variant_names)]
-#[derive(Debug, PartialEq, Clone, Copy, Eq)]
-pub(crate) enum FreqReadingOption {
-    NoFreq,
-    SkipFreq,
-    ReadFreq,
-}
-
 #[cfg(test)]
 pub(crate) mod tests {
    use std::mem;
@@ -50,6 +45,7 @@ pub(crate) mod tests {
    use crate::index::{Index, SegmentComponent, SegmentReader};
    use crate::indexer::operation::AddOperation;
    use crate::indexer::SegmentWriter;
+    use crate::postings::DocFreq;
    use crate::query::Scorer;
    use crate::schema::{
        Field, IndexRecordOption, Schema, Term, TextFieldIndexing, TextOptions, INDEXED, TEXT,
@@ -280,11 +276,11 @@ pub(crate) mod tests {
            }
            {
                let term_a = Term::from_field_text(text_field, "a");
-                let mut postings_a = segment_reader
+                let mut postings_a: Box<dyn Postings> = segment_reader
                    .inverted_index(term_a.field())?
                    .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_a.len(), 1000);
+                assert_eq!(postings_a.doc_freq(), DocFreq::Exact(1000));
                assert_eq!(postings_a.doc(), 0);
                assert_eq!(postings_a.term_freq(), 6);
                postings_a.positions(&mut positions);
@@ -307,7 +303,7 @@ pub(crate) mod tests {
                    .inverted_index(term_e.field())?
                    .read_postings(&term_e, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_e.len(), 1000 - 2);
+                assert_eq!(postings_e.doc_freq(), DocFreq::Exact(1000 - 2));
                for i in 2u32..1000u32 {
                    assert_eq!(postings_e.term_freq(), i);
                    postings_e.positions(&mut positions);
--- a/src/postings/per_field_postings_writer.rs
+++ b/src/postings/per_field_postings_writer.rs
@@ -1,16 +1,15 @@
 use crate::postings::json_postings_writer::JsonPostingsWriter;
-use crate::postings::postings_writer::SpecializedPostingsWriter;
+use crate::postings::postings_writer::{PostingsWriterEnum, SpecializedPostingsWriter};
 use crate::postings::recorder::{DocIdRecorder, TermFrequencyRecorder, TfAndPositionRecorder};
-use crate::postings::PostingsWriter;
 use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};

-pub(crate) struct PerFieldPostingsWriter {
-    per_field_postings_writers: Vec<Box<dyn PostingsWriter>>,
+pub struct PerFieldPostingsWriter {
+    per_field_postings_writers: Vec<PostingsWriterEnum>,
 }

 impl PerFieldPostingsWriter {
    pub fn for_schema(schema: &Schema) -> Self {
-        let per_field_postings_writers = schema
+        let per_field_postings_writers: Vec<PostingsWriterEnum> = schema
            .fields()
            .map(|(_, field_entry)| posting_writer_from_field_entry(field_entry))
            .collect();
@@ -19,16 +18,16 @@ impl PerFieldPostingsWriter {
        }
    }

-    pub(crate) fn get_for_field(&self, field: Field) -> &dyn PostingsWriter {
-        self.per_field_postings_writers[field.field_id() as usize].as_ref()
+    pub(crate) fn get_for_field(&self, field: Field) -> &PostingsWriterEnum {
+        &self.per_field_postings_writers[field.field_id() as usize]
    }

-    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut dyn PostingsWriter {
-        self.per_field_postings_writers[field.field_id() as usize].as_mut()
+    pub fn get_for_field_mut(&mut self, field: Field) -> &mut PostingsWriterEnum {
+        &mut self.per_field_postings_writers[field.field_id() as usize]
    }
 }

-fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter> {
+fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> PostingsWriterEnum {
    match *field_entry.field_type() {
        FieldType::Str(ref text_options) => text_options
            .get_indexing_options()
@@ -51,7 +50,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn Postings
        | FieldType::Date(_)
        | FieldType::Bytes(_)
        | FieldType::IpAddr(_)
-        | FieldType::Facet(_) => Box::<SpecializedPostingsWriter<DocIdRecorder>>::default(),
+        | FieldType::Facet(_) => <SpecializedPostingsWriter<DocIdRecorder>>::default().into(),
        FieldType::JsonObject(ref json_object_options) => {
            if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() {
                match text_indexing_option.index_option() {
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,5 +1,25 @@
 use crate::docset::DocSet;

+/// Result of the doc_freq method.
+///
+/// Postings can inform us that the document frequency is approximate.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DocFreq {
+    /// The document frequency is approximate.
+    Approximate(u32),
+    /// The document frequency is exact.
+    Exact(u32),
+}
+
+impl From<DocFreq> for u32 {
+    fn from(doc_freq: DocFreq) -> Self {
+        match doc_freq {
+            DocFreq::Approximate(approximate_doc_freq) => approximate_doc_freq,
+            DocFreq::Exact(doc_freq) => doc_freq,
+        }
+    }
+}
+
 /// Postings (also called inverted list)
 ///
 /// For a given term, it is the list of doc ids of the doc
@@ -14,6 +34,9 @@ pub trait Postings: DocSet + 'static {
    /// The number of times the term appears in the document.
    fn term_freq(&self) -> u32;

+    /// Returns the number of documents containing the term in the segment.
+    fn doc_freq(&self) -> DocFreq;
+
    /// Returns the positions offsetted with a given value.
    /// It is not necessary to clear the `output` before calling this method.
    /// The output vector will be resized to the `term_freq`.
@@ -31,6 +54,16 @@ pub trait Postings: DocSet + 'static {
    fn positions(&mut self, output: &mut Vec<u32>) {
        self.positions_with_offset(0u32, output);
    }
+
+    /// Returns true if the term_frequency is available.
+    ///
+    /// This is a tricky question, because on JSON fields, it is possible
+    /// for a text term to have term freq, whereas a number term in the field has none.
+    ///
+    /// This function returns whether the actual term has term frequencies or not.
+    /// In this above JSON field example, `has_freq` should return true for the
+    /// earlier and false for the latter.
+    fn has_freq(&self) -> bool;
 }

 impl Postings for Box<dyn Postings> {
@@ -41,4 +74,12 @@ impl Postings for Box<dyn Postings> {
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        (**self).append_positions_with_offset(offset, output);
    }
+
+    fn has_freq(&self) -> bool {
+        (**self).has_freq()
+    }
+
+    fn doc_freq(&self) -> DocFreq {
+        (**self).doc_freq()
+    }
 }
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -4,10 +4,15 @@ use std::ops::Range;

 use stacker::Addr;

+use crate::codec::Codec;
 use crate::fieldnorm::FieldNormReaders;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
-use crate::postings::recorder::{BufferLender, Recorder};
+use crate::postings::json_postings_writer::JsonPostingsWriter;
+use crate::postings::recorder::{
+    BufferLender, DocIdRecorder, Recorder, TermFrequencyRecorder, TfAndPositionRecorder,
+    UNINITIALIZED_DOC,
+};
 use crate::postings::{
    FieldSerializer, IndexingContext, InvertedIndexSerializer, PerFieldPostingsWriter,
 };
@@ -45,12 +50,12 @@ fn make_field_partition(
 /// Serialize the inverted index.
 /// It pushes all term, one field at a time, towards the
 /// postings serializer.
-pub(crate) fn serialize_postings(
+pub(crate) fn serialize_postings<C: Codec>(
    ctx: IndexingContext,
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    fieldnorm_readers: FieldNormReaders,
-    serializer: &mut InvertedIndexSerializer,
+    serializer: &mut InvertedIndexSerializer<C>,
 ) -> crate::Result<()> {
    // Replace unordered ids by ordered ids to be able to sort
    let unordered_id_to_ordered_id: Vec<OrderedPathId> =
@@ -95,11 +100,190 @@ pub(crate) fn serialize_postings(
 }

 #[derive(Default, Debug)]
-pub(crate) struct IndexingPosition {
+#[doc(hidden)]
+pub struct IndexingPosition {
    pub num_tokens: u32,
    pub end_position: u32,
 }

+pub enum PostingsWriterEnum {
+    DocId(SpecializedPostingsWriter<DocIdRecorder>),
+    DocIdTf(SpecializedPostingsWriter<TermFrequencyRecorder>),
+    DocTfAndPosition(SpecializedPostingsWriter<TfAndPositionRecorder>),
+    JsonDocId(JsonPostingsWriter<DocIdRecorder>),
+    JsonDocIdTf(JsonPostingsWriter<TermFrequencyRecorder>),
+    JsonDocTfAndPosition(JsonPostingsWriter<TfAndPositionRecorder>),
+}
+
+impl From<SpecializedPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_recorder_writer: SpecializedPostingsWriter<DocIdRecorder>) -> Self {
+        PostingsWriterEnum::DocId(doc_id_recorder_writer)
+    }
+}
+
+impl From<SpecializedPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_tf_recorder_writer: SpecializedPostingsWriter<TermFrequencyRecorder>) -> Self {
+        PostingsWriterEnum::DocIdTf(doc_id_tf_recorder_writer)
+    }
+}
+
+impl From<SpecializedPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
+    fn from(
+        doc_id_tf_and_positions_recorder_writer: SpecializedPostingsWriter<TfAndPositionRecorder>,
+    ) -> Self {
+        PostingsWriterEnum::DocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_recorder_writer: JsonPostingsWriter<DocIdRecorder>) -> Self {
+        PostingsWriterEnum::JsonDocId(doc_id_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_tf_recorder_writer: JsonPostingsWriter<TermFrequencyRecorder>) -> Self {
+        PostingsWriterEnum::JsonDocIdTf(doc_id_tf_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
+    fn from(
+        doc_id_tf_and_positions_recorder_writer: JsonPostingsWriter<TfAndPositionRecorder>,
+    ) -> Self {
+        PostingsWriterEnum::JsonDocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
+    }
+}
+
+impl PostingsWriterEnum {
+    /// Public, codec-agnostic entry point that tokenizes `token_stream` and
+    /// records every token for `doc_id`, mirroring what `SegmentWriter` does
+    /// for a text field.
+    ///
+    /// `indexing_position.end_position` offsets the token positions (set it to
+    /// shift the tokens, e.g. to place a placeholder's tokens after the static
+    /// tokens that precede it) and is advanced as tokens are consumed.
+    #[doc(hidden)]
+    pub fn index_text(
+        &mut self,
+        doc_id: DocId,
+        token_stream: &mut dyn TokenStream,
+        term_buffer: &mut IndexingTerm,
+        ctx: &mut IndexingContext,
+        indexing_position: &mut IndexingPosition,
+    ) {
+        <Self as PostingsWriter>::index_text(
+            self,
+            doc_id,
+            token_stream,
+            term_buffer,
+            ctx,
+            indexing_position,
+        )
+    }
+}
+
+impl PostingsWriter for PostingsWriterEnum {
+    fn subscribe(&mut self, doc: DocId, pos: u32, term: &IndexingTerm, ctx: &mut IndexingContext) {
+        match self {
+            PostingsWriterEnum::DocId(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::DocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::DocTfAndPosition(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocId(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.subscribe(doc, pos, term, ctx)
+            }
+        }
+    }
+
+    fn serialize<C: Codec>(
+        &self,
+        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
+        ordered_id_to_path: &[&str],
+        ctx: &IndexingContext,
+        serializer: &mut FieldSerializer<C>,
+    ) -> io::Result<()> {
+        match self {
+            PostingsWriterEnum::DocId(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::DocIdTf(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::DocTfAndPosition(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocId(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocIdTf(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+        }
+    }
+
+    fn ensure_term(&self, serialized_term: &[u8], ctx: &mut IndexingContext) -> Addr {
+        match self {
+            PostingsWriterEnum::DocId(writer) => writer.ensure_term(serialized_term, ctx),
+            PostingsWriterEnum::DocIdTf(writer) => writer.ensure_term(serialized_term, ctx),
+            PostingsWriterEnum::DocTfAndPosition(writer) => {
+                writer.ensure_term(serialized_term, ctx)
+            }
+            PostingsWriterEnum::JsonDocId(writer) => writer.ensure_term(serialized_term, ctx),
+            PostingsWriterEnum::JsonDocIdTf(writer) => writer.ensure_term(serialized_term, ctx),
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.ensure_term(serialized_term, ctx)
+            }
+        }
+    }
+
+    /// Tokenize a text and subscribe all of its token.
+    fn index_text(
+        &mut self,
+        doc_id: DocId,
+        token_stream: &mut dyn TokenStream,
+        term_buffer: &mut IndexingTerm,
+        ctx: &mut IndexingContext,
+        indexing_position: &mut IndexingPosition,
+    ) {
+        match self {
+            PostingsWriterEnum::DocId(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::DocIdTf(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::DocTfAndPosition(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocId(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocIdTf(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+        }
+    }
+
+    fn total_num_tokens(&self) -> u64 {
+        match self {
+            PostingsWriterEnum::DocId(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::DocIdTf(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::DocTfAndPosition(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocId(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocIdTf(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => writer.total_num_tokens(),
+        }
+    }
+}
+
 /// The `PostingsWriter` is in charge of receiving documenting
 /// and building a `Segment` in anonymous memory.
 ///
@@ -116,14 +300,23 @@ pub(crate) trait PostingsWriter: Send + Sync {

    /// Serializes the postings on disk.
    /// The actual serialization format is handled by the `PostingsSerializer`.
-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer,
+        serializer: &mut FieldSerializer<C>,
    ) -> io::Result<()>;

+    /// Ensures `serialized_term` has an entry in the term index, creating an
+    /// empty recorder (matching this writer's indexing option) if the term is
+    /// not present yet, and returns the value `Addr` of its recorder.
+    ///
+    /// An existing recorder is never overwritten, so the term keeps any
+    /// posting data already recorded for it. This is used to attach a
+    /// codec-specific payload to a term that may belong to no document.
+    fn ensure_term(&self, serialized_term: &[u8], ctx: &mut IndexingContext) -> Addr;
+
    /// Tokenize a text and subscribe all of its token.
    fn index_text(
        &mut self,
@@ -166,31 +359,27 @@ pub(crate) trait PostingsWriter: Send + Sync {
 /// The `SpecializedPostingsWriter` is just here to remove dynamic
 /// dispatch to the recorder information.
 #[derive(Default)]
-pub(crate) struct SpecializedPostingsWriter<Rec: Recorder> {
+pub struct SpecializedPostingsWriter<Rec: Recorder> {
    total_num_tokens: u64,
    _recorder_type: PhantomData<Rec>,
 }

-impl<Rec: Recorder> From<SpecializedPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
-    fn from(
-        specialized_postings_writer: SpecializedPostingsWriter<Rec>,
-    ) -> Box<dyn PostingsWriter> {
-        Box::new(specialized_postings_writer)
-    }
-}
-
 impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
    #[inline]
-    pub(crate) fn serialize_one_term(
+    pub(crate) fn serialize_one_term<C: Codec>(
        term: &[u8],
        addr: Addr,
        buffer_lender: &mut BufferLender,
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer,
+        serializer: &mut FieldSerializer<C>,
    ) -> io::Result<()> {
        let recorder: Rec = ctx.term_index.read(addr);
        let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
        serializer.new_term(term, term_doc_freq, recorder.has_term_freq())?;
+        if let Some(payload) = ctx.codec_term_payloads.get(&addr) {
+            // `&(dyn Any + Send)` upcasts to `&dyn Any`.
+            serializer.set_term_payload(payload.as_ref());
+        }
        recorder.serialize(&ctx.arena, serializer, buffer_lender);
        serializer.close_term()?;
        Ok(())
@@ -210,29 +399,31 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
        self.total_num_tokens += 1;
        let (term_index, arena) = (&mut ctx.term_index, &mut ctx.arena);
        term_index.mutate_or_create(term.serialized_term(), |opt_recorder: Option<Rec>| {
-            if let Some(mut recorder) = opt_recorder {
-                let current_doc = recorder.current_doc();
-                if current_doc != doc {
+            // A recorder may already exist without having started any document: the codec
+            // payload mechanism (`ensure_term`) pre-creates one to attach a payload to a
+            // term (e.g. a static template token in the moshiki codec). Such a recorder has
+            // `current_doc == UNINITIALIZED_DOC`. We must NOT `close_doc` it on the first
+            // real occurrence — that would emit a spurious doc terminator and desync the
+            // posting/position stream. `new_doc` writes the first doc id as an absolute delta.
+            let mut recorder = opt_recorder.unwrap_or_default();
+            let current_doc = recorder.current_doc();
+            if current_doc != doc {
+                if current_doc != UNINITIALIZED_DOC {
                    recorder.close_doc(arena);
-                    recorder.new_doc(doc, arena);
                }
-                recorder.record_position(position, arena);
-                recorder
-            } else {
-                let mut recorder = Rec::default();
                recorder.new_doc(doc, arena);
-                recorder.record_position(position, arena);
-                recorder
            }
+            recorder.record_position(position, arena);
+            recorder
        });
    }

-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        _ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer,
+        serializer: &mut FieldSerializer<C>,
    ) -> io::Result<()> {
        let mut buffer_lender = BufferLender::default();
        for (_field, _path_id, term, addr) in term_addrs {
@@ -241,6 +432,11 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
        Ok(())
    }

+    fn ensure_term(&self, serialized_term: &[u8], ctx: &mut IndexingContext) -> Addr {
+        ctx.term_index
+            .get_or_create_value_addr::<Rec>(serialized_term, Rec::default)
+    }
+
    fn total_num_tokens(&self) -> u64 {
        self.total_num_tokens
    }
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,13 +1,37 @@
 use common::read_u32_vint;
 use stacker::{ExpUnrolledLinkedList, MemoryArena};

+use crate::codec::Codec;
 use crate::postings::FieldSerializer;
 use crate::DocId;

 const POSITION_END: u32 = 0;

+/// Sentinel `current_doc` for a recorder that has not yet started any document.
+///
+/// A recorder can exist before its first `new_doc` because the codec payload
+/// mechanism (`ensure_term`) pre-creates a recorder to attach a payload to a term
+/// — e.g. a static template token in the moshiki codec. `DocId::MAX` is never a
+/// real document id (it is `TERMINATED`), so it unambiguously marks "no document
+/// started yet": `subscribe` uses it to skip the spurious `close_doc` that would
+/// otherwise desync the position stream, and `new_doc` uses it to write the first
+/// doc id as an absolute delta.
+pub(crate) const UNINITIALIZED_DOC: DocId = DocId::MAX;
+
+/// Doc-id delta to vint-encode in `new_doc`. The first document of a term is stored
+/// as an absolute id (the recorder's `current_doc` is still `UNINITIALIZED_DOC`),
+/// matching the decoder which seeds `prev_doc = 0`.
+#[inline]
+fn doc_delta(current_doc: DocId, doc: DocId) -> u32 {
+    if current_doc == UNINITIALIZED_DOC {
+        doc
+    } else {
+        doc - current_doc
+    }
+}
+
 #[derive(Default)]
-pub(crate) struct BufferLender {
+pub struct BufferLender {
    buffer_u8: Vec<u8>,
    buffer_u32: Vec<u32>,
 }
@@ -55,7 +79,7 @@ impl Iterator for VInt32Reader<'_> {
 ///   * the document id
 ///   * the term frequency
 ///   * the term positions
-pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
+pub trait Recorder: Copy + Default + Send + Sync + 'static {
    /// Returns the current document
    fn current_doc(&self) -> u32;
    /// Starts recording information about a new document
@@ -67,10 +91,10 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
    /// Close the document. It will help record the term frequency.
    fn close_doc(&mut self, arena: &mut MemoryArena);
    /// Pushes the postings information to the serializer.
-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer<C>,
        buffer_lender: &mut BufferLender,
    );
    /// Returns the number of document containing this term.
@@ -85,12 +109,21 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
 }

 /// Only records the doc ids
-#[derive(Clone, Copy, Default)]
+#[derive(Clone, Copy)]
 pub struct DocIdRecorder {
    stack: ExpUnrolledLinkedList,
    current_doc: DocId,
 }

+impl Default for DocIdRecorder {
+    fn default() -> Self {
+        DocIdRecorder {
+            stack: ExpUnrolledLinkedList::default(),
+            current_doc: UNINITIALIZED_DOC,
+        }
+    }
+}
+
 impl Recorder for DocIdRecorder {
    #[inline]
    fn current_doc(&self) -> DocId {
@@ -99,7 +132,7 @@ impl Recorder for DocIdRecorder {

    #[inline]
    fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
-        let delta = doc - self.current_doc;
+        let delta = doc_delta(self.current_doc, doc);
        self.current_doc = doc;
        self.stack.writer(arena).write_u32_vint(delta);
    }
@@ -110,10 +143,10 @@ impl Recorder for DocIdRecorder {
    #[inline]
    fn close_doc(&mut self, _arena: &mut MemoryArena) {}

-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer<C>,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -144,7 +177,7 @@ fn get_sum_reader(iter: impl Iterator<Item = u32>) -> impl Iterator<Item = u32>
 }

 /// Recorder encoding document ids, and term frequencies
-#[derive(Clone, Copy, Default)]
+#[derive(Clone, Copy)]
 pub struct TermFrequencyRecorder {
    stack: ExpUnrolledLinkedList,
    current_doc: DocId,
@@ -152,6 +185,17 @@ pub struct TermFrequencyRecorder {
    term_doc_freq: u32,
 }

+impl Default for TermFrequencyRecorder {
+    fn default() -> Self {
+        TermFrequencyRecorder {
+            stack: ExpUnrolledLinkedList::default(),
+            current_doc: UNINITIALIZED_DOC,
+            current_tf: 0,
+            term_doc_freq: 0,
+        }
+    }
+}
+
 impl Recorder for TermFrequencyRecorder {
    #[inline]
    fn current_doc(&self) -> DocId {
@@ -160,7 +204,7 @@ impl Recorder for TermFrequencyRecorder {

    #[inline]
    fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
-        let delta = doc - self.current_doc;
+        let delta = doc_delta(self.current_doc, doc);
        self.term_doc_freq += 1;
        self.current_doc = doc;
        self.stack.writer(arena).write_u32_vint(delta);
@@ -178,10 +222,10 @@ impl Recorder for TermFrequencyRecorder {
        self.current_tf = 0;
    }

-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer<C>,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -202,13 +246,23 @@ impl Recorder for TermFrequencyRecorder {
 }

 /// Recorder encoding term frequencies as well as positions.
-#[derive(Clone, Copy, Default)]
+#[derive(Clone, Copy)]
 pub struct TfAndPositionRecorder {
    stack: ExpUnrolledLinkedList,
    current_doc: DocId,
    term_doc_freq: u32,
 }

+impl Default for TfAndPositionRecorder {
+    fn default() -> Self {
+        TfAndPositionRecorder {
+            stack: ExpUnrolledLinkedList::default(),
+            current_doc: UNINITIALIZED_DOC,
+            term_doc_freq: 0,
+        }
+    }
+}
+
 impl Recorder for TfAndPositionRecorder {
    #[inline]
    fn current_doc(&self) -> DocId {
@@ -217,7 +271,7 @@ impl Recorder for TfAndPositionRecorder {

    #[inline]
    fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
-        let delta = doc - self.current_doc;
+        let delta = doc_delta(self.current_doc, doc);
        self.current_doc = doc;
        self.term_doc_freq += 1u32;
        self.stack.writer(arena).write_u32_vint(delta);
@@ -235,10 +289,10 @@ impl Recorder for TfAndPositionRecorder {
        self.stack.writer(arena).write_u32_vint(POSITION_END);
    }

-    fn serialize(
+    fn serialize<C: Codec>(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer<C>,
        buffer_lender: &mut BufferLender,
    ) {
        let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
@@ -256,6 +310,11 @@ impl Recorder for TfAndPositionRecorder {
                        break;
                    }
                    Some(position_plus_one) => {
+                        debug_assert!(
+                            position_plus_one >= prev_position_plus_one,
+                            "positions for a (term, doc) must be recorded non-decreasing (got \
+                             {position_plus_one} after {prev_position_plus_one})",
+                        );
                        let delta_position = position_plus_one - prev_position_plus_one;
                        buffer_positions.push(delta_position);
                        prev_position_plus_one = position_plus_one;
@@ -275,8 +334,9 @@ impl Recorder for TfAndPositionRecorder {
 mod tests {

    use common::write_u32_vint;
+    use stacker::MemoryArena;

-    use super::{BufferLender, VInt32Reader};
+    use super::{BufferLender, Recorder, TermFrequencyRecorder, VInt32Reader};

    #[test]
    fn test_buffer_lender() {
@@ -314,4 +374,98 @@ mod tests {
        let res: Vec<u32> = VInt32Reader::new(&buffer[..]).collect();
        assert_eq!(&res[..], &vals[..]);
    }
+
+    // ── TermFrequencyRecorder ─────────────────────────────────────────────────
+
+    #[test]
+    fn term_frequency_recorder_has_term_freq() {
+        let rec = TermFrequencyRecorder::default();
+        assert!(
+            rec.has_term_freq(),
+            "TermFrequencyRecorder must advertise term-frequency support"
+        );
+    }
+
+    #[test]
+    fn term_frequency_recorder_term_doc_freq_single_doc() {
+        let mut arena = MemoryArena::default();
+        let mut rec = TermFrequencyRecorder::default();
+
+        // Record one document with two term occurrences.
+        rec.new_doc(0, &mut arena);
+        rec.record_position(0, &mut arena);
+        rec.record_position(1, &mut arena);
+        rec.close_doc(&mut arena);
+
+        assert_eq!(
+            rec.term_doc_freq(),
+            Some(1),
+            "term_doc_freq should be 1 after recording one document"
+        );
+    }
+
+    #[test]
+    fn term_frequency_recorder_term_doc_freq_multiple_docs() {
+        let mut arena = MemoryArena::default();
+        let mut rec = TermFrequencyRecorder::default();
+
+        // Three documents with 1, 3, and 2 occurrences respectively.
+        for (doc, tf) in [(0u32, 1u32), (5, 3), (10, 2)] {
+            rec.new_doc(doc, &mut arena);
+            for pos in 0..tf {
+                rec.record_position(pos, &mut arena);
+            }
+            rec.close_doc(&mut arena);
+        }
+
+        assert_eq!(
+            rec.term_doc_freq(),
+            Some(3),
+            "term_doc_freq should equal the number of documents recorded"
+        );
+    }
+
+    #[test]
+    fn term_frequency_recorder_zero_docs() {
+        let rec = TermFrequencyRecorder::default();
+        assert_eq!(
+            rec.term_doc_freq(),
+            Some(0),
+            "term_doc_freq should be 0 before any document is recorded"
+        );
+    }
+
+    #[test]
+    fn term_frequency_recorder_single_occurrence_per_doc() {
+        let mut arena = MemoryArena::default();
+        let mut rec = TermFrequencyRecorder::default();
+
+        // Each document has exactly one occurrence — the minimum non-trivial case.
+        for doc in [1u32, 2, 100] {
+            rec.new_doc(doc, &mut arena);
+            rec.record_position(0, &mut arena);
+            rec.close_doc(&mut arena);
+        }
+
+        assert_eq!(rec.term_doc_freq(), Some(3));
+    }
+
+    #[test]
+    fn term_frequency_recorder_high_frequency_doc() {
+        let mut arena = MemoryArena::default();
+        let mut rec = TermFrequencyRecorder::default();
+
+        // A document where the term appears many times.
+        rec.new_doc(42, &mut arena);
+        for pos in 0..1000 {
+            rec.record_position(pos, &mut arena);
+        }
+        rec.close_doc(&mut arena);
+
+        assert_eq!(
+            rec.term_doc_freq(),
+            Some(1),
+            "term_doc_freq counts documents, not occurrences"
+        );
+    }
 }
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -1,17 +1,15 @@
-use std::cmp::Ordering;
 use std::io::{self, Write};

-use common::{BinarySerializable, CountingWriter, VInt};
+use common::{BinarySerializable, CountingWriter};

 use super::TermInfo;
+use crate::codec::positions::{PositionsCodec, PositionsSerializer};
+use crate::codec::postings::PostingsSerializer;
+use crate::codec::Codec;
 use crate::directory::{CompositeWrite, WritePtr};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::Segment;
-use crate::positions::PositionSerializer;
-use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE};
-use crate::postings::skip::SkipSerializer;
-use crate::query::Bm25Weight;
-use crate::schema::{Field, FieldEntry, IndexRecordOption, Schema};
+use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};
 use crate::termdict::TermDictionaryBuilder;
 use crate::{DocId, Score};

@@ -46,22 +44,27 @@ use crate::{DocId, Score};
 ///
 /// A description of the serialization format is
 /// [available here](https://fulmicoton.gitbooks.io/tantivy-doc/content/inverted-index.html).
-pub struct InvertedIndexSerializer {
+pub struct InvertedIndexSerializer<C: Codec> {
    terms_write: CompositeWrite<WritePtr>,
    postings_write: CompositeWrite<WritePtr>,
    positions_write: CompositeWrite<WritePtr>,
    schema: Schema,
+    codec: C,
 }

-impl InvertedIndexSerializer {
+use crate::codec::postings::PostingsCodec;
+
+impl<C: Codec> InvertedIndexSerializer<C> {
    /// Open a new `InvertedIndexSerializer` for the given segment
-    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
+    pub fn open(segment: &mut Segment<C>) -> crate::Result<InvertedIndexSerializer<C>> {
        use crate::index::SegmentComponent::{Positions, Postings, Terms};
+        let codec = segment.index().codec().clone();
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
            postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
            positions_write: CompositeWrite::wrap(segment.open_write(Positions)?),
            schema: segment.schema(),
+            codec,
        };
        Ok(inv_index_serializer)
    }
@@ -75,22 +78,19 @@ impl InvertedIndexSerializer {
        field: Field,
        total_num_tokens: u64,
        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> io::Result<FieldSerializer<'_>> {
+    ) -> io::Result<FieldSerializer<'_, C>> {
        let field_entry: &FieldEntry = self.schema.get_field_entry(field);
        let term_dictionary_write = self.terms_write.for_field(field);
        let postings_write = self.postings_write.for_field(field);
        let positions_write = self.positions_write.for_field(field);
-        let index_record_option = field_entry
-            .field_type()
-            .index_record_option()
-            .unwrap_or(IndexRecordOption::Basic);
        FieldSerializer::create(
-            index_record_option,
+            field_entry.field_type(),
            total_num_tokens,
            term_dictionary_write,
            postings_write,
            positions_write,
            fieldnorm_reader,
+            &self.codec,
        )
    }

@@ -105,36 +105,43 @@ impl InvertedIndexSerializer {

 /// The field serializer is in charge of
 /// the serialization of a specific field.
-pub struct FieldSerializer<'a, W: Write = WritePtr> {
-    term_dictionary_builder: TermDictionaryBuilder<&'a mut CountingWriter<W>>,
-    postings_serializer: PostingsSerializer,
-    positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<W>>>,
+pub struct FieldSerializer<'a, C: Codec> {
+    term_dictionary_builder: TermDictionaryBuilder<&'a mut CountingWriter<WritePtr>>,
+    postings_serializer: <C::PostingsCodec as PostingsCodec>::PostingsSerializer,
+    positions_serializer_opt:
+        Option<<C::PositionsCodec as PositionsCodec>::Serializer<&'a mut CountingWriter<WritePtr>>>,
    current_term_info: TermInfo,
    term_open: bool,
-    postings_write: &'a mut CountingWriter<W>,
+    postings_write: &'a mut CountingWriter<WritePtr>,
    postings_start_offset: u64,
 }

-impl<'a, W: Write> FieldSerializer<'a, W> {
-    /// Creates a new `FieldSerializer` for the given field type.
-    pub fn create(
-        index_record_option: IndexRecordOption,
+impl<'a, C: Codec> FieldSerializer<'a, C> {
+    fn create(
+        field_type: &FieldType,
        total_num_tokens: u64,
-        term_dictionary_write: &'a mut CountingWriter<W>,
-        postings_write: &'a mut CountingWriter<W>,
-        positions_write: &'a mut CountingWriter<W>,
+        term_dictionary_write: &'a mut CountingWriter<WritePtr>,
+        postings_write: &'a mut CountingWriter<WritePtr>,
+        positions_write: &'a mut CountingWriter<WritePtr>,
        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> io::Result<FieldSerializer<'a, W>> {
+        codec: &C,
+    ) -> io::Result<FieldSerializer<'a, C>> {
+        let index_record_option = field_type
+            .index_record_option()
+            .unwrap_or(IndexRecordOption::Basic);
        total_num_tokens.serialize(postings_write)?;
        let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
        let average_fieldnorm = fieldnorm_reader
            .as_ref()
            .map(|ff_reader| total_num_tokens as Score / ff_reader.num_docs() as Score)
            .unwrap_or(0.0);
-        let postings_serializer =
-            PostingsSerializer::new(average_fieldnorm, index_record_option, fieldnorm_reader);
+        let postings_serializer = codec.postings_codec().new_serializer(
+            average_fieldnorm,
+            index_record_option,
+            fieldnorm_reader,
+        );
        let positions_serializer_opt = if index_record_option.has_positions() {
-            Some(PositionSerializer::new(positions_write))
+            Some(codec.positions_codec().new_serializer(positions_write))
        } else {
            None
        };
@@ -185,7 +192,6 @@ impl<'a, W: Write> FieldSerializer<'a, W> {
            "Called new_term, while the previous term was not closed."
        );
        self.term_open = true;
-        self.postings_serializer.clear();
        self.current_term_info = self.current_term_info();
        self.term_dictionary_builder.insert_key(term)?;
        self.postings_serializer
@@ -198,6 +204,13 @@ impl<'a, W: Write> FieldSerializer<'a, W> {
        self.new_term(term, 0, false)
    }

+    /// Forwards a codec-specific per-term payload to the postings serializer.
+    ///
+    /// Must be called after `new_term` and before any `write_doc`.
+    pub fn set_term_payload(&mut self, payload: &dyn std::any::Any) {
+        self.postings_serializer.set_term_payload(payload);
+    }
+
    /// Serialize the information that a document contains for the current term:
    /// its term frequency, and the position deltas.
    ///
@@ -254,234 +267,3 @@ impl<'a, W: Write> FieldSerializer<'a, W> {
        Ok(())
    }
 }
-
-struct Block {
-    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE],
-    term_freqs: [u32; COMPRESSION_BLOCK_SIZE],
-    len: usize,
-}
-
-impl Block {
-    fn new() -> Self {
-        Block {
-            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE],
-            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE],
-            len: 0,
-        }
-    }
-
-    fn doc_ids(&self) -> &[DocId] {
-        &self.doc_ids[..self.len]
-    }
-
-    fn term_freqs(&self) -> &[u32] {
-        &self.term_freqs[..self.len]
-    }
-
-    fn clear(&mut self) {
-        self.len = 0;
-    }
-
-    fn append_doc(&mut self, doc: DocId, term_freq: u32) {
-        let len = self.len;
-        self.doc_ids[len] = doc;
-        self.term_freqs[len] = term_freq;
-        self.len = len + 1;
-    }
-
-    fn is_full(&self) -> bool {
-        self.len == COMPRESSION_BLOCK_SIZE
-    }
-
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    fn last_doc(&self) -> DocId {
-        assert_eq!(self.len, COMPRESSION_BLOCK_SIZE);
-        self.doc_ids[COMPRESSION_BLOCK_SIZE - 1]
-    }
-}
-
-/// Serializer for postings lists.
-pub struct PostingsSerializer {
-    last_doc_id_encoded: u32,
-
-    block_encoder: BlockEncoder,
-    block: Box<Block>,
-
-    postings_write: Vec<u8>,
-    skip_write: SkipSerializer,
-
-    mode: IndexRecordOption,
-    fieldnorm_reader: Option<FieldNormReader>,
-
-    bm25_weight: Option<Bm25Weight>,
-    avg_fieldnorm: Score, /* Average number of term in the field for that segment.
-                           * this value is used to compute the block wand information. */
-    term_has_freq: bool,
-}
-
-impl PostingsSerializer {
-    /// Creates a new `PostingsSerializer`.
-    /// * avg_fieldnorm - average field norm for the field being serialized.
-    /// * mode - indexing options for the field being serialized.
-    pub fn new(
-        avg_fieldnorm: Score,
-        mode: IndexRecordOption,
-        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> PostingsSerializer {
-        PostingsSerializer {
-            block_encoder: BlockEncoder::new(),
-            block: Box::new(Block::new()),
-
-            postings_write: Vec::new(),
-            skip_write: SkipSerializer::new(),
-
-            last_doc_id_encoded: 0u32,
-            mode,
-
-            fieldnorm_reader,
-            bm25_weight: None,
-            avg_fieldnorm,
-            term_has_freq: false,
-        }
-    }
-
-    /// Starts the serialization for a new term.
-    /// * term_doc_freq - the number of documents containing the term.
-    pub fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
-        self.bm25_weight = None;
-
-        self.term_has_freq = self.mode.has_freq() && record_term_freq;
-        if !self.term_has_freq {
-            return;
-        }
-
-        let num_docs_in_segment: u64 =
-            if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
-                fieldnorm_reader.num_docs() as u64
-            } else {
-                return;
-            };
-
-        if num_docs_in_segment == 0 {
-            return;
-        }
-
-        self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
-            term_doc_freq as u64,
-            num_docs_in_segment,
-            self.avg_fieldnorm,
-        ));
-    }
-
-    fn write_block(&mut self) {
-        {
-            // encode the doc ids
-            let (num_bits, block_encoded): (u8, &[u8]) = self
-                .block_encoder
-                .compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
-            self.last_doc_id_encoded = self.block.last_doc();
-            self.skip_write
-                .write_doc(self.last_doc_id_encoded, num_bits);
-            // last el block 0, offset block 1,
-            self.postings_write.extend(block_encoded);
-        }
-        if self.term_has_freq {
-            // encode the term frequencies
-            let (num_bits, block_encoded): (u8, &[u8]) = self
-                .block_encoder
-                .compress_block_unsorted(self.block.term_freqs(), true);
-            self.postings_write.extend(block_encoded);
-            self.skip_write.write_term_freq(num_bits);
-            if self.mode.has_positions() {
-                // We serialize the sum of term freqs within the skip information
-                // in order to navigate through positions.
-                let sum_freq = self.block.term_freqs().iter().cloned().sum();
-                self.skip_write.write_total_term_freq(sum_freq);
-            }
-            let mut blockwand_params = (0u8, 0u32);
-            if let Some(bm25_weight) = self.bm25_weight.as_ref() {
-                if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
-                    let docs = self.block.doc_ids().iter().cloned();
-                    let term_freqs = self.block.term_freqs().iter().cloned();
-                    let fieldnorms = docs.map(|doc| fieldnorm_reader.fieldnorm_id(doc));
-                    blockwand_params = fieldnorms
-                        .zip(term_freqs)
-                        .max_by(
-                            |(left_fieldnorm_id, left_term_freq),
-                             (right_fieldnorm_id, right_term_freq)| {
-                                let left_score =
-                                    bm25_weight.tf_factor(*left_fieldnorm_id, *left_term_freq);
-                                let right_score =
-                                    bm25_weight.tf_factor(*right_fieldnorm_id, *right_term_freq);
-                                left_score
-                                    .partial_cmp(&right_score)
-                                    .unwrap_or(Ordering::Equal)
-                            },
-                        )
-                        .unwrap();
-                }
-            }
-            let (fieldnorm_id, term_freq) = blockwand_params;
-            self.skip_write.write_blockwand_max(fieldnorm_id, term_freq);
-        }
-        self.block.clear();
-    }
-
-    /// Register that the given document contains the current term.
-    /// * doc_id - the document id.
-    /// * term_freq - the term frequency within the document.
-    pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
-        self.block.append_doc(doc_id, term_freq);
-        if self.block.is_full() {
-            self.write_block();
-        }
-    }
-
-    /// Finish the serialization for this term.
-    pub fn close_term(
-        &mut self,
-        doc_freq: u32,
-        output_write: &mut impl std::io::Write,
-    ) -> io::Result<()> {
-        if !self.block.is_empty() {
-            // we have doc ids waiting to be written
-            // this happens when the number of doc ids is
-            // not a perfect multiple of our block size.
-            //
-            // In that case, the remaining part is encoded
-            // using variable int encoding.
-            {
-                let block_encoded = self
-                    .block_encoder
-                    .compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
-                self.postings_write.write_all(block_encoded)?;
-            }
-            // ... Idem for term frequencies
-            if self.term_has_freq {
-                let block_encoded = self
-                    .block_encoder
-                    .compress_vint_unsorted(self.block.term_freqs());
-                self.postings_write.write_all(block_encoded)?;
-            }
-            self.block.clear();
-        }
-        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            let skip_data = self.skip_write.data();
-            VInt(skip_data.len() as u64).serialize(output_write)?;
-            output_write.write_all(skip_data)?;
-        }
-        output_write.write_all(&self.postings_write[..])?;
-        self.skip_write.clear();
-        self.postings_write.clear();
-        self.bm25_weight = None;
-        Ok(())
-    }
-
-    fn clear(&mut self) {
-        self.block.clear();
-        self.last_doc_id_encoded = 0;
-    }
-}
--- a/src/query/all_query.rs
+++ b/src/query/all_query.rs
@@ -2,7 +2,7 @@ use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::boost_query::BoostScorer;
 use crate::query::explanation::does_not_match;
-use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, Score};

 /// Query that matches all of the documents.
@@ -24,9 +24,9 @@ impl Weight for AllWeight {
    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let all_scorer = AllScorer::new(reader.max_doc());
        if boost != 1.0 {
-            Ok(Box::new(BoostScorer::new(all_scorer, boost)))
+            Ok(box_scorer(BoostScorer::new(all_scorer, boost)))
        } else {
-            Ok(Box::new(all_scorer))
+            Ok(box_scorer(all_scorer))
        }
    }

--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -10,7 +10,7 @@ use crate::postings::TermInfo;
 use crate::query::{BitSetDocSet, ConstScorer, Explanation, Scorer, Weight};
 use crate::schema::{Field, IndexRecordOption};
 use crate::termdict::{TermDictionary, TermStreamer};
-use crate::{DocId, Score, TantivyError};
+use crate::{DocId, DocSet, Score, TantivyError};

 /// A weight struct for Fuzzy Term and Regex Queries
 pub struct AutomatonWeight<A> {
@@ -92,18 +92,9 @@ where
        let mut term_stream = self.automaton_stream(term_dict)?;
        while term_stream.advance() {
            let term_info = term_stream.value();
-            let mut block_segment_postings = inverted_index
-                .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
-            loop {
-                let docs = block_segment_postings.docs();
-                if docs.is_empty() {
-                    break;
-                }
-                for &doc in docs {
-                    doc_bitset.insert(doc);
-                }
-                block_segment_postings.advance();
-            }
+            let mut block_segment_postings =
+                inverted_index.read_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
+            block_segment_postings.fill_bitset(&mut doc_bitset);
        }
        let doc_bitset = BitSetDocSet::from(doc_bitset);
        let const_scorer = ConstScorer::new(doc_bitset, boost);
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -24,6 +24,13 @@ impl BitSetDocSet {
        self.cursor_bucket = bucket_addr;
        self.cursor_tinybitset = self.docs.tinyset(bucket_addr);
    }
+
+    /// Returns the number of documents in the bitset.
+    ///
+    /// This call is not free: it will bitcount the number of bits in the bitset.
+    pub fn doc_freq(&self) -> u32 {
+        self.docs.len() as u32
+    }
 }

 impl From<BitSet> for BitSetDocSet {
--- a/src/query/boolean_query/block_wand_intersection.rs
+++ b/src/query/boolean_query/block_wand_intersection.rs
@@ -0,0 +1,464 @@
+use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
+use crate::query::term_query::TermScorer;
+use crate::query::Scorer;
+use crate::{DocId, DocSet, Score, TERMINATED};
+
+/// Block-max pruning for top-K over intersection of term scorers.
+///
+/// Uses the least-frequent term as "leader" to define 128-doc processing windows.
+/// For each window, the sum of block_max_scores is compared to the current threshold;
+/// if the block can't beat it, the entire block is skipped.
+///
+/// Within non-skipped blocks, individual documents are pruned by checking whether
+/// leader_score + sum(secondary block_max_scores) can exceed the threshold before
+/// performing the expensive intersection membership check (seeking into secondary scorers).
+///
+/// # Preconditions
+/// - `scorers` has at least 2 elements
+/// - All scorers read frequencies (`FreqReadingOption::ReadFreq`)
+pub(crate) fn block_wand_intersection(
+    mut scorers: Vec<TermScorer>,
+    mut threshold: Score,
+    callback: &mut dyn FnMut(DocId, Score) -> Score,
+) {
+    assert!(scorers.len() >= 2);
+
+    // Sort by cost (ascending). scorers[0] becomes the "leader" (rarest term).
+    scorers.sort_by_key(TermScorer::size_hint);
+
+    let (leader, secondaries) = scorers.split_first_mut().unwrap();
+
+    // Precompute global max scores for early termination checks.
+    let leader_max_score: Score = leader.max_score();
+    let secondaries_global_max_sum: Score = secondaries.iter().map(TermScorer::max_score).sum();
+
+    // Early exit: no document can possibly beat the threshold.
+    if leader_max_score + secondaries_global_max_sum <= threshold {
+        return;
+    }
+
+    // Borrow fieldnorm reader and BM25 weight before the main loop.
+    // These are immutable references to disjoint fields from block_cursor,
+    // but Rust's borrow checker can't see through method calls, so we
+    // extract them once upfront.
+    let fieldnorm_reader = leader.fieldnorm_reader().clone();
+    let bm25_weight = leader.bm25_weight().clone();
+
+    let mut doc = leader.doc();
+
+    let mut secondary_block_max_scores: Box<[f32]> =
+        vec![0.0f32; secondaries.len()].into_boxed_slice();
+    let mut secondary_suffix_block_max: Box<[f32]> =
+        vec![0.0f32; secondaries.len()].into_boxed_slice();
+
+    while doc < TERMINATED {
+        // --- Phase 1: Block-level pruning ---
+        //
+        // Position all skip readers on the block containing `doc`.
+        // seek_block is cheap: it only advances the skip reader, no block decompression.
+        leader.seek_block(doc);
+        let leader_block_max: Score = leader.block_max_score();
+
+        // Compute the window end as the minimum last_doc_in_block across all scorers.
+        // This ensures the block_max values are valid for all docs in [doc, window_end].
+        // Different scorers have independently aligned blocks, so we must use the
+        // smallest window where all block_max values hold.
+        let mut window_end: DocId = leader.last_doc_in_block();
+
+        let mut secondary_block_max_sum: Score = 0.0;
+        let num_secondaries = secondaries.len();
+        for (idx, secondary) in secondaries.iter_mut().enumerate() {
+            secondary.block_cursor().seek_block(doc);
+            if !secondary.block_cursor().has_remaining_docs() {
+                return;
+            }
+            window_end = window_end.min(secondary.last_doc_in_block());
+            let bms = secondary.block_max_score();
+            secondary_block_max_scores[idx] = bms;
+            secondary_block_max_sum += bms;
+        }
+
+        if leader_block_max + secondary_block_max_sum <= threshold {
+            // The entire window cannot beat the threshold. Skip past it.
+            doc = window_end + 1;
+            continue;
+        }
+
+        // --- Phase 2: Batch processing within the window ---
+        //
+        // Score-first approach: decode the leader's block, filter by threshold,
+        // then check intersection membership only for survivors. This avoids expensive
+        // secondary seeks for docs that can't beat the threshold.
+        let block_cursor = leader.block_cursor();
+        // seek loads the block and returns the in-block index of the first doc >= `doc`.
+        let start_idx = block_cursor.seek(doc);
+
+        // Use the branchless binary search on the doc decoder to find the first
+        // index past window_end.
+        let end_idx = block_cursor
+            .doc_decoder
+            .seek_within_block(window_end + 1)
+            .min(block_cursor.block_len());
+
+        let block_docs = &block_cursor.doc_decoder.output_array()[start_idx..end_idx];
+        let block_freqs = &block_cursor.freq_output_array()[start_idx..end_idx];
+
+        // Pass 1: Batch-compute leader BM25 scores and branchlessly filter
+        // candidates that can't beat the threshold.
+        //
+        // The trick: always write to the buffer at `num_candidates`, then
+        // conditionally advance the count. The compiler can turn this into
+        // a cmov instead of a branch, avoiding misprediction costs.
+        let score_threshold = threshold - secondary_block_max_sum;
+        let mut candidate_doc_ids = [0u32; COMPRESSION_BLOCK_SIZE];
+        let mut candidate_scores = [0.0f32; COMPRESSION_BLOCK_SIZE];
+        let mut num_candidates = 0usize;
+
+        for (candidate_doc, term_freq) in
+            block_docs.iter().copied().zip(block_freqs.iter().copied())
+        {
+            let fieldnorm_id = fieldnorm_reader.fieldnorm_id(candidate_doc);
+            let leader_score = bm25_weight.score(fieldnorm_id, term_freq);
+            candidate_doc_ids[num_candidates] = candidate_doc;
+            candidate_scores[num_candidates] = leader_score;
+            num_candidates += (leader_score > score_threshold) as usize;
+        }
+
+        // Precompute suffix sums: suffix[i] = sum of block_max for secondaries[i+1..].
+        // Used in Phase 2 to prune candidates that can't beat threshold even with
+        // remaining secondaries contributing their block_max.
+        if num_candidates == 0 {
+            doc = window_end + 1;
+            continue;
+        }
+
+        let mut running = 0.0f32;
+        for idx in (0..num_secondaries).rev() {
+            secondary_suffix_block_max[idx] = running;
+            running += secondary_block_max_scores[idx];
+        }
+
+        // Pass 2: Check intersection membership only for survivors.
+        // score_threshold may be stale (threshold can increase from callbacks),
+        // but that's conservative — we may check a few extra candidates, never miss one.
+        'next_candidate: for candidate_idx in 0..num_candidates {
+            let candidate_doc = candidate_doc_ids[candidate_idx];
+            let mut total_score: Score = candidate_scores[candidate_idx];
+
+            for (secondary_idx, secondary) in secondaries.iter_mut().enumerate() {
+                // If a previous candidate already advanced this secondary past
+                // candidate_doc, the candidate can't be in the intersection.
+                if secondary.doc() > candidate_doc {
+                    continue 'next_candidate;
+                }
+                let seek_result = secondary.seek(candidate_doc);
+                if seek_result != candidate_doc {
+                    continue 'next_candidate;
+                }
+                total_score += secondary.score();
+
+                // Prune: even if all remaining secondaries score at their block max,
+                // can we still beat the threshold?
+                if total_score + secondary_suffix_block_max[secondary_idx] <= threshold {
+                    continue 'next_candidate;
+                }
+            }
+
+            // All secondaries matched.
+            if total_score > threshold {
+                threshold = callback(candidate_doc, total_score);
+
+                if leader_max_score + secondaries_global_max_sum <= threshold {
+                    return;
+                }
+            }
+        }
+
+        doc = window_end + 1;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::cmp::Ordering;
+    use std::collections::BinaryHeap;
+
+    use proptest::prelude::*;
+
+    use crate::query::term_query::TermScorer;
+    use crate::query::{Bm25Weight, Scorer};
+    use crate::{DocId, DocSet, Score, TERMINATED};
+
+    struct Float(Score);
+
+    impl Eq for Float {}
+
+    impl PartialEq for Float {
+        fn eq(&self, other: &Self) -> bool {
+            self.cmp(other) == Ordering::Equal
+        }
+    }
+
+    impl PartialOrd for Float {
+        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+            Some(self.cmp(other))
+        }
+    }
+
+    impl Ord for Float {
+        fn cmp(&self, other: &Self) -> Ordering {
+            other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
+        }
+    }
+
+    fn nearly_equals(left: Score, right: Score) -> bool {
+        (left - right).abs() < 0.0001 * (left + right).abs()
+    }
+
+    /// Run block_wand_intersection and collect (doc, score) pairs above threshold.
+    fn compute_checkpoints_block_wand_intersection(
+        term_scorers: Vec<TermScorer>,
+        top_k: usize,
+    ) -> Vec<(DocId, Score)> {
+        let mut heap: BinaryHeap<Float> = BinaryHeap::with_capacity(top_k);
+        let mut checkpoints: Vec<(DocId, Score)> = Vec::new();
+        let mut limit: Score = 0.0;
+
+        let callback = &mut |doc, score| {
+            heap.push(Float(score));
+            if heap.len() > top_k {
+                heap.pop().unwrap();
+            }
+            if heap.len() == top_k {
+                limit = heap.peek().unwrap().0;
+            }
+            if !nearly_equals(score, limit) {
+                checkpoints.push((doc, score));
+            }
+            limit
+        };
+
+        super::block_wand_intersection(term_scorers, Score::MIN, callback);
+        checkpoints
+    }
+
+    /// Naive baseline: intersect by iterating all docs.
+    fn compute_checkpoints_naive_intersection(
+        mut term_scorers: Vec<TermScorer>,
+        top_k: usize,
+    ) -> Vec<(DocId, Score)> {
+        let mut heap: BinaryHeap<Float> = BinaryHeap::with_capacity(top_k);
+        let mut checkpoints: Vec<(DocId, Score)> = Vec::new();
+        let mut limit = Score::MIN;
+
+        // Sort by cost to use the cheapest as driver.
+        term_scorers.sort_by_key(|s| s.cost());
+
+        let (leader, secondaries) = term_scorers.split_first_mut().unwrap();
+
+        let mut doc = leader.doc();
+        while doc != TERMINATED {
+            let mut all_match = true;
+            for secondary in secondaries.iter_mut() {
+                let secondary_doc = secondary.doc();
+                let seek_result = if secondary_doc <= doc {
+                    secondary.seek(doc)
+                } else {
+                    secondary_doc
+                };
+                if seek_result != doc {
+                    all_match = false;
+                    break;
+                }
+            }
+
+            if all_match {
+                let score: Score =
+                    leader.score() + secondaries.iter_mut().map(|s| s.score()).sum::<Score>();
+
+                if score > limit {
+                    heap.push(Float(score));
+                    if heap.len() > top_k {
+                        heap.pop().unwrap();
+                    }
+                    if heap.len() == top_k {
+                        limit = heap.peek().unwrap().0;
+                    }
+                    if !nearly_equals(score, limit) {
+                        checkpoints.push((doc, score));
+                    }
+                }
+            }
+            doc = leader.advance();
+        }
+        checkpoints
+    }
+
+    const MAX_TERM_FREQ: u32 = 100u32;
+
+    fn posting_list(max_doc: u32) -> BoxedStrategy<Vec<(DocId, u32)>> {
+        (1..max_doc + 1)
+            .prop_flat_map(move |doc_freq| {
+                (
+                    proptest::bits::bitset::sampled(doc_freq as usize, 0..max_doc as usize),
+                    proptest::collection::vec(1u32..MAX_TERM_FREQ, doc_freq as usize),
+                )
+            })
+            .prop_map(|(docset, term_freqs)| {
+                docset
+                    .iter()
+                    .map(|doc| doc as u32)
+                    .zip(term_freqs.iter().cloned())
+                    .collect::<Vec<_>>()
+            })
+            .boxed()
+    }
+
+    #[expect(clippy::type_complexity)]
+    fn gen_term_scorers(num_scorers: usize) -> BoxedStrategy<(Vec<Vec<(DocId, u32)>>, Vec<u32>)> {
+        (1u32..100u32)
+            .prop_flat_map(move |max_doc: u32| {
+                (
+                    proptest::collection::vec(posting_list(max_doc), num_scorers),
+                    proptest::collection::vec(2u32..10u32 * MAX_TERM_FREQ, max_doc as usize),
+                )
+            })
+            .boxed()
+    }
+
+    fn test_block_wand_intersection_aux(posting_lists: &[Vec<(DocId, u32)>], fieldnorms: &[u32]) {
+        // Repeat docs 64 times to create multi-block scenarios, matching block_wand.rs test
+        // strategy.
+        const REPEAT: usize = 64;
+        let fieldnorms_expanded: Vec<u32> = fieldnorms
+            .iter()
+            .cloned()
+            .flat_map(|fieldnorm| std::iter::repeat_n(fieldnorm, REPEAT))
+            .collect();
+
+        let postings_lists_expanded: Vec<Vec<(DocId, u32)>> = posting_lists
+            .iter()
+            .map(|posting_list| {
+                posting_list
+                    .iter()
+                    .cloned()
+                    .flat_map(|(doc, term_freq)| {
+                        (0_u32..REPEAT as u32).map(move |offset| {
+                            (
+                                doc * (REPEAT as u32) + offset,
+                                if offset == 0 { term_freq } else { 1 },
+                            )
+                        })
+                    })
+                    .collect::<Vec<(DocId, u32)>>()
+            })
+            .collect();
+
+        let total_fieldnorms: u64 = fieldnorms_expanded
+            .iter()
+            .cloned()
+            .map(|fieldnorm| fieldnorm as u64)
+            .sum();
+        let average_fieldnorm = (total_fieldnorms as Score) / (fieldnorms_expanded.len() as Score);
+        let max_doc = fieldnorms_expanded.len();
+
+        let make_scorers = || -> Vec<TermScorer> {
+            postings_lists_expanded
+                .iter()
+                .map(|postings| {
+                    let bm25_weight = Bm25Weight::for_one_term(
+                        postings.len() as u64,
+                        max_doc as u64,
+                        average_fieldnorm,
+                    );
+                    TermScorer::create_for_test(postings, &fieldnorms_expanded[..], bm25_weight)
+                })
+                .collect()
+        };
+
+        for top_k in 1..4 {
+            let checkpoints_optimized =
+                compute_checkpoints_block_wand_intersection(make_scorers(), top_k);
+            let checkpoints_naive = compute_checkpoints_naive_intersection(make_scorers(), top_k);
+            assert_eq!(
+                checkpoints_optimized.len(),
+                checkpoints_naive.len(),
+                "Mismatch in checkpoint count for top_k={top_k}"
+            );
+            for (&(left_doc, left_score), &(right_doc, right_score)) in
+                checkpoints_optimized.iter().zip(checkpoints_naive.iter())
+            {
+                assert_eq!(left_doc, right_doc);
+                assert!(
+                    nearly_equals(left_score, right_score),
+                    "Score mismatch for doc {left_doc}: {left_score} vs {right_score}"
+                );
+            }
+        }
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(500))]
+        #[test]
+        fn test_block_wand_intersection_two_scorers(
+            (posting_lists, fieldnorms) in gen_term_scorers(2)
+        ) {
+            test_block_wand_intersection_aux(&posting_lists[..], &fieldnorms[..]);
+        }
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(500))]
+        #[test]
+        fn test_block_wand_intersection_three_scorers(
+            (posting_lists, fieldnorms) in gen_term_scorers(3)
+        ) {
+            test_block_wand_intersection_aux(&posting_lists[..], &fieldnorms[..]);
+        }
+    }
+
+    #[test]
+    fn test_block_wand_intersection_disjoint() {
+        // Two posting lists with no overlap — intersection is empty.
+        let fieldnorms: Vec<u32> = vec![10; 200];
+        let average_fieldnorm = 10.0;
+        let postings_a: Vec<(DocId, u32)> = (0..100).map(|d| (d, 1)).collect();
+        let postings_b: Vec<(DocId, u32)> = (100..200).map(|d| (d, 1)).collect();
+
+        let scorer_a = TermScorer::create_for_test(
+            &postings_a,
+            &fieldnorms,
+            Bm25Weight::for_one_term(100, 200, average_fieldnorm),
+        );
+        let scorer_b = TermScorer::create_for_test(
+            &postings_b,
+            &fieldnorms,
+            Bm25Weight::for_one_term(100, 200, average_fieldnorm),
+        );
+
+        let checkpoints = compute_checkpoints_block_wand_intersection(vec![scorer_a, scorer_b], 10);
+        assert!(checkpoints.is_empty());
+    }
+
+    #[test]
+    fn test_block_wand_intersection_all_overlap() {
+        // Two posting lists with full overlap.
+        let fieldnorms: Vec<u32> = vec![10; 50];
+        let average_fieldnorm = 10.0;
+        let postings: Vec<(DocId, u32)> = (0..50).map(|d| (d, 3)).collect();
+
+        let make_scorer = || {
+            TermScorer::create_for_test(
+                &postings,
+                &fieldnorms,
+                Bm25Weight::for_one_term(50, 50, average_fieldnorm),
+            )
+        };
+
+        let checkpoints_opt =
+            compute_checkpoints_block_wand_intersection(vec![make_scorer(), make_scorer()], 5);
+        let checkpoints_naive =
+            compute_checkpoints_naive_intersection(vec![make_scorer(), make_scorer()], 5);
+        assert_eq!(checkpoints_opt.len(), checkpoints_naive.len());
+    }
+}
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,24 +1,18 @@
 use std::collections::HashMap;

+use crate::codec::{ObjectSafeCodec, SumOrDoNothingCombiner};
 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
 use crate::index::SegmentReader;
-use crate::postings::FreqReadingOption;
 use crate::query::disjunction::Disjunction;
 use crate::query::explanation::does_not_match;
 use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
-use crate::query::term_query::TermScorer;
-use crate::query::weight::{for_each_docset_buffered, for_each_pruning_scorer, for_each_scorer};
+use crate::query::weight::for_each_docset_buffered;
 use crate::query::{
-    intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
-    RequiredOptionalScorer, Scorer, Weight,
+    box_scorer, intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude,
+    Explanation, Occur, RequiredOptionalScorer, Scorer, SumCombiner, Weight,
 };
 use crate::{DocId, Score};

-enum SpecializedScorer {
-    TermUnion(Vec<TermScorer>),
-    Other(Box<dyn Scorer>),
-}
-
 fn scorer_disjunction<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
    score_combiner: TScoreCombiner,
@@ -32,7 +26,7 @@ where
    if scorers.len() == 1 {
        return scorers.into_iter().next().unwrap(); // Safe unwrap.
    }
-    Box::new(Disjunction::new(
+    box_scorer(Disjunction::new(
        scorers,
        score_combiner,
        minimum_match_required,
@@ -44,57 +38,41 @@ fn scorer_union<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    num_docs: u32,
-) -> SpecializedScorer
+    codec: &dyn ObjectSafeCodec,
+) -> Box<dyn Scorer>
 where
    TScoreCombiner: ScoreCombiner,
 {
-    assert!(!scorers.is_empty());
-    if scorers.len() == 1 {
-        return SpecializedScorer::Other(scorers.into_iter().next().unwrap()); //< we checked the size beforehand
-    }
-
-    {
-        let is_all_term_queries = scorers.iter().all(|scorer| scorer.is::<TermScorer>());
-        if is_all_term_queries {
-            let scorers: Vec<TermScorer> = scorers
-                .into_iter()
-                .map(|scorer| *(scorer.downcast::<TermScorer>().map_err(|_| ()).unwrap()))
-                .collect();
-            if scorers
-                .iter()
-                .all(|scorer| scorer.freq_reading_option() == FreqReadingOption::ReadFreq)
+    match scorers.len() {
+        0 => box_scorer(EmptyScorer),
+        1 => scorers.into_iter().next().unwrap(),
+        _ => {
+            let combiner_opt: Option<SumOrDoNothingCombiner> = if std::any::TypeId::of::<
+                TScoreCombiner,
+            >() == std::any::TypeId::of::<
+                SumCombiner,
+            >() {
+                Some(SumOrDoNothingCombiner::Sum)
+            } else if std::any::TypeId::of::<TScoreCombiner>()
+                == std::any::TypeId::of::<DoNothingCombiner>()
            {
-                // Block wand is only available if we read frequencies.
-                return SpecializedScorer::TermUnion(scorers);
+                Some(SumOrDoNothingCombiner::DoNothing)
            } else {
-                return SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+                None
+            };
+            if let Some(combiner) = combiner_opt {
+                let scorer =
+                    codec.build_union_scorer_with_sum_combiner(scorers, num_docs, combiner);
+                scorer
+            } else {
+                box_scorer(BufferedUnionScorer::build(
                    scorers,
                    score_combiner_fn,
                    num_docs,
-                )));
+                ))
            }
        }
    }
-    SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
-        scorers,
-        score_combiner_fn,
-        num_docs,
-    )))
-}
-
-fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
-    scorer: SpecializedScorer,
-    score_combiner_fn: impl Fn() -> TScoreCombiner,
-    num_docs: u32,
-) -> Box<dyn Scorer> {
-    match scorer {
-        SpecializedScorer::TermUnion(term_scorers) => {
-            let union_scorer =
-                BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
-            Box::new(union_scorer)
-        }
-        SpecializedScorer::Other(scorer) => scorer,
-    }
 }

 /// Returns the effective MUST scorer, accounting for removed AllScorers.
@@ -110,7 +88,7 @@ fn effective_must_scorer(
    if must_scorers.is_empty() {
        if removed_all_scorer_count > 0 {
            // Had AllScorer(s) only - all docs match
-            Some(Box::new(AllScorer::new(max_doc)))
+            Some(box_scorer(AllScorer::new(max_doc)))
        } else {
            // No MUST constraint at all
            None
@@ -128,28 +106,26 @@ fn effective_must_scorer(
 /// When `scoring_enabled` is false, we can just return AllScorer alone since
 /// we don't need score contributions from the should_scorer.
 fn effective_should_scorer_for_union<TScoreCombiner: ScoreCombiner>(
-    should_scorer: SpecializedScorer,
+    should_scorer: Box<dyn Scorer>,
    removed_all_scorer_count: usize,
    max_doc: DocId,
    num_docs: u32,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    scoring_enabled: bool,
-) -> SpecializedScorer {
+) -> Box<dyn Scorer> {
    if removed_all_scorer_count > 0 {
        if scoring_enabled {
            // Need to union to get score contributions from both
-            let all_scorers: Vec<Box<dyn Scorer>> = vec![
-                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
-                Box::new(AllScorer::new(max_doc)),
-            ];
-            SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+            let all_scorers: Vec<Box<dyn Scorer>> =
+                vec![should_scorer, box_scorer(AllScorer::new(max_doc))];
+            box_scorer(BufferedUnionScorer::build(
                all_scorers,
                score_combiner_fn,
                num_docs,
-            )))
+            ))
        } else {
            // Scoring disabled - AllScorer alone is sufficient
-            SpecializedScorer::Other(Box::new(AllScorer::new(max_doc)))
+            box_scorer(AllScorer::new(max_doc))
        }
    } else {
        should_scorer
@@ -160,9 +136,9 @@ enum ShouldScorersCombinationMethod {
    // Should scorers are irrelevant.
    Ignored,
    // Only contributes to final score.
-    Optional(SpecializedScorer),
+    Optional(Box<dyn Scorer>),
    // Regardless of score, the should scorers may impact whether a document is matching or not.
-    Required(SpecializedScorer),
+    Required(Box<dyn Scorer>),
 }

 /// Weight associated to the `BoolQuery`.
@@ -224,7 +200,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        reader: &SegmentReader,
        boost: Score,
        score_combiner_fn: impl Fn() -> TComplexScoreCombiner,
-    ) -> crate::Result<SpecializedScorer> {
+    ) -> crate::Result<Box<dyn Scorer>> {
        let num_docs = reader.num_docs();
        let mut per_occur_scorers = self.per_occur_scorers(reader, boost)?;

@@ -234,7 +210,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        let must_special_scorer_counts = remove_and_count_all_and_empty_scorers(&mut must_scorers);

        if must_special_scorer_counts.num_empty_scorers > 0 {
-            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+            return Ok(box_scorer(EmptyScorer));
        }

        let mut should_scorers = per_occur_scorers.remove(&Occur::Should).unwrap_or_default();
@@ -249,7 +225,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {

        if exclude_special_scorer_counts.num_all_scorers > 0 {
            // We exclude all documents at one point.
-            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+            return Ok(box_scorer(EmptyScorer));
        }

        let effective_minimum_number_should_match = self
@@ -261,7 +237,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
            if effective_minimum_number_should_match > num_of_should_scorers {
                // We don't have enough scorers to satisfy the minimum number of should matches.
                // The request will match no documents.
-                return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+                return Ok(box_scorer(EmptyScorer));
            }
            match effective_minimum_number_should_match {
                0 if num_of_should_scorers == 0 => ShouldScorersCombinationMethod::Ignored,
@@ -269,11 +245,13 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    should_scorers,
                    &score_combiner_fn,
                    num_docs,
+                    reader.codec(),
                )),
                1 => ShouldScorersCombinationMethod::Required(scorer_union(
                    should_scorers,
                    &score_combiner_fn,
                    num_docs,
+                    reader.codec(),
                )),
                n if num_of_should_scorers == n => {
                    // When num_of_should_scorers equals the number of should clauses,
@@ -281,16 +259,26 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    must_scorers.append(&mut should_scorers);
                    ShouldScorersCombinationMethod::Ignored
                }
-                _ => ShouldScorersCombinationMethod::Required(SpecializedScorer::Other(
-                    scorer_disjunction(
-                        should_scorers,
-                        score_combiner_fn(),
-                        effective_minimum_number_should_match,
-                    ),
+                _ => ShouldScorersCombinationMethod::Required(scorer_disjunction(
+                    should_scorers,
+                    score_combiner_fn(),
+                    effective_minimum_number_should_match,
                )),
            }
        };

+        let exclude_scorer_opt: Option<Box<dyn Scorer>> = if exclude_scorers.is_empty() {
+            None
+        } else {
+            let exclude_scorers_union: Box<dyn Scorer> = scorer_union(
+                exclude_scorers,
+                DoNothingCombiner::default,
+                num_docs,
+                reader.codec(),
+            );
+            Some(exclude_scorers_union)
+        };
+
        let include_scorer = match (should_scorers, must_scorers) {
            (ShouldScorersCombinationMethod::Ignored, must_scorers) => {
                // No SHOULD clauses (or they were absorbed into MUST).
@@ -303,8 +291,8 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    reader.max_doc(),
                    num_docs,
                )
-                .unwrap_or_else(|| Box::new(EmptyScorer));
-                SpecializedScorer::Other(boxed_scorer)
+                .unwrap_or_else(|| box_scorer(EmptyScorer));
+                boxed_scorer
            }
            (ShouldScorersCombinationMethod::Optional(should_scorer), must_scorers) => {
                // Optional SHOULD: contributes to scoring but not required for matching.
@@ -329,16 +317,12 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    Some(must_scorer) => {
                        // Has MUST constraint: SHOULD only affects scoring.
                        if self.scoring_enabled {
-                            SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
-                                _,
-                                _,
-                                TScoreCombiner,
-                            >::new(
+                            box_scorer(RequiredOptionalScorer::<_, _, TScoreCombiner>::new(
                                must_scorer,
-                                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
-                            )))
+                                should_scorer,
+                            ))
                        } else {
-                            SpecializedScorer::Other(must_scorer)
+                            must_scorer
                        }
                    }
                }
@@ -358,33 +342,16 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    }
                    Some(must_scorer) => {
                        // Has MUST constraint: intersect MUST with SHOULD.
-                        let should_boxed =
-                            into_box_scorer(should_scorer, &score_combiner_fn, num_docs);
-                        SpecializedScorer::Other(intersect_scorers(
-                            vec![must_scorer, should_boxed],
-                            num_docs,
-                        ))
+                        intersect_scorers(vec![must_scorer, should_scorer], num_docs)
                    }
                }
            }
        };
-        if exclude_scorers.is_empty() {
-            return Ok(include_scorer);
-        }
-
-        let include_scorer_boxed = into_box_scorer(include_scorer, &score_combiner_fn, num_docs);
-        let scorer: Box<dyn Scorer> = if exclude_scorers.len() == 1 {
-            let exclude_scorer = exclude_scorers.pop().unwrap();
-            match exclude_scorer.downcast::<TermScorer>() {
-                // Cast to TermScorer succeeded
-                Ok(exclude_scorer) => Box::new(Exclude::new(include_scorer_boxed, *exclude_scorer)),
-                // We get back the original Box<dyn Scorer>
-                Err(exclude_scorer) => Box::new(Exclude::new(include_scorer_boxed, exclude_scorer)),
-            }
+        if let Some(exclude_scorer) = exclude_scorer_opt {
+            Ok(box_scorer(Exclude::new(include_scorer, exclude_scorer)))
        } else {
-            Box::new(Exclude::new(include_scorer_boxed, exclude_scorers))
-        };
-        Ok(SpecializedScorer::Other(scorer))
+            Ok(include_scorer)
+        }
    }
 }

@@ -414,7 +381,6 @@ fn remove_and_count_all_and_empty_scorers(

 impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombiner> {
    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        let num_docs = reader.num_docs();
        if self.weights.is_empty() {
            Ok(Box::new(EmptyScorer))
        } else if self.weights.len() == 1 {
@@ -426,14 +392,8 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
            }
        } else if self.scoring_enabled {
            self.complex_scorer(reader, boost, &self.score_combiner_fn)
-                .map(|specialized_scorer| {
-                    into_box_scorer(specialized_scorer, &self.score_combiner_fn, num_docs)
-                })
        } else {
            self.complex_scorer(reader, boost, DoNothingCombiner::default)
-                .map(|specialized_scorer| {
-                    into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
-                })
        }
    }

@@ -462,20 +422,8 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score),
    ) -> crate::Result<()> {
-        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer = BufferedUnionScorer::build(
-                    term_scorers,
-                    &self.score_combiner_fn,
-                    reader.num_docs(),
-                );
-                for_each_scorer(&mut union_scorer, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_scorer(scorer.as_mut(), callback);
-            }
-        }
+        let mut scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
+        scorer.for_each(callback);
        Ok(())
    }

@@ -484,22 +432,9 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
-        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
+        let mut scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer = BufferedUnionScorer::build(
-                    term_scorers,
-                    &self.score_combiner_fn,
-                    reader.num_docs(),
-                );
-                for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
-            }
-        }
+        for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
        Ok(())
    }

@@ -520,14 +455,7 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        callback: &mut dyn FnMut(DocId, Score) -> Score,
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                super::block_wand(term_scorers, threshold, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_pruning_scorer(scorer.as_mut(), threshold, callback);
-            }
-        }
+        reader.codec().for_each_pruning(threshold, scorer, callback);
        Ok(())
    }
 }
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -1,8 +1,6 @@
-mod block_wand;
 mod boolean_query;
 mod boolean_weight;

-pub(crate) use self::block_wand::{block_wand, block_wand_single_scorer};
 pub use self::boolean_query::BooleanQuery;
 pub use self::boolean_weight::BooleanWeight;

--- a/src/query/const_score_query.rs
+++ b/src/query/const_score_query.rs
@@ -1,7 +1,7 @@
 use std::fmt;

 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
-use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};

 /// `ConstScoreQuery` is a wrapper over a query to provide a constant score.
@@ -65,7 +65,10 @@ impl ConstWeight {
 impl Weight for ConstWeight {
    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let inner_scorer = self.weight.scorer(reader, boost)?;
-        Ok(Box::new(ConstScorer::new(inner_scorer, boost * self.score)))
+        Ok(box_scorer(ConstScorer::new(
+            inner_scorer,
+            boost * self.score,
+        )))
    }

    fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -2,7 +2,7 @@ use super::Scorer;
 use crate::docset::TERMINATED;
 use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
-use crate::query::{EnableScoring, Explanation, Query, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Weight};
 use crate::{DocId, DocSet, Score, Searcher};

 /// `EmptyQuery` is a dummy `Query` in which no document matches.
@@ -27,7 +27,7 @@ impl Query for EmptyQuery {
 pub struct EmptyWeight;
 impl Weight for EmptyWeight {
    fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        Ok(Box::new(EmptyScorer))
+        Ok(box_scorer(EmptyScorer))
    }

    fn explain(&self, _reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
--- a/Show More
+++ b/Show More