Removed BlockNotLoaded

Explicit doc for the meaning of intersection_priority
Added unit tests
2026-05-31 23:50:41 +00:00 · 2026-01-06 14:37:10 +01:00 · 2026-01-06 14:09:52 +01:00 · 2026-01-02 13:07:00 +01:00 · 2025-12-31 18:20:28 +01:00
125 changed files with 3607 additions and 5417 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -39,11 +39,11 @@ jobs:

    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
-
+    
    - name: Check Stable Compilation
      run: cargo build --all-features

-
+    
    - name: Check Bench Compilation
      run: cargo +nightly bench --no-run --profile=dev --all-features

@@ -59,10 +59,10 @@ jobs:

    strategy:
      matrix:
-        features:
-          - { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints,stemmer" }
-          - { label: "quickwit", flags: "mmap,quickwit,failpoints" }
-          - { label: "none", flags: "" }
+        features: [
+            { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints" },
+            { label: "quickwit", flags: "mmap,quickwit,failpoints" }
+        ]

    name: test-${{ matrix.features.label}}

@@ -80,21 +80,7 @@ jobs:
    - uses: Swatinem/rust-cache@v2

    - name: Run tests
-      run: |
-        # if matrix.feature.flags is empty then run on --lib to avoid compiling examples
-        # (as most of them rely on mmap) otherwise run all
-        if [ -z "${{ matrix.features.flags }}" ]; then
-          cargo +stable nextest run --lib --no-default-features --verbose --workspace
-        else
-          cargo +stable nextest run --features ${{ matrix.features.flags }} --no-default-features --verbose --workspace
-        fi
+      run: cargo +stable nextest run --features ${{ matrix.features.flags }} --verbose --workspace

    - name: Run doctests
-      run: |
-        # if matrix.feature.flags is empty then run on --lib to avoid compiling examples
-        # (as most of them rely on mmap) otherwise run all
-        if [ -z "${{ matrix.features.flags }}" ]; then
-          echo "no doctest for no feature flag"
-        else
-          cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
-        fi
+      run: cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,9 +37,9 @@ fs4 = { version = "0.13.1", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
-rust-stemmers = { version = "1.2.0", optional = true }
+rust-stemmers = "1.2.0"
 downcast-rs = "2.0.1"
-bitpacking = { version = "0.9.3", default-features = false, features = [
+bitpacking = { version = "0.9.2", default-features = false, features = [
    "bitpacker4x",
 ] }
 census = "0.4.2"
@@ -113,8 +113,7 @@ debug-assertions = true
 overflow-checks = true

 [features]
-default = ["mmap", "stopwords", "lz4-compression", "columnar-zstd-compression", "stemmer"]
-stemmer = ["rust-stemmers"]
+default = ["mmap", "stopwords", "lz4-compression", "columnar-zstd-compression"]
 mmap = ["fs4", "tempfile", "memmap2"]
 stopwords = []

--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -54,33 +54,33 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, stats_f64);
    register!(group, extendedstats_f64);
    register!(group, percentiles_f64);
-    register!(group, terms_7);
+    register!(group, terms_few);
    register!(group, terms_all_unique);
-    register!(group, terms_150_000);
+    register!(group, terms_many);
    register!(group, terms_many_top_1000);
    register!(group, terms_many_order_by_term);
    register!(group, terms_many_with_top_hits);
    register!(group, terms_all_unique_with_avg_sub_agg);
    register!(group, terms_many_with_avg_sub_agg);
+    register!(group, terms_few_with_avg_sub_agg);
    register!(group, terms_status_with_avg_sub_agg);
+    register!(group, terms_status);
+    register!(group, terms_few_with_histogram);
    register!(group, terms_status_with_histogram);
-    register!(group, terms_zipf_1000);
-    register!(group, terms_zipf_1000_with_histogram);
-    register!(group, terms_zipf_1000_with_avg_sub_agg);

    register!(group, terms_many_json_mixed_type_with_avg_sub_agg);

    register!(group, cardinality_agg);
-    register!(group, terms_status_with_cardinality_agg);
+    register!(group, terms_few_with_cardinality_agg);

    register!(group, range_agg);
    register!(group, range_agg_with_avg_sub_agg);
-    register!(group, range_agg_with_term_agg_status);
+    register!(group, range_agg_with_term_agg_few);
    register!(group, range_agg_with_term_agg_many);
    register!(group, histogram);
    register!(group, histogram_hard_bounds);
    register!(group, histogram_with_avg_sub_agg);
-    register!(group, histogram_with_term_agg_status);
+    register!(group, histogram_with_term_agg_few);
    register!(group, avg_and_range_with_avg_sub_agg);

    // Filter aggregation benchmarks
@@ -159,10 +159,10 @@ fn cardinality_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
-fn terms_status_with_cardinality_agg(index: &Index) {
+fn terms_few_with_cardinality_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
+            "terms": { "field": "text_few_terms" },
            "aggs": {
                "cardinality": {
                    "cardinality": {
@@ -175,7 +175,13 @@ fn terms_status_with_cardinality_agg(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_7(index: &Index) {
+fn terms_few(index: &Index) {
+    let agg_req = json!({
+        "my_texts": { "terms": { "field": "text_few_terms" } },
+    });
+    execute_agg(index, agg_req);
+}
+fn terms_status(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_few_terms_status" } },
    });
@@ -188,7 +194,7 @@ fn terms_all_unique(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_150_000(index: &Index) {
+fn terms_many(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_many_terms" } },
    });
@@ -247,6 +253,17 @@ fn terms_all_unique_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
+fn terms_few_with_histogram(index: &Index) {
+    let agg_req = json!({
+        "my_texts": {
+            "terms": { "field": "text_few_terms" },
+            "aggs": {
+                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
 fn terms_status_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -259,18 +276,17 @@ fn terms_status_with_histogram(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_zipf_1000_with_histogram(index: &Index) {
+fn terms_few_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
-            "terms": { "field": "text_1000_terms_zipf" },
+            "terms": { "field": "text_few_terms" },
            "aggs": {
-                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
+                "average_f64": { "avg": { "field": "score_f64" } }
            }
-        }
+        },
    });
    execute_agg(index, agg_req);
 }
-
 fn terms_status_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -283,25 +299,6 @@ fn terms_status_with_avg_sub_agg(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn terms_zipf_1000_with_avg_sub_agg(index: &Index) {
-    let agg_req = json!({
-        "my_texts": {
-            "terms": { "field": "text_1000_terms_zipf" },
-            "aggs": {
-                "average_f64": { "avg": { "field": "score_f64" } }
-            }
-        },
-    });
-    execute_agg(index, agg_req);
-}
-
-fn terms_zipf_1000(index: &Index) {
-    let agg_req = json!({
-        "my_texts": { "terms": { "field": "text_1000_terms_zipf" } },
-    });
-    execute_agg(index, agg_req);
-}
-
 fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -357,7 +354,7 @@ fn range_agg_with_avg_sub_agg(index: &Index) {
    execute_agg(index, agg_req);
 }

-fn range_agg_with_term_agg_status(index: &Index) {
+fn range_agg_with_term_agg_few(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
@@ -372,7 +369,7 @@ fn range_agg_with_term_agg_status(index: &Index) {
                ]
            },
            "aggs": {
-                "my_texts": { "terms": { "field": "text_few_terms_status" } },
+                "my_texts": { "terms": { "field": "text_few_terms" } },
            }
        },
    });
@@ -428,12 +425,12 @@ fn histogram_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
-fn histogram_with_term_agg_status(index: &Index) {
+fn histogram_with_term_agg_few(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "histogram": { "field": "score_f64", "interval": 10 },
            "aggs": {
-                "my_texts": { "terms": { "field": "text_few_terms_status" } }
+                "my_texts": { "terms": { "field": "text_few_terms" } }
            }
        }
    });
@@ -478,13 +475,6 @@ fn get_collector(agg_req: Aggregations) -> AggregationCollector {
 }

 fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
-    // Flag to use existing index
-    let reuse_index = std::env::var("REUSE_AGG_BENCH_INDEX").is_ok();
-    if reuse_index && std::path::Path::new("agg_bench").exists() {
-        return Index::open_in_dir("agg_bench");
-    }
-    // crreate dir
-    std::fs::create_dir_all("agg_bench")?;
    let mut schema_builder = Schema::builder();
    let text_fieldtype = tantivy::schema::TextOptions::default()
        .set_indexing_options(
@@ -496,44 +486,24 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
    let text_field_all_unique_terms =
        schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
+    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
    let text_field_few_terms_status =
        schema_builder.add_text_field("text_few_terms_status", STRING | FAST);
-    let text_field_1000_terms_zipf =
-        schema_builder.add_text_field("text_1000_terms_zipf", STRING | FAST);
    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
-    // use tmp dir
-    let index = if reuse_index {
-        Index::create_in_dir("agg_bench", schema_builder.build())?
-    } else {
-        Index::create_from_tempdir(schema_builder.build())?
-    };
-    // Approximate log proportions
-    let status_field_data = [
-        ("INFO", 8000),
-        ("ERROR", 300),
-        ("WARN", 1200),
-        ("DEBUG", 500),
-        ("OK", 500),
-        ("CRITICAL", 20),
-        ("EMERGENCY", 1),
-    ];
-    let log_level_distribution =
-        WeightedIndex::new(status_field_data.iter().map(|item| item.1)).unwrap();
+    let index = Index::create_from_tempdir(schema_builder.build())?;
+    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
+    // Approximate production log proportions: INFO dominant, WARN and DEBUG occasional, ERROR rare.
+    let log_level_distribution = WeightedIndex::new([80u32, 3, 12, 5]).unwrap();

    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();

    let many_terms_data = (0..150_000)
        .map(|num| format!("author{num}"))
        .collect::<Vec<_>>();
-
-    // Prepare 1000 unique terms sampled using a Zipf distribution.
-    // Exponent ~1.1 approximates top-20 terms covering around ~20%.
-    let terms_1000: Vec<String> = (1..=1000).map(|i| format!("term_{i}")).collect();
-    let zipf_1000 = rand_distr::Zipf::new(1000, 1.1f64).unwrap();
-
    {
        let mut rng = StdRng::from_seed([1u8; 32]);
        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
@@ -543,12 +513,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!())?;
        }
        if cardinality == Cardinality::Multivalued {
-            let log_level_sample_a = status_field_data[log_level_distribution.sample(&mut rng)].0;
-            let log_level_sample_b = status_field_data[log_level_distribution.sample(&mut rng)].0;
-            let idx_a = zipf_1000.sample(&mut rng) as usize - 1;
-            let idx_b = zipf_1000.sample(&mut rng) as usize - 1;
-            let term_1000_a = &terms_1000[idx_a];
-            let term_1000_b = &terms_1000[idx_b];
+            let log_level_sample_a = few_terms_data[log_level_distribution.sample(&mut rng)];
+            let log_level_sample_b = few_terms_data[log_level_distribution.sample(&mut rng)];
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
@@ -558,10 +524,10 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
                text_field_all_unique_terms => "coolo",
                text_field_many_terms => "cool",
                text_field_many_terms => "cool",
+                text_field_few_terms => "cool",
+                text_field_few_terms => "cool",
                text_field_few_terms_status => log_level_sample_a,
                text_field_few_terms_status => log_level_sample_b,
-                text_field_1000_terms_zipf => term_1000_a.as_str(),
-                text_field_1000_terms_zipf => term_1000_b.as_str(),
                score_field => 1u64,
                score_field => 1u64,
                score_field_f64 => lg_norm.sample(&mut rng),
@@ -588,8 +554,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
                json_field => json,
                text_field_all_unique_terms => format!("unique_term_{}", rng.gen::<u64>()),
                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
-                text_field_few_terms_status => status_field_data[log_level_distribution.sample(&mut rng)].0,
-                text_field_1000_terms_zipf => terms_1000[zipf_1000.sample(&mut rng) as usize - 1].as_str(),
+                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
+                text_field_few_terms_status => few_terms_data[log_level_distribution.sample(&mut rng)],
                score_field => val as u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => val as i64,
@@ -641,7 +607,7 @@ fn filter_agg_all_query_with_sub_aggs(index: &Index) {
                "avg_score": { "avg": { "field": "score" } },
                "stats_score": { "stats": { "field": "score_f64" } },
                "terms_text": {
-                    "terms": { "field": "text_few_terms_status" }
+                    "terms": { "field": "text_few_terms" }
                }
            }
        }
@@ -657,7 +623,7 @@ fn filter_agg_term_query_with_sub_aggs(index: &Index) {
                "avg_score": { "avg": { "field": "score" } },
                "stats_score": { "stats": { "field": "score_f64" } },
                "terms_text": {
-                    "terms": { "field": "text_few_terms_status" }
+                    "terms": { "field": "text_few_terms" }
                }
            }
        }
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -29,20 +29,12 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
        }
    }
    #[inline]
-    pub fn fetch_block_with_missing(
-        &mut self,
-        docs: &[u32],
-        accessor: &Column<T>,
-        missing: Option<T>,
-    ) {
+    pub fn fetch_block_with_missing(&mut self, docs: &[u32], accessor: &Column<T>, missing: T) {
        self.fetch_block(docs, accessor);
        // no missing values
        if accessor.index.get_cardinality().is_full() {
            return;
        }
-        let Some(missing) = missing else {
-            return;
-        };

        // We can compare docid_cache length with docs to find missing docs
        // For multi value columns we can't rely on the length and always need to scan
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -85,8 +85,8 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }

    #[inline]
-    pub fn first(&self, doc_id: DocId) -> Option<T> {
-        self.values_for_doc(doc_id).next()
+    pub fn first(&self, row_id: RowId) -> Option<T> {
+        self.values_for_doc(row_id).next()
    }

    /// Load the first value for each docid in the provided slice.
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -60,7 +60,7 @@ fn test_dataframe_writer_bool() {
    let DynamicColumn::Bool(bool_col) = dyn_bool_col else {
        panic!();
    };
-    let vals: Vec<Option<bool>> = (0..5).map(|doc_id| bool_col.first(doc_id)).collect();
+    let vals: Vec<Option<bool>> = (0..5).map(|row_id| bool_col.first(row_id)).collect();
    assert_eq!(&vals, &[None, Some(false), None, Some(true), None,]);
 }

@@ -108,7 +108,7 @@ fn test_dataframe_writer_ip_addr() {
    let DynamicColumn::IpAddr(ip_col) = dyn_bool_col else {
        panic!();
    };
-    let vals: Vec<Option<Ipv6Addr>> = (0..5).map(|doc_id| ip_col.first(doc_id)).collect();
+    let vals: Vec<Option<Ipv6Addr>> = (0..5).map(|row_id| ip_col.first(row_id)).collect();
    assert_eq!(
        &vals,
        &[
@@ -169,7 +169,7 @@ fn test_dictionary_encoded_str() {
    let DynamicColumn::Str(str_col) = col_handles[0].open().unwrap() else {
        panic!();
    };
-    let index: Vec<Option<u64>> = (0..5).map(|doc_id| str_col.ords().first(doc_id)).collect();
+    let index: Vec<Option<u64>> = (0..5).map(|row_id| str_col.ords().first(row_id)).collect();
    assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]);
    assert_eq!(str_col.num_rows(), 5);
    let mut term_buffer = String::new();
@@ -204,7 +204,7 @@ fn test_dictionary_encoded_bytes() {
        panic!();
    };
    let index: Vec<Option<u64>> = (0..5)
-        .map(|doc_id| bytes_col.ords().first(doc_id))
+        .map(|row_id| bytes_col.ords().first(row_id))
        .collect();
    assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]);
    assert_eq!(bytes_col.num_rows(), 5);
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -178,15 +178,9 @@ impl TinySet {
 #[derive(Clone)]
 pub struct BitSet {
    tinysets: Box<[TinySet]>,
+    len: u64,
    max_value: u32,
 }
-impl std::fmt::Debug for BitSet {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("BitSet")
-            .field("max_value", &self.max_value)
-            .finish()
-    }
-}

 fn num_buckets(max_val: u32) -> u32 {
    max_val.div_ceil(64u32)
@@ -210,6 +204,7 @@ impl BitSet {
        let tinybitsets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
        BitSet {
            tinysets: tinybitsets,
+            len: 0,
            max_value,
        }
    }
@@ -227,6 +222,7 @@ impl BitSet {
        }
        BitSet {
            tinysets: tinybitsets,
+            len: max_value as u64,
            max_value,
        }
    }
@@ -245,19 +241,17 @@ impl BitSet {

    /// Intersect with tinysets
    fn intersect_update_with_iter(&mut self, other: impl Iterator<Item = TinySet>) {
+        self.len = 0;
        for (left, right) in self.tinysets.iter_mut().zip(other) {
            *left = left.intersect(right);
+            self.len += left.len() as u64;
        }
    }

    /// Returns the number of elements in the `BitSet`.
    #[inline]
    pub fn len(&self) -> usize {
-        self.tinysets
-            .iter()
-            .copied()
-            .map(|tinyset| tinyset.len())
-            .sum::<u32>() as usize
+        self.len as usize
    }

    /// Inserts an element in the `BitSet`
@@ -266,7 +260,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.tinysets[higher as usize].insert_mut(lower);
+        self.len += u64::from(self.tinysets[higher as usize].insert_mut(lower));
    }

    /// Inserts an element in the `BitSet`
@@ -275,7 +269,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.tinysets[higher as usize].remove_mut(lower);
+        self.len -= u64::from(self.tinysets[higher as usize].remove_mut(lower));
    }

    /// Returns true iff the elements is in the `BitSet`.
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -91,10 +91,46 @@ fn main() -> tantivy::Result<()> {
        }
    }

-    // Some other powerful operations (especially `.seek`) may be useful to consume these
+    // A `Term` is a text token associated with a field.
+    // Let's go through all docs containing the term `title:the` and access their position
+    let term_the = Term::from_field_text(title, "the");
+
+    // Some other powerful operations (especially `.skip_to`) may be useful to consume these
    // posting lists rapidly.
    // You can check for them in the [`DocSet`](https://docs.rs/tantivy/~0/tantivy/trait.DocSet.html) trait
    // and the [`Postings`](https://docs.rs/tantivy/~0/tantivy/trait.Postings.html) trait

+    // Also, for some VERY specific high performance use case like an OLAP analysis of logs,
+    // you can get better performance by accessing directly the blocks of doc ids.
+    for segment_reader in searcher.segment_readers() {
+        // A segment contains different data structure.
+        // Inverted index stands for the combination of
+        // - the term dictionary
+        // - the inverted lists associated with each terms and their positions
+        let inverted_index = segment_reader.inverted_index(title)?;
+
+        // This segment posting object is like a cursor over the documents matching the term.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
+        // frequencies and positions.
+        //
+        // If you don't need all this information, you may get better performance by decompressing
+        // less information.
+        if let Some(mut block_segment_postings) =
+            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
+        {
+            loop {
+                let docs = block_segment_postings.docs();
+                if docs.is_empty() {
+                    break;
+                }
+                // Once again these docs MAY contains deleted documents as well.
+                let docs = block_segment_postings.docs();
+                // Prints `Docs [0, 2].`
+                println!("Docs {docs:?}");
+                block_segment_postings.advance();
+            }
+        }
+    }
+
    Ok(())
 }
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
@@ -1,4 +1,4 @@
-use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};
+use columnar::{Column, ColumnType, StrColumn};
 use common::BitSet;
 use rustc_hash::FxHashSet;
 use serde::Serialize;
@@ -10,16 +10,16 @@ use crate::aggregation::accessor_helpers::{
 };
 use crate::aggregation::agg_req::{Aggregation, AggregationVariants, Aggregations};
 use crate::aggregation::bucket::{
-    build_segment_filter_collector, build_segment_range_collector, FilterAggReqData,
-    HistogramAggReqData, HistogramBounds, IncludeExcludeParam, MissingTermAggReqData,
-    RangeAggReqData, SegmentHistogramCollector, TermMissingAgg, TermsAggReqData, TermsAggregation,
+    FilterAggReqData, HistogramAggReqData, HistogramBounds, IncludeExcludeParam,
+    MissingTermAggReqData, RangeAggReqData, SegmentFilterCollector, SegmentHistogramCollector,
+    SegmentRangeCollector, TermMissingAgg, TermsAggReqData, TermsAggregation,
    TermsAggregationInternal,
 };
 use crate::aggregation::metric::{
-    build_segment_stats_collector, AverageAggregation, CardinalityAggReqData,
-    CardinalityAggregationReq, CountAggregation, ExtendedStatsAggregation, MaxAggregation,
-    MetricAggReqData, MinAggregation, SegmentCardinalityCollector, SegmentExtendedStatsCollector,
-    SegmentPercentilesCollector, StatsAggregation, StatsType, SumAggregation, TopHitsAggReqData,
+    AverageAggregation, CardinalityAggReqData, CardinalityAggregationReq, CountAggregation,
+    ExtendedStatsAggregation, MaxAggregation, MetricAggReqData, MinAggregation,
+    SegmentCardinalityCollector, SegmentExtendedStatsCollector, SegmentPercentilesCollector,
+    SegmentStatsCollector, StatsAggregation, StatsType, SumAggregation, TopHitsAggReqData,
    TopHitsSegmentCollector,
 };
 use crate::aggregation::segment_agg_result::{
@@ -35,7 +35,6 @@ pub struct AggregationsSegmentCtx {
    /// Request data for each aggregation type.
    pub per_request: PerRequestAggSegCtx,
    pub context: AggContextParams,
-    pub column_block_accessor: ColumnBlockAccessor<u64>,
 }

 impl AggregationsSegmentCtx {
@@ -108,14 +107,21 @@ impl AggregationsSegmentCtx {
            .as_deref()
            .expect("range_req_data slot is empty (taken)")
    }
+    #[inline]
+    pub(crate) fn get_filter_req_data(&self, idx: usize) -> &FilterAggReqData {
+        self.per_request.filter_req_data[idx]
+            .as_deref()
+            .expect("filter_req_data slot is empty (taken)")
+    }

    // ---------- mutable getters ----------

    #[inline]
-    pub(crate) fn get_metric_req_data_mut(&mut self, idx: usize) -> &mut MetricAggReqData {
-        &mut self.per_request.stats_metric_req_data[idx]
+    pub(crate) fn get_term_req_data_mut(&mut self, idx: usize) -> &mut TermsAggReqData {
+        self.per_request.term_req_data[idx]
+            .as_deref_mut()
+            .expect("term_req_data slot is empty (taken)")
    }
-
    #[inline]
    pub(crate) fn get_cardinality_req_data_mut(
        &mut self,
@@ -123,7 +129,10 @@ impl AggregationsSegmentCtx {
    ) -> &mut CardinalityAggReqData {
        &mut self.per_request.cardinality_req_data[idx]
    }
-
+    #[inline]
+    pub(crate) fn get_metric_req_data_mut(&mut self, idx: usize) -> &mut MetricAggReqData {
+        &mut self.per_request.stats_metric_req_data[idx]
+    }
    #[inline]
    pub(crate) fn get_histogram_req_data_mut(&mut self, idx: usize) -> &mut HistogramAggReqData {
        self.per_request.histogram_req_data[idx]
@@ -133,6 +142,21 @@ impl AggregationsSegmentCtx {

    // ---------- take / put (terms, histogram, range) ----------

+    /// Move out the boxed Terms request at `idx`, leaving `None`.
+    #[inline]
+    pub(crate) fn take_term_req_data(&mut self, idx: usize) -> Box<TermsAggReqData> {
+        self.per_request.term_req_data[idx]
+            .take()
+            .expect("term_req_data slot is empty (taken)")
+    }
+
+    /// Put back a Terms request into an empty slot at `idx`.
+    #[inline]
+    pub(crate) fn put_back_term_req_data(&mut self, idx: usize, value: Box<TermsAggReqData>) {
+        debug_assert!(self.per_request.term_req_data[idx].is_none());
+        self.per_request.term_req_data[idx] = Some(value);
+    }
+
    /// Move out the boxed Histogram request at `idx`, leaving `None`.
    #[inline]
    pub(crate) fn take_histogram_req_data(&mut self, idx: usize) -> Box<HistogramAggReqData> {
@@ -296,7 +320,6 @@ impl PerRequestAggSegCtx {

    /// Convert the aggregation tree into a serializable struct representation.
    /// Each node contains: { name, kind, children }.
-    #[allow(dead_code)]
    pub fn get_view_tree(&self) -> Vec<AggTreeViewNode> {
        fn node_to_view(node: &AggRefNode, pr: &PerRequestAggSegCtx) -> AggTreeViewNode {
            let mut children: Vec<AggTreeViewNode> =
@@ -322,19 +345,12 @@ impl PerRequestAggSegCtx {
 pub(crate) fn build_segment_agg_collectors_root(
    req: &mut AggregationsSegmentCtx,
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    build_segment_agg_collectors_generic(req, &req.per_request.agg_tree.clone())
+    build_segment_agg_collectors(req, &req.per_request.agg_tree.clone())
 }

 pub(crate) fn build_segment_agg_collectors(
    req: &mut AggregationsSegmentCtx,
    nodes: &[AggRefNode],
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    build_segment_agg_collectors_generic(req, nodes)
-}
-
-fn build_segment_agg_collectors_generic(
-    req: &mut AggregationsSegmentCtx,
-    nodes: &[AggRefNode],
 ) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
    let mut collectors = Vec::new();
    for node in nodes.iter() {
@@ -372,8 +388,6 @@ pub(crate) fn build_segment_agg_collector(
            Ok(Box::new(SegmentCardinalityCollector::from_req(
                req_data.column_type,
                node.idx_in_req_data,
-                req_data.accessor.clone(),
-                req_data.missing_value_for_accessor,
            )))
        }
        AggKind::StatsKind(stats_type) => {
@@ -384,21 +398,20 @@ pub(crate) fn build_segment_agg_collector(
                | StatsType::Count
                | StatsType::Max
                | StatsType::Min
-                | StatsType::Stats => build_segment_stats_collector(req_data),
-                StatsType::ExtendedStats(sigma) => Ok(Box::new(
-                    SegmentExtendedStatsCollector::from_req(req_data, sigma),
-                )),
-                StatsType::Percentiles => {
-                    let req_data = req.get_metric_req_data_mut(node.idx_in_req_data);
-                    Ok(Box::new(
-                        SegmentPercentilesCollector::from_req_and_validate(
-                            req_data.field_type,
-                            req_data.missing_u64,
-                            req_data.accessor.clone(),
-                            node.idx_in_req_data,
-                        ),
-                    ))
+                | StatsType::Stats => Ok(Box::new(SegmentStatsCollector::from_req(
+                    node.idx_in_req_data,
+                ))),
+                StatsType::ExtendedStats(sigma) => {
+                    Ok(Box::new(SegmentExtendedStatsCollector::from_req(
+                        req_data.field_type,
+                        sigma,
+                        node.idx_in_req_data,
+                        req_data.missing,
+                    )))
                }
+                StatsType::Percentiles => Ok(Box::new(
+                    SegmentPercentilesCollector::from_req_and_validate(node.idx_in_req_data)?,
+                )),
            }
        }
        AggKind::TopHits => {
@@ -415,8 +428,12 @@ pub(crate) fn build_segment_agg_collector(
        AggKind::DateHistogram => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
            req, node,
        )?)),
-        AggKind::Range => Ok(build_segment_range_collector(req, node)?),
-        AggKind::Filter => build_segment_filter_collector(req, node),
+        AggKind::Range => Ok(Box::new(SegmentRangeCollector::from_req_and_validate(
+            req, node,
+        )?)),
+        AggKind::Filter => Ok(Box::new(SegmentFilterCollector::from_req_and_validate(
+            req, node,
+        )?)),
    }
 }

@@ -476,7 +493,6 @@ pub(crate) fn build_aggregations_data_from_req(
    let mut data = AggregationsSegmentCtx {
        per_request: Default::default(),
        context,
-        column_block_accessor: ColumnBlockAccessor::default(),
    };

    for (name, agg) in aggs.iter() {
@@ -505,9 +521,9 @@ fn build_nodes(
            let idx_in_req_data = data.push_range_req_data(RangeAggReqData {
                accessor,
                field_type,
+                column_block_accessor: Default::default(),
                name: agg_name.to_string(),
                req: range_req.clone(),
-                is_top_level,
            });
            let children = build_children(&req.sub_aggregation, reader, segment_ordinal, data)?;
            Ok(vec![AggRefNode {
@@ -525,7 +541,9 @@ fn build_nodes(
            let idx_in_req_data = data.push_histogram_req_data(HistogramAggReqData {
                accessor,
                field_type,
+                column_block_accessor: Default::default(),
                name: agg_name.to_string(),
+                sub_aggregation_blueprint: None,
                req: histo_req.clone(),
                is_date_histogram: false,
                bounds: HistogramBounds {
@@ -550,7 +568,9 @@ fn build_nodes(
            let idx_in_req_data = data.push_histogram_req_data(HistogramAggReqData {
                accessor,
                field_type,
+                column_block_accessor: Default::default(),
                name: agg_name.to_string(),
+                sub_aggregation_blueprint: None,
                req: histo_req,
                is_date_histogram: true,
                bounds: HistogramBounds {
@@ -630,6 +650,7 @@ fn build_nodes(
            let idx_in_req_data = data.push_metric_req_data(MetricAggReqData {
                accessor,
                field_type,
+                column_block_accessor: Default::default(),
                name: agg_name.to_string(),
                collecting_for,
                missing: *missing,
@@ -657,6 +678,7 @@ fn build_nodes(
            let idx_in_req_data = data.push_metric_req_data(MetricAggReqData {
                accessor,
                field_type,
+                column_block_accessor: Default::default(),
                name: agg_name.to_string(),
                collecting_for: StatsType::Percentiles,
                missing: percentiles_req.missing,
@@ -731,7 +753,6 @@ fn build_nodes(
                segment_reader: reader.clone(),
                evaluator,
                matching_docs_buffer,
-                is_top_level,
            });
            let children = build_children(&req.sub_aggregation, reader, segment_ordinal, data)?;
            Ok(vec![AggRefNode {
@@ -874,7 +895,7 @@ fn build_terms_or_cardinality_nodes(
        });
    }

-    // Add one node per accessor
+    // Add one node per accessor to mirror previous behavior and allow per-type missing handling.
    for (accessor, column_type) in column_and_types {
        let missing_value_for_accessor = if use_special_missing_agg {
            None
@@ -905,8 +926,11 @@ fn build_terms_or_cardinality_nodes(
                    column_type,
                    str_dict_column: str_dict_column.clone(),
                    missing_value_for_accessor,
+                    column_block_accessor: Default::default(),
                    name: agg_name.to_string(),
                    req: TermsAggregationInternal::from_req(req),
+                    // Will be filled later when building collectors
+                    sub_aggregation_blueprint: None,
                    sug_aggregations: sub_aggs.clone(),
                    allowed_term_ids,
                    is_top_level,
@@ -919,6 +943,7 @@ fn build_terms_or_cardinality_nodes(
                    column_type,
                    str_dict_column: str_dict_column.clone(),
                    missing_value_for_accessor,
+                    column_block_accessor: Default::default(),
                    name: agg_name.to_string(),
                    req: req.clone(),
                });
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -2,441 +2,15 @@ use serde_json::Value;

 use crate::aggregation::agg_req::{Aggregation, Aggregations};
 use crate::aggregation::agg_result::AggregationResults;
+use crate::aggregation::buf_collector::DOC_BLOCK_SIZE;
 use crate::aggregation::collector::AggregationCollector;
 use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
 use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values_and_terms};
 use crate::aggregation::DistributedAggregationCollector;
-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
 use crate::query::{AllQuery, TermQuery};
 use crate::schema::{IndexRecordOption, Schema, FAST};
 use crate::{Index, IndexWriter, Term};

-// The following tests ensure that each bucket aggregation type correctly functions as a
-// sub-aggregation of another bucket aggregation in two scenarios:
-// 1) The parent has more buckets than the child sub-aggregation
-// 2) The child sub-aggregation has more buckets than the parent
-//
-// These scenarios exercise the bucket id mapping and sub-aggregation routing logic.
-
-#[test]
-fn test_terms_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
-    let index = get_test_index_2_segments(false)?;
-
-    // Case A: parent has more buckets than child
-    // Parent: range with 4 buckets
-    // Child: terms on text -> 2 buckets
-    let agg_parent_more: Aggregations = serde_json::from_value(json!({
-        "parent_range": {
-            "range": {
-                "field": "score",
-                "ranges": [
-                    {"to": 3.0},
-                    {"from": 3.0, "to": 7.0},
-                    {"from": 7.0, "to": 20.0},
-                    {"from": 20.0}
-                ]
-            },
-            "aggs": {
-                "child_terms": {"terms": {"field": "text", "order": {"_key": "asc"}}}
-            }
-        }
-    }))
-    .unwrap();
-
-    let res = crate::aggregation::tests::exec_request(agg_parent_more, &index)?;
-    // Exact expected structure and counts
-    assert_eq!(
-        res["parent_range"]["buckets"],
-        json!([
-            {
-                "key": "*-3",
-                "doc_count": 1,
-                "to": 3.0,
-                "child_terms": {
-                    "buckets": [
-                        {"doc_count": 1, "key": "cool"}
-                    ],
-                    "sum_other_doc_count": 0
-                }
-            },
-            {
-                "key": "3-7",
-                "doc_count": 3,
-                "from": 3.0,
-                "to": 7.0,
-                "child_terms": {
-                    "buckets": [
-                        {"doc_count": 2, "key": "cool"},
-                        {"doc_count": 1, "key": "nohit"}
-                    ],
-                    "sum_other_doc_count": 0
-                }
-            },
-            {
-                "key": "7-20",
-                "doc_count": 3,
-                "from": 7.0,
-                "to": 20.0,
-                "child_terms": {
-                    "buckets": [
-                        {"doc_count": 3, "key": "cool"}
-                    ],
-                    "sum_other_doc_count": 0
-                }
-            },
-            {
-                "key": "20-*",
-                "doc_count": 2,
-                "from": 20.0,
-                "child_terms": {
-                    "buckets": [
-                        {"doc_count": 1, "key": "cool"},
-                        {"doc_count": 1, "key": "nohit"}
-                    ],
-                    "sum_other_doc_count": 0
-                }
-            }
-        ])
-    );
-
-    // Case B: child has more buckets than parent
-    // Parent: histogram on score with large interval -> 1 bucket
-    // Child: terms on text -> 2 buckets (cool/nohit)
-    let agg_child_more: Aggregations = serde_json::from_value(json!({
-        "parent_hist": {
-            "histogram": {"field": "score", "interval": 100.0},
-            "aggs": {
-                "child_terms": {"terms": {"field": "text", "order": {"_key": "asc"}}}
-            }
-        }
-    }))
-    .unwrap();
-
-    let res = crate::aggregation::tests::exec_request(agg_child_more, &index)?;
-    assert_eq!(
-        res["parent_hist"],
-        json!({
-            "buckets": [
-                {
-                    "key": 0.0,
-                    "doc_count": 9,
-                    "child_terms": {
-                        "buckets": [
-                            {"doc_count": 7, "key": "cool"},
-                            {"doc_count": 2, "key": "nohit"}
-                        ],
-                        "sum_other_doc_count": 0
-                    }
-                }
-            ]
-        })
-    );
-
-    Ok(())
-}
-
-#[test]
-fn test_range_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
-    let index = get_test_index_2_segments(false)?;
-
-    // Case A: parent has more buckets than child
-    // Parent: range with 5 buckets
-    // Child: coarse range with 3 buckets
-    let agg_parent_more: Aggregations = serde_json::from_value(json!({
-        "parent_range": {
-            "range": {
-                "field": "score",
-                "ranges": [
-                    {"to": 3.0},
-                    {"from": 3.0, "to": 7.0},
-                    {"from": 7.0, "to": 11.0},
-                    {"from": 11.0, "to": 20.0},
-                    {"from": 20.0}
-                ]
-            },
-            "aggs": {
-                "child_range": {
-                    "range": {
-                        "field": "score",
-                        "ranges": [
-                            {"to": 3.0},
-                            {"from": 3.0, "to": 20.0}
-                        ]
-                    }
-                }
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_parent_more, &index)?;
-    assert_eq!(
-        res["parent_range"]["buckets"],
-        json!([
-            {"key": "*-3", "doc_count": 1, "to": 3.0,
-                "child_range": {"buckets": [
-                    {"key": "*-3", "doc_count": 1, "to": 3.0},
-                    {"key": "3-20", "doc_count": 0, "from": 3.0, "to": 20.0},
-                    {"key": "20-*", "doc_count": 0, "from": 20.0}
-                ]}
-            },
-            {"key": "3-7", "doc_count": 3, "from": 3.0, "to": 7.0,
-                "child_range": {"buckets": [
-                    {"key": "*-3", "doc_count": 0, "to": 3.0},
-                    {"key": "3-20", "doc_count": 3, "from": 3.0, "to": 20.0},
-                    {"key": "20-*", "doc_count": 0, "from": 20.0}
-                ]}
-            },
-            {"key": "7-11", "doc_count": 1, "from": 7.0, "to": 11.0,
-                "child_range": {"buckets": [
-                    {"key": "*-3", "doc_count": 0, "to": 3.0},
-                    {"key": "3-20", "doc_count": 1, "from": 3.0, "to": 20.0},
-                    {"key": "20-*", "doc_count": 0, "from": 20.0}
-                ]}
-            },
-            {"key": "11-20", "doc_count": 2, "from": 11.0, "to": 20.0,
-                "child_range": {"buckets": [
-                    {"key": "*-3", "doc_count": 0, "to": 3.0},
-                    {"key": "3-20", "doc_count": 2, "from": 3.0, "to": 20.0},
-                    {"key": "20-*", "doc_count": 0, "from": 20.0}
-                ]}
-            },
-            {"key": "20-*", "doc_count": 2, "from": 20.0,
-                "child_range": {"buckets": [
-                    {"key": "*-3", "doc_count": 0, "to": 3.0},
-                    {"key": "3-20", "doc_count": 0, "from": 3.0, "to": 20.0},
-                    {"key": "20-*", "doc_count": 2, "from": 20.0}
-                ]}
-            }
-        ])
-    );
-
-    // Case B: child has more buckets than parent
-    // Parent: terms on text (2 buckets)
-    // Child: range with 4 buckets
-    let agg_child_more: Aggregations = serde_json::from_value(json!({
-        "parent_terms": {
-            "terms": {"field": "text"},
-            "aggs": {
-                "child_range": {
-                    "range": {
-                        "field": "score",
-                        "ranges": [
-                            {"to": 3.0},
-                            {"from": 3.0, "to": 7.0},
-                            {"from": 7.0, "to": 20.0}
-                        ]
-                    }
-                }
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_child_more, &index)?;
-
-    assert_eq!(
-        res["parent_terms"],
-        json!({
-            "buckets": [
-                {
-                    "key": "cool",
-                    "doc_count": 7,
-                    "child_range": {
-                        "buckets": [
-                            {"key": "*-3", "doc_count": 1, "to": 3.0},
-                            {"key": "3-7", "doc_count": 2, "from": 3.0, "to": 7.0},
-                            {"key": "7-20", "doc_count": 3, "from": 7.0, "to": 20.0},
-                            {"key": "20-*", "doc_count": 1, "from": 20.0}
-                        ]
-                    }
-                },
-                {
-                    "key": "nohit",
-                    "doc_count": 2,
-                    "child_range": {
-                        "buckets": [
-                            {"key": "*-3", "doc_count": 0, "to": 3.0},
-                            {"key": "3-7", "doc_count": 1, "from": 3.0, "to": 7.0},
-                            {"key": "7-20", "doc_count": 0, "from": 7.0, "to": 20.0},
-                            {"key": "20-*", "doc_count": 1, "from": 20.0}
-                        ]
-                    }
-                }
-            ],
-            "doc_count_error_upper_bound": 0,
-            "sum_other_doc_count": 0
-        })
-    );
-
-    Ok(())
-}
-
-#[test]
-fn test_histogram_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
-    let index = get_test_index_2_segments(false)?;
-
-    // Case A: parent has more buckets than child
-    // Parent: range with several ranges
-    // Child: histogram with large interval (single bucket per parent)
-    let agg_parent_more: Aggregations = serde_json::from_value(json!({
-        "parent_range": {
-            "range": {
-                "field": "score",
-                "ranges": [
-                    {"to": 3.0},
-                    {"from": 3.0, "to": 7.0},
-                    {"from": 7.0, "to": 11.0},
-                    {"from": 11.0, "to": 20.0},
-                    {"from": 20.0}
-                ]
-            },
-            "aggs": {
-                "child_hist": {"histogram": {"field": "score", "interval": 100.0}}
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_parent_more, &index)?;
-    assert_eq!(
-        res["parent_range"]["buckets"],
-        json!([
-            {"key": "*-3", "doc_count": 1, "to": 3.0,
-                "child_hist": {"buckets": [ {"key": 0.0, "doc_count": 1} ]}
-            },
-            {"key": "3-7", "doc_count": 3, "from": 3.0, "to": 7.0,
-                "child_hist": {"buckets": [ {"key": 0.0, "doc_count": 3} ]}
-            },
-            {"key": "7-11", "doc_count": 1, "from": 7.0, "to": 11.0,
-                "child_hist": {"buckets": [ {"key": 0.0, "doc_count": 1} ]}
-            },
-            {"key": "11-20", "doc_count": 2, "from": 11.0, "to": 20.0,
-                "child_hist": {"buckets": [ {"key": 0.0, "doc_count": 2} ]}
-            },
-            {"key": "20-*", "doc_count": 2, "from": 20.0,
-                "child_hist": {"buckets": [ {"key": 0.0, "doc_count": 2} ]}
-            }
-        ])
-    );
-
-    // Case B: child has more buckets than parent
-    // Parent: terms on text -> 2 buckets
-    // Child: histogram with small interval -> multiple buckets including empties
-    let agg_child_more: Aggregations = serde_json::from_value(json!({
-        "parent_terms": {
-            "terms": {"field": "text"},
-            "aggs": {
-                "child_hist": {"histogram": {"field": "score", "interval": 10.0}}
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_child_more, &index)?;
-    assert_eq!(
-        res["parent_terms"],
-        json!({
-            "buckets": [
-                {
-                    "key": "cool",
-                    "doc_count": 7,
-                    "child_hist": {
-                        "buckets": [
-                            {"key": 0.0, "doc_count": 4},
-                            {"key": 10.0, "doc_count": 2},
-                            {"key": 20.0, "doc_count": 0},
-                            {"key": 30.0, "doc_count": 0},
-                            {"key": 40.0, "doc_count": 1}
-                        ]
-                    }
-                },
-                {
-                    "key": "nohit",
-                    "doc_count": 2,
-                    "child_hist": {
-                        "buckets": [
-                            {"key": 0.0, "doc_count": 1},
-                            {"key": 10.0, "doc_count": 0},
-                            {"key": 20.0, "doc_count": 0},
-                            {"key": 30.0, "doc_count": 0},
-                            {"key": 40.0, "doc_count": 1}
-                        ]
-                    }
-                }
-            ],
-            "doc_count_error_upper_bound": 0,
-            "sum_other_doc_count": 0
-        })
-    );
-
-    Ok(())
-}
-
-#[test]
-fn test_date_histogram_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
-    let index = get_test_index_2_segments(false)?;
-
-    // Case A: parent has more buckets than child
-    // Parent: range with several buckets
-    // Child: date_histogram with 30d -> single bucket per parent
-    let agg_parent_more: Aggregations = serde_json::from_value(json!({
-        "parent_range": {
-            "range": {
-                "field": "score",
-                "ranges": [
-                    {"to": 3.0},
-                    {"from": 3.0, "to": 7.0},
-                    {"from": 7.0, "to": 11.0},
-                    {"from": 11.0, "to": 20.0},
-                    {"from": 20.0}
-                ]
-            },
-            "aggs": {
-                "child_date_hist": {"date_histogram": {"field": "date", "fixed_interval": "30d"}}
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_parent_more, &index)?;
-    let buckets = res["parent_range"]["buckets"].as_array().unwrap();
-    // Verify each parent bucket has exactly one child date bucket with matching doc_count
-    for bucket in buckets {
-        let parent_count = bucket["doc_count"].as_u64().unwrap();
-        let child_buckets = bucket["child_date_hist"]["buckets"].as_array().unwrap();
-        assert_eq!(child_buckets.len(), 1);
-        assert_eq!(child_buckets[0]["doc_count"], parent_count);
-    }
-
-    // Case B: child has more buckets than parent
-    // Parent: terms on text (2 buckets)
-    // Child: date_histogram with 1d -> multiple buckets
-    let agg_child_more: Aggregations = serde_json::from_value(json!({
-        "parent_terms": {
-            "terms": {"field": "text"},
-            "aggs": {
-                "child_date_hist": {"date_histogram": {"field": "date", "fixed_interval": "1d"}}
-            }
-        }
-    }))
-    .unwrap();
-    let res = crate::aggregation::tests::exec_request(agg_child_more, &index)?;
-    let buckets = res["parent_terms"]["buckets"].as_array().unwrap();
-
-    // cool bucket
-    assert_eq!(buckets[0]["key"], "cool");
-    let cool_buckets = buckets[0]["child_date_hist"]["buckets"].as_array().unwrap();
-    assert_eq!(cool_buckets.len(), 3);
-    assert_eq!(cool_buckets[0]["doc_count"], 1); // day 0
-    assert_eq!(cool_buckets[1]["doc_count"], 4); // day 1
-    assert_eq!(cool_buckets[2]["doc_count"], 2); // day 2
-
-    // nohit bucket
-    assert_eq!(buckets[1]["key"], "nohit");
-    let nohit_buckets = buckets[1]["child_date_hist"]["buckets"].as_array().unwrap();
-    assert_eq!(nohit_buckets.len(), 2);
-    assert_eq!(nohit_buckets[0]["doc_count"], 1); // day 1
-    assert_eq!(nohit_buckets[1]["doc_count"], 1); // day 2
-
-    Ok(())
-}
-
 fn get_avg_req(field_name: &str) -> Aggregation {
    serde_json::from_value(json!({
        "avg": {
@@ -451,10 +25,6 @@ fn get_collector(agg_req: Aggregations) -> AggregationCollector {
 }

 // *** EVERY BUCKET-TYPE SHOULD BE TESTED HERE ***
-// Note: The flushng part of these  tests are outdated, since the buffering change after converting
-// the collection into one collector per request instead of per bucket.
-//
-// However they are useful as they test a complex aggregation requests.
 fn test_aggregation_flushing(
    merge_segments: bool,
    use_distributed_collector: bool,
@@ -467,9 +37,8 @@ fn test_aggregation_flushing(

    let reader = index.reader()?;

-    assert_eq!(COLLECT_BLOCK_BUFFER_LEN, 64);
-    // In the tree we cache documents of COLLECT_BLOCK_BUFFER_LEN before passing them down as one
-    // block.
+    assert_eq!(DOC_BLOCK_SIZE, 64);
+    // In the tree we cache Documents of DOC_BLOCK_SIZE, before passing them down as one block.
    //
    // Build a request so that on the first level we have one full cache, which is then flushed.
    // The same cache should have some residue docs at the end, which are flushed (Range 0-70)
--- a/src/aggregation/bucket/filter.rs
+++ b/src/aggregation/bucket/filter.rs
@@ -6,14 +6,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
-use crate::aggregation::cached_sub_aggs::{
-    CachedSubAggs, HighCardSubAggCache, LowCardSubAggCache, SubAggCache,
-};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
 };
-use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
-use crate::aggregation::BucketId;
+use crate::aggregation::segment_agg_result::{CollectorClone, SegmentAggregationCollector};
 use crate::docset::DocSet;
 use crate::query::{AllQuery, EnableScoring, Query, QueryParser};
 use crate::schema::Schema;
@@ -408,18 +404,15 @@ pub struct FilterAggReqData {
    pub evaluator: DocumentQueryEvaluator,
    /// Reusable buffer for matching documents to minimize allocations during collection
    pub matching_docs_buffer: Vec<DocId>,
-    /// True if this filter aggregation is at the top level of the aggregation tree (not nested).
-    pub is_top_level: bool,
 }

 impl FilterAggReqData {
    pub(crate) fn get_memory_consumption(&self) -> usize {
        // Estimate: name + segment reader reference + bitset + buffer capacity
        self.name.len()
-        + std::mem::size_of::<SegmentReader>()
-        + self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes)
-        + self.matching_docs_buffer.capacity() * std::mem::size_of::<DocId>()
-        + std::mem::size_of::<bool>()
+            + std::mem::size_of::<SegmentReader>()
+            + self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes)
+            + self.matching_docs_buffer.capacity() * std::mem::size_of::<DocId>()
    }
 }

@@ -453,7 +446,7 @@ impl DocumentQueryEvaluator {
        let weight = query.weight(EnableScoring::disabled_from_schema(&schema))?;

        // Get a scorer that iterates over matching documents
-        let mut scorer = weight.scorer(segment_reader, 1.0)?;
+        let mut scorer = weight.scorer(segment_reader, 1.0, 0)?;

        // Create a BitSet to hold all matching documents
        let mut bitset = BitSet::with_max_value(max_doc);
@@ -496,24 +489,17 @@ impl Debug for DocumentQueryEvaluator {
    }
 }

-#[derive(Debug, Clone, PartialEq, Copy)]
-struct DocCount {
-    doc_count: u64,
-    bucket_id: BucketId,
-}
-
 /// Segment collector for filter aggregation
-pub struct SegmentFilterCollector<C: SubAggCache> {
-    /// Document counts per parent bucket
-    parent_buckets: Vec<DocCount>,
+pub struct SegmentFilterCollector {
+    /// Document count in this bucket
+    doc_count: u64,
    /// Sub-aggregation collectors
-    sub_aggregations: Option<CachedSubAggs<C>>,
-    bucket_id_provider: BucketIdProvider,
+    sub_aggregations: Option<Box<dyn SegmentAggregationCollector>>,
    /// Accessor index for this filter aggregation (to access FilterAggReqData)
    accessor_idx: usize,
 }

-impl<C: SubAggCache> SegmentFilterCollector<C> {
+impl SegmentFilterCollector {
    /// Create a new filter segment collector following the new agg_data pattern
    pub(crate) fn from_req_and_validate(
        req: &mut AggregationsSegmentCtx,
@@ -525,75 +511,47 @@ impl<C: SubAggCache> SegmentFilterCollector<C> {
        } else {
            None
        };
-        let sub_agg_collector = sub_agg_collector.map(CachedSubAggs::new);

        Ok(SegmentFilterCollector {
-            parent_buckets: Vec::new(),
+            doc_count: 0,
            sub_aggregations: sub_agg_collector,
            accessor_idx: node.idx_in_req_data,
-            bucket_id_provider: BucketIdProvider::default(),
        })
    }
 }

-pub(crate) fn build_segment_filter_collector(
-    req: &mut AggregationsSegmentCtx,
-    node: &AggRefNode,
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    let is_top_level = req.per_request.filter_req_data[node.idx_in_req_data]
-        .as_ref()
-        .expect("filter_req_data slot is empty")
-        .is_top_level;
-
-    if is_top_level {
-        Ok(Box::new(
-            SegmentFilterCollector::<LowCardSubAggCache>::from_req_and_validate(req, node)?,
-        ))
-    } else {
-        Ok(Box::new(
-            SegmentFilterCollector::<HighCardSubAggCache>::from_req_and_validate(req, node)?,
-        ))
-    }
-}
-
-impl<C: SubAggCache> Debug for SegmentFilterCollector<C> {
+impl Debug for SegmentFilterCollector {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentFilterCollector")
-            .field("buckets", &self.parent_buckets)
+            .field("doc_count", &self.doc_count)
            .field("has_sub_aggs", &self.sub_aggregations.is_some())
            .field("accessor_idx", &self.accessor_idx)
            .finish()
    }
 }

-impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
+impl CollectorClone for SegmentFilterCollector {
+    fn clone_box(&self) -> Box<dyn SegmentAggregationCollector> {
+        // For now, panic - this needs proper implementation with weight recreation
+        panic!("SegmentFilterCollector cloning not yet implemented - requires weight recreation")
+    }
+}
+
+impl SegmentAggregationCollector for SegmentFilterCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
        let mut sub_results = IntermediateAggregationResults::default();
-        let bucket_opt = self.parent_buckets.get(parent_bucket_id as usize);

-        if let Some(sub_aggs) = &mut self.sub_aggregations {
-            sub_aggs
-                .get_sub_agg_collector()
-                .add_intermediate_aggregation_result(
-                    agg_data,
-                    &mut sub_results,
-                    // Here we create a new bucket ID for sub-aggregations if the bucket doesn't
-                    // exist, so that sub-aggregations can still produce results (e.g., zero doc
-                    // count)
-                    bucket_opt
-                        .map(|bucket| bucket.bucket_id)
-                        .unwrap_or(self.bucket_id_provider.next_bucket_id()),
-                )?;
+        if let Some(sub_aggs) = self.sub_aggregations {
+            sub_aggs.add_intermediate_aggregation_result(agg_data, &mut sub_results)?;
        }

        // Create the filter bucket result
        let filter_bucket_result = IntermediateBucketResult::Filter {
-            doc_count: bucket_opt.map(|b| b.doc_count).unwrap_or(0),
+            doc_count: self.doc_count,
            sub_aggregations: sub_results,
        };

@@ -612,17 +570,32 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
        Ok(())
    }

-    fn collect(
+    fn collect(&mut self, doc: DocId, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
+        // Access the evaluator from FilterAggReqData
+        let req_data = agg_data.get_filter_req_data(self.accessor_idx);
+
+        // O(1) BitSet lookup to check if document matches filter
+        if req_data.evaluator.matches_document(doc) {
+            self.doc_count += 1;
+
+            // If we have sub-aggregations, collect on them for this filtered document
+            if let Some(sub_aggs) = &mut self.sub_aggregations {
+                sub_aggs.collect(doc, agg_data)?;
+            }
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn collect_block(
        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
+        docs: &[DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        if docs.is_empty() {
            return Ok(());
        }

-        let mut bucket = self.parent_buckets[parent_bucket_id as usize];
        // Take the request data to avoid borrow checker issues with sub-aggregations
        let mut req = agg_data.take_filter_req_data(self.accessor_idx);

@@ -631,24 +604,18 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
        req.evaluator
            .filter_batch(docs, &mut req.matching_docs_buffer);

-        bucket.doc_count += req.matching_docs_buffer.len() as u64;
+        self.doc_count += req.matching_docs_buffer.len() as u64;

        // Batch process sub-aggregations if we have matches
        if !req.matching_docs_buffer.is_empty() {
            if let Some(sub_aggs) = &mut self.sub_aggregations {
-                for &doc_id in &req.matching_docs_buffer {
-                    sub_aggs.push(bucket.bucket_id, doc_id);
-                }
+                // Use collect_block for better sub-aggregation performance
+                sub_aggs.collect_block(&req.matching_docs_buffer, agg_data)?;
            }
        }

        // Put the request data back
        agg_data.put_back_filter_req_data(self.accessor_idx, req);
-        if let Some(sub_aggs) = &mut self.sub_aggregations {
-            sub_aggs.check_flush_local(agg_data)?;
-        }
-        // put back bucket
-        self.parent_buckets[parent_bucket_id as usize] = bucket;

        Ok(())
    }
@@ -659,21 +626,6 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentFilterCollector<C> {
        }
        Ok(())
    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        while self.parent_buckets.len() <= max_bucket as usize {
-            let bucket_id = self.bucket_id_provider.next_bucket_id();
-            self.parent_buckets.push(DocCount {
-                doc_count: 0,
-                bucket_id,
-            });
-        }
-        Ok(())
-    }
 }

 /// Intermediate result for filter aggregation
@@ -1567,9 +1519,9 @@ mod tests {
        let searcher = reader.searcher();

        let agg = json!({
-                "test": {
-                    "filter": deserialized,
-                    "aggs": { "count": { "value_count": { "field": "brand" } } }
+            "test": {
+                "filter": deserialized,
+                "aggs": { "count": { "value_count": { "field": "brand" } } }
            }
        });

--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,6 +1,6 @@
 use std::cmp::Ordering;

-use columnar::{Column, ColumnType};
+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use tantivy_bitpacker::minmax;
@@ -8,14 +8,14 @@ use tantivy_bitpacker::minmax;
 use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
+use crate::aggregation::agg_limits::MemoryConsumption;
 use crate::aggregation::agg_req::Aggregations;
 use crate::aggregation::agg_result::BucketEntry;
-use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateHistogramBucketEntry,
 };
-use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::TantivyError;

@@ -26,8 +26,13 @@ pub struct HistogramAggReqData {
    pub accessor: Column<u64>,
    /// The field type of the fast field.
    pub field_type: ColumnType,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
    /// The name of the aggregation.
    pub name: String,
+    /// The sub aggregation blueprint, used to create sub aggregations for each bucket.
+    /// Will be filled during initialization of the collector.
+    pub sub_aggregation_blueprint: Option<Box<dyn SegmentAggregationCollector>>,
    /// The histogram aggregation request.
    pub req: HistogramAggregation,
    /// True if this is a date_histogram aggregation.
@@ -252,24 +257,18 @@ impl HistogramBounds {
 pub(crate) struct SegmentHistogramBucketEntry {
    pub key: f64,
    pub doc_count: u64,
-    pub bucket_id: BucketId,
 }

 impl SegmentHistogramBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
-        sub_aggregation: &mut Option<HighCardCachedSubAggs>,
+        sub_aggregation: Option<Box<dyn SegmentAggregationCollector>>,
        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateHistogramBucketEntry> {
        let mut sub_aggregation_res = IntermediateAggregationResults::default();
        if let Some(sub_aggregation) = sub_aggregation {
            sub_aggregation
-                .get_sub_agg_collector()
-                .add_intermediate_aggregation_result(
-                    agg_data,
-                    &mut sub_aggregation_res,
-                    self.bucket_id,
-                )?;
+                .add_intermediate_aggregation_result(agg_data, &mut sub_aggregation_res)?;
        }
        Ok(IntermediateHistogramBucketEntry {
            key: self.key,
@@ -279,38 +278,27 @@ impl SegmentHistogramBucketEntry {
    }
 }

-#[derive(Clone, Debug, Default)]
-struct HistogramBuckets {
-    pub buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
-}
-
 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct SegmentHistogramCollector {
    /// The buckets containing the aggregation data.
-    /// One Histogram bucket per parent bucket id.
-    parent_buckets: Vec<HistogramBuckets>,
-    sub_agg: Option<HighCardCachedSubAggs>,
+    buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
+    sub_aggregations: FxHashMap<i64, Box<dyn SegmentAggregationCollector>>,
    accessor_idx: usize,
-    bucket_id_provider: BucketIdProvider,
 }

 impl SegmentAggregationCollector for SegmentHistogramCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
        let name = agg_data
            .get_histogram_req_data(self.accessor_idx)
            .name
            .clone();
-        // TODO: avoid prepare_max_bucket here and handle empty buckets.
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
-        let histogram = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);
-        let bucket = self.add_intermediate_bucket_result(agg_data, histogram)?;
+        let bucket = self.into_intermediate_bucket_result(agg_data)?;
        results.push(name, IntermediateAggregationResult::Bucket(bucket))?;

        Ok(())
@@ -319,40 +307,44 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
    #[inline]
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.collect_block(&[doc], agg_data)
+    }
+
+    #[inline]
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let req = agg_data.take_histogram_req_data(self.accessor_idx);
+        let mut req = agg_data.take_histogram_req_data(self.accessor_idx);
        let mem_pre = self.get_memory_consumption();
-        let buckets = &mut self.parent_buckets[parent_bucket_id as usize].buckets;

        let bounds = req.bounds;
        let interval = req.req.interval;
        let offset = req.offset;
        let get_bucket_pos = |val| get_bucket_pos_f64(val, interval, offset) as i64;

-        agg_data
-            .column_block_accessor
-            .fetch_block(docs, &req.accessor);
-        for (doc, val) in agg_data
+        req.column_block_accessor.fetch_block(docs, &req.accessor);
+        for (doc, val) in req
            .column_block_accessor
            .iter_docid_vals(docs, &req.accessor)
        {
-            let val = f64_from_fastfield_u64(val, req.field_type);
+            let val = f64_from_fastfield_u64(val, &req.field_type);
            let bucket_pos = get_bucket_pos(val);
            if bounds.contains(val) {
-                let bucket = buckets.entry(bucket_pos).or_insert_with(|| {
+                let bucket = self.buckets.entry(bucket_pos).or_insert_with(|| {
                    let key = get_bucket_key_from_pos(bucket_pos as f64, interval, offset);
-                    SegmentHistogramBucketEntry {
-                        key,
-                        doc_count: 0,
-                        bucket_id: self.bucket_id_provider.next_bucket_id(),
-                    }
+                    SegmentHistogramBucketEntry { key, doc_count: 0 }
                });
                bucket.doc_count += 1;
-                if let Some(sub_agg) = &mut self.sub_agg {
-                    sub_agg.push(bucket.bucket_id, doc);
+                if let Some(sub_aggregation_blueprint) = req.sub_aggregation_blueprint.as_ref() {
+                    self.sub_aggregations
+                        .entry(bucket_pos)
+                        .or_insert_with(|| sub_aggregation_blueprint.clone())
+                        .collect(doc, agg_data)?;
                }
            }
        }
@@ -366,30 +358,14 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
                .add_memory_consumed(mem_delta as u64)?;
        }

-        if let Some(sub_agg) = &mut self.sub_agg {
-            sub_agg.check_flush_local(agg_data)?;
-        }
-
        Ok(())
    }

    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
-        if let Some(sub_aggregation) = &mut self.sub_agg {
+        for sub_aggregation in self.sub_aggregations.values_mut() {
            sub_aggregation.flush(agg_data)?;
        }
-        Ok(())
-    }

-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        while self.parent_buckets.len() <= max_bucket as usize {
-            self.parent_buckets.push(HistogramBuckets {
-                buckets: FxHashMap::default(),
-            });
-        }
        Ok(())
    }
 }
@@ -397,19 +373,22 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
 impl SegmentHistogramCollector {
    fn get_memory_consumption(&self) -> usize {
        let self_mem = std::mem::size_of::<Self>();
-        let buckets_mem = self.parent_buckets.len() * std::mem::size_of::<HistogramBuckets>();
-        self_mem + buckets_mem
+        let sub_aggs_mem = self.sub_aggregations.memory_consumption();
+        let buckets_mem = self.buckets.memory_consumption();
+        self_mem + sub_aggs_mem + buckets_mem
    }
    /// Converts the collector result into a intermediate bucket result.
-    fn add_intermediate_bucket_result(
-        &mut self,
+    pub fn into_intermediate_bucket_result(
+        self,
        agg_data: &AggregationsSegmentCtx,
-        histogram: HistogramBuckets,
    ) -> crate::Result<IntermediateBucketResult> {
-        let mut buckets = Vec::with_capacity(histogram.buckets.len());
+        let mut buckets = Vec::with_capacity(self.buckets.len());

-        for bucket in histogram.buckets.into_values() {
-            let bucket_res = bucket.into_intermediate_bucket_entry(&mut self.sub_agg, agg_data);
+        for (bucket_pos, bucket) in self.buckets {
+            let bucket_res = bucket.into_intermediate_bucket_entry(
+                self.sub_aggregations.get(&bucket_pos).cloned(),
+                agg_data,
+            );

            buckets.push(bucket_res?);
        }
@@ -429,7 +408,7 @@ impl SegmentHistogramCollector {
        agg_data: &mut AggregationsSegmentCtx,
        node: &AggRefNode,
    ) -> crate::Result<Self> {
-        let sub_agg = if !node.children.is_empty() {
+        let blueprint = if !node.children.is_empty() {
            Some(build_segment_agg_collectors(agg_data, &node.children)?)
        } else {
            None
@@ -444,13 +423,13 @@ impl SegmentHistogramCollector {
            max: f64::MAX,
        });
        req_data.offset = req_data.req.offset.unwrap_or(0.0);
-        let sub_agg = sub_agg.map(CachedSubAggs::new);
+
+        req_data.sub_aggregation_blueprint = blueprint;

        Ok(Self {
-            parent_buckets: Default::default(),
-            sub_agg,
+            buckets: Default::default(),
+            sub_aggregations: Default::default(),
            accessor_idx: node.idx_in_req_data,
-            bucket_id_provider: BucketIdProvider::default(),
        })
    }
 }
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,22 +1,18 @@
 use std::fmt::Debug;
 use std::ops::Range;

-use columnar::{Column, ColumnType};
+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
-use crate::aggregation::agg_limits::AggregationLimitsGuard;
-use crate::aggregation::cached_sub_aggs::{
-    CachedSubAggs, HighCardSubAggCache, LowCardCachedSubAggs, LowCardSubAggCache, SubAggCache,
-};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
 };
-use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
 use crate::TantivyError;

@@ -27,12 +23,12 @@ pub struct RangeAggReqData {
    pub accessor: Column<u64>,
    /// The type of the fast field.
    pub field_type: ColumnType,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
    /// The range aggregation request.
    pub req: RangeAggregation,
    /// The name of the aggregation.
    pub name: String,
-    /// Whether this is a top-level aggregation.
-    pub is_top_level: bool,
 }

 impl RangeAggReqData {
@@ -155,47 +151,19 @@ pub(crate) struct SegmentRangeAndBucketEntry {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-pub struct SegmentRangeCollector<C: SubAggCache> {
+#[derive(Clone, Debug)]
+pub struct SegmentRangeCollector {
    /// The buckets containing the aggregation data.
-    /// One for each ParentBucketId
-    parent_buckets: Vec<Vec<SegmentRangeAndBucketEntry>>,
+    buckets: Vec<SegmentRangeAndBucketEntry>,
    column_type: ColumnType,
    pub(crate) accessor_idx: usize,
-    sub_agg: Option<CachedSubAggs<C>>,
-    /// Here things get a bit weird. We need to assign unique bucket ids across all
-    /// parent buckets. So we keep track of the next available bucket id here.
-    /// This allows a kind of flattening of the bucket ids across all parent buckets.
-    /// E.g. in nested aggregations:
-    /// Term Agg -> Range aggregation -> Stats aggregation
-    /// E.g. the Term Agg creates 3 buckets ["INFO", "ERROR", "WARN"], each of these has a Range
-    /// aggregation with 4 buckets. The Range aggregation will create buckets with ids:
-    /// - INFO: 0,1,2,3
-    /// - ERROR: 4,5,6,7
-    /// - WARN: 8,9,10,11
-    ///
-    /// This allows the Stats aggregation to have unique bucket ids to refer to.
-    bucket_id_provider: BucketIdProvider,
-    limits: AggregationLimitsGuard,
 }

-impl<C: SubAggCache> Debug for SegmentRangeCollector<C> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("SegmentRangeCollector")
-            .field("parent_buckets_len", &self.parent_buckets.len())
-            .field("column_type", &self.column_type)
-            .field("accessor_idx", &self.accessor_idx)
-            .field("has_sub_agg", &self.sub_agg.is_some())
-            .finish()
-    }
-}
-
-/// TODO: Bad naming, there's also SegmentRangeAndBucketEntry
 #[derive(Clone)]
 pub(crate) struct SegmentRangeBucketEntry {
    pub key: Key,
    pub doc_count: u64,
-    // pub sub_aggregation: Option<Box<dyn SegmentAggregationCollector>>,
-    pub bucket_id: BucketId,
+    pub sub_aggregation: Option<Box<dyn SegmentAggregationCollector>>,
    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    pub from: Option<f64>,
    /// The to range of the bucket. Equals `f64::MAX` when `None`. Open interval, `to` is not
@@ -216,50 +184,48 @@ impl Debug for SegmentRangeBucketEntry {
 impl SegmentRangeBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateRangeBucketEntry> {
-        let sub_aggregation = IntermediateAggregationResults::default();
+        let mut sub_aggregation_res = IntermediateAggregationResults::default();
+        if let Some(sub_aggregation) = self.sub_aggregation {
+            sub_aggregation
+                .add_intermediate_aggregation_result(agg_data, &mut sub_aggregation_res)?
+        } else {
+            Default::default()
+        };

        Ok(IntermediateRangeBucketEntry {
            key: self.key.into(),
            doc_count: self.doc_count,
-            sub_aggregation_res: sub_aggregation,
+            sub_aggregation: sub_aggregation_res,
            from: self.from,
            to: self.to,
        })
    }
 }

-impl<C: SubAggCache> SegmentAggregationCollector for SegmentRangeCollector<C> {
+impl SegmentAggregationCollector for SegmentRangeCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
        let field_type = self.column_type;
        let name = agg_data
            .get_range_req_data(self.accessor_idx)
            .name
            .to_string();

-        let buckets = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);
-
-        let buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry> = buckets
+        let buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry> = self
+            .buckets
            .into_iter()
-            .map(|range_bucket| {
-                let bucket_id = range_bucket.bucket.bucket_id;
-                let mut agg = range_bucket.bucket.into_intermediate_bucket_entry()?;
-                if let Some(sub_aggregation) = &mut self.sub_agg {
-                    sub_aggregation
-                        .get_sub_agg_collector()
-                        .add_intermediate_aggregation_result(
-                            agg_data,
-                            &mut agg.sub_aggregation_res,
-                            bucket_id,
-                        )?;
-                }
-                Ok((range_to_string(&range_bucket.range, &field_type)?, agg))
+            .map(move |range_bucket| {
+                Ok((
+                    range_to_string(&range_bucket.range, &field_type)?,
+                    range_bucket
+                        .bucket
+                        .into_intermediate_bucket_entry(agg_data)?,
+                ))
            })
            .collect::<crate::Result<_>>()?;

@@ -276,114 +242,73 @@ impl<C: SubAggCache> SegmentAggregationCollector for SegmentRangeCollector<C> {
    #[inline]
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.collect_block(&[doc], agg_data)
+    }
+
+    #[inline]
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let req = agg_data.take_range_req_data(self.accessor_idx);
+        // Take request data to avoid borrow conflicts during sub-aggregation
+        let mut req = agg_data.take_range_req_data(self.accessor_idx);

-        agg_data
-            .column_block_accessor
-            .fetch_block(docs, &req.accessor);
+        req.column_block_accessor.fetch_block(docs, &req.accessor);

-        let buckets = &mut self.parent_buckets[parent_bucket_id as usize];
-
-        for (doc, val) in agg_data
+        for (doc, val) in req
            .column_block_accessor
            .iter_docid_vals(docs, &req.accessor)
        {
-            let bucket_pos = get_bucket_pos(val, buckets);
-            let bucket = &mut buckets[bucket_pos];
+            let bucket_pos = self.get_bucket_pos(val);
+            let bucket = &mut self.buckets[bucket_pos];
            bucket.bucket.doc_count += 1;
-            if let Some(sub_agg) = self.sub_agg.as_mut() {
-                sub_agg.push(bucket.bucket.bucket_id, doc);
+            if let Some(sub_agg) = bucket.bucket.sub_aggregation.as_mut() {
+                sub_agg.collect(doc, agg_data)?;
            }
        }

        agg_data.put_back_range_req_data(self.accessor_idx, req);
-        if let Some(sub_agg) = self.sub_agg.as_mut() {
-            sub_agg.check_flush_local(agg_data)?;
-        }

        Ok(())
    }

    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
-        if let Some(sub_agg) = self.sub_agg.as_mut() {
-            sub_agg.flush(agg_data)?;
+        for bucket in self.buckets.iter_mut() {
+            if let Some(sub_agg) = bucket.bucket.sub_aggregation.as_mut() {
+                sub_agg.flush(agg_data)?;
+            }
        }
        Ok(())
    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        while self.parent_buckets.len() <= max_bucket as usize {
-            let new_buckets = self.create_new_buckets(agg_data)?;
-            self.parent_buckets.push(new_buckets);
-        }
-
-        Ok(())
-    }
-}
-/// Build a concrete `SegmentRangeCollector` with either a Vec- or HashMap-backed
-/// bucket storage, depending on the column type and aggregation level.
-pub(crate) fn build_segment_range_collector(
-    agg_data: &mut AggregationsSegmentCtx,
-    node: &AggRefNode,
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    let accessor_idx = node.idx_in_req_data;
-    let req_data = agg_data.get_range_req_data(node.idx_in_req_data);
-    let field_type = req_data.field_type;
-
-    // TODO: A better metric instead of is_top_level would be the number of buckets expected.
-    // E.g. If range agg is not top level, but the parent is a bucket agg with less than 10 buckets,
-    // we can are still in low cardinality territory.
-    let is_low_card = req_data.is_top_level && req_data.req.ranges.len() <= 64;
-
-    let sub_agg = if !node.children.is_empty() {
-        Some(build_segment_agg_collectors(agg_data, &node.children)?)
-    } else {
-        None
-    };
-
-    if is_low_card {
-        Ok(Box::new(SegmentRangeCollector::<LowCardSubAggCache> {
-            sub_agg: sub_agg.map(LowCardCachedSubAggs::new),
-            column_type: field_type,
-            accessor_idx,
-            parent_buckets: Vec::new(),
-            bucket_id_provider: BucketIdProvider::default(),
-            limits: agg_data.context.limits.clone(),
-        }))
-    } else {
-        Ok(Box::new(SegmentRangeCollector::<HighCardSubAggCache> {
-            sub_agg: sub_agg.map(CachedSubAggs::new),
-            column_type: field_type,
-            accessor_idx,
-            parent_buckets: Vec::new(),
-            bucket_id_provider: BucketIdProvider::default(),
-            limits: agg_data.context.limits.clone(),
-        }))
-    }
 }

-impl<C: SubAggCache> SegmentRangeCollector<C> {
-    pub(crate) fn create_new_buckets(
-        &mut self,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<Vec<SegmentRangeAndBucketEntry>> {
-        let field_type = self.column_type;
-        let req_data = agg_data.get_range_req_data(self.accessor_idx);
+impl SegmentRangeCollector {
+    pub(crate) fn from_req_and_validate(
+        req_data: &mut AggregationsSegmentCtx,
+        node: &AggRefNode,
+    ) -> crate::Result<Self> {
+        let accessor_idx = node.idx_in_req_data;
+        let (field_type, ranges) = {
+            let req_view = req_data.get_range_req_data(node.idx_in_req_data);
+            (req_view.field_type, req_view.req.ranges.clone())
+        };
+
        // The range input on the request is f64.
        // We need to convert to u64 ranges, because we read the values as u64.
        // The mapping from the conversion is monotonic so ordering is preserved.
-        let buckets: Vec<_> = extend_validate_ranges(&req_data.req.ranges, &field_type)?
+        let sub_agg_prototype = if !node.children.is_empty() {
+            Some(build_segment_agg_collectors(req_data, &node.children)?)
+        } else {
+            None
+        };
+
+        let buckets: Vec<_> = extend_validate_ranges(&ranges, &field_type)?
            .iter()
            .map(|range| {
-                let bucket_id = self.bucket_id_provider.next_bucket_id();
                let key = range
                    .key
                    .clone()
@@ -392,20 +317,20 @@ impl<C: SubAggCache> SegmentRangeCollector<C> {
                let to = if range.range.end == u64::MAX {
                    None
                } else {
-                    Some(f64_from_fastfield_u64(range.range.end, field_type))
+                    Some(f64_from_fastfield_u64(range.range.end, &field_type))
                };
                let from = if range.range.start == u64::MIN {
                    None
                } else {
-                    Some(f64_from_fastfield_u64(range.range.start, field_type))
+                    Some(f64_from_fastfield_u64(range.range.start, &field_type))
                };
-                // let sub_aggregation = sub_agg_prototype.clone();
+                let sub_aggregation = sub_agg_prototype.clone();

                Ok(SegmentRangeAndBucketEntry {
                    range: range.range.clone(),
                    bucket: SegmentRangeBucketEntry {
                        doc_count: 0,
-                        bucket_id,
+                        sub_aggregation,
                        key,
                        from,
                        to,
@@ -414,19 +339,26 @@ impl<C: SubAggCache> SegmentRangeCollector<C> {
            })
            .collect::<crate::Result<_>>()?;

-        self.limits.add_memory_consumed(
+        req_data.context.limits.add_memory_consumed(
            buckets.len() as u64 * std::mem::size_of::<SegmentRangeAndBucketEntry>() as u64,
        )?;
-        Ok(buckets)
+
+        Ok(SegmentRangeCollector {
+            buckets,
+            column_type: field_type,
+            accessor_idx,
+        })
+    }
+
+    #[inline]
+    fn get_bucket_pos(&self, val: u64) -> usize {
+        let pos = self
+            .buckets
+            .binary_search_by_key(&val, |probe| probe.range.start)
+            .unwrap_or_else(|pos| pos - 1);
+        debug_assert!(self.buckets[pos].range.contains(&val));
+        pos
    }
-}
-#[inline]
-fn get_bucket_pos(val: u64, buckets: &[SegmentRangeAndBucketEntry]) -> usize {
-    let pos = buckets
-        .binary_search_by_key(&val, |probe| probe.range.start)
-        .unwrap_or_else(|pos| pos - 1);
-    debug_assert!(buckets[pos].range.contains(&val));
-    pos
 }

 /// Converts the user provided f64 range value to fast field value space.
@@ -524,7 +456,7 @@ pub(crate) fn range_to_string(
            let val = i64::from_u64(val);
            format_date(val)
        } else {
-            Ok(f64_from_fastfield_u64(val, *field_type).to_string())
+            Ok(f64_from_fastfield_u64(val, field_type).to_string())
        }
    };

@@ -554,7 +486,7 @@ mod tests {
    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
        field_type: ColumnType,
-    ) -> SegmentRangeCollector<HighCardSubAggCache> {
+    ) -> SegmentRangeCollector {
        let req = RangeAggregation {
            field: "dummy".to_string(),
            ranges,
@@ -574,33 +506,30 @@ mod tests {
                let to = if range.range.end == u64::MAX {
                    None
                } else {
-                    Some(f64_from_fastfield_u64(range.range.end, field_type))
+                    Some(f64_from_fastfield_u64(range.range.end, &field_type))
                };
                let from = if range.range.start == u64::MIN {
                    None
                } else {
-                    Some(f64_from_fastfield_u64(range.range.start, field_type))
+                    Some(f64_from_fastfield_u64(range.range.start, &field_type))
                };
                SegmentRangeAndBucketEntry {
                    range: range.range.clone(),
                    bucket: SegmentRangeBucketEntry {
                        doc_count: 0,
+                        sub_aggregation: None,
                        key,
                        from,
                        to,
-                        bucket_id: 0,
                    },
                }
            })
            .collect();

        SegmentRangeCollector {
-            parent_buckets: vec![buckets],
+            buckets,
            column_type: field_type,
            accessor_idx: 0,
-            sub_agg: None,
-            bucket_id_provider: Default::default(),
-            limits: AggregationLimitsGuard::default(),
        }
    }

@@ -847,7 +776,7 @@ mod tests {
        let buckets = vec![(10f64..20f64).into(), (30f64..40f64).into()];
        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = collector.parent_buckets[0].clone();
+        let buckets = collector.buckets;
        assert_eq!(buckets[0].range.start, u64::MIN);
        assert_eq!(buckets[0].range.end, 10f64.to_u64());
        assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -870,7 +799,7 @@ mod tests {
        ];
        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = collector.parent_buckets[0].clone();
+        let buckets = collector.buckets;
        assert_eq!(buckets[0].range.start, u64::MIN);
        assert_eq!(buckets[0].range.end, 10f64.to_u64());
        assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -885,7 +814,7 @@ mod tests {
        let buckets = vec![(-10f64..-1f64).into()];
        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = collector.parent_buckets[0].clone();
+        let buckets = collector.buckets;
        assert_eq!(&buckets[0].bucket.key.to_string(), "*--10");
        assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "-1-*");
    }
@@ -894,7 +823,7 @@ mod tests {
        let buckets = vec![(0f64..10f64).into()];
        let collector = get_collector_from_ranges(buckets, ColumnType::F64);

-        let buckets = collector.parent_buckets[0].clone();
+        let buckets = collector.buckets;
        assert_eq!(&buckets[0].bucket.key.to_string(), "*-0");
        assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "10-*");
    }
@@ -903,7 +832,7 @@ mod tests {
    fn range_binary_search_test_u64() {
        let check_ranges = |ranges: Vec<RangeAggregationRange>| {
            let collector = get_collector_from_ranges(ranges, ColumnType::U64);
-            let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);
+            let search = |val: u64| collector.get_bucket_pos(val);

            assert_eq!(search(u64::MIN), 0);
            assert_eq!(search(9), 0);
@@ -949,7 +878,7 @@ mod tests {
        let ranges = vec![(10.0..100.0).into()];

        let collector = get_collector_from_ranges(ranges, ColumnType::F64);
-        let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);
+        let search = |val: u64| collector.get_bucket_pos(val);

        assert_eq!(search(u64::MIN), 0);
        assert_eq!(search(9f64.to_u64()), 0);
@@ -961,3 +890,63 @@ mod tests {
                                             // the max value
    }
 }
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use itertools::Itertools;
+    use rand::seq::SliceRandom;
+    use rand::thread_rng;
+
+    use super::*;
+    use crate::aggregation::bucket::range::tests::get_collector_from_ranges;
+
+    const TOTAL_DOCS: u64 = 1_000_000u64;
+    const NUM_DOCS: u64 = 50_000u64;
+
+    fn get_collector_with_buckets(num_buckets: u64, num_docs: u64) -> SegmentRangeCollector {
+        let bucket_size = num_docs / num_buckets;
+        let mut buckets: Vec<RangeAggregationRange> = vec![];
+        for i in 0..num_buckets {
+            let bucket_start = (i * bucket_size) as f64;
+            buckets.push((bucket_start..bucket_start + bucket_size as f64).into())
+        }
+
+        get_collector_from_ranges(buckets, ColumnType::U64)
+    }
+
+    fn get_rand_docs(total_docs: u64, num_docs_returned: u64) -> Vec<u64> {
+        let mut rng = thread_rng();
+
+        let all_docs = (0..total_docs - 1).collect_vec();
+        let mut vals = all_docs
+            .as_slice()
+            .choose_multiple(&mut rng, num_docs_returned as usize)
+            .cloned()
+            .collect_vec();
+        vals.sort();
+        vals
+    }
+
+    fn bench_range_binary_search(b: &mut test::Bencher, num_buckets: u64) {
+        let collector = get_collector_with_buckets(num_buckets, TOTAL_DOCS);
+        let vals = get_rand_docs(TOTAL_DOCS, NUM_DOCS);
+        b.iter(|| {
+            let mut bucket_pos = 0;
+            for val in &vals {
+                bucket_pos = collector.get_bucket_pos(*val);
+            }
+            bucket_pos
+        })
+    }
+
+    #[bench]
+    fn bench_range_100_buckets(b: &mut test::Bencher) {
+        bench_range_binary_search(b, 100)
+    }
+
+    #[bench]
+    fn bench_range_10_buckets(b: &mut test::Bencher) {
+        bench_range_binary_search(b, 10)
+    }
+}
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -5,13 +5,11 @@ use crate::aggregation::agg_data::{
    build_segment_agg_collectors, AggRefNode, AggregationsSegmentCtx,
 };
 use crate::aggregation::bucket::term_agg::TermsAggregation;
-use crate::aggregation::cached_sub_aggs::{CachedSubAggs, HighCardCachedSubAggs};
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateBucketResult,
    IntermediateKey, IntermediateTermBucketEntry, IntermediateTermBucketResult,
 };
-use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
-use crate::aggregation::BucketId;
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;

 /// Special aggregation to handle missing values for term aggregations.
 /// This missing aggregation will check multiple columns for existence.
@@ -37,55 +35,41 @@ impl MissingTermAggReqData {
    }
 }

-#[derive(Default, Debug, Clone)]
-struct MissingCount {
-    missing_count: u32,
-    bucket_id: BucketId,
-}
-
 /// The specialized missing term aggregation.
-#[derive(Default, Debug)]
+#[derive(Default, Debug, Clone)]
 pub struct TermMissingAgg {
+    missing_count: u32,
    accessor_idx: usize,
-    sub_agg: Option<HighCardCachedSubAggs>,
-    /// Idx = parent bucket id, Value = missing count for that bucket
-    missing_count_per_bucket: Vec<MissingCount>,
-    bucket_id_provider: BucketIdProvider,
+    sub_agg: Option<Box<dyn SegmentAggregationCollector>>,
 }
 impl TermMissingAgg {
    pub(crate) fn new(
-        agg_data: &mut AggregationsSegmentCtx,
+        req_data: &mut AggregationsSegmentCtx,
        node: &AggRefNode,
    ) -> crate::Result<Self> {
        let has_sub_aggregations = !node.children.is_empty();
        let accessor_idx = node.idx_in_req_data;
        let sub_agg = if has_sub_aggregations {
-            let sub_aggregation = build_segment_agg_collectors(agg_data, &node.children)?;
+            let sub_aggregation = build_segment_agg_collectors(req_data, &node.children)?;
            Some(sub_aggregation)
        } else {
            None
        };

-        let sub_agg = sub_agg.map(CachedSubAggs::new);
-        let bucket_id_provider = BucketIdProvider::default();
-
        Ok(Self {
            accessor_idx,
            sub_agg,
-            missing_count_per_bucket: Vec::new(),
-            bucket_id_provider,
+            ..Default::default()
        })
    }
 }

 impl SegmentAggregationCollector for TermMissingAgg {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
        let req_data = agg_data.get_missing_term_req_data(self.accessor_idx);
        let term_agg = &req_data.req;
        let missing = term_agg
@@ -96,16 +80,13 @@ impl SegmentAggregationCollector for TermMissingAgg {
        let mut entries: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> =
            Default::default();

-        let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
        let mut missing_entry = IntermediateTermBucketEntry {
-            doc_count: missing_count.missing_count,
+            doc_count: self.missing_count,
            sub_aggregation: Default::default(),
        };
-        if let Some(sub_agg) = &mut self.sub_agg {
+        if let Some(sub_agg) = self.sub_agg {
            let mut res = IntermediateAggregationResults::default();
-            sub_agg
-                .get_sub_agg_collector()
-                .add_intermediate_aggregation_result(agg_data, &mut res, missing_count.bucket_id)?;
+            sub_agg.add_intermediate_aggregation_result(agg_data, &mut res)?;
            missing_entry.sub_aggregation = res;
        }
        entries.insert(missing.into(), missing_entry);
@@ -128,52 +109,30 @@ impl SegmentAggregationCollector for TermMissingAgg {

    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        let req_data = agg_data.get_missing_term_req_data(self.accessor_idx);
+        let has_value = req_data
+            .accessors
+            .iter()
+            .any(|(acc, _)| acc.index.has_value(doc));
+        if !has_value {
+            self.missing_count += 1;
+            if let Some(sub_agg) = self.sub_agg.as_mut() {
+                sub_agg.collect(doc, agg_data)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let bucket = &mut self.missing_count_per_bucket[parent_bucket_id as usize];
-        let req_data = agg_data.get_missing_term_req_data(self.accessor_idx);
-
        for doc in docs {
-            let doc = *doc;
-            let has_value = req_data
-                .accessors
-                .iter()
-                .any(|(acc, _)| acc.index.has_value(doc));
-            if !has_value {
-                bucket.missing_count += 1;
-
-                if let Some(sub_agg) = self.sub_agg.as_mut() {
-                    sub_agg.push(bucket.bucket_id, doc);
-                }
-            }
-        }
-
-        if let Some(sub_agg) = self.sub_agg.as_mut() {
-            sub_agg.check_flush_local(agg_data)?;
-        }
-        Ok(())
-    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        while self.missing_count_per_bucket.len() <= max_bucket as usize {
-            let bucket_id = self.bucket_id_provider.next_bucket_id();
-            self.missing_count_per_bucket.push(MissingCount {
-                missing_count: 0,
-                bucket_id,
-            });
-        }
-        Ok(())
-    }
-
-    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
-        if let Some(sub_agg) = self.sub_agg.as_mut() {
-            sub_agg.flush(agg_data)?;
+            self.collect(*doc, agg_data)?;
        }
        Ok(())
    }
--- a/src/aggregation/buf_collector.rs
+++ b/src/aggregation/buf_collector.rs
@@ -0,0 +1,87 @@
+use super::intermediate_agg_result::IntermediateAggregationResults;
+use super::segment_agg_result::SegmentAggregationCollector;
+use crate::aggregation::agg_data::AggregationsSegmentCtx;
+use crate::DocId;
+
+#[cfg(test)]
+pub(crate) const DOC_BLOCK_SIZE: usize = 64;
+
+#[cfg(not(test))]
+pub(crate) const DOC_BLOCK_SIZE: usize = 256;
+
+pub(crate) type DocBlock = [DocId; DOC_BLOCK_SIZE];
+
+/// BufAggregationCollector buffers documents before calling collect_block().
+#[derive(Clone)]
+pub(crate) struct BufAggregationCollector {
+    pub(crate) collector: Box<dyn SegmentAggregationCollector>,
+    staged_docs: DocBlock,
+    num_staged_docs: usize,
+}
+
+impl std::fmt::Debug for BufAggregationCollector {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("SegmentAggregationResultsCollector")
+            .field("staged_docs", &&self.staged_docs[..self.num_staged_docs])
+            .field("num_staged_docs", &self.num_staged_docs)
+            .finish()
+    }
+}
+
+impl BufAggregationCollector {
+    pub fn new(collector: Box<dyn SegmentAggregationCollector>) -> Self {
+        Self {
+            collector,
+            num_staged_docs: 0,
+            staged_docs: [0; DOC_BLOCK_SIZE],
+        }
+    }
+}
+
+impl SegmentAggregationCollector for BufAggregationCollector {
+    #[inline]
+    fn add_intermediate_aggregation_result(
+        self: Box<Self>,
+        agg_data: &AggregationsSegmentCtx,
+        results: &mut IntermediateAggregationResults,
+    ) -> crate::Result<()> {
+        Box::new(self.collector).add_intermediate_aggregation_result(agg_data, results)
+    }
+
+    #[inline]
+    fn collect(
+        &mut self,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.staged_docs[self.num_staged_docs] = doc;
+        self.num_staged_docs += 1;
+        if self.num_staged_docs == self.staged_docs.len() {
+            self.collector
+                .collect_block(&self.staged_docs[..self.num_staged_docs], agg_data)?;
+            self.num_staged_docs = 0;
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn collect_block(
+        &mut self,
+        docs: &[crate::DocId],
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.collector.collect_block(docs, agg_data)?;
+        Ok(())
+    }
+
+    #[inline]
+    fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
+        self.collector
+            .collect_block(&self.staged_docs[..self.num_staged_docs], agg_data)?;
+        self.num_staged_docs = 0;
+
+        self.collector.flush(agg_data)?;
+
+        Ok(())
+    }
+}
--- a/src/aggregation/cached_sub_aggs.rs
+++ b/src/aggregation/cached_sub_aggs.rs
@@ -1,245 +0,0 @@
-use std::fmt::Debug;
-
-use super::segment_agg_result::SegmentAggregationCollector;
-use crate::aggregation::agg_data::AggregationsSegmentCtx;
-use crate::aggregation::bucket::MAX_NUM_TERMS_FOR_VEC;
-use crate::aggregation::BucketId;
-use crate::DocId;
-
-/// A cache for sub-aggregations, storing doc ids per bucket id.
-/// Depending on the cardinality of the parent aggregation, we use different
-/// storage strategies.
-///
-/// ## Low Cardinality
-/// Cardinality here refers to the number of unique flattened buckets that can be created
-/// by the parent aggregation.
-/// Flattened buckets are the result of combining all buckets per collector
-/// into a single list of buckets, where each bucket is identified by its BucketId.
-///
-/// ## Usage
-/// Since this is caching for sub-aggregations, it is only used by bucket
-/// aggregations.
-///
-/// TODO: consider using a more advanced data structure for high cardinality
-/// aggregations.
-/// What this datastructure does in general is to group docs by bucket id.
-#[derive(Debug)]
-pub(crate) struct CachedSubAggs<C: SubAggCache> {
-    cache: C,
-    sub_agg_collector: Box<dyn SegmentAggregationCollector>,
-    num_docs: usize,
-}
-
-pub type LowCardCachedSubAggs = CachedSubAggs<LowCardSubAggCache>;
-pub type HighCardCachedSubAggs = CachedSubAggs<HighCardSubAggCache>;
-
-const FLUSH_THRESHOLD: usize = 2048;
-
-/// A trait for caching sub-aggregation doc ids per bucket id.
-/// Different implementations can be used depending on the cardinality
-/// of the parent aggregation.
-pub trait SubAggCache: Debug {
-    fn new() -> Self;
-    fn push(&mut self, bucket_id: BucketId, doc_id: DocId);
-    fn flush_local(
-        &mut self,
-        sub_agg: &mut Box<dyn SegmentAggregationCollector>,
-        agg_data: &mut AggregationsSegmentCtx,
-        force: bool,
-    ) -> crate::Result<()>;
-}
-
-impl<Backend: SubAggCache + Debug> CachedSubAggs<Backend> {
-    pub fn new(sub_agg: Box<dyn SegmentAggregationCollector>) -> Self {
-        Self {
-            cache: Backend::new(),
-            sub_agg_collector: sub_agg,
-            num_docs: 0,
-        }
-    }
-
-    pub fn get_sub_agg_collector(&mut self) -> &mut Box<dyn SegmentAggregationCollector> {
-        &mut self.sub_agg_collector
-    }
-
-    #[inline]
-    pub fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
-        self.cache.push(bucket_id, doc_id);
-        self.num_docs += 1;
-    }
-
-    /// Check if we need to flush based on the number of documents cached.
-    /// If so, flushes the cache to the provided aggregation collector.
-    pub fn check_flush_local(
-        &mut self,
-        agg_data: &mut AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        if self.num_docs >= FLUSH_THRESHOLD {
-            self.cache
-                .flush_local(&mut self.sub_agg_collector, agg_data, false)?;
-            self.num_docs = 0;
-        }
-        Ok(())
-    }
-
-    /// Note: this _does_ flush the sub aggregations.
-    pub fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
-        if self.num_docs != 0 {
-            self.cache
-                .flush_local(&mut self.sub_agg_collector, agg_data, true)?;
-            self.num_docs = 0;
-        }
-        self.sub_agg_collector.flush(agg_data)?;
-        Ok(())
-    }
-}
-
-/// Number of partitions for high cardinality sub-aggregation cache.
-const NUM_PARTITIONS: usize = 16;
-
-#[derive(Debug)]
-pub(crate) struct HighCardSubAggCache {
-    /// This weird partitioning is used to do some cheap grouping on the bucket ids.
-    /// bucket ids are dense, e.g. when we don't detect the cardinality as low cardinality,
-    /// but there are just 16 bucket ids, each bucket id will go to its own partition.
-    ///
-    /// We want to keep this cheap, because high cardinality aggregations can have a lot of
-    /// buckets, and there may be nothing to group.
-    partitions: Box<[PartitionEntry; NUM_PARTITIONS]>,
-}
-
-impl HighCardSubAggCache {
-    #[inline]
-    fn clear(&mut self) {
-        for partition in self.partitions.iter_mut() {
-            partition.clear();
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default)]
-struct PartitionEntry {
-    bucket_ids: Vec<BucketId>,
-    docs: Vec<DocId>,
-}
-
-impl PartitionEntry {
-    #[inline]
-    fn clear(&mut self) {
-        self.bucket_ids.clear();
-        self.docs.clear();
-    }
-}
-
-impl SubAggCache for HighCardSubAggCache {
-    fn new() -> Self {
-        Self {
-            partitions: Box::new(core::array::from_fn(|_| PartitionEntry::default())),
-        }
-    }
-
-    fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
-        let idx = bucket_id % NUM_PARTITIONS as u32;
-        let slot = &mut self.partitions[idx as usize];
-        slot.bucket_ids.push(bucket_id);
-        slot.docs.push(doc_id);
-    }
-
-    fn flush_local(
-        &mut self,
-        sub_agg: &mut Box<dyn SegmentAggregationCollector>,
-        agg_data: &mut AggregationsSegmentCtx,
-        _force: bool,
-    ) -> crate::Result<()> {
-        let mut max_bucket = 0u32;
-        for partition in self.partitions.iter() {
-            if let Some(&local_max) = partition.bucket_ids.iter().max() {
-                max_bucket = max_bucket.max(local_max);
-            }
-        }
-
-        sub_agg.prepare_max_bucket(max_bucket, agg_data)?;
-
-        for slot in self.partitions.iter() {
-            if !slot.bucket_ids.is_empty() {
-                // Reduce dynamic dispatch overhead by collecting a full partition in one call.
-                sub_agg.collect_multiple(&slot.bucket_ids, &slot.docs, agg_data)?;
-            }
-        }
-
-        self.clear();
-        Ok(())
-    }
-}
-
-#[derive(Debug)]
-pub(crate) struct LowCardSubAggCache {
-    /// Cache doc ids per bucket for sub-aggregations.
-    ///
-    /// The outer Vec is indexed by BucketId.
-    per_bucket_docs: Vec<Vec<DocId>>,
-}
-
-impl LowCardSubAggCache {
-    #[inline]
-    fn clear(&mut self) {
-        for v in &mut self.per_bucket_docs {
-            v.clear();
-        }
-    }
-}
-
-impl SubAggCache for LowCardSubAggCache {
-    fn new() -> Self {
-        Self {
-            per_bucket_docs: Vec::new(),
-        }
-    }
-
-    fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
-        let idx = bucket_id as usize;
-        if self.per_bucket_docs.len() <= idx {
-            self.per_bucket_docs.resize_with(idx + 1, Vec::new);
-        }
-        self.per_bucket_docs[idx].push(doc_id);
-    }
-
-    fn flush_local(
-        &mut self,
-        sub_agg: &mut Box<dyn SegmentAggregationCollector>,
-        agg_data: &mut AggregationsSegmentCtx,
-        force: bool,
-    ) -> crate::Result<()> {
-        // Pre-aggregated: call collect per bucket.
-        let max_bucket = (self.per_bucket_docs.len() as BucketId).saturating_sub(1);
-        sub_agg.prepare_max_bucket(max_bucket, agg_data)?;
-        // The threshold above which we flush buckets individually.
-        // Note: We need to make sure that we don't lock ourselves into a situation where we hit
-        // the FLUSH_THRESHOLD, but never flush any buckets. (except the final flush)
-        let mut bucket_treshold = FLUSH_THRESHOLD / (self.per_bucket_docs.len().max(1) * 2);
-        const _: () = {
-            // MAX_NUM_TERMS_FOR_VEC threshold is used for term aggregations
-            // Note: There may be other flexible values, for other aggregations, but we can use the
-            // const value here as a upper bound. (better than nothing)
-            let bucket_treshold_limit = FLUSH_THRESHOLD / (MAX_NUM_TERMS_FOR_VEC as usize * 2);
-            assert!(
-                bucket_treshold_limit > 0,
-                "Bucket threshold must be greater than 0"
-            );
-        };
-        if force {
-            bucket_treshold = 0;
-        }
-        for (bucket_id, docs) in self
-            .per_bucket_docs
-            .iter()
-            .enumerate()
-            .filter(|(_, docs)| docs.len() > bucket_treshold)
-        {
-            sub_agg.collect(bucket_id as BucketId, docs, agg_data)?;
-        }
-
-        self.clear();
-        Ok(())
-    }
-}
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -1,9 +1,9 @@
 use super::agg_req::Aggregations;
 use super::agg_result::AggregationResults;
-use super::cached_sub_aggs::LowCardCachedSubAggs;
+use super::buf_collector::BufAggregationCollector;
 use super::intermediate_agg_result::IntermediateAggregationResults;
+use super::segment_agg_result::SegmentAggregationCollector;
 use super::AggContextParams;
-// group buffering strategy is chosen explicitly by callers; no need to hash-group on the fly.
 use crate::aggregation::agg_data::{
    build_aggregations_data_from_req, build_segment_agg_collectors_root, AggregationsSegmentCtx,
 };
@@ -136,7 +136,7 @@ fn merge_fruits(
 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsSegmentCtx,
-    agg_collector: LowCardCachedSubAggs,
+    agg_collector: BufAggregationCollector,
    error: Option<TantivyError>,
 }

@@ -151,11 +151,8 @@ impl AggregationSegmentCollector {
    ) -> crate::Result<Self> {
        let mut agg_data =
            build_aggregations_data_from_req(agg, reader, segment_ordinal, context.clone())?;
-        let mut result =
-            LowCardCachedSubAggs::new(build_segment_agg_collectors_root(&mut agg_data)?);
-        result
-            .get_sub_agg_collector()
-            .prepare_max_bucket(0, &agg_data)?; // prepare for bucket zero
+        let result =
+            BufAggregationCollector::new(build_segment_agg_collectors_root(&mut agg_data)?);

        Ok(AggregationSegmentCollector {
            aggs_with_accessor: agg_data,
@@ -173,31 +170,26 @@ impl SegmentCollector for AggregationSegmentCollector {
        if self.error.is_some() {
            return;
        }
-        self.agg_collector.push(0, doc);
-        match self
+        if let Err(err) = self
            .agg_collector
-            .check_flush_local(&mut self.aggs_with_accessor)
+            .collect(doc, &mut self.aggs_with_accessor)
        {
-            Ok(_) => {}
-            Err(e) => {
-                self.error = Some(e);
-            }
+            self.error = Some(err);
        }
    }
+
+    /// The query pushes the documents to the collector via this method.
+    ///
+    /// Only valid for Collectors that ignore docs
    fn collect_block(&mut self, docs: &[DocId]) {
        if self.error.is_some() {
            return;
        }
-
-        match self.agg_collector.get_sub_agg_collector().collect(
-            0,
-            docs,
-            &mut self.aggs_with_accessor,
-        ) {
-            Ok(_) => {}
-            Err(e) => {
-                self.error = Some(e);
-            }
+        if let Err(err) = self
+            .agg_collector
+            .collect_block(docs, &mut self.aggs_with_accessor)
+        {
+            self.error = Some(err);
        }
    }

@@ -208,13 +200,10 @@ impl SegmentCollector for AggregationSegmentCollector {
        self.agg_collector.flush(&mut self.aggs_with_accessor)?;

        let mut sub_aggregation_res = IntermediateAggregationResults::default();
-        self.agg_collector
-            .get_sub_agg_collector()
-            .add_intermediate_aggregation_result(
-                &self.aggs_with_accessor,
-                &mut sub_aggregation_res,
-                0,
-            )?;
+        Box::new(self.agg_collector).add_intermediate_aggregation_result(
+            &self.aggs_with_accessor,
+            &mut sub_aggregation_res,
+        )?;

        Ok(sub_aggregation_res)
    }
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -792,7 +792,7 @@ pub struct IntermediateRangeBucketEntry {
    /// The number of documents in the bucket.
    pub doc_count: u64,
    /// The sub_aggregation in this bucket.
-    pub sub_aggregation_res: IntermediateAggregationResults,
+    pub sub_aggregation: IntermediateAggregationResults,
    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    pub from: Option<f64>,
    /// The to range of the bucket. Equals `f64::MAX` when `None`.
@@ -811,7 +811,7 @@ impl IntermediateRangeBucketEntry {
            key: self.key.into(),
            doc_count: self.doc_count,
            sub_aggregation: self
-                .sub_aggregation_res
+                .sub_aggregation
                .into_final_result_internal(req, limits)?,
            to: self.to,
            from: self.from,
@@ -857,8 +857,7 @@ impl MergeFruits for IntermediateTermBucketEntry {
 impl MergeFruits for IntermediateRangeBucketEntry {
    fn merge_fruits(&mut self, other: IntermediateRangeBucketEntry) -> crate::Result<()> {
        self.doc_count += other.doc_count;
-        self.sub_aggregation_res
-            .merge_fruits(other.sub_aggregation_res)?;
+        self.sub_aggregation.merge_fruits(other.sub_aggregation)?;
        Ok(())
    }
 }
@@ -888,7 +887,7 @@ mod tests {
                IntermediateRangeBucketEntry {
                    key: IntermediateKey::Str(key.to_string()),
                    doc_count: *doc_count,
-                    sub_aggregation_res: Default::default(),
+                    sub_aggregation: Default::default(),
                    from: None,
                    to: None,
                },
@@ -921,7 +920,7 @@ mod tests {
                    doc_count: *doc_count,
                    from: None,
                    to: None,
-                    sub_aggregation_res: get_sub_test_tree(&[(
+                    sub_aggregation: get_sub_test_tree(&[(
                        sub_aggregation_key.to_string(),
                        *sub_aggregation_count,
                    )]),
--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -52,8 +52,10 @@ pub struct IntermediateAverage {

 impl IntermediateAverage {
    /// Creates a new [`IntermediateAverage`] instance from a [`SegmentStatsCollector`].
-    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
-        Self { stats }
+    pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
+        Self {
+            stats: collector.stats,
+        }
    }
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateAverage) {
--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
@@ -2,7 +2,7 @@ use std::collections::hash_map::DefaultHasher;
 use std::hash::{BuildHasher, Hasher};

 use columnar::column_values::CompactSpaceU64Accessor;
-use columnar::{Column, ColumnType, Dictionary, StrColumn};
+use columnar::{Column, ColumnBlockAccessor, ColumnType, Dictionary, StrColumn};
 use common::f64_to_u64;
 use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
 use rustc_hash::FxHashSet;
@@ -106,6 +106,8 @@ pub struct CardinalityAggReqData {
    pub str_dict_column: Option<StrColumn>,
    /// The missing value normalized to the internal u64 representation of the field type.
    pub missing_value_for_accessor: Option<u64>,
+    /// The column block accessor to access the fast field values.
+    pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
    /// The name of the aggregation.
    pub name: String,
    /// The aggregation request.
@@ -133,34 +135,45 @@ impl CardinalityAggregationReq {
    }
 }

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) struct SegmentCardinalityCollector {
-    buckets: Vec<SegmentCardinalityCollectorBucket>,
-    accessor_idx: usize,
-    /// The column accessor to access the fast field values.
-    accessor: Column<u64>,
-    /// The column_type of the field.
-    column_type: ColumnType,
-    /// The missing value normalized to the internal u64 representation of the field type.
-    missing_value_for_accessor: Option<u64>,
-}
-
-#[derive(Clone, Debug, PartialEq, Default)]
-pub(crate) struct SegmentCardinalityCollectorBucket {
    cardinality: CardinalityCollector,
    entries: FxHashSet<u64>,
+    accessor_idx: usize,
 }
-impl SegmentCardinalityCollectorBucket {
-    pub fn new(column_type: ColumnType) -> Self {
+
+impl SegmentCardinalityCollector {
+    pub fn from_req(column_type: ColumnType, accessor_idx: usize) -> Self {
        Self {
            cardinality: CardinalityCollector::new(column_type as u8),
-            entries: FxHashSet::default(),
+            entries: Default::default(),
+            accessor_idx,
        }
    }
+
+    fn fetch_block_with_field(
+        &mut self,
+        docs: &[crate::DocId],
+        agg_data: &mut CardinalityAggReqData,
+    ) {
+        if let Some(missing) = agg_data.missing_value_for_accessor {
+            agg_data.column_block_accessor.fetch_block_with_missing(
+                docs,
+                &agg_data.accessor,
+                missing,
+            );
+        } else {
+            agg_data
+                .column_block_accessor
+                .fetch_block(docs, &agg_data.accessor);
+        }
+    }
+
    fn into_intermediate_metric_result(
        mut self,
-        req_data: &CardinalityAggReqData,
+        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<IntermediateMetricResult> {
+        let req_data = &agg_data.get_cardinality_req_data(self.accessor_idx);
        if req_data.column_type == ColumnType::Str {
            let fallback_dict = Dictionary::empty();
            let dict = req_data
@@ -181,7 +194,6 @@ impl SegmentCardinalityCollectorBucket {
                    term_ids.push(term_ord as u32);
                }
            }
-
            term_ids.sort_unstable();
            dict.sorted_ords_to_term_cb(term_ids.iter().map(|term| *term as u64), |term| {
                self.cardinality.sketch.insert_any(&term);
@@ -215,49 +227,16 @@ impl SegmentCardinalityCollectorBucket {
    }
 }

-impl SegmentCardinalityCollector {
-    pub fn from_req(
-        column_type: ColumnType,
-        accessor_idx: usize,
-        accessor: Column<u64>,
-        missing_value_for_accessor: Option<u64>,
-    ) -> Self {
-        Self {
-            buckets: vec![SegmentCardinalityCollectorBucket::new(column_type); 1],
-            column_type,
-            accessor_idx,
-            accessor,
-            missing_value_for_accessor,
-        }
-    }
-
-    fn fetch_block_with_field(
-        &mut self,
-        docs: &[crate::DocId],
-        agg_data: &mut AggregationsSegmentCtx,
-    ) {
-        agg_data.column_block_accessor.fetch_block_with_missing(
-            docs,
-            &self.accessor,
-            self.missing_value_for_accessor,
-        );
-    }
-}
-
 impl SegmentAggregationCollector for SegmentCardinalityCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
        let req_data = &agg_data.get_cardinality_req_data(self.accessor_idx);
        let name = req_data.name.to_string();
-        // take the bucket in buckets and replace it with a new empty one
-        let bucket = std::mem::take(&mut self.buckets[parent_bucket_id as usize]);

-        let intermediate_result = bucket.into_intermediate_metric_result(req_data)?;
+        let intermediate_result = self.into_intermediate_metric_result(agg_data)?;
        results.push(
            name,
            IntermediateAggregationResult::Metric(intermediate_result),
@@ -268,20 +247,27 @@ impl SegmentAggregationCollector for SegmentCardinalityCollector {

    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.collect_block(&[doc], agg_data)
+    }
+
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.fetch_block_with_field(docs, agg_data);
-        let bucket = &mut self.buckets[parent_bucket_id as usize];
+        let req_data = agg_data.get_cardinality_req_data_mut(self.accessor_idx);
+        self.fetch_block_with_field(docs, req_data);

-        let col_block_accessor = &agg_data.column_block_accessor;
-        if self.column_type == ColumnType::Str {
+        let col_block_accessor = &req_data.column_block_accessor;
+        if req_data.column_type == ColumnType::Str {
            for term_ord in col_block_accessor.iter_vals() {
-                bucket.entries.insert(term_ord);
+                self.entries.insert(term_ord);
            }
-        } else if self.column_type == ColumnType::IpAddr {
-            let compact_space_accessor = self
+        } else if req_data.column_type == ColumnType::IpAddr {
+            let compact_space_accessor = req_data
                .accessor
                .values
                .clone()
@@ -296,29 +282,16 @@ impl SegmentAggregationCollector for SegmentCardinalityCollector {
                })?;
            for val in col_block_accessor.iter_vals() {
                let val: u128 = compact_space_accessor.compact_to_u128(val as u32);
-                bucket.cardinality.sketch.insert_any(&val);
+                self.cardinality.sketch.insert_any(&val);
            }
        } else {
            for val in col_block_accessor.iter_vals() {
-                bucket.cardinality.sketch.insert_any(&val);
+                self.cardinality.sketch.insert_any(&val);
            }
        }

        Ok(())
    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        if max_bucket as usize >= self.buckets.len() {
-            self.buckets.resize_with(max_bucket as usize + 1, || {
-                SegmentCardinalityCollectorBucket::new(self.column_type)
-            });
-        }
-        Ok(())
-    }
 }

 #[derive(Clone, Debug, Serialize, Deserialize)]
--- a/src/aggregation/metric/count.rs
+++ b/src/aggregation/metric/count.rs
@@ -52,8 +52,10 @@ pub struct IntermediateCount {

 impl IntermediateCount {
    /// Creates a new [`IntermediateCount`] instance from a [`SegmentStatsCollector`].
-    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
-        Self { stats }
+    pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
+        Self {
+            stats: collector.stats,
+        }
    }
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateCount) {
--- a/src/aggregation/metric/extended_stats.rs
+++ b/src/aggregation/metric/extended_stats.rs
@@ -8,9 +8,10 @@ use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
-use crate::TantivyError;
+use crate::{DocId, TantivyError};

 /// A multi-value metric aggregation that computes a collection of extended statistics
 /// on numeric values that are extracted
@@ -317,28 +318,51 @@ impl IntermediateExtendedStats {
    }
 }

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) struct SegmentExtendedStatsCollector {
-    name: String,
    missing: Option<u64>,
    field_type: ColumnType,
-    accessor: columnar::Column<u64>,
-    buckets: Vec<IntermediateExtendedStats>,
-    sigma: Option<f64>,
+    pub(crate) extended_stats: IntermediateExtendedStats,
+    pub(crate) accessor_idx: usize,
+    val_cache: Vec<u64>,
 }

 impl SegmentExtendedStatsCollector {
-    pub fn from_req(req: &MetricAggReqData, sigma: Option<f64>) -> Self {
-        let missing = req
-            .missing
-            .and_then(|val| f64_to_fastfield_u64(val, &req.field_type));
+    pub fn from_req(
+        field_type: ColumnType,
+        sigma: Option<f64>,
+        accessor_idx: usize,
+        missing: Option<f64>,
+    ) -> Self {
+        let missing = missing.and_then(|val| f64_to_fastfield_u64(val, &field_type));
        Self {
-            name: req.name.clone(),
-            field_type: req.field_type,
-            accessor: req.accessor.clone(),
+            field_type,
+            extended_stats: IntermediateExtendedStats::with_sigma(sigma),
+            accessor_idx,
            missing,
-            buckets: vec![IntermediateExtendedStats::with_sigma(sigma); 16],
-            sigma,
+            val_cache: Default::default(),
+        }
+    }
+    #[inline]
+    pub(crate) fn collect_block_with_field(
+        &mut self,
+        docs: &[DocId],
+        req_data: &mut MetricAggReqData,
+    ) {
+        if let Some(missing) = self.missing.as_ref() {
+            req_data.column_block_accessor.fetch_block_with_missing(
+                docs,
+                &req_data.accessor,
+                *missing,
+            );
+        } else {
+            req_data
+                .column_block_accessor
+                .fetch_block(docs, &req_data.accessor);
+        }
+        for val in req_data.column_block_accessor.iter_vals() {
+            let val1 = f64_from_fastfield_u64(val, &self.field_type);
+            self.extended_stats.collect(val1);
        }
    }
 }
@@ -346,18 +370,15 @@ impl SegmentExtendedStatsCollector {
 impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        let name = self.name.clone();
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
-        let extended_stats = std::mem::take(&mut self.buckets[parent_bucket_id as usize]);
+        let name = agg_data.get_metric_req_data(self.accessor_idx).name.clone();
        results.push(
            name,
            IntermediateAggregationResult::Metric(IntermediateMetricResult::ExtendedStats(
-                extended_stats,
+                self.extended_stats,
            )),
        )?;

@@ -367,36 +388,39 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
    #[inline]
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
+        doc: crate::DocId,
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let mut extended_stats = self.buckets[parent_bucket_id as usize].clone();
-
-        agg_data
-            .column_block_accessor
-            .fetch_block_with_missing(docs, &self.accessor, self.missing);
-        for val in agg_data.column_block_accessor.iter_vals() {
-            let val1 = f64_from_fastfield_u64(val, self.field_type);
-            extended_stats.collect(val1);
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);
+        if let Some(missing) = self.missing {
+            let mut has_val = false;
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+                self.extended_stats.collect(val1);
+                has_val = true;
+            }
+            if !has_val {
+                self.extended_stats
+                    .collect(f64_from_fastfield_u64(missing, &self.field_type));
+            }
+        } else {
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+                self.extended_stats.collect(val1);
+            }
        }

-        // store back
-        self.buckets[parent_bucket_id as usize] = extended_stats;
-
        Ok(())
    }

-    fn prepare_max_bucket(
+    #[inline]
+    fn collect_block(
        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
+        docs: &[crate::DocId],
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        if self.buckets.len() <= max_bucket as usize {
-            self.buckets.resize_with(max_bucket as usize + 1, || {
-                IntermediateExtendedStats::with_sigma(self.sigma)
-            });
-        }
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
 }
--- a/src/aggregation/metric/max.rs
+++ b/src/aggregation/metric/max.rs
@@ -52,8 +52,10 @@ pub struct IntermediateMax {

 impl IntermediateMax {
    /// Creates a new [`IntermediateMax`] instance from a [`SegmentStatsCollector`].
-    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
-        Self { stats }
+    pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
+        Self {
+            stats: collector.stats,
+        }
    }
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateMax) {
--- a/src/aggregation/metric/min.rs
+++ b/src/aggregation/metric/min.rs
@@ -52,8 +52,10 @@ pub struct IntermediateMin {

 impl IntermediateMin {
    /// Creates a new [`IntermediateMin`] instance from a [`SegmentStatsCollector`].
-    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
-        Self { stats }
+    pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
+        Self {
+            stats: collector.stats,
+        }
    }
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateMin) {
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -31,7 +31,7 @@ use std::collections::HashMap;

 pub use average::*;
 pub use cardinality::*;
-use columnar::{Column, ColumnType};
+use columnar::{Column, ColumnBlockAccessor, ColumnType};
 pub use count::*;
 pub use extended_stats::*;
 pub use max::*;
@@ -55,6 +55,8 @@ pub struct MetricAggReqData {
    pub field_type: ColumnType,
    /// The missing value normalized to the internal u64 representation of the field type.
    pub missing_u64: Option<u64>,
+    /// The column block accessor to access the fast field values.
+    pub column_block_accessor: ColumnBlockAccessor<u64>,
    /// The column accessor to access the fast field values.
    pub accessor: Column<u64>,
    /// Used when converting to intermediate result
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -7,9 +7,10 @@ use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
-use crate::TantivyError;
+use crate::{DocId, TantivyError};

 /// # Percentiles
 ///
@@ -130,16 +131,10 @@ impl PercentilesAggregationReq {
    }
 }

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) struct SegmentPercentilesCollector {
-    pub(crate) buckets: Vec<PercentilesCollector>,
+    pub(crate) percentiles: PercentilesCollector,
    pub(crate) accessor_idx: usize,
-    /// The type of the field.
-    pub field_type: ColumnType,
-    /// The missing value normalized to the internal u64 representation of the field type.
-    pub missing_u64: Option<u64>,
-    /// The column accessor to access the fast field values.
-    pub accessor: Column<u64>,
 }

 #[derive(Clone, Serialize, Deserialize)]
@@ -234,18 +229,33 @@ impl PercentilesCollector {
 }

 impl SegmentPercentilesCollector {
-    pub fn from_req_and_validate(
-        field_type: ColumnType,
-        missing_u64: Option<u64>,
-        accessor: Column<u64>,
-        accessor_idx: usize,
-    ) -> Self {
-        Self {
-            buckets: Vec::with_capacity(64),
-            field_type,
-            missing_u64,
-            accessor,
+    pub fn from_req_and_validate(accessor_idx: usize) -> crate::Result<Self> {
+        Ok(Self {
+            percentiles: PercentilesCollector::new(),
            accessor_idx,
+        })
+    }
+    #[inline]
+    pub(crate) fn collect_block_with_field(
+        &mut self,
+        docs: &[DocId],
+        req_data: &mut MetricAggReqData,
+    ) {
+        if let Some(missing) = req_data.missing_u64.as_ref() {
+            req_data.column_block_accessor.fetch_block_with_missing(
+                docs,
+                &req_data.accessor,
+                *missing,
+            );
+        } else {
+            req_data
+                .column_block_accessor
+                .fetch_block(docs, &req_data.accessor);
+        }
+
+        for val in req_data.column_block_accessor.iter_vals() {
+            let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+            self.percentiles.collect(val1);
        }
    }
 }
@@ -253,18 +263,12 @@ impl SegmentPercentilesCollector {
 impl SegmentAggregationCollector for SegmentPercentilesCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
        let name = agg_data.get_metric_req_data(self.accessor_idx).name.clone();
-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
-        // Swap collector with an empty one to avoid cloning
-        let percentiles_collector = std::mem::take(&mut self.buckets[parent_bucket_id as usize]);
-
-        let intermediate_metric_result =
-            IntermediateMetricResult::Percentiles(percentiles_collector);
+        let intermediate_metric_result = IntermediateMetricResult::Percentiles(self.percentiles);

        results.push(
            name,
@@ -277,33 +281,40 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
    #[inline]
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
+        doc: crate::DocId,
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let percentiles = &mut self.buckets[parent_bucket_id as usize];
-        agg_data.column_block_accessor.fetch_block_with_missing(
-            docs,
-            &self.accessor,
-            self.missing_u64,
-        );
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);

-        for val in agg_data.column_block_accessor.iter_vals() {
-            let val1 = f64_from_fastfield_u64(val, self.field_type);
-            percentiles.collect(val1);
+        if let Some(missing) = req_data.missing_u64 {
+            let mut has_val = false;
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+                self.percentiles.collect(val1);
+                has_val = true;
+            }
+            if !has_val {
+                self.percentiles
+                    .collect(f64_from_fastfield_u64(missing, &req_data.field_type));
+            }
+        } else {
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+                self.percentiles.collect(val1);
+            }
        }

        Ok(())
    }

-    fn prepare_max_bucket(
+    #[inline]
+    fn collect_block(
        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
+        docs: &[crate::DocId],
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        while self.buckets.len() <= max_bucket as usize {
-            self.buckets.push(PercentilesCollector::new());
-        }
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
 }
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -1,6 +1,5 @@
 use std::fmt::Debug;

-use columnar::{Column, ColumnType};
 use serde::{Deserialize, Serialize};

 use super::*;
@@ -8,9 +7,10 @@ use crate::aggregation::agg_data::AggregationsSegmentCtx;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
 };
+use crate::aggregation::metric::MetricAggReqData;
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::*;
-use crate::TantivyError;
+use crate::{DocId, TantivyError};

 /// A multi-value metric aggregation that computes a collection of statistics on numeric values that
 /// are extracted from the aggregated documents.
@@ -83,7 +83,7 @@ impl Stats {

 /// Intermediate result of the stats aggregation that can be combined with other intermediate
 /// results.
-#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateStats {
    /// The number of extracted values.
    pub(crate) count: u64,
@@ -187,75 +187,75 @@ pub enum StatsType {
    Percentiles,
 }

-fn create_collector<const TYPE_ID: u8>(
-    req: &MetricAggReqData,
-) -> Box<dyn SegmentAggregationCollector> {
-    Box::new(SegmentStatsCollector::<TYPE_ID> {
-        name: req.name.clone(),
-        collecting_for: req.collecting_for,
-        is_number_or_date_type: req.is_number_or_date_type,
-        missing_u64: req.missing_u64,
-        accessor: req.accessor.clone(),
-        buckets: vec![IntermediateStats::default()],
-    })
+#[derive(Clone, Debug)]
+pub(crate) struct SegmentStatsCollector {
+    pub(crate) stats: IntermediateStats,
+    pub(crate) accessor_idx: usize,
 }

-/// Build a concrete `SegmentStatsCollector` depending on the column type.
-pub(crate) fn build_segment_stats_collector(
-    req: &MetricAggReqData,
-) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
-    match req.field_type {
-        ColumnType::I64 => Ok(create_collector::<{ ColumnType::I64 as u8 }>(req)),
-        ColumnType::U64 => Ok(create_collector::<{ ColumnType::U64 as u8 }>(req)),
-        ColumnType::F64 => Ok(create_collector::<{ ColumnType::F64 as u8 }>(req)),
-        ColumnType::Bool => Ok(create_collector::<{ ColumnType::Bool as u8 }>(req)),
-        ColumnType::DateTime => Ok(create_collector::<{ ColumnType::DateTime as u8 }>(req)),
-        ColumnType::Bytes => Ok(create_collector::<{ ColumnType::Bytes as u8 }>(req)),
-        ColumnType::Str => Ok(create_collector::<{ ColumnType::Str as u8 }>(req)),
-        ColumnType::IpAddr => Ok(create_collector::<{ ColumnType::IpAddr as u8 }>(req)),
+impl SegmentStatsCollector {
+    pub fn from_req(accessor_idx: usize) -> Self {
+        Self {
+            stats: IntermediateStats::default(),
+            accessor_idx,
+        }
+    }
+    #[inline]
+    pub(crate) fn collect_block_with_field(
+        &mut self,
+        docs: &[DocId],
+        req_data: &mut MetricAggReqData,
+    ) {
+        if let Some(missing) = req_data.missing_u64.as_ref() {
+            req_data.column_block_accessor.fetch_block_with_missing(
+                docs,
+                &req_data.accessor,
+                *missing,
+            );
+        } else {
+            req_data
+                .column_block_accessor
+                .fetch_block(docs, &req_data.accessor);
+        }
+        if req_data.is_number_or_date_type {
+            for val in req_data.column_block_accessor.iter_vals() {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+                self.stats.collect(val1);
+            }
+        } else {
+            for _val in req_data.column_block_accessor.iter_vals() {
+                // we ignore the value and simply record that we got something
+                self.stats.collect(0.0);
+            }
+        }
    }
 }

-#[repr(C)]
-#[derive(Clone, Debug)]
-pub(crate) struct SegmentStatsCollector<const COLUMN_TYPE_ID: u8> {
-    pub(crate) missing_u64: Option<u64>,
-    pub(crate) accessor: Column<u64>,
-    pub(crate) is_number_or_date_type: bool,
-    pub(crate) buckets: Vec<IntermediateStats>,
-    pub(crate) name: String,
-    pub(crate) collecting_for: StatsType,
-}
-
-impl<const COLUMN_TYPE_ID: u8> SegmentAggregationCollector
-    for SegmentStatsCollector<COLUMN_TYPE_ID>
-{
+impl SegmentAggregationCollector for SegmentStatsCollector {
    #[inline]
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        let name = self.name.clone();
+        let req = agg_data.get_metric_req_data(self.accessor_idx);
+        let name = req.name.clone();

-        self.prepare_max_bucket(parent_bucket_id, agg_data)?;
-        let stats = self.buckets[parent_bucket_id as usize];
-        let intermediate_metric_result = match self.collecting_for {
+        let intermediate_metric_result = match req.collecting_for {
            StatsType::Average => {
-                IntermediateMetricResult::Average(IntermediateAverage::from_stats(stats))
+                IntermediateMetricResult::Average(IntermediateAverage::from_collector(*self))
            }
            StatsType::Count => {
-                IntermediateMetricResult::Count(IntermediateCount::from_stats(stats))
+                IntermediateMetricResult::Count(IntermediateCount::from_collector(*self))
            }
-            StatsType::Max => IntermediateMetricResult::Max(IntermediateMax::from_stats(stats)),
-            StatsType::Min => IntermediateMetricResult::Min(IntermediateMin::from_stats(stats)),
-            StatsType::Stats => IntermediateMetricResult::Stats(stats),
-            StatsType::Sum => IntermediateMetricResult::Sum(IntermediateSum::from_stats(stats)),
+            StatsType::Max => IntermediateMetricResult::Max(IntermediateMax::from_collector(*self)),
+            StatsType::Min => IntermediateMetricResult::Min(IntermediateMin::from_collector(*self)),
+            StatsType::Stats => IntermediateMetricResult::Stats(self.stats),
+            StatsType::Sum => IntermediateMetricResult::Sum(IntermediateSum::from_collector(*self)),
            _ => {
                return Err(TantivyError::InvalidArgument(format!(
                    "Unsupported stats type for stats aggregation: {:?}",
-                    self.collecting_for
+                    req.collecting_for
                )))
            }
        };
@@ -271,67 +271,41 @@ impl<const COLUMN_TYPE_ID: u8> SegmentAggregationCollector
    #[inline]
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        let req_data = agg_data.get_metric_req_data(self.accessor_idx);
+        if let Some(missing) = req_data.missing_u64 {
+            let mut has_val = false;
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+                self.stats.collect(val1);
+                has_val = true;
+            }
+            if !has_val {
+                self.stats
+                    .collect(f64_from_fastfield_u64(missing, &req_data.field_type));
+            }
+        } else {
+            for val in req_data.accessor.values_for_doc(doc) {
+                let val1 = f64_from_fastfield_u64(val, &req_data.field_type);
+                self.stats.collect(val1);
+            }
+        }
+
+        Ok(())
+    }
+
+    #[inline]
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        // TODO: remove once we fetch all values for all bucket ids in one go
-        if docs.len() == 1 && self.missing_u64.is_none() {
-            collect_stats::<COLUMN_TYPE_ID>(
-                &mut self.buckets[parent_bucket_id as usize],
-                self.accessor.values_for_doc(docs[0]),
-                self.is_number_or_date_type,
-            )?;
-
-            return Ok(());
-        }
-        agg_data.column_block_accessor.fetch_block_with_missing(
-            docs,
-            &self.accessor,
-            self.missing_u64,
-        );
-        collect_stats::<COLUMN_TYPE_ID>(
-            &mut self.buckets[parent_bucket_id as usize],
-            agg_data.column_block_accessor.iter_vals(),
-            self.is_number_or_date_type,
-        )?;
-
+        let req_data = agg_data.get_metric_req_data_mut(self.accessor_idx);
+        self.collect_block_with_field(docs, req_data);
        Ok(())
    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        let required_buckets = (max_bucket as usize) + 1;
-        if self.buckets.len() < required_buckets {
-            self.buckets
-                .resize_with(required_buckets, IntermediateStats::default);
-        }
-        Ok(())
-    }
-}
-
-#[inline]
-fn collect_stats<const COLUMN_TYPE_ID: u8>(
-    stats: &mut IntermediateStats,
-    vals: impl Iterator<Item = u64>,
-    is_number_or_date_type: bool,
-) -> crate::Result<()> {
-    if is_number_or_date_type {
-        for val in vals {
-            let val1 = convert_to_f64::<COLUMN_TYPE_ID>(val);
-            stats.collect(val1);
-        }
-    } else {
-        for _val in vals {
-            // we ignore the value and simply record that we got something
-            stats.collect(0.0);
-        }
-    }
-
-    Ok(())
 }

 #[cfg(test)]
--- a/src/aggregation/metric/sum.rs
+++ b/src/aggregation/metric/sum.rs
@@ -52,8 +52,10 @@ pub struct IntermediateSum {

 impl IntermediateSum {
    /// Creates a new [`IntermediateSum`] instance from a [`SegmentStatsCollector`].
-    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
-        Self { stats }
+    pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
+        Self {
+            stats: collector.stats,
+        }
    }
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateSum) {
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -15,11 +15,12 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResult, IntermediateMetricResult,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
-use crate::aggregation::{AggregationError, BucketId};
+use crate::aggregation::AggregationError;
 use crate::collector::sort_key::ReverseComparator;
 use crate::collector::TopNComputer;
 use crate::schema::OwnedValue;
 use crate::{DocAddress, DocId, SegmentOrdinal};
+// duplicate import removed; already imported above

 /// Contains all information required by the TopHitsSegmentCollector to perform the
 /// top_hits aggregation on a segment.
@@ -471,10 +472,7 @@ impl TopHitsTopNComputer {
    /// Create a new TopHitsCollector
    pub fn new(req: &TopHitsAggregationReq) -> Self {
        Self {
-            top_n: TopNComputer::new_with_comparator(
-                req.size + req.from.unwrap_or(0),
-                ReverseComparator,
-            ),
+            top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
            req: req.clone(),
        }
    }
@@ -520,8 +518,7 @@ impl TopHitsTopNComputer {
 pub(crate) struct TopHitsSegmentCollector {
    segment_ordinal: SegmentOrdinal,
    accessor_idx: usize,
-    buckets: Vec<TopNComputer<Vec<DocValueAndOrder>, DocAddress, ReverseComparator>>,
-    num_hits: usize,
+    top_n: TopNComputer<Vec<DocValueAndOrder>, DocAddress, ReverseComparator>,
 }

 impl TopHitsSegmentCollector {
@@ -530,29 +527,19 @@ impl TopHitsSegmentCollector {
        accessor_idx: usize,
        segment_ordinal: SegmentOrdinal,
    ) -> Self {
-        let num_hits = req.size + req.from.unwrap_or(0);
        Self {
-            num_hits,
+            top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
            segment_ordinal,
            accessor_idx,
-            buckets: vec![TopNComputer::new_with_comparator(num_hits, ReverseComparator); 1],
        }
    }
-    fn get_top_hits_computer(
-        &mut self,
-        parent_bucket_id: BucketId,
+    fn into_top_hits_collector(
+        self,
        value_accessors: &HashMap<String, Vec<DynamicColumn>>,
        req: &TopHitsAggregationReq,
    ) -> TopHitsTopNComputer {
-        if parent_bucket_id as usize >= self.buckets.len() {
-            return TopHitsTopNComputer::new(req);
-        }
-        let top_n = std::mem::replace(
-            &mut self.buckets[parent_bucket_id as usize],
-            TopNComputer::new(0),
-        );
        let mut top_hits_computer = TopHitsTopNComputer::new(req);
-        let top_results = top_n.into_vec();
+        let top_results = self.top_n.into_vec();

        for res in top_results {
            let doc_value_fields = req.get_document_field_data(value_accessors, res.doc.doc_id);
@@ -567,24 +554,54 @@ impl TopHitsSegmentCollector {

        top_hits_computer
    }
+
+    /// TODO add a specialized variant for a single sort field
+    fn collect_with(
+        &mut self,
+        doc_id: crate::DocId,
+        req: &TopHitsAggregationReq,
+        accessors: &[(Column<u64>, ColumnType)],
+    ) -> crate::Result<()> {
+        let sorts: Vec<DocValueAndOrder> = req
+            .sort
+            .iter()
+            .enumerate()
+            .map(|(idx, KeyOrder { order, .. })| {
+                let order = *order;
+                let value = accessors
+                    .get(idx)
+                    .expect("could not find field in accessors")
+                    .0
+                    .values_for_doc(doc_id)
+                    .next();
+                DocValueAndOrder { value, order }
+            })
+            .collect();
+
+        self.top_n.push(
+            sorts,
+            DocAddress {
+                segment_ord: self.segment_ordinal,
+                doc_id,
+            },
+        );
+        Ok(())
+    }
 }

 impl SegmentAggregationCollector for TopHitsSegmentCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut crate::aggregation::intermediate_agg_result::IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);

        let value_accessors = &req_data.value_accessors;

-        let intermediate_result = IntermediateMetricResult::TopHits(self.get_top_hits_computer(
-            parent_bucket_id,
-            value_accessors,
-            &req_data.req,
-        ));
+        let intermediate_result = IntermediateMetricResult::TopHits(
+            self.into_top_hits_collector(value_accessors, &req_data.req),
+        );
        results.push(
            req_data.name.to_string(),
            IntermediateAggregationResult::Metric(intermediate_result),
@@ -594,54 +611,24 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
    /// TODO: Consider a caching layer to reduce the call overhead
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
+        doc_id: crate::DocId,
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        let top_n = &mut self.buckets[parent_bucket_id as usize];
        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);
-        let req = &req_data.req;
-        let accessors = &req_data.accessors;
-        for &doc_id in docs {
-            // TODO: this is terrible, a new vec is allocated for every doc
-            // We can fetch blocks instead
-            // We don't need to store the order for every value
-            let sorts: Vec<DocValueAndOrder> = req
-                .sort
-                .iter()
-                .enumerate()
-                .map(|(idx, KeyOrder { order, .. })| {
-                    let order = *order;
-                    let value = accessors
-                        .get(idx)
-                        .expect("could not find field in accessors")
-                        .0
-                        .values_for_doc(doc_id)
-                        .next();
-                    DocValueAndOrder { value, order }
-                })
-                .collect();
-
-            top_n.push(
-                sorts,
-                DocAddress {
-                    segment_ord: self.segment_ordinal,
-                    doc_id,
-                },
-            );
-        }
+        self.collect_with(doc_id, &req_data.req, &req_data.accessors)?;
        Ok(())
    }

-    fn prepare_max_bucket(
+    fn collect_block(
        &mut self,
-        max_bucket: BucketId,
-        _agg_data: &AggregationsSegmentCtx,
+        docs: &[crate::DocId],
+        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
-        self.buckets.resize(
-            (max_bucket as usize) + 1,
-            TopNComputer::new_with_comparator(self.num_hits, ReverseComparator),
-        );
+        let req_data = agg_data.get_top_hits_req_data(self.accessor_idx);
+        // TODO: Consider getting fields with the column block accessor.
+        for doc in docs {
+            self.collect_with(*doc, &req_data.req, &req_data.accessors)?;
+        }
        Ok(())
    }
 }
@@ -759,7 +746,7 @@ mod tests {
                    ],
                    "from": 0,
                }
-            }
+        }
        }))
        .unwrap();

@@ -888,7 +875,7 @@ mod tests {
                        "mixed.*",
                    ],
                }
-            }
+        }
        }))?;

        let collector = AggregationCollector::from_aggs(d, Default::default());
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -133,7 +133,7 @@ mod agg_limits;
 pub mod agg_req;
 pub mod agg_result;
 pub mod bucket;
-pub(crate) mod cached_sub_aggs;
+mod buf_collector;
 mod collector;
 mod date;
 mod error;
@@ -162,19 +162,6 @@ use serde::{Deserialize, Deserializer, Serialize};

 use crate::tokenizer::TokenizerManager;

-/// A bucket id is a dense identifier for a bucket within an aggregation.
-/// It is used to index into a Vec that hold per-bucket data.
-///
-/// For example, in a terms aggregation, each unique term will be assigned a incremental BucketId.
-/// This BucketId will be forwarded to sub-aggregations to identify the parent bucket.
-///
-/// This allows to have a single AggregationCollector instance per aggregation,
-/// that can handle multiple buckets efficiently.
-///
-/// The API to call sub-aggregations is therefore a &[(BucketId, &[DocId])].
-/// For that we'll need a buffer. One Vec per bucket aggregation is needed.
-pub type BucketId = u32;
-
 /// Context parameters for aggregation execution
 ///
 /// This struct holds shared resources needed during aggregation execution:
@@ -348,37 +335,19 @@ impl Display for Key {
    }
 }

-pub(crate) fn convert_to_f64<const COLUMN_TYPE_ID: u8>(val: u64) -> f64 {
-    if COLUMN_TYPE_ID == ColumnType::U64 as u8 {
-        val as f64
-    } else if COLUMN_TYPE_ID == ColumnType::I64 as u8
-        || COLUMN_TYPE_ID == ColumnType::DateTime as u8
-    {
-        i64::from_u64(val) as f64
-    } else if COLUMN_TYPE_ID == ColumnType::F64 as u8 {
-        f64::from_u64(val)
-    } else if COLUMN_TYPE_ID == ColumnType::Bool as u8 {
-        val as f64
-    } else {
-        panic!(
-            "ColumnType ID {} cannot be converted to f64 metric",
-            COLUMN_TYPE_ID
-        )
-    }
-}
-
 /// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics.
 ///
 /// # Panics
 /// Only `u64`, `f64`, `date`, and `i64` are supported.
-pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: ColumnType) -> f64 {
+pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &ColumnType) -> f64 {
    match field_type {
-        ColumnType::U64 => convert_to_f64::<{ ColumnType::U64 as u8 }>(val),
-        ColumnType::I64 => convert_to_f64::<{ ColumnType::I64 as u8 }>(val),
-        ColumnType::F64 => convert_to_f64::<{ ColumnType::F64 as u8 }>(val),
-        ColumnType::Bool => convert_to_f64::<{ ColumnType::Bool as u8 }>(val),
-        ColumnType::DateTime => convert_to_f64::<{ ColumnType::DateTime as u8 }>(val),
-        _ => panic!("unexpected type {field_type:?}. This should not happen"),
+        ColumnType::U64 => val as f64,
+        ColumnType::I64 | ColumnType::DateTime => i64::from_u64(val) as f64,
+        ColumnType::F64 => f64::from_u64(val),
+        ColumnType::Bool => val as f64,
+        _ => {
+            panic!("unexpected type {field_type:?}. This should not happen")
+        }
    }
 }

--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -8,67 +8,25 @@ use std::fmt::Debug;
 pub(crate) use super::agg_limits::AggregationLimitsGuard;
 use super::intermediate_agg_result::IntermediateAggregationResults;
 use crate::aggregation::agg_data::AggregationsSegmentCtx;
-use crate::aggregation::BucketId;
-
-/// Monotonically increasing provider of BucketIds.
-#[derive(Debug, Clone, Default)]
-pub struct BucketIdProvider(u32);
-impl BucketIdProvider {
-    /// Get the next BucketId.
-    pub fn next_bucket_id(&mut self) -> BucketId {
-        let bucket_id = self.0;
-        self.0 += 1;
-        bucket_id
-    }
-}

 /// A SegmentAggregationCollector is used to collect aggregation results.
-pub trait SegmentAggregationCollector: Debug {
+pub trait SegmentAggregationCollector: CollectorClone + Debug {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()>;

-    /// Note: The caller needs to call `prepare_max_bucket` before calling `collect`.
    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
-        docs: &[crate::DocId],
+        doc: crate::DocId,
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()>;

-    /// Collect docs for multiple buckets in one call.
-    /// Minimizes dynamic dispatch overhead when collecting many buckets.
-    ///
-    /// Note: The caller needs to call `prepare_max_bucket` before calling `collect`.
-    fn collect_multiple(
+    fn collect_block(
        &mut self,
-        bucket_ids: &[BucketId],
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        debug_assert_eq!(bucket_ids.len(), docs.len());
-        let mut start = 0;
-        while start < bucket_ids.len() {
-            let bucket_id = bucket_ids[start];
-            let mut end = start + 1;
-            while end < bucket_ids.len() && bucket_ids[end] == bucket_id {
-                end += 1;
-            }
-            self.collect(bucket_id, &docs[start..end], agg_data)?;
-            start = end;
-        }
-        Ok(())
-    }
-
-    /// Prepare the collector for collecting up to BucketId `max_bucket`.
-    /// This is useful so we can split allocation ahead of time of collecting.
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        agg_data: &AggregationsSegmentCtx,
    ) -> crate::Result<()>;

    /// Finalize method. Some Aggregator collect blocks of docs before calling `collect_block`.
@@ -78,7 +36,26 @@ pub trait SegmentAggregationCollector: Debug {
    }
 }

-#[derive(Default)]
+/// A helper trait to enable cloning of Box<dyn SegmentAggregationCollector>
+pub trait CollectorClone {
+    fn clone_box(&self) -> Box<dyn SegmentAggregationCollector>;
+}
+
+impl<T> CollectorClone for T
+where T: 'static + SegmentAggregationCollector + Clone
+{
+    fn clone_box(&self) -> Box<dyn SegmentAggregationCollector> {
+        Box::new(self.clone())
+    }
+}
+
+impl Clone for Box<dyn SegmentAggregationCollector> {
+    fn clone(&self) -> Box<dyn SegmentAggregationCollector> {
+        self.clone_box()
+    }
+}
+
+#[derive(Clone, Default)]
 /// The GenericSegmentAggregationResultsCollector is the generic version of the collector, which
 /// can handle arbitrary complexity of  sub-aggregations. Ideally we never have to pick this one
 /// and can provide specialized versions instead, that remove some of its overhead.
@@ -96,13 +73,12 @@ impl Debug for GenericSegmentAggregationResultsCollector {

 impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
    fn add_intermediate_aggregation_result(
-        &mut self,
+        self: Box<Self>,
        agg_data: &AggregationsSegmentCtx,
        results: &mut IntermediateAggregationResults,
-        parent_bucket_id: BucketId,
    ) -> crate::Result<()> {
-        for agg in &mut self.aggs {
-            agg.add_intermediate_aggregation_result(agg_data, results, parent_bucket_id)?;
+        for agg in self.aggs {
+            agg.add_intermediate_aggregation_result(agg_data, results)?;
        }

        Ok(())
@@ -110,13 +86,23 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {

    fn collect(
        &mut self,
-        parent_bucket_id: BucketId,
+        doc: crate::DocId,
+        agg_data: &mut AggregationsSegmentCtx,
+    ) -> crate::Result<()> {
+        self.collect_block(&[doc], agg_data)?;
+
+        Ok(())
+    }
+
+    fn collect_block(
+        &mut self,
        docs: &[crate::DocId],
        agg_data: &mut AggregationsSegmentCtx,
    ) -> crate::Result<()> {
        for collector in &mut self.aggs {
-            collector.collect(parent_bucket_id, docs, agg_data)?;
+            collector.collect_block(docs, agg_data)?;
        }
+
        Ok(())
    }

@@ -126,15 +112,4 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
        }
        Ok(())
    }
-
-    fn prepare_max_bucket(
-        &mut self,
-        max_bucket: BucketId,
-        agg_data: &AggregationsSegmentCtx,
-    ) -> crate::Result<()> {
-        for collector in &mut self.aggs {
-            collector.prepare_max_bucket(max_bucket, agg_data)?;
-        }
-        Ok(())
-    }
 }
--- a/src/codec/mod.rs
+++ b/src/codec/mod.rs
@@ -1,169 +0,0 @@
-/// Codec specific to postings data.
-pub mod postings;
-
-/// Standard tantivy codec. This is the codec you use by default.
-pub mod standard;
-
-use std::io;
-
-pub use standard::StandardCodec;
-
-use crate::codec::postings::PostingsCodec;
-use crate::fieldnorm::FieldNormReader;
-use crate::postings::{Postings, TermInfo};
-use crate::query::{box_scorer, Bm25Weight, Scorer};
-use crate::schema::IndexRecordOption;
-use crate::{DocId, InvertedIndexReader, Score};
-
-/// Codecs describes how data is layed out on disk.
-///
-/// For the moment, only postings codec can be custom.
-pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
-    /// The specific postings type used by this codec.
-    type PostingsCodec: PostingsCodec;
-
-    /// Name of the codec. It should be unique to your codec.
-    const NAME: &'static str;
-
-    /// Load codec based on the codec configuration.
-    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self>;
-
-    /// Get codec configuration.
-    fn to_json_props(&self) -> serde_json::Value;
-
-    /// Returns the postings codec.
-    fn postings_codec(&self) -> &Self::PostingsCodec;
-}
-
-/// Object-safe codec is a Codec that can be used in a trait object.
-///
-/// The point of it is to offer a way to use a codec without a proliferation of generics.
-pub trait ObjectSafeCodec: 'static + Send + Sync {
-    /// Loads a type-erased Postings object for the given term.
-    ///
-    /// If the schema used to build the index did not provide enough
-    /// information to match the requested `option`, a Postings is still
-    /// returned in a best-effort manner.
-    fn load_postings_type_erased(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        inverted_index_reader: &InvertedIndexReader,
-    ) -> io::Result<Box<dyn Postings>>;
-
-    /// Loads a type-erased TermScorer object for the given term.
-    ///
-    /// If the schema used to build the index did not provide enough
-    /// information to match the requested `option`, a TermScorer is still
-    /// returned in a best-effort manner.
-    ///
-    /// The point of this contraption is that the return TermScorer is backed,
-    /// not by Box<dyn Postings> but by the codec's concrete Postings type.
-    fn load_term_scorer_type_erased(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        inverted_index_reader: &InvertedIndexReader,
-        fieldnorm_reader: FieldNormReader,
-        similarity_weight: Bm25Weight,
-    ) -> io::Result<Box<dyn Scorer>>;
-
-    /// Loads a type-erased PhraseScorer object for the given term.
-    ///
-    /// If the schema used to build the index did not provide enough
-    /// information to match the requested `option`, a TermScorer is still
-    /// returned in a best-effort manner.
-    ///
-    /// The point of this contraption is that the return PhraseScorer is backed,
-    /// not by Box<dyn Postings> but by the codec's concrete Postings type.
-    fn new_phrase_scorer_type_erased(
-        &self,
-        term_infos: &[(usize, TermInfo)],
-        similarity_weight: Option<Bm25Weight>,
-        fieldnorm_reader: FieldNormReader,
-        slop: u32,
-        inverted_index_reader: &InvertedIndexReader,
-    ) -> io::Result<Box<dyn Scorer>>;
-
-    /// Performs a for_each_pruning operation on the given scorer.
-    ///
-    /// The function will go through matching documents and call the callback
-    /// function for all docs with a score exceeding the threshold.
-    ///
-    /// The function itself will return a larger threshold value,
-    /// meant to update the threshold value.
-    ///
-    /// If the codec and the scorer allow it, this function can rely on
-    /// optimizations like the block-max wand.
-    fn for_each_pruning(
-        &self,
-        threshold: Score,
-        scorer: Box<dyn Scorer>,
-        callback: &mut dyn FnMut(DocId, Score) -> Score,
-    );
-}
-
-impl<TCodec: Codec> ObjectSafeCodec for TCodec {
-    fn load_postings_type_erased(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        inverted_index_reader: &InvertedIndexReader,
-    ) -> io::Result<Box<dyn Postings>> {
-        let postings = inverted_index_reader
-            .read_postings_from_terminfo_specialized(term_info, option, self)?;
-        Ok(Box::new(postings))
-    }
-
-    fn load_term_scorer_type_erased(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        inverted_index_reader: &InvertedIndexReader,
-        fieldnorm_reader: FieldNormReader,
-        similarity_weight: Bm25Weight,
-    ) -> io::Result<Box<dyn Scorer>> {
-        let scorer = inverted_index_reader.new_term_scorer_specialized(
-            term_info,
-            option,
-            fieldnorm_reader,
-            similarity_weight,
-            self,
-        )?;
-        Ok(box_scorer(scorer))
-    }
-
-    fn new_phrase_scorer_type_erased(
-        &self,
-        term_infos: &[(usize, TermInfo)],
-        similarity_weight: Option<Bm25Weight>,
-        fieldnorm_reader: FieldNormReader,
-        slop: u32,
-        inverted_index_reader: &InvertedIndexReader,
-    ) -> io::Result<Box<dyn Scorer>> {
-        let scorer = inverted_index_reader.new_phrase_scorer_type_specialized(
-            term_infos,
-            similarity_weight,
-            fieldnorm_reader,
-            slop,
-            self,
-        )?;
-        Ok(box_scorer(scorer))
-    }
-
-    fn for_each_pruning(
-        &self,
-        threshold: Score,
-        scorer: Box<dyn Scorer>,
-        callback: &mut dyn FnMut(DocId, Score) -> Score,
-    ) {
-        let accerelerated_foreach_pruning_res =
-            <TCodec as Codec>::PostingsCodec::try_accelerated_for_each_pruning(
-                threshold, scorer, callback,
-            );
-        if let Err(mut scorer) = accerelerated_foreach_pruning_res {
-            // No acceleration available. We need to do things manually.
-            scorer.for_each_pruning(threshold, callback);
-        }
-    }
-}
--- a/src/codec/postings/mod.rs
+++ b/src/codec/postings/mod.rs
@@ -1,119 +0,0 @@
-use std::io;
-
-/// Block-max WAND algorithm.
-pub mod block_wand;
-use common::OwnedBytes;
-
-use crate::fieldnorm::FieldNormReader;
-use crate::postings::Postings;
-use crate::query::{Bm25Weight, Scorer};
-use crate::schema::IndexRecordOption;
-use crate::{DocId, Score};
-
-/// Postings codec.
-pub trait PostingsCodec: Send + Sync + 'static {
-    /// Serializer type for the postings codec.
-    type PostingsSerializer: PostingsSerializer;
-    /// Postings type for the postings codec.
-    type Postings: Postings + Clone;
-    /// Creates a new postings serializer.
-    fn new_serializer(
-        &self,
-        avg_fieldnorm: Score,
-        mode: IndexRecordOption,
-        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> Self::PostingsSerializer;
-
-    /// Loads postings
-    ///
-    /// Record option is the option that was passed at indexing time.
-    /// Requested option is the option that is requested.
-    ///
-    /// For instance, we may have term_freq in the posting list
-    /// but we can skip decompressing as we read the posting list.
-    ///
-    /// If record option does not support the requested option,
-    /// this method does NOT return an error and will in fact restrict
-    /// requested_option to what is available.
-    fn load_postings(
-        &self,
-        doc_freq: u32,
-        postings_data: OwnedBytes,
-        record_option: IndexRecordOption,
-        requested_option: IndexRecordOption,
-        positions_data: Option<OwnedBytes>,
-    ) -> io::Result<Self::Postings>;
-
-    /// If your codec supports different ways to accelerate `for_each_pruning` that's
-    /// where you should implement it.
-    ///
-    /// Returning `Err(scorer)` without mutating the scorer nor calling the callback function,
-    /// is never "wrong". It just leaves the responsability to the caller to call a fallback
-    /// implementation on the scorer.
-    ///
-    /// If your codec supports BlockMax-Wand, you just need to have your
-    /// postings implement `PostingsWithBlockMax` and copy what is done in the StandardPostings
-    /// codec to enable it.
-    fn try_accelerated_for_each_pruning(
-        _threshold: Score,
-        scorer: Box<dyn Scorer>,
-        _callback: &mut dyn FnMut(DocId, Score) -> Score,
-    ) -> Result<(), Box<dyn Scorer>> {
-        Err(scorer)
-    }
-}
-
-/// A postings serializer is a listener that is in charge of serializing postings
-///
-/// IO is done only once per postings, once all of the data has been received.
-/// A serializer will therefore contain internal buffers.
-///
-/// A serializer is created once and recycled for all postings.
-///
-/// Clients should use PostingsSerializer as follows.
-/// ```
-/// // First postings list
-/// serializer.new_term(2, true);
-/// serializer.write_doc(2, 1);
-/// serializer.write_doc(6, 2);
-/// serializer.close_term(3);
-/// serializer.clear();
-/// // Second postings list
-/// serializer.new_term(1, true);
-/// serializer.write_doc(3, 1);
-/// serializer.close_term(3);
-/// ```
-pub trait PostingsSerializer {
-    /// The term_doc_freq here is the number of documents
-    /// in the postings lists.
-    ///
-    /// It can be used to compute the idf that will be used for the
-    /// blockmax parameters.
-    ///
-    /// If not available (e.g. if we do not collect `term_frequencies`
-    /// blockwand is disabled), the term_doc_freq passed will be set 0.
-    fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool);
-
-    /// Records a new document id for the current term.
-    /// The serializer may ignore it.
-    fn write_doc(&mut self, doc_id: DocId, term_freq: u32);
-
-    /// Closes the current term and writes the postings list associated.
-    fn close_term(&mut self, doc_freq: u32, wrt: &mut impl io::Write) -> io::Result<()>;
-}
-
-/// A light complement interface to Postings to allow block-max wand acceleration.
-pub trait PostingsWithBlockMax: Postings {
-    /// Moves the postings to the block containign `target_doc` and returns
-    /// an upperbound of the score for documents in the block.
-    ///
-    /// `Warning`: Calling this method may leave the postings in an invalid state.
-    /// callers are required to call seek before calling any other of the
-    /// `Postings` method (like doc / advance etc.).
-    fn seek_block_max(&mut self, target_doc: crate::DocId, similarity_weight: &Bm25Weight)
-        -> Score;
-
-    /// Returns the last document in the current block (or Terminated if this
-    /// is the last block).
-    fn last_doc_in_block(&self) -> crate::DocId;
-}
--- a/src/codec/standard/mod.rs
+++ b/src/codec/standard/mod.rs
@@ -1,35 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-use crate::codec::standard::postings::StandardPostingsCodec;
-use crate::codec::Codec;
-
-/// Tantivy's default postings codec.
-pub mod postings;
-
-/// Tantivy's default codec.
-#[derive(Debug, Default, Clone, Serialize, Deserialize)]
-pub struct StandardCodec;
-
-impl Codec for StandardCodec {
-    type PostingsCodec = StandardPostingsCodec;
-
-    const NAME: &'static str = "standard";
-
-    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self> {
-        if !json_value.is_null() {
-            return Err(crate::TantivyError::InvalidArgument(format!(
-                "Codec property for the StandardCodec are unexpected. expected null, got {}",
-                json_value.as_str().unwrap_or("null")
-            )));
-        }
-        Ok(StandardCodec)
-    }
-
-    fn to_json_props(&self) -> serde_json::Value {
-        serde_json::Value::Null
-    }
-
-    fn postings_codec(&self) -> &Self::PostingsCodec {
-        &StandardPostingsCodec
-    }
-}
--- a/src/codec/standard/postings/block.rs
+++ b/src/codec/standard/postings/block.rs
@@ -1,50 +0,0 @@
-use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::DocId;
-
-pub struct Block {
-    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE],
-    term_freqs: [u32; COMPRESSION_BLOCK_SIZE],
-    len: usize,
-}
-
-impl Block {
-    pub fn new() -> Self {
-        Block {
-            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE],
-            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE],
-            len: 0,
-        }
-    }
-
-    pub fn doc_ids(&self) -> &[DocId] {
-        &self.doc_ids[..self.len]
-    }
-
-    pub fn term_freqs(&self) -> &[u32] {
-        &self.term_freqs[..self.len]
-    }
-
-    pub fn clear(&mut self) {
-        self.len = 0;
-    }
-
-    pub fn append_doc(&mut self, doc: DocId, term_freq: u32) {
-        let len = self.len;
-        self.doc_ids[len] = doc;
-        self.term_freqs[len] = term_freq;
-        self.len = len + 1;
-    }
-
-    pub fn is_full(&self) -> bool {
-        self.len == COMPRESSION_BLOCK_SIZE
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    pub fn last_doc(&self) -> DocId {
-        assert_eq!(self.len, COMPRESSION_BLOCK_SIZE);
-        self.doc_ids[COMPRESSION_BLOCK_SIZE - 1]
-    }
-}
--- a/src/codec/standard/postings/mod.rs
+++ b/src/codec/standard/postings/mod.rs
@@ -1,107 +0,0 @@
-use std::io;
-
-use crate::codec::postings::block_wand::{block_wand, block_wand_single_scorer};
-use crate::codec::postings::PostingsCodec;
-use crate::codec::standard::postings::block_segment_postings::BlockSegmentPostings;
-pub use crate::codec::standard::postings::segment_postings::SegmentPostings;
-use crate::fieldnorm::FieldNormReader;
-use crate::positions::PositionReader;
-use crate::query::term_query::TermScorer;
-use crate::query::{BufferedUnionScorer, Scorer, SumCombiner};
-use crate::schema::IndexRecordOption;
-use crate::{DocSet as _, Score, TERMINATED};
-
-mod block;
-mod block_segment_postings;
-mod segment_postings;
-mod skip;
-mod standard_postings_serializer;
-
-pub use segment_postings::SegmentPostings as StandardPostings;
-pub use standard_postings_serializer::StandardPostingsSerializer;
-
-/// The default postings codec for tantivy.
-pub struct StandardPostingsCodec;
-
-#[expect(clippy::enum_variant_names)]
-#[derive(Debug, PartialEq, Clone, Copy, Eq)]
-pub(crate) enum FreqReadingOption {
-    NoFreq,
-    SkipFreq,
-    ReadFreq,
-}
-
-impl PostingsCodec for StandardPostingsCodec {
-    type PostingsSerializer = StandardPostingsSerializer;
-    type Postings = SegmentPostings;
-
-    fn new_serializer(
-        &self,
-        avg_fieldnorm: Score,
-        mode: IndexRecordOption,
-        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> Self::PostingsSerializer {
-        StandardPostingsSerializer::new(avg_fieldnorm, mode, fieldnorm_reader)
-    }
-
-    fn load_postings(
-        &self,
-        doc_freq: u32,
-        postings_data: common::OwnedBytes,
-        record_option: IndexRecordOption,
-        requested_option: IndexRecordOption,
-        positions_data_opt: Option<common::OwnedBytes>,
-    ) -> io::Result<Self::Postings> {
-        // Rationalize record_option/requested_option.
-        let requested_option = requested_option.downgrade(record_option);
-        let block_segment_postings =
-            BlockSegmentPostings::open(doc_freq, postings_data, record_option, requested_option)?;
-        let position_reader = positions_data_opt.map(PositionReader::open).transpose()?;
-        Ok(SegmentPostings::from_block_postings(
-            block_segment_postings,
-            position_reader,
-        ))
-    }
-
-    fn try_accelerated_for_each_pruning(
-        mut threshold: Score,
-        mut scorer: Box<dyn Scorer>,
-        callback: &mut dyn FnMut(crate::DocId, Score) -> Score,
-    ) -> Result<(), Box<dyn Scorer>> {
-        scorer = match scorer.downcast::<TermScorer<Self::Postings>>() {
-            Ok(term_scorer) => {
-                block_wand_single_scorer(*term_scorer, threshold, callback);
-                return Ok(());
-            }
-            Err(scorer) => scorer,
-        };
-        let mut union_scorer =
-            scorer.downcast::<BufferedUnionScorer<Box<dyn Scorer>, SumCombiner>>()?;
-        if !union_scorer
-            .scorers()
-            .iter()
-            .all(|scorer| scorer.is::<TermScorer<Self::Postings>>())
-        {
-            return Err(union_scorer);
-        }
-        let doc = union_scorer.doc();
-        if doc == TERMINATED {
-            return Ok(());
-        }
-        let score = union_scorer.score();
-        if score > threshold {
-            threshold = callback(doc, score);
-        }
-        let boxed_scorers: Vec<Box<dyn Scorer>> = union_scorer.into_scorers();
-        let scorers: Vec<TermScorer<Self::Postings>> = boxed_scorers
-            .into_iter()
-            .map(|scorer| {
-                *scorer.downcast::<TermScorer<Self::Postings>>().ok().expect(
-                    "Downcast failed despite the fact we already checked the type was correct",
-                )
-            })
-            .collect();
-        block_wand(scorers, threshold, callback);
-        Ok(())
-    }
-}
--- a/src/codec/standard/postings/standard_postings_serializer.rs
+++ b/src/codec/standard/postings/standard_postings_serializer.rs
@@ -1,183 +0,0 @@
-use std::cmp::Ordering;
-use std::io::{self, Write as _};
-
-use common::{BinarySerializable as _, VInt};
-
-use crate::codec::postings::PostingsSerializer;
-use crate::codec::standard::postings::block::Block;
-use crate::codec::standard::postings::skip::SkipSerializer;
-use crate::fieldnorm::FieldNormReader;
-use crate::postings::compression::{BlockEncoder, VIntEncoder as _, COMPRESSION_BLOCK_SIZE};
-use crate::query::Bm25Weight;
-use crate::schema::IndexRecordOption;
-use crate::{DocId, Score};
-
-pub struct StandardPostingsSerializer {
-    last_doc_id_encoded: u32,
-
-    block_encoder: BlockEncoder,
-    block: Box<Block>,
-
-    postings_write: Vec<u8>,
-    skip_write: SkipSerializer,
-
-    mode: IndexRecordOption,
-    fieldnorm_reader: Option<FieldNormReader>,
-
-    bm25_weight: Option<Bm25Weight>,
-    avg_fieldnorm: Score, /* Average number of term in the field for that segment.
-                           * this value is used to compute the block wand information. */
-    term_has_freq: bool,
-}
-
-impl StandardPostingsSerializer {
-    pub fn new(
-        avg_fieldnorm: Score,
-        mode: IndexRecordOption,
-        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> StandardPostingsSerializer {
-        Self {
-            last_doc_id_encoded: 0,
-            block_encoder: BlockEncoder::new(),
-            block: Box::new(Block::new()),
-            postings_write: Vec::new(),
-            skip_write: SkipSerializer::new(),
-            mode,
-            fieldnorm_reader,
-            bm25_weight: None,
-            avg_fieldnorm,
-            term_has_freq: false,
-        }
-    }
-}
-
-impl PostingsSerializer for StandardPostingsSerializer {
-    fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
-        self.clear();
-
-        self.term_has_freq = self.mode.has_freq() && record_term_freq;
-        if !self.term_has_freq {
-            return;
-        }
-
-        let num_docs_in_segment: u64 =
-            if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
-                fieldnorm_reader.num_docs() as u64
-            } else {
-                return;
-            };
-
-        if num_docs_in_segment == 0 {
-            return;
-        }
-
-        self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
-            term_doc_freq as u64,
-            num_docs_in_segment,
-            self.avg_fieldnorm,
-        ));
-    }
-
-    fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
-        self.block.append_doc(doc_id, term_freq);
-        if self.block.is_full() {
-            self.write_block();
-        }
-    }
-
-    fn close_term(&mut self, doc_freq: u32, output_write: &mut impl io::Write) -> io::Result<()> {
-        if !self.block.is_empty() {
-            // we have doc ids waiting to be written
-            // this happens when the number of doc ids is
-            // not a perfect multiple of our block size.
-            //
-            // In that case, the remaining part is encoded
-            // using variable int encoding.
-            {
-                let block_encoded = self
-                    .block_encoder
-                    .compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
-                self.postings_write.write_all(block_encoded)?;
-            }
-            // ... Idem for term frequencies
-            if self.term_has_freq {
-                let block_encoded = self
-                    .block_encoder
-                    .compress_vint_unsorted(self.block.term_freqs());
-                self.postings_write.write_all(block_encoded)?;
-            }
-            self.block.clear();
-        }
-        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            let skip_data = self.skip_write.data();
-            VInt(skip_data.len() as u64).serialize(output_write)?;
-            output_write.write_all(skip_data)?;
-        }
-        output_write.write_all(&self.postings_write[..])?;
-        self.skip_write.clear();
-        self.postings_write.clear();
-        self.bm25_weight = None;
-        Ok(())
-    }
-}
-
-impl StandardPostingsSerializer {
-    fn clear(&mut self) {
-        self.bm25_weight = None;
-        self.block.clear();
-        self.last_doc_id_encoded = 0;
-    }
-
-    fn write_block(&mut self) {
-        {
-            // encode the doc ids
-            let (num_bits, block_encoded): (u8, &[u8]) = self
-                .block_encoder
-                .compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
-            self.last_doc_id_encoded = self.block.last_doc();
-            self.skip_write
-                .write_doc(self.last_doc_id_encoded, num_bits);
-            // last el block 0, offset block 1,
-            self.postings_write.extend(block_encoded);
-        }
-        if self.term_has_freq {
-            let (num_bits, block_encoded): (u8, &[u8]) = self
-                .block_encoder
-                .compress_block_unsorted(self.block.term_freqs(), true);
-            self.postings_write.extend(block_encoded);
-            self.skip_write.write_term_freq(num_bits);
-            if self.mode.has_positions() {
-                // We serialize the sum of term freqs within the skip information
-                // in order to navigate through positions.
-                let sum_freq = self.block.term_freqs().iter().cloned().sum();
-                self.skip_write.write_total_term_freq(sum_freq);
-            }
-            let mut blockwand_params = (0u8, 0u32);
-            if let Some(bm25_weight) = self.bm25_weight.as_ref() {
-                if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
-                    let docs = self.block.doc_ids().iter().cloned();
-                    let term_freqs = self.block.term_freqs().iter().cloned();
-                    let fieldnorms = docs.map(|doc| fieldnorm_reader.fieldnorm_id(doc));
-                    blockwand_params = fieldnorms
-                        .zip(term_freqs)
-                        .max_by(
-                            |(left_fieldnorm_id, left_term_freq),
-                             (right_fieldnorm_id, right_term_freq)| {
-                                let left_score =
-                                    bm25_weight.tf_factor(*left_fieldnorm_id, *left_term_freq);
-                                let right_score =
-                                    bm25_weight.tf_factor(*right_fieldnorm_id, *right_term_freq);
-                                left_score
-                                    .partial_cmp(&right_score)
-                                    .unwrap_or(Ordering::Equal)
-                            },
-                        )
-                        .unwrap();
-                }
-            }
-            let (fieldnorm_id, term_freq) = blockwand_params;
-            self.skip_write.write_blockwand_max(fieldnorm_id, term_freq);
-        }
-        self.block.clear();
-    }
-}
--- a/src/collector/sort_key/mod.rs
+++ b/src/collector/sort_key/mod.rs
@@ -1,12 +1,10 @@
 mod order;
-mod sort_by_erased_type;
 mod sort_by_score;
 mod sort_by_static_fast_value;
 mod sort_by_string;
 mod sort_key_computer;

 pub use order::*;
-pub use sort_by_erased_type::SortByErasedType;
 pub use sort_by_score::SortBySimilarityScore;
 pub use sort_by_static_fast_value::SortByStaticFastValue;
 pub use sort_by_string::SortByString;
@@ -36,13 +34,11 @@ pub(crate) mod tests {
    use std::collections::HashMap;
    use std::ops::Range;

-    use crate::collector::sort_key::{
-        SortByErasedType, SortBySimilarityScore, SortByStaticFastValue, SortByString,
-    };
+    use crate::collector::sort_key::{SortBySimilarityScore, SortByStaticFastValue, SortByString};
    use crate::collector::{ComparableDoc, DocSetCollector, TopDocs};
    use crate::indexer::NoMergePolicy;
    use crate::query::{AllQuery, QueryParser};
-    use crate::schema::{OwnedValue, Schema, FAST, TEXT};
+    use crate::schema::{Schema, FAST, TEXT};
    use crate::{DocAddress, Document, Index, Order, Score, Searcher};

    fn make_index() -> crate::Result<Index> {
@@ -317,9 +313,11 @@ pub(crate) mod tests {
                (SortBySimilarityScore, score_order),
                (SortByString::for_field("city"), city_order),
            ));
-            let results: Vec<((Score, Option<String>), DocAddress)> =
-                searcher.search(&AllQuery, &top_collector)?;
-            Ok(results.into_iter().map(|(f, doc)| (f, ids[&doc])).collect())
+            Ok(searcher
+                .search(&AllQuery, &top_collector)?
+                .into_iter()
+                .map(|(f, doc)| (f, ids[&doc]))
+                .collect())
        }

        assert_eq!(
@@ -344,51 +342,6 @@ pub(crate) mod tests {
        Ok(())
    }

-    #[test]
-    fn test_order_by_score_then_owned_value() -> crate::Result<()> {
-        let index = make_index()?;
-
-        type SortKey = (Score, OwnedValue);
-
-        fn query(
-            index: &Index,
-            score_order: Order,
-            city_order: Order,
-        ) -> crate::Result<Vec<(SortKey, u64)>> {
-            let searcher = index.reader()?.searcher();
-            let ids = id_mapping(&searcher);
-
-            let top_collector = TopDocs::with_limit(4).order_by::<(Score, OwnedValue)>((
-                (SortBySimilarityScore, score_order),
-                (SortByErasedType::for_field("city"), city_order),
-            ));
-            let results: Vec<((Score, OwnedValue), DocAddress)> =
-                searcher.search(&AllQuery, &top_collector)?;
-            Ok(results.into_iter().map(|(f, doc)| (f, ids[&doc])).collect())
-        }
-
-        assert_eq!(
-            &query(&index, Order::Asc, Order::Asc)?,
-            &[
-                ((1.0, OwnedValue::Str("austin".to_owned())), 0),
-                ((1.0, OwnedValue::Str("greenville".to_owned())), 1),
-                ((1.0, OwnedValue::Str("tokyo".to_owned())), 2),
-                ((1.0, OwnedValue::Null), 3),
-            ]
-        );
-
-        assert_eq!(
-            &query(&index, Order::Asc, Order::Desc)?,
-            &[
-                ((1.0, OwnedValue::Str("tokyo".to_owned())), 2),
-                ((1.0, OwnedValue::Str("greenville".to_owned())), 1),
-                ((1.0, OwnedValue::Str("austin".to_owned())), 0),
-                ((1.0, OwnedValue::Null), 3),
-            ]
-        );
-        Ok(())
-    }
-
    use proptest::prelude::*;

    proptest! {
--- a/src/collector/sort_key/order.rs
+++ b/src/collector/sort_key/order.rs
@@ -1,70 +1,11 @@
 use std::cmp::Ordering;

-use columnar::MonotonicallyMappableToU64;
 use serde::{Deserialize, Serialize};

 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
-use crate::schema::{OwnedValue, Schema};
+use crate::schema::Schema;
 use crate::{DocId, Order, Score};

-fn compare_owned_value<const NULLS_FIRST: bool>(lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
-    match (lhs, rhs) {
-        (OwnedValue::Null, OwnedValue::Null) => Ordering::Equal,
-        (OwnedValue::Null, _) => {
-            if NULLS_FIRST {
-                Ordering::Less
-            } else {
-                Ordering::Greater
-            }
-        }
-        (_, OwnedValue::Null) => {
-            if NULLS_FIRST {
-                Ordering::Greater
-            } else {
-                Ordering::Less
-            }
-        }
-        (OwnedValue::Str(a), OwnedValue::Str(b)) => a.cmp(b),
-        (OwnedValue::PreTokStr(a), OwnedValue::PreTokStr(b)) => a.cmp(b),
-        (OwnedValue::U64(a), OwnedValue::U64(b)) => a.cmp(b),
-        (OwnedValue::I64(a), OwnedValue::I64(b)) => a.cmp(b),
-        (OwnedValue::F64(a), OwnedValue::F64(b)) => a.to_u64().cmp(&b.to_u64()),
-        (OwnedValue::Bool(a), OwnedValue::Bool(b)) => a.cmp(b),
-        (OwnedValue::Date(a), OwnedValue::Date(b)) => a.cmp(b),
-        (OwnedValue::Facet(a), OwnedValue::Facet(b)) => a.cmp(b),
-        (OwnedValue::Bytes(a), OwnedValue::Bytes(b)) => a.cmp(b),
-        (OwnedValue::IpAddr(a), OwnedValue::IpAddr(b)) => a.cmp(b),
-        (OwnedValue::U64(a), OwnedValue::I64(b)) => {
-            if *b < 0 {
-                Ordering::Greater
-            } else {
-                a.cmp(&(*b as u64))
-            }
-        }
-        (OwnedValue::I64(a), OwnedValue::U64(b)) => {
-            if *a < 0 {
-                Ordering::Less
-            } else {
-                (*a as u64).cmp(b)
-            }
-        }
-        (OwnedValue::U64(a), OwnedValue::F64(b)) => (*a as f64).to_u64().cmp(&b.to_u64()),
-        (OwnedValue::F64(a), OwnedValue::U64(b)) => a.to_u64().cmp(&(*b as f64).to_u64()),
-        (OwnedValue::I64(a), OwnedValue::F64(b)) => (*a as f64).to_u64().cmp(&b.to_u64()),
-        (OwnedValue::F64(a), OwnedValue::I64(b)) => a.to_u64().cmp(&(*b as f64).to_u64()),
-        (a, b) => {
-            let ord = a.discriminant_value().cmp(&b.discriminant_value());
-            // If the discriminant is equal, it's because a new type was added, but hasn't been
-            // included in this `match` statement.
-            assert!(
-                ord != Ordering::Equal,
-                "Unimplemented comparison for type of {a:?}, {b:?}"
-            );
-            ord
-        }
-    }
-}
-
 /// Comparator trait defining the order in which documents should be ordered.
 pub trait Comparator<T>: Send + Sync + std::fmt::Debug + Default {
    /// Return the order between two values.
@@ -84,18 +25,7 @@ pub struct NaturalComparator;
 impl<T: PartialOrd> Comparator<T> for NaturalComparator {
    #[inline(always)]
    fn compare(&self, lhs: &T, rhs: &T) -> Ordering {
-        lhs.partial_cmp(rhs).unwrap_or(Ordering::Equal)
-    }
-}
-
-/// A (partial) implementation of comparison for OwnedValue.
-///
-/// Intended for use within columns of homogenous types, and so will panic for OwnedValues with
-/// mismatched types. The one exception is Null, for which we do define all comparisons.
-impl Comparator<OwnedValue> for NaturalComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
-        compare_owned_value::</* NULLS_FIRST= */ true>(lhs, rhs)
+        lhs.partial_cmp(rhs).unwrap()
    }
 }

@@ -191,13 +121,6 @@ impl Comparator<String> for ReverseNoneIsLowerComparator {
    }
 }

-impl Comparator<OwnedValue> for ReverseNoneIsLowerComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
-        compare_owned_value::</* NULLS_FIRST= */ false>(rhs, lhs)
-    }
-}
-
 /// Compare values naturally, but treating `None` as higher than `Some`.
 ///
 /// When used with `TopDocs`, which reverses the order, this results in a
@@ -262,13 +185,6 @@ impl Comparator<String> for NaturalNoneIsHigherComparator {
    }
 }

-impl Comparator<OwnedValue> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
-        compare_owned_value::</* NULLS_FIRST= */ false>(lhs, rhs)
-    }
-}
-
 /// An enum representing the different sort orders.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
 pub enum ComparatorEnum {
@@ -488,12 +404,11 @@ impl<TSegmentSortKeyComputer, TSegmentSortKey, TComparator> SegmentSortKeyComput
    for SegmentSortKeyComputerWithComparator<TSegmentSortKeyComputer, TComparator>
 where
    TSegmentSortKeyComputer: SegmentSortKeyComputer<SegmentSortKey = TSegmentSortKey>,
-    TSegmentSortKey: Clone + 'static + Sync + Send,
+    TSegmentSortKey: PartialOrd + Clone + 'static + Sync + Send,
    TComparator: Comparator<TSegmentSortKey> + 'static + Sync + Send,
 {
    type SortKey = TSegmentSortKeyComputer::SortKey;
    type SegmentSortKey = TSegmentSortKey;
-    type SegmentComparator = TComparator;

    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
        self.segment_sort_key_computer.segment_sort_key(doc, score)
@@ -517,7 +432,6 @@ where
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::schema::OwnedValue;

    #[test]
    fn test_natural_none_is_higher() {
@@ -541,27 +455,4 @@ mod tests {
        // compare(None, None) should be Equal.
        assert_eq!(comp.compare(&null, &null), Ordering::Equal);
    }
-
-    #[test]
-    fn test_mixed_ownedvalue_compare() {
-        let u = OwnedValue::U64(10);
-        let i = OwnedValue::I64(10);
-        let f = OwnedValue::F64(10.0);
-
-        let nc = NaturalComparator;
-        assert_eq!(nc.compare(&u, &i), Ordering::Equal);
-        assert_eq!(nc.compare(&u, &f), Ordering::Equal);
-        assert_eq!(nc.compare(&i, &f), Ordering::Equal);
-
-        let u2 = OwnedValue::U64(11);
-        assert_eq!(nc.compare(&u2, &f), Ordering::Greater);
-
-        let s = OwnedValue::Str("a".to_string());
-        // Str < U64
-        assert_eq!(nc.compare(&s, &u), Ordering::Less);
-        // Str < I64
-        assert_eq!(nc.compare(&s, &i), Ordering::Less);
-        // Str < F64
-        assert_eq!(nc.compare(&s, &f), Ordering::Less);
-    }
 }
--- a/src/collector/sort_key/sort_by_erased_type.rs
+++ b/src/collector/sort_key/sort_by_erased_type.rs
@@ -1,361 +0,0 @@
-use columnar::{ColumnType, MonotonicallyMappableToU64};
-
-use crate::collector::sort_key::{
-    NaturalComparator, SortBySimilarityScore, SortByStaticFastValue, SortByString,
-};
-use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
-use crate::fastfield::FastFieldNotAvailableError;
-use crate::schema::OwnedValue;
-use crate::{DateTime, DocId, Score};
-
-/// Sort by the boxed / OwnedValue representation of either a fast field, or of the score.
-///
-/// Using the OwnedValue representation allows for type erasure, and can be useful when sort orders
-/// are not known until runtime. But it comes with a performance cost: wherever possible, prefer to
-/// use a SortKeyComputer implementation with a known-type at compile time.
-#[derive(Debug, Clone)]
-pub enum SortByErasedType {
-    /// Sort by a fast field
-    Field(String),
-    /// Sort by score
-    Score,
-}
-
-impl SortByErasedType {
-    /// Creates a new sort key computer which will sort by the given fast field column, with type
-    /// erasure.
-    pub fn for_field(column_name: impl ToString) -> Self {
-        Self::Field(column_name.to_string())
-    }
-
-    /// Creates a new sort key computer which will sort by score, with type erasure.
-    pub fn for_score() -> Self {
-        Self::Score
-    }
-}
-
-trait ErasedSegmentSortKeyComputer: Send + Sync {
-    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64>;
-    fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue;
-}
-
-struct ErasedSegmentSortKeyComputerWrapper<C, F> {
-    inner: C,
-    converter: F,
-}
-
-impl<C, F> ErasedSegmentSortKeyComputer for ErasedSegmentSortKeyComputerWrapper<C, F>
-where
-    C: SegmentSortKeyComputer<SegmentSortKey = Option<u64>> + Send + Sync,
-    F: Fn(C::SortKey) -> OwnedValue + Send + Sync + 'static,
-{
-    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64> {
-        self.inner.segment_sort_key(doc, score)
-    }
-
-    fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue {
-        let val = self.inner.convert_segment_sort_key(sort_key);
-        (self.converter)(val)
-    }
-}
-
-struct ScoreSegmentSortKeyComputer {
-    segment_computer: SortBySimilarityScore,
-}
-
-impl ErasedSegmentSortKeyComputer for ScoreSegmentSortKeyComputer {
-    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64> {
-        let score_value: f64 = self.segment_computer.segment_sort_key(doc, score).into();
-        Some(score_value.to_u64())
-    }
-
-    fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue {
-        let score_value: u64 = sort_key.expect("This implementation always produces a score.");
-        OwnedValue::F64(f64::from_u64(score_value))
-    }
-}
-
-impl SortKeyComputer for SortByErasedType {
-    type SortKey = OwnedValue;
-    type Child = ErasedColumnSegmentSortKeyComputer;
-    type Comparator = NaturalComparator;
-
-    fn requires_scoring(&self) -> bool {
-        matches!(self, Self::Score)
-    }
-
-    fn segment_sort_key_computer(
-        &self,
-        segment_reader: &crate::SegmentReader,
-    ) -> crate::Result<Self::Child> {
-        let inner: Box<dyn ErasedSegmentSortKeyComputer> = match self {
-            Self::Field(column_name) => {
-                let fast_fields = segment_reader.fast_fields();
-                // TODO: We currently double-open the column to avoid relying on the implementation
-                // details of `SortByString` or `SortByStaticFastValue`. Once
-                // https://github.com/quickwit-oss/tantivy/issues/2776 is resolved, we should
-                // consider directly constructing the appropriate `SegmentSortKeyComputer` type for
-                // the column that we open here.
-                let (_column, column_type) =
-                    fast_fields.u64_lenient(column_name)?.ok_or_else(|| {
-                        FastFieldNotAvailableError {
-                            field_name: column_name.to_owned(),
-                        }
-                    })?;
-
-                match column_type {
-                    ColumnType::Str => {
-                        let computer = SortByString::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<String>| {
-                                val.map(OwnedValue::Str).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    ColumnType::U64 => {
-                        let computer = SortByStaticFastValue::<u64>::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<u64>| {
-                                val.map(OwnedValue::U64).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    ColumnType::I64 => {
-                        let computer = SortByStaticFastValue::<i64>::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<i64>| {
-                                val.map(OwnedValue::I64).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    ColumnType::F64 => {
-                        let computer = SortByStaticFastValue::<f64>::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<f64>| {
-                                val.map(OwnedValue::F64).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    ColumnType::Bool => {
-                        let computer = SortByStaticFastValue::<bool>::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<bool>| {
-                                val.map(OwnedValue::Bool).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    ColumnType::DateTime => {
-                        let computer = SortByStaticFastValue::<DateTime>::for_field(column_name);
-                        let inner = computer.segment_sort_key_computer(segment_reader)?;
-                        Box::new(ErasedSegmentSortKeyComputerWrapper {
-                            inner,
-                            converter: |val: Option<DateTime>| {
-                                val.map(OwnedValue::Date).unwrap_or(OwnedValue::Null)
-                            },
-                        })
-                    }
-                    column_type => {
-                        return Err(crate::TantivyError::SchemaError(format!(
-                            "Field `{}` is of type {column_type:?}, which is not supported for \
-                             sorting by owned value yet.",
-                            column_name
-                        )))
-                    }
-                }
-            }
-            Self::Score => Box::new(ScoreSegmentSortKeyComputer {
-                segment_computer: SortBySimilarityScore,
-            }),
-        };
-        Ok(ErasedColumnSegmentSortKeyComputer { inner })
-    }
-}
-
-pub struct ErasedColumnSegmentSortKeyComputer {
-    inner: Box<dyn ErasedSegmentSortKeyComputer>,
-}
-
-impl SegmentSortKeyComputer for ErasedColumnSegmentSortKeyComputer {
-    type SortKey = OwnedValue;
-    type SegmentSortKey = Option<u64>;
-    type SegmentComparator = NaturalComparator;
-
-    #[inline(always)]
-    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64> {
-        self.inner.segment_sort_key(doc, score)
-    }
-
-    fn convert_segment_sort_key(&self, segment_sort_key: Self::SegmentSortKey) -> OwnedValue {
-        self.inner.convert_segment_sort_key(segment_sort_key)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::collector::sort_key::{ComparatorEnum, SortByErasedType};
-    use crate::collector::TopDocs;
-    use crate::query::AllQuery;
-    use crate::schema::{OwnedValue, Schema, FAST, TEXT};
-    use crate::Index;
-
-    #[test]
-    fn test_sort_by_owned_u64() {
-        let mut schema_builder = Schema::builder();
-        let id_field = schema_builder.add_u64_field("id", FAST);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_for_tests().unwrap();
-        writer.add_document(doc!(id_field => 10u64)).unwrap();
-        writer.add_document(doc!(id_field => 2u64)).unwrap();
-        writer.add_document(doc!()).unwrap();
-        writer.commit().unwrap();
-
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-
-        let collector = TopDocs::with_limit(10)
-            .order_by((SortByErasedType::for_field("id"), ComparatorEnum::Natural));
-        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
-
-        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
-
-        assert_eq!(
-            values,
-            vec![OwnedValue::U64(10), OwnedValue::U64(2), OwnedValue::Null]
-        );
-
-        let collector = TopDocs::with_limit(10).order_by((
-            SortByErasedType::for_field("id"),
-            ComparatorEnum::ReverseNoneLower,
-        ));
-        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
-
-        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
-
-        assert_eq!(
-            values,
-            vec![OwnedValue::U64(2), OwnedValue::U64(10), OwnedValue::Null]
-        );
-    }
-
-    #[test]
-    fn test_sort_by_owned_string() {
-        let mut schema_builder = Schema::builder();
-        let city_field = schema_builder.add_text_field("city", FAST | TEXT);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_for_tests().unwrap();
-        writer.add_document(doc!(city_field => "tokyo")).unwrap();
-        writer.add_document(doc!(city_field => "austin")).unwrap();
-        writer.add_document(doc!()).unwrap();
-        writer.commit().unwrap();
-
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-
-        let collector = TopDocs::with_limit(10).order_by((
-            SortByErasedType::for_field("city"),
-            ComparatorEnum::ReverseNoneLower,
-        ));
-        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
-
-        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
-
-        assert_eq!(
-            values,
-            vec![
-                OwnedValue::Str("austin".to_string()),
-                OwnedValue::Str("tokyo".to_string()),
-                OwnedValue::Null
-            ]
-        );
-    }
-
-    #[test]
-    fn test_sort_by_owned_reverse() {
-        let mut schema_builder = Schema::builder();
-        let id_field = schema_builder.add_u64_field("id", FAST);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_for_tests().unwrap();
-        writer.add_document(doc!(id_field => 10u64)).unwrap();
-        writer.add_document(doc!(id_field => 2u64)).unwrap();
-        writer.add_document(doc!()).unwrap();
-        writer.commit().unwrap();
-
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-
-        let collector = TopDocs::with_limit(10)
-            .order_by((SortByErasedType::for_field("id"), ComparatorEnum::Reverse));
-        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
-
-        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
-
-        assert_eq!(
-            values,
-            vec![OwnedValue::Null, OwnedValue::U64(2), OwnedValue::U64(10)]
-        );
-    }
-
-    #[test]
-    fn test_sort_by_owned_score() {
-        let mut schema_builder = Schema::builder();
-        let body_field = schema_builder.add_text_field("body", TEXT);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer = index.writer_for_tests().unwrap();
-        writer.add_document(doc!(body_field => "a a")).unwrap();
-        writer.add_document(doc!(body_field => "a")).unwrap();
-        writer.commit().unwrap();
-
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let query_parser = crate::query::QueryParser::for_index(&index, vec![body_field]);
-        let query = query_parser.parse_query("a").unwrap();
-
-        // Sort by score descending (Natural)
-        let collector = TopDocs::with_limit(10)
-            .order_by((SortByErasedType::for_score(), ComparatorEnum::Natural));
-        let top_docs = searcher.search(&query, &collector).unwrap();
-
-        let values: Vec<f64> = top_docs
-            .into_iter()
-            .map(|(key, _)| match key {
-                OwnedValue::F64(val) => val,
-                _ => panic!("Wrong type {key:?}"),
-            })
-            .collect();
-
-        assert_eq!(values.len(), 2);
-        assert!(values[0] > values[1]);
-
-        // Sort by score ascending (ReverseNoneLower)
-        let collector = TopDocs::with_limit(10).order_by((
-            SortByErasedType::for_score(),
-            ComparatorEnum::ReverseNoneLower,
-        ));
-        let top_docs = searcher.search(&query, &collector).unwrap();
-
-        let values: Vec<f64> = top_docs
-            .into_iter()
-            .map(|(key, _)| match key {
-                OwnedValue::F64(val) => val,
-                _ => panic!("Wrong type {key:?}"),
-            })
-            .collect();
-
-        assert_eq!(values.len(), 2);
-        assert!(values[0] < values[1]);
-    }
-}
--- a/src/collector/sort_key/sort_by_score.rs
+++ b/src/collector/sort_key/sort_by_score.rs
@@ -63,8 +63,8 @@ impl SortKeyComputer for SortBySimilarityScore {

 impl SegmentSortKeyComputer for SortBySimilarityScore {
    type SortKey = Score;
+
    type SegmentSortKey = Score;
-    type SegmentComparator = NaturalComparator;

    #[inline(always)]
    fn segment_sort_key(&mut self, _doc: DocId, score: Score) -> Score {
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -34,7 +34,9 @@ impl<T: FastValue> SortByStaticFastValue<T> {

 impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {
    type Child = SortByFastValueSegmentSortKeyComputer<T>;
+
    type SortKey = Option<T>;
+
    type Comparator = NaturalComparator;

    fn check_schema(&self, schema: &crate::schema::Schema) -> crate::Result<()> {
@@ -82,8 +84,8 @@ pub struct SortByFastValueSegmentSortKeyComputer<T> {

 impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyComputer<T> {
    type SortKey = Option<T>;
+
    type SegmentSortKey = Option<u64>;
-    type SegmentComparator = NaturalComparator;

    #[inline(always)]
    fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> Self::SegmentSortKey {
--- a/src/collector/sort_key/sort_by_string.rs
+++ b/src/collector/sort_key/sort_by_string.rs
@@ -30,7 +30,9 @@ impl SortByString {

 impl SortKeyComputer for SortByString {
    type SortKey = Option<String>;
+
    type Child = ByStringColumnSegmentSortKeyComputer;
+
    type Comparator = NaturalComparator;

    fn segment_sort_key_computer(
@@ -48,8 +50,8 @@ pub struct ByStringColumnSegmentSortKeyComputer {

 impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
    type SortKey = Option<String>;
+
    type SegmentSortKey = Option<TermOrdinal>;
-    type SegmentComparator = NaturalComparator;

    #[inline(always)]
    fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> Option<TermOrdinal> {
@@ -58,8 +60,6 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
    }

    fn convert_segment_sort_key(&self, term_ord_opt: Option<TermOrdinal>) -> Option<String> {
-        // TODO: Individual lookups to the dictionary like this are very likely to repeatedly
-        // decompress the same blocks. See https://github.com/quickwit-oss/tantivy/issues/2776
        let term_ord = term_ord_opt?;
        let str_column = self.str_column_opt.as_ref()?;
        let mut bytes = Vec::new();
--- a/src/collector/sort_key/sort_key_computer.rs
+++ b/src/collector/sort_key/sort_key_computer.rs
@@ -12,21 +12,13 @@ use crate::{DocAddress, DocId, Result, Score, SegmentReader};
 /// It is the segment local version of the [`SortKeyComputer`].
 pub trait SegmentSortKeyComputer: 'static {
    /// The final score being emitted.
-    type SortKey: 'static + Send + Sync + Clone;
+    type SortKey: 'static + PartialOrd + Send + Sync + Clone;

    /// Sort key used by at the segment level by the `SegmentSortKeyComputer`.
    ///
    /// It is typically small like a `u64`, and is meant to be converted
    /// to the final score at the end of the collection of the segment.
-    type SegmentSortKey: 'static + Clone + Send + Sync + Clone;
-
-    /// Comparator type.
-    type SegmentComparator: Comparator<Self::SegmentSortKey> + 'static;
-
-    /// Returns the segment sort key comparator.
-    fn segment_comparator(&self) -> Self::SegmentComparator {
-        Self::SegmentComparator::default()
-    }
+    type SegmentSortKey: 'static + PartialOrd + Clone + Send + Sync + Clone;

    /// Computes the sort key for the given document and score.
    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey;
@@ -55,7 +47,7 @@ pub trait SegmentSortKeyComputer: 'static {
        left: &Self::SegmentSortKey,
        right: &Self::SegmentSortKey,
    ) -> Ordering {
-        self.segment_comparator().compare(left, right)
+        NaturalComparator.compare(left, right)
    }

    /// Implementing this method makes it possible to avoid computing
@@ -89,7 +81,7 @@ pub trait SegmentSortKeyComputer: 'static {
 /// the sort key at a segment scale.
 pub trait SortKeyComputer: Sync {
    /// The sort key type.
-    type SortKey: 'static + Send + Sync + Clone + std::fmt::Debug;
+    type SortKey: 'static + Send + Sync + PartialOrd + Clone + std::fmt::Debug;
    /// Type of the associated [`SegmentSortKeyComputer`].
    type Child: SegmentSortKeyComputer<SortKey = Self::SortKey>;
    /// Comparator type.
@@ -144,7 +136,10 @@ where
    HeadSortKeyComputer: SortKeyComputer,
    TailSortKeyComputer: SortKeyComputer,
 {
-    type SortKey = (HeadSortKeyComputer::SortKey, TailSortKeyComputer::SortKey);
+    type SortKey = (
+        <HeadSortKeyComputer::Child as SegmentSortKeyComputer>::SortKey,
+        <TailSortKeyComputer::Child as SegmentSortKeyComputer>::SortKey,
+    );
    type Child = (HeadSortKeyComputer::Child, TailSortKeyComputer::Child);

    type Comparator = (
@@ -193,11 +188,6 @@ where
        TailSegmentSortKeyComputer::SegmentSortKey,
    );

-    type SegmentComparator = (
-        HeadSegmentSortKeyComputer::SegmentComparator,
-        TailSegmentSortKeyComputer::SegmentComparator,
-    );
-
    /// A SegmentSortKeyComputer maps to a SegmentSortKey, but it can also decide on
    /// its ordering.
    ///
@@ -279,12 +269,11 @@ impl<T, PreviousScore, NewScore> SegmentSortKeyComputer
    for MappedSegmentSortKeyComputer<T, PreviousScore, NewScore>
 where
    T: SegmentSortKeyComputer<SortKey = PreviousScore>,
-    PreviousScore: 'static + Clone + Send + Sync,
-    NewScore: 'static + Clone + Send + Sync,
+    PreviousScore: 'static + Clone + Send + Sync + PartialOrd,
+    NewScore: 'static + Clone + Send + Sync + PartialOrd,
 {
    type SortKey = NewScore;
    type SegmentSortKey = T::SegmentSortKey;
-    type SegmentComparator = T::SegmentComparator;

    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
        self.sort_key_computer.segment_sort_key(doc, score)
@@ -474,7 +463,6 @@ where
 {
    type SortKey = TSortKey;
    type SegmentSortKey = TSortKey;
-    type SegmentComparator = NaturalComparator;

    fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> TSortKey {
        (self)(doc)
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -324,7 +324,7 @@ impl TopDocs {
        sort_key_computer: impl SortKeyComputer<SortKey = TSortKey> + Send + 'static,
    ) -> impl Collector<Fruit = Vec<(TSortKey, DocAddress)>>
    where
-        TSortKey: 'static + Clone + Send + Sync + std::fmt::Debug,
+        TSortKey: 'static + Clone + Send + Sync + PartialOrd + std::fmt::Debug,
    {
        TopBySortKeyCollector::new(sort_key_computer, self.doc_range())
    }
@@ -445,7 +445,7 @@ where
    F: 'static + Send + Sync + Fn(&SegmentReader) -> TTweakScoreSortKeyFn,
    TTweakScoreSortKeyFn: 'static + Fn(DocId, Score) -> TSortKey,
    TweakScoreSegmentSortKeyComputer<TTweakScoreSortKeyFn>:
-        SegmentSortKeyComputer<SortKey = TSortKey, SegmentSortKey = TSortKey>,
+        SegmentSortKeyComputer<SortKey = TSortKey>,
    TSortKey: 'static + PartialOrd + Clone + Send + Sync + std::fmt::Debug,
 {
    type SortKey = TSortKey;
@@ -480,7 +480,6 @@ where
 {
    type SortKey = TSortKey;
    type SegmentSortKey = TSortKey;
-    type SegmentComparator = NaturalComparator;

    fn segment_sort_key(&mut self, doc: DocId, score: Score) -> TSortKey {
        (self.sort_key_fn)(doc, score)
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -48,15 +48,7 @@ impl Executor {
        F: Sized + Sync + Fn(A) -> crate::Result<R>,
    {
        match self {
-            Executor::SingleThread => {
-                // Avoid `collect`, since the stacktrace is blown up by it, which makes profiling
-                // harder.
-                let mut result = Vec::with_capacity(args.size_hint().0);
-                for arg in args {
-                    result.push(f(arg)?);
-                }
-                Ok(result)
-            }
+            Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
            Executor::ThreadPool(pool) => {
                let args: Vec<A> = args.collect();
                let num_fruits = args.len();
--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -4,7 +4,7 @@ use common::{replace_in_place, JsonPathWriter};
 use rustc_hash::FxHashMap;

 use crate::indexer::indexing_term::IndexingTerm;
-use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter as _, PostingsWriterEnum};
+use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
 use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{Type, DATE_TIME_PRECISION_INDEXED};
 use crate::time::format_description::well_known::Rfc3339;
@@ -80,7 +80,7 @@ fn index_json_object<'a, V: Value<'a>>(
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut PostingsWriterEnum,
+    postings_writer: &mut dyn PostingsWriter,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
@@ -110,7 +110,7 @@ pub(crate) fn index_json_value<'a, V: Value<'a>>(
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut PostingsWriterEnum,
+    postings_writer: &mut dyn PostingsWriter,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
--- a/src/directory/mmap_directory/file_watcher.rs
+++ b/src/directory/mmap_directory/file_watcher.rs
--- a/src/directory/mmap_directory/mod.rs
+++ b/src/directory/mmap_directory/mod.rs
@@ -1,5 +1,3 @@
-mod file_watcher;
-
 use std::collections::HashMap;
 use std::fmt;
 use std::fs::{self, File, OpenOptions};
@@ -9,7 +7,6 @@ use std::path::{Path, PathBuf};
 use std::sync::{Arc, RwLock, Weak};

 use common::StableDeref;
-use file_watcher::FileWatcher;
 use fs4::fs_std::FileExt;
 #[cfg(all(feature = "mmap", unix))]
 pub use memmap2::Advice;
@@ -21,6 +18,7 @@ use crate::core::META_FILEPATH;
 use crate::directory::error::{
    DeleteError, LockError, OpenDirectoryError, OpenReadError, OpenWriteError,
 };
+use crate::directory::file_watcher::FileWatcher;
 use crate::directory::{
    AntiCallToken, Directory, DirectoryLock, FileHandle, Lock, OwnedBytes, TerminatingWrite,
    WatchCallback, WatchHandle, WritePtr,
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -5,6 +5,7 @@ mod mmap_directory;

 mod directory;
 mod directory_lock;
+mod file_watcher;
 pub mod footer;
 mod managed_directory;
 mod ram_directory;
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,7 +1,5 @@
 use std::borrow::{Borrow, BorrowMut};

-use common::BitSet;
-
 use crate::fastfield::AliveBitSet;
 use crate::DocId;

@@ -44,6 +42,7 @@ pub trait DocSet: Send {
    /// Calling `seek(TERMINATED)` is also legal and is the normal way to consume a `DocSet`.
    ///
    /// `target` has to be larger or equal to `.doc()` when calling `seek`.
+    /// If `target` is equal to `.doc()` then the DocSet should not advance.
    fn seek(&mut self, target: DocId) -> DocId {
        let mut doc = self.doc();
        debug_assert!(doc <= target);
@@ -108,15 +107,6 @@ pub trait DocSet: Send {
        buffer.len()
    }

-    /// TODO comment on the size of the bitset
-    fn fill_bitset(&mut self, bitset: &mut BitSet) {
-        let mut doc = self.doc();
-        while doc != TERMINATED {
-            bitset.insert(doc);
-            doc = self.advance();
-        }
-    }
-
    /// Returns the current document
    /// Right after creating a new `DocSet`, the docset points to the first document.
    ///
@@ -177,6 +167,19 @@ pub trait DocSet: Send {
    }
 }

+/// Consumes the `DocSet` and returns a Vec with all of the docs in the DocSet
+/// including the current doc.
+#[cfg(test)]
+pub fn docset_to_doc_vec(mut doc_set: Box<dyn DocSet>) -> Vec<DocId> {
+    let mut output = Vec::new();
+    let mut doc = doc_set.doc();
+    while doc != TERMINATED {
+        output.push(doc);
+        doc = doc_set.advance();
+    }
+    output
+}
+
 impl DocSet for &mut dyn DocSet {
    fn advance(&mut self) -> u32 {
        (**self).advance()
--- a/src/fieldnorm/mod.rs
+++ b/src/fieldnorm/mod.rs
@@ -113,7 +113,7 @@ mod tests {
            IndexRecordOption::WithFreqs,
        );
        let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-        let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
+        let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32, 0)?;
        assert_eq!(scorer.doc(), 0);
        assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
        assert_eq!(scorer.advance(), 1);
@@ -142,7 +142,7 @@ mod tests {
            IndexRecordOption::WithFreqs,
        );
        let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-        let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
+        let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32, 0)?;
        assert_eq!(scorer.doc(), 0);
        assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
        assert_eq!(scorer.advance(), 1);
--- a/src/index/codec_configuration.rs
+++ b/src/index/codec_configuration.rs
@@ -1,38 +0,0 @@
-use std::borrow::Cow;
-
-use serde::{Deserialize, Serialize};
-
-use crate::codec::{Codec, StandardCodec};
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct CodecConfiguration {
-    name: Cow<'static, str>,
-    #[serde(default, skip_serializing_if = "serde_json::Value::is_null")]
-    props: serde_json::Value,
-}
-
-impl CodecConfiguration {
-    pub fn from_codec<C: Codec>(codec: &C) -> Self {
-        CodecConfiguration {
-            name: Cow::Borrowed(C::NAME),
-            props: codec.to_json_props(),
-        }
-    }
-
-    pub fn to_codec<C: Codec>(&self) -> crate::Result<C> {
-        if self.name != C::NAME {
-            return Err(crate::TantivyError::InvalidArgument(format!(
-                "Codec name mismatch: expected {}, got {}",
-                C::NAME,
-                self.name
-            )));
-        }
-        C::from_json_props(&self.props)
-    }
-}
-
-impl Default for CodecConfiguration {
-    fn default() -> Self {
-        CodecConfiguration::from_codec(&StandardCodec)
-    }
-}
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -8,14 +8,12 @@ use std::thread::available_parallelism;
 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
 use super::{FieldMetadata, IndexSettings};
-use crate::codec::StandardCodec;
 use crate::core::{Executor, META_FILEPATH};
 use crate::directory::error::OpenReadError;
 #[cfg(feature = "mmap")]
 use crate::directory::MmapDirectory;
 use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK};
 use crate::error::{DataCorruption, TantivyError};
-use crate::index::codec_configuration::CodecConfiguration;
 use crate::index::{IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory};
 use crate::indexer::index_writer::{
    IndexWriterOptions, MAX_NUM_THREAD, MEMORY_BUDGET_NUM_BYTES_MIN,
@@ -61,7 +59,6 @@ fn save_new_metas(
    schema: Schema,
    index_settings: IndexSettings,
    directory: &dyn Directory,
-    codec: CodecConfiguration,
 ) -> crate::Result<()> {
    save_metas(
        &IndexMeta {
@@ -70,7 +67,6 @@ fn save_new_metas(
            schema,
            opstamp: 0u64,
            payload: None,
-            codec,
        },
        directory,
    )?;
@@ -105,21 +101,18 @@ fn save_new_metas(
 /// };
 /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
 /// ```
-pub struct IndexBuilder<Codec: crate::codec::Codec = StandardCodec> {
+pub struct IndexBuilder {
    schema: Option<Schema>,
    index_settings: IndexSettings,
    tokenizer_manager: TokenizerManager,
    fast_field_tokenizer_manager: TokenizerManager,
-    codec: Codec,
 }
-
-impl Default for IndexBuilder<StandardCodec> {
+impl Default for IndexBuilder {
    fn default() -> Self {
        IndexBuilder::new()
    }
 }
-
-impl IndexBuilder<StandardCodec> {
+impl IndexBuilder {
    /// Creates a new `IndexBuilder`
    pub fn new() -> Self {
        Self {
@@ -127,21 +120,6 @@ impl IndexBuilder<StandardCodec> {
            index_settings: IndexSettings::default(),
            tokenizer_manager: TokenizerManager::default(),
            fast_field_tokenizer_manager: TokenizerManager::default(),
-            codec: StandardCodec,
-        }
-    }
-}
-
-impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
-    /// Set the codec
-    #[must_use]
-    pub fn codec<NewCodec: crate::codec::Codec>(self, codec: NewCodec) -> IndexBuilder<NewCodec> {
-        IndexBuilder {
-            schema: self.schema,
-            index_settings: self.index_settings,
-            tokenizer_manager: self.tokenizer_manager,
-            fast_field_tokenizer_manager: self.fast_field_tokenizer_manager,
-            codec,
        }
    }

@@ -176,7 +154,7 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
    /// The index will be allocated in anonymous memory.
    /// This is useful for indexing small set of documents
    /// for instances like unit test or temporary in memory index.
-    pub fn create_in_ram(self) -> Result<Index<Codec>, TantivyError> {
+    pub fn create_in_ram(self) -> Result<Index, TantivyError> {
        let ram_directory = RamDirectory::create();
        self.create(ram_directory)
    }
@@ -187,7 +165,7 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
    /// If a previous index was in this directory, it returns an
    /// [`TantivyError::IndexAlreadyExists`] error.
    #[cfg(feature = "mmap")]
-    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index<Codec>> {
+    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
        if Index::exists(&*mmap_directory)? {
            return Err(TantivyError::IndexAlreadyExists);
@@ -208,7 +186,7 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
        self,
        dir: impl Into<Box<dyn Directory>>,
        mem_budget: usize,
-    ) -> crate::Result<SingleSegmentIndexWriter<Codec, D>> {
+    ) -> crate::Result<SingleSegmentIndexWriter<D>> {
        let index = self.create(dir)?;
        let index_simple_writer = SingleSegmentIndexWriter::new(index, mem_budget)?;
        Ok(index_simple_writer)
@@ -224,7 +202,7 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
    /// For other unit tests, prefer the [`RamDirectory`], see:
    /// [`IndexBuilder::create_in_ram()`].
    #[cfg(feature = "mmap")]
-    pub fn create_from_tempdir(self) -> crate::Result<Index<Codec>> {
+    pub fn create_from_tempdir(self) -> crate::Result<Index> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
        self.create(mmap_directory)
    }
@@ -237,15 +215,12 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
    }

    /// Opens or creates a new index in the provided directory
-    pub fn open_or_create<T: Into<Box<dyn Directory>>>(
-        self,
-        dir: T,
-    ) -> crate::Result<Index<Codec>> {
+    pub fn open_or_create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
        let dir: Box<dyn Directory> = dir.into();
        if !Index::exists(&*dir)? {
            return self.create(dir);
        }
-        let mut index: Index<Codec> = Index::<Codec>::open_with_codec(dir)?;
+        let mut index = Index::open(dir)?;
        index.set_tokenizers(self.tokenizer_manager.clone());
        if index.schema() == self.get_expect_schema()? {
            Ok(index)
@@ -269,25 +244,18 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
-    pub fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index<Codec>> {
-        self.create_avoid_monomorphization(dir.into())
-    }
-
-    fn create_avoid_monomorphization(self, dir: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
+    fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
        self.validate()?;
+        let dir = dir.into();
        let directory = ManagedDirectory::wrap(dir)?;
-        let codec: CodecConfiguration = CodecConfiguration::from_codec(&self.codec);
        save_new_metas(
            self.get_expect_schema()?,
            self.index_settings.clone(),
            &directory,
-            codec,
        )?;
-        let schema = self.get_expect_schema()?;
-        let mut metas = IndexMeta::with_schema_and_codec(schema, &self.codec);
+        let mut metas = IndexMeta::with_schema(self.get_expect_schema()?);
        metas.index_settings = self.index_settings;
-        let mut index: Index<Codec> =
-            Index::<Codec>::open_from_metas(directory, &metas, SegmentMetaInventory::default())?;
+        let mut index = Index::open_from_metas(directory, &metas, SegmentMetaInventory::default());
        index.set_tokenizers(self.tokenizer_manager);
        index.set_fast_field_tokenizers(self.fast_field_tokenizer_manager);
        Ok(index)
@@ -296,7 +264,7 @@ impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {

 /// Search Index
 #[derive(Clone)]
-pub struct Index<Codec: crate::codec::Codec = crate::codec::StandardCodec> {
+pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
@@ -304,7 +272,6 @@ pub struct Index<Codec: crate::codec::Codec = crate::codec::StandardCodec> {
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
-    codec: Codec,
 }

 impl Index {
@@ -312,6 +279,41 @@ impl Index {
    pub fn builder() -> IndexBuilder {
        IndexBuilder::new()
    }
+    /// Examines the directory to see if it contains an index.
+    ///
+    /// Effectively, it only checks for the presence of the `meta.json` file.
+    pub fn exists(dir: &dyn Directory) -> Result<bool, OpenReadError> {
+        dir.exists(&META_FILEPATH)
+    }
+
+    /// Accessor to the search executor.
+    ///
+    /// This pool is used by default when calling `searcher.search(...)`
+    /// to perform search on the individual segments.
+    ///
+    /// By default the executor is single thread, and simply runs in the calling thread.
+    pub fn search_executor(&self) -> &Executor {
+        &self.executor
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
+        Ok(())
+    }
+
+    /// Custom thread pool by a outer thread pool.
+    pub fn set_executor(&mut self, executor: Executor) {
+        self.executor = executor;
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with as many threads as there are CPUs on the system.
+    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
+        let default_num_threads = available_parallelism()?.get();
+        self.set_multithread_executor(default_num_threads)
+    }

    /// Creates a new index using the [`RamDirectory`].
    ///
@@ -322,13 +324,6 @@ impl Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }

-    /// Examines the directory to see if it contains an index.
-    ///
-    /// Effectively, it only checks for the presence of the `meta.json` file.
-    pub fn exists(directory: &dyn Directory) -> Result<bool, OpenReadError> {
-        directory.exists(&META_FILEPATH)
-    }
-
    /// Creates a new index in a given filepath.
    /// The index will use the [`MmapDirectory`].
    ///
@@ -375,107 +370,20 @@ impl Index {
        schema: Schema,
        settings: IndexSettings,
    ) -> crate::Result<Index> {
-        Self::create_to_avoid_monomorphization(dir.into(), schema, settings)
-    }
-
-    fn create_to_avoid_monomorphization(
-        dir: Box<dyn Directory>,
-        schema: Schema,
-        settings: IndexSettings,
-    ) -> crate::Result<Index> {
+        let dir: Box<dyn Directory> = dir.into();
        let mut builder = IndexBuilder::new().schema(schema);
        builder = builder.settings(settings);
        builder.create(dir)
    }

-    /// Opens a new directory from an index path.
-    #[cfg(feature = "mmap")]
-    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
-        Self::open_in_dir_to_avoid_monomorphization(directory_path.as_ref())
-    }
-
-    #[inline(never)]
-    fn open_in_dir_to_avoid_monomorphization(directory_path: &Path) -> crate::Result<Index> {
-        let mmap_directory = MmapDirectory::open(directory_path)?;
-        Index::open(mmap_directory)
-    }
-
-    /// Open the index using the provided directory
-    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
-        Index::<StandardCodec>::open_with_codec(directory.into())
-    }
-}
-
-impl<Codec: crate::codec::Codec> Index<Codec> {
-    /// Returns a version of this index with the standard codec.
-    /// This is useful when you need to pass the index to APIs that
-    /// don't care about the codec (e.g., for reading).
-    pub(crate) fn with_standard_codec(&self) -> Index<StandardCodec> {
-        Index {
-            directory: self.directory.clone(),
-            schema: self.schema.clone(),
-            settings: self.settings.clone(),
-            executor: self.executor.clone(),
-            tokenizers: self.tokenizers.clone(),
-            fast_field_tokenizers: self.fast_field_tokenizers.clone(),
-            inventory: self.inventory.clone(),
-            codec: StandardCodec,
-        }
-    }
-
-    /// Open the index using the provided directory
-    #[inline(never)]
-    pub fn open_with_codec(directory: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
-        let directory = ManagedDirectory::wrap(directory)?;
-        let inventory = SegmentMetaInventory::default();
-        let metas = load_metas(&directory, &inventory)?;
-        let index: Index<Codec> = Index::<Codec>::open_from_metas(directory, &metas, inventory)?;
-        Ok(index)
-    }
-
-    /// Accessor to the codec.
-    pub fn codec(&self) -> &Codec {
-        &self.codec
-    }
-
-    /// Accessor to the search executor.
-    ///
-    /// This pool is used by default when calling `searcher.search(...)`
-    /// to perform search on the individual segments.
-    ///
-    /// By default the executor is single thread, and simply runs in the calling thread.
-    pub fn search_executor(&self) -> &Executor {
-        &self.executor
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with a given number of threads.
-    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
-        Ok(())
-    }
-
-    /// Custom thread pool by a outer thread pool.
-    pub fn set_executor(&mut self, executor: Executor) {
-        self.executor = executor;
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with as many threads as there are CPUs on the system.
-    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = available_parallelism()?.get();
-        self.set_multithread_executor(default_num_threads)
-    }
-
    /// Creates a new index given a directory and an [`IndexMeta`].
-    fn open_from_metas<C: crate::codec::Codec>(
+    fn open_from_metas(
        directory: ManagedDirectory,
        metas: &IndexMeta,
        inventory: SegmentMetaInventory,
-    ) -> crate::Result<Index<C>> {
+    ) -> Index {
        let schema = metas.schema.clone();
-        let codec = metas.codec.to_codec::<C>()?;
-        Ok(Index {
+        Index {
            settings: metas.index_settings.clone(),
            directory,
            schema,
@@ -483,8 +391,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
            fast_field_tokenizers: TokenizerManager::default(),
            executor: Executor::single_thread(),
            inventory,
-            codec,
-        })
+        }
    }

    /// Setter for the tokenizer manager.
@@ -540,7 +447,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    /// Create a default [`IndexReader`] for the given index.
    ///
    /// See [`Index.reader_builder()`].
-    pub fn reader(&self) -> crate::Result<IndexReader<Codec>> {
+    pub fn reader(&self) -> crate::Result<IndexReader> {
        self.reader_builder().try_into()
    }

@@ -548,10 +455,17 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    ///
    /// Most project should create at most one reader for a given index.
    /// This method is typically called only once per `Index` instance.
-    pub fn reader_builder(&self) -> IndexReaderBuilder<Codec> {
+    pub fn reader_builder(&self) -> IndexReaderBuilder {
        IndexReaderBuilder::new(self.clone())
    }

+    /// Opens a new directory from an index path.
+    #[cfg(feature = "mmap")]
+    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
+        let mmap_directory = MmapDirectory::open(directory_path)?;
+        Index::open(mmap_directory)
+    }
+
    /// Returns the list of the segment metas tracked by the index.
    ///
    /// Such segments can of course be part of the index,
@@ -592,6 +506,16 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
        self.inventory.new_segment_meta(segment_id, max_doc)
    }

+    /// Open the index using the provided directory
+    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
+        let directory = directory.into();
+        let directory = ManagedDirectory::wrap(directory)?;
+        let inventory = SegmentMetaInventory::default();
+        let metas = load_metas(&directory, &inventory)?;
+        let index = Index::open_from_metas(directory, &metas, inventory);
+        Ok(index)
+    }
+
    /// Reads the index meta file from the directory.
    pub fn load_metas(&self) -> crate::Result<IndexMeta> {
        load_metas(self.directory(), &self.inventory)
@@ -615,7 +539,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    pub fn writer_with_options<D: Document>(
        &self,
        options: IndexWriterOptions,
-    ) -> crate::Result<IndexWriter<Codec, D>> {
+    ) -> crate::Result<IndexWriter<D>> {
        let directory_lock = self
            .directory
            .acquire_lock(&INDEX_WRITER_LOCK)
@@ -657,7 +581,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
        &self,
        num_threads: usize,
        overall_memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<Codec, D>> {
+    ) -> crate::Result<IndexWriter<D>> {
        let memory_arena_in_bytes_per_thread = overall_memory_budget_in_bytes / num_threads;
        let options = IndexWriterOptions::builder()
            .num_worker_threads(num_threads)
@@ -671,7 +595,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    /// That index writer only simply has a single thread and a memory budget of 15 MB.
    /// Using a single thread gives us a deterministic allocation of DocId.
    #[cfg(test)]
-    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<Codec, D>> {
+    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<D>> {
        self.writer_with_num_threads(1, MEMORY_BUDGET_NUM_BYTES_MIN)
    }

@@ -689,7 +613,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    pub fn writer<D: Document>(
        &self,
        memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<Codec, D>> {
+    ) -> crate::Result<IndexWriter<D>> {
        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
@@ -716,7 +640,7 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    }

    /// Returns the list of segments that are searchable
-    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment<Codec>>> {
+    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment>> {
        Ok(self
            .searchable_segment_metas()?
            .into_iter()
@@ -725,12 +649,12 @@ impl<Codec: crate::codec::Codec> Index<Codec> {
    }

    #[doc(hidden)]
-    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment<Codec> {
+    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment {
        Segment::for_index(self.clone(), segment_meta)
    }

    /// Creates a new segment.
-    pub fn new_segment(&self) -> Segment<Codec> {
+    pub fn new_segment(&self) -> Segment {
        let segment_meta = self
            .inventory
            .new_segment_meta(SegmentId::generate_random(), 0);
--- a/src/index/index_meta.rs
+++ b/src/index/index_meta.rs
@@ -7,8 +7,7 @@ use std::sync::Arc;
 use serde::{Deserialize, Serialize};

 use super::SegmentComponent;
-use crate::codec::Codec;
-use crate::index::{CodecConfiguration, SegmentId};
+use crate::index::SegmentId;
 use crate::schema::Schema;
 use crate::store::Compressor;
 use crate::{Inventory, Opstamp, TrackedObject};
@@ -321,8 +320,6 @@ pub struct IndexMeta {
    /// This payload is entirely unused by tantivy.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
-    /// Codec configuration for the index.
-    pub codec: CodecConfiguration,
 }

 #[derive(Deserialize, Debug)]
@@ -334,8 +331,6 @@ struct UntrackedIndexMeta {
    pub opstamp: Opstamp,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
-    #[serde(default)]
-    pub codec: CodecConfiguration,
 }

 impl UntrackedIndexMeta {
@@ -350,7 +345,6 @@ impl UntrackedIndexMeta {
            schema: self.schema,
            opstamp: self.opstamp,
            payload: self.payload,
-            codec: self.codec,
        }
    }
 }
@@ -361,14 +355,13 @@ impl IndexMeta {
    ///
    /// This new index does not contains any segments.
    /// Opstamp will the value `0u64`.
-    pub fn with_schema_and_codec<C: Codec>(schema: Schema, codec: &C) -> IndexMeta {
+    pub fn with_schema(schema: Schema) -> IndexMeta {
        IndexMeta {
            index_settings: IndexSettings::default(),
            segments: vec![],
            schema,
            opstamp: 0u64,
            payload: None,
-            codec: CodecConfiguration::from_codec(codec),
        }
    }

@@ -411,20 +404,16 @@ mod tests {
            schema_builder.build()
        };
        let index_metas = IndexMeta {
-            index_settings: IndexSettings {
-                docstore_compression: Compressor::None,
-                ..Default::default()
-            },
+            index_settings: IndexSettings::default(),
            segments: Vec::new(),
            schema,
            opstamp: 0u64,
            payload: None,
-            codec: Default::default(),
        };
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
        assert_eq!(
            json,
-            r#"{"index_settings":{"docstore_compression":"none","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0,"codec":{"name":"standard"}}"#
+            r#"{"index_settings":{"docstore_compression":"lz4","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
        );

        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
@@ -505,8 +494,6 @@ mod tests {
    #[test]
    #[cfg(feature = "lz4-compression")]
    fn test_index_settings_default() {
-        use crate::store::Compressor;
-
        let mut index_settings = IndexSettings::default();
        assert_eq!(
            index_settings,
--- a/src/index/inverted_index_reader.rs
+++ b/src/index/inverted_index_reader.rs
@@ -1,8 +1,7 @@
 use std::io;
-use std::sync::Arc;

 use common::json_path_writer::JSON_END_OF_PATH;
-use common::{BinarySerializable, ByteCount, OwnedBytes};
+use common::{BinarySerializable, ByteCount};
 #[cfg(feature = "quickwit")]
 use futures_util::{FutureExt, StreamExt, TryStreamExt};
 #[cfg(feature = "quickwit")]
@@ -10,15 +9,12 @@ use itertools::Itertools;
 #[cfg(feature = "quickwit")]
 use tantivy_fst::automaton::{AlwaysMatch, Automaton};

-use crate::codec::postings::PostingsCodec;
-use crate::codec::{Codec, ObjectSafeCodec, StandardCodec};
 use crate::directory::FileSlice;
-use crate::fieldnorm::FieldNormReader;
-use crate::postings::{Postings, TermInfo};
-use crate::query::term_query::TermScorer;
-use crate::query::{Bm25Weight, PhraseScorer, Scorer};
+use crate::positions::PositionReader;
+use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
 use crate::schema::{IndexRecordOption, Term, Type};
 use crate::termdict::TermDictionary;
+use crate::DocId;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated with a specific field.
@@ -38,7 +34,6 @@ pub struct InvertedIndexReader {
    positions_file_slice: FileSlice,
    record_option: IndexRecordOption,
    total_num_tokens: u64,
-    codec: Arc<dyn ObjectSafeCodec>,
 }

 /// Object that records the amount of space used by a field in an inverted index.
@@ -74,7 +69,6 @@ impl InvertedIndexReader {
        postings_file_slice: FileSlice,
        positions_file_slice: FileSlice,
        record_option: IndexRecordOption,
-        codec: Arc<dyn ObjectSafeCodec>,
    ) -> io::Result<InvertedIndexReader> {
        let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
        let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?;
@@ -84,7 +78,6 @@ impl InvertedIndexReader {
            positions_file_slice,
            record_option,
            total_num_tokens,
-            codec,
        })
    }

@@ -97,7 +90,6 @@ impl InvertedIndexReader {
            positions_file_slice: FileSlice::empty(),
            record_option,
            total_num_tokens: 0u64,
-            codec: Arc::new(StandardCodec),
        }
    }

@@ -169,98 +161,80 @@ impl InvertedIndexReader {
        Ok(fields)
    }

-    pub(crate) fn new_term_scorer_specialized<C: Codec>(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        fieldnorm_reader: FieldNormReader,
-        similarity_weight: Bm25Weight,
-        codec: &C,
-    ) -> io::Result<TermScorer<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings>> {
-        let postings = self.read_postings_from_terminfo_specialized(term_info, option, codec)?;
-        let term_scorer = TermScorer::new(postings, fieldnorm_reader, similarity_weight);
-        Ok(term_scorer)
-    }
-
-    pub(crate) fn new_phrase_scorer_type_specialized<C: Codec>(
-        &self,
-        term_infos: &[(usize, TermInfo)],
-        similarity_weight_opt: Option<Bm25Weight>,
-        fieldnorm_reader: FieldNormReader,
-        slop: u32,
-        codec: &C,
-    ) -> io::Result<PhraseScorer<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings>> {
-        let mut offset_and_term_postings: Vec<(
-            usize,
-            <<C as Codec>::PostingsCodec as PostingsCodec>::Postings,
-        )> = Vec::with_capacity(term_infos.len());
-        for (offset, term_info) in term_infos {
-            let postings = self.read_postings_from_terminfo_specialized(
-                term_info,
-                IndexRecordOption::WithFreqsAndPositions,
-                codec,
-            )?;
-            offset_and_term_postings.push((*offset, postings));
-        }
-        let phrase_scorer = PhraseScorer::new(
-            offset_and_term_postings,
-            similarity_weight_opt,
-            fieldnorm_reader,
-            slop,
-        );
-        Ok(phrase_scorer)
-    }
-
-    /// Build a new term scorer.
-    pub fn new_term_scorer(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-        fieldnorm_reader: FieldNormReader,
-        similarity_weight: Bm25Weight,
-    ) -> io::Result<Box<dyn Scorer>> {
-        let term_scorer = self.codec.load_term_scorer_type_erased(
-            term_info,
-            option,
-            self,
-            fieldnorm_reader,
-            similarity_weight,
-        )?;
-        Ok(term_scorer)
-    }
-
-    /// Returns a postings object specific with a concrete type.
+    /// Resets the block segment to another position of the postings
+    /// file.
    ///
-    /// This requires you to provied the actual codec.
-    pub fn read_postings_from_terminfo_specialized<C: Codec>(
+    /// This is useful for enumerating through a list of terms,
+    /// and consuming the associated posting lists while avoiding
+    /// reallocating a [`BlockSegmentPostings`].
+    ///
+    /// # Warning
+    ///
+    /// This does not reset the positions list.
+    pub fn reset_block_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
+        block_postings: &mut BlockSegmentPostings,
+    ) -> io::Result<()> {
+        let postings_slice = self
+            .postings_file_slice
+            .slice(term_info.postings_range.clone());
+        let postings_bytes = postings_slice.read_bytes()?;
+        block_postings.reset(term_info.doc_freq, postings_bytes)?;
+        Ok(())
+    }
+
+    /// Returns a block postings given a `Term`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most users should prefer using [`Self::read_postings()`] instead.
+    pub fn read_block_postings(
+        &self,
+        term: &Term,
        option: IndexRecordOption,
-        codec: &C,
-    ) -> io::Result<<<C as Codec>::PostingsCodec as PostingsCodec>::Postings> {
-        let option = option.downgrade(self.record_option);
+    ) -> io::Result<Option<BlockSegmentPostings>> {
+        let Some(term_info) = self.get_term_info(term)? else {
+            return Ok(None);
+        };
+        let block_postings_not_loaded =
+            self.read_block_postings_from_terminfo(&term_info, option)?;
+        Ok(Some(block_postings_not_loaded))
+    }
+
+    /// Returns a block postings given a `term_info`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most users should prefer using [`Self::read_postings()`] instead.
+    pub(crate) fn read_block_postings_from_terminfo_with_seek(
+        &self,
+        term_info: &TermInfo,
+        requested_option: IndexRecordOption,
+        seek_doc: DocId,
+    ) -> io::Result<(BlockSegmentPostings, usize)> {
        let postings_data = self
            .postings_file_slice
-            .slice(term_info.postings_range.clone())
-            .read_bytes()?;
-        let positions_data: Option<OwnedBytes> = if option.has_positions() {
-            let positions_data = self
-                .positions_file_slice
-                .slice(term_info.positions_range.clone())
-                .read_bytes()?;
-            Some(positions_data)
-        } else {
-            None
-        };
-        let postings: <<C as Codec>::PostingsCodec as PostingsCodec>::Postings =
-            codec.postings_codec().load_postings(
-                term_info.doc_freq,
-                postings_data,
-                self.record_option,
-                option,
-                positions_data,
-            )?;
-        Ok(postings)
+            .slice(term_info.postings_range.clone());
+        BlockSegmentPostings::open(
+            term_info.doc_freq,
+            postings_data,
+            self.record_option,
+            requested_option,
+            seek_doc,
+        )
+    }
+
+    /// Returns a block postings given a `term_info`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most users should prefer using [`Self::read_postings()`] instead.
+    pub fn read_block_postings_from_terminfo(
+        &self,
+        term_info: &TermInfo,
+        requested_option: IndexRecordOption,
+    ) -> io::Result<BlockSegmentPostings> {
+        let (block_segment_postings, _) =
+            self.read_block_postings_from_terminfo_with_seek(term_info, requested_option, 0)?;
+        Ok(block_segment_postings)
    }

    /// Returns a posting object given a `term_info`.
@@ -270,10 +244,27 @@ impl InvertedIndexReader {
    pub fn read_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
-        option: IndexRecordOption,
-    ) -> io::Result<Box<dyn Postings>> {
-        self.codec
-            .load_postings_type_erased(term_info, option, self)
+        record_option: IndexRecordOption,
+        seek_doc: DocId,
+    ) -> io::Result<SegmentPostings> {
+        let (block_segment_postings, position_within_block) =
+            self.read_block_postings_from_terminfo_with_seek(term_info, record_option, seek_doc)?;
+        let position_reader = {
+            if record_option.has_positions() {
+                let positions_data = self
+                    .positions_file_slice
+                    .read_bytes_slice(term_info.positions_range.clone())?;
+                let position_reader = PositionReader::open(positions_data)?;
+                Some(position_reader)
+            } else {
+                None
+            }
+        };
+        Ok(SegmentPostings::from_block_postings(
+            block_segment_postings,
+            position_reader,
+            position_within_block,
+        ))
    }

    /// Returns the total number of tokens recorded for all documents
@@ -296,9 +287,9 @@ impl InvertedIndexReader {
        &self,
        term: &Term,
        option: IndexRecordOption,
-    ) -> io::Result<Option<Box<dyn Postings>>> {
+    ) -> io::Result<Option<SegmentPostings>> {
        self.get_term_info(term)?
-            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
+            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option, 0u32))
            .transpose()
    }

--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -2,7 +2,6 @@
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.

-mod codec_configuration;
 mod index;
 mod index_meta;
 mod inverted_index_reader;
@@ -11,7 +10,6 @@ mod segment_component;
 mod segment_id;
 mod segment_reader;

-pub use self::codec_configuration::CodecConfiguration;
 pub use self::index::{Index, IndexBuilder};
 pub(crate) use self::index_meta::SegmentMetaInventory;
 pub use self::index_meta::{IndexMeta, IndexSettings, Order, SegmentMeta};
--- a/src/index/segment.rs
+++ b/src/index/segment.rs
@@ -2,7 +2,6 @@ use std::fmt;
 use std::path::PathBuf;

 use super::SegmentComponent;
-use crate::codec::StandardCodec;
 use crate::directory::error::{OpenReadError, OpenWriteError};
 use crate::directory::{Directory, FileSlice, WritePtr};
 use crate::index::{Index, SegmentId, SegmentMeta};
@@ -11,25 +10,25 @@ use crate::Opstamp;

 /// A segment is a piece of the index.
 #[derive(Clone)]
-pub struct Segment<C: crate::codec::Codec = StandardCodec> {
-    index: Index<C>,
+pub struct Segment {
+    index: Index,
    meta: SegmentMeta,
 }

-impl<C: crate::codec::Codec> fmt::Debug for Segment<C> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+impl fmt::Debug for Segment {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Segment({:?})", self.id().uuid_string())
    }
 }

-impl<C: crate::codec::Codec> Segment<C> {
+impl Segment {
    /// Creates a new segment given an `Index` and a `SegmentId`
-    pub(crate) fn for_index(index: Index<C>, meta: SegmentMeta) -> Segment<C> {
+    pub(crate) fn for_index(index: Index, meta: SegmentMeta) -> Segment {
        Segment { index, meta }
    }

    /// Returns the index the segment belongs to.
-    pub fn index(&self) -> &Index<C> {
+    pub fn index(&self) -> &Index {
        &self.index
    }

@@ -47,7 +46,7 @@ impl<C: crate::codec::Codec> Segment<C> {
    ///
    /// This method is only used when updating `max_doc` from 0
    /// as we finalize a fresh new segment.
-    pub fn with_max_doc(self, max_doc: u32) -> Segment<C> {
+    pub fn with_max_doc(self, max_doc: u32) -> Segment {
        Segment {
            index: self.index,
            meta: self.meta.with_max_doc(max_doc),
@@ -56,7 +55,7 @@ impl<C: crate::codec::Codec> Segment<C> {

    #[doc(hidden)]
    #[must_use]
-    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment<C> {
+    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment {
        Segment {
            index: self.index,
            meta: self.meta.with_delete_meta(num_deleted_docs, opstamp),
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -6,7 +6,6 @@ use common::{ByteCount, HasLen};
 use fnv::FnvHashMap;
 use itertools::Itertools;

-use crate::codec::ObjectSafeCodec;
 use crate::directory::{CompositeFile, FileSlice};
 use crate::error::DataCorruption;
 use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
@@ -48,8 +47,6 @@ pub struct SegmentReader {
    store_file: FileSlice,
    alive_bitset_opt: Option<AliveBitSet>,
    schema: Schema,
-
-    pub(crate) codec: Arc<dyn ObjectSafeCodec>,
 }

 impl SegmentReader {
@@ -143,16 +140,15 @@ impl SegmentReader {
    }

    /// Open a new segment for reading.
-    pub fn open<C: crate::codec::Codec>(segment: &Segment<C>) -> crate::Result<SegmentReader> {
+    pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
        Self::open_with_custom_alive_set(segment, None)
    }

    /// Open a new segment for reading.
-    pub fn open_with_custom_alive_set<C: crate::codec::Codec>(
-        segment: &Segment<C>,
+    pub fn open_with_custom_alive_set(
+        segment: &Segment,
        custom_bitset: Option<AliveBitSet>,
    ) -> crate::Result<SegmentReader> {
-        let codec: Arc<dyn ObjectSafeCodec> = Arc::new(segment.index().codec().clone());
        let termdict_file = segment.open_read(SegmentComponent::Terms)?;
        let termdict_composite = CompositeFile::open(&termdict_file)?;

@@ -208,7 +204,6 @@ impl SegmentReader {
            alive_bitset_opt,
            positions_composite,
            schema,
-            codec,
        })
    }

@@ -278,7 +273,6 @@ impl SegmentReader {
            postings_file,
            positions_file,
            record_option,
-            self.codec.clone(),
        )?);

        // by releasing the lock in between, we may end up opening the inverting index
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -4,20 +4,19 @@ use std::sync::{Arc, RwLock, Weak};
 use super::operation::DeleteOperation;
 use crate::Opstamp;

-/// The DeleteQueue is similar in conceptually to a multiple
-/// consumer single producer broadcast channel.
-///
-/// All consumer will receive all messages.
-///
-/// Consumer of the delete queue are holding a `DeleteCursor`,
-/// which points to a specific place of the `DeleteQueue`.
-///
-/// New consumer can be created in two ways
-/// - calling `delete_queue.cursor()` returns a cursor, that will include all future delete
-///   operation (and some or none of the past operations... The client is in charge of checking the
-///   opstamps.).
-/// - cloning an existing cursor returns a new cursor, that is at the exact same position, and can
-///   now advance independently from the original cursor.
+// The DeleteQueue is similar in conceptually to a multiple
+// consumer single producer broadcast channel.
+//
+// All consumer will receive all messages.
+//
+// Consumer of the delete queue are holding a `DeleteCursor`,
+// which points to a specific place of the `DeleteQueue`.
+//
+// New consumer can be created in two ways
+// - calling `delete_queue.cursor()` returns a cursor, that will include all future delete operation
+//   (and some or none of the past operations... The client is in charge of checking the opstamps.).
+// - cloning an existing cursor returns a new cursor, that is at the exact same position, and can
+//   now advance independently from the original cursor.
 #[derive(Default)]
 struct InnerDeleteQueue {
    writer: Vec<DeleteOperation>,
@@ -250,7 +249,12 @@ mod tests {

    struct DummyWeight;
    impl Weight for DummyWeight {
-        fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
+        fn scorer(
+            &self,
+            _reader: &SegmentReader,
+            _boost: Score,
+            _seek_doc: DocId,
+        ) -> crate::Result<Box<dyn Scorer>> {
            Err(crate::TantivyError::InternalError("dummy impl".to_owned()))
        }

--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -9,7 +9,6 @@ use smallvec::smallvec;
 use super::operation::{AddOperation, UserOperation};
 use super::segment_updater::SegmentUpdater;
 use super::{AddBatch, AddBatchReceiver, AddBatchSender, PreparedCommit};
-use crate::codec::{Codec, StandardCodec};
 use crate::directory::{DirectoryLock, GarbageCollectionResult, TerminatingWrite};
 use crate::error::TantivyError;
 use crate::fastfield::write_alive_bitset;
@@ -69,12 +68,12 @@ pub struct IndexWriterOptions {
 /// indexing queue.
 /// Each indexing thread builds its own independent [`Segment`], via
 /// a `SegmentWriter` object.
-pub struct IndexWriter<C: Codec = StandardCodec, D: Document = TantivyDocument> {
+pub struct IndexWriter<D: Document = TantivyDocument> {
    // the lock is just used to bind the
    // lifetime of the lock with that of the IndexWriter.
    _directory_lock: Option<DirectoryLock>,

-    index: Index<C>,
+    index: Index,

    options: IndexWriterOptions,

@@ -83,7 +82,7 @@ pub struct IndexWriter<C: Codec = StandardCodec, D: Document = TantivyDocument>
    index_writer_status: IndexWriterStatus<D>,
    operation_sender: AddBatchSender<D>,

-    segment_updater: SegmentUpdater<C>,
+    segment_updater: SegmentUpdater,

    worker_id: usize,

@@ -129,8 +128,8 @@ fn compute_deleted_bitset(
 /// is `==` target_opstamp.
 /// For instance, there was no delete operation between the state of the `segment_entry` and
 /// the `target_opstamp`, `segment_entry` is not updated.
-pub fn advance_deletes<C: Codec>(
-    mut segment: Segment<C>,
+pub fn advance_deletes(
+    mut segment: Segment,
    segment_entry: &mut SegmentEntry,
    target_opstamp: Opstamp,
 ) -> crate::Result<()> {
@@ -180,11 +179,11 @@ pub fn advance_deletes<C: Codec>(
    Ok(())
 }

-fn index_documents<C: crate::codec::Codec, D: Document>(
+fn index_documents<D: Document>(
    memory_budget: usize,
-    segment: Segment<C>,
+    segment: Segment,
    grouped_document_iterator: &mut dyn Iterator<Item = AddBatch<D>>,
-    segment_updater: &SegmentUpdater<C>,
+    segment_updater: &SegmentUpdater,
    mut delete_cursor: DeleteCursor,
 ) -> crate::Result<()> {
    let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone())?;
@@ -227,8 +226,8 @@ fn index_documents<C: crate::codec::Codec, D: Document>(
 }

 /// `doc_opstamps` is required to be non-empty.
-fn apply_deletes<C: crate::codec::Codec>(
-    segment: &Segment<C>,
+fn apply_deletes(
+    segment: &Segment,
    delete_cursor: &mut DeleteCursor,
    doc_opstamps: &[Opstamp],
 ) -> crate::Result<Option<BitSet>> {
@@ -263,7 +262,7 @@ fn apply_deletes<C: crate::codec::Codec>(
    })
 }

-impl<C: Codec, D: Document> IndexWriter<C, D> {
+impl<D: Document> IndexWriter<D> {
    /// Create a new index writer. Attempts to acquire a lockfile.
    ///
    /// The lockfile should be deleted on drop, but it is possible
@@ -279,7 +278,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
    /// If the memory arena per thread is too small or too big, returns
    /// `TantivyError::InvalidArgument`
    pub(crate) fn new(
-        index: &Index<C>,
+        index: &Index,
        options: IndexWriterOptions,
        directory_lock: DirectoryLock,
    ) -> crate::Result<Self> {
@@ -346,7 +345,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
    }

    /// Accessor to the index.
-    pub fn index(&self) -> &Index<C> {
+    pub fn index(&self) -> &Index {
        &self.index
    }

@@ -394,7 +393,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
    /// It is safe to start writing file associated with the new `Segment`.
    /// These will not be garbage collected as long as an instance object of
    /// `SegmentMeta` object associated with the new `Segment` is "alive".
-    pub fn new_segment(&self) -> Segment<C> {
+    pub fn new_segment(&self) -> Segment {
        self.index.new_segment()
    }

@@ -616,7 +615,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
    /// It is also possible to add a payload to the `commit`
    /// using this API.
    /// See [`PreparedCommit::set_payload()`].
-    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, C, D>> {
+    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, D>> {
        // Here, because we join all of the worker threads,
        // all of the segment update for this commit have been
        // sent.
@@ -666,7 +665,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
        self.prepare_commit()?.commit()
    }

-    pub(crate) fn segment_updater(&self) -> &SegmentUpdater<C> {
+    pub(crate) fn segment_updater(&self) -> &SegmentUpdater {
        &self.segment_updater
    }

@@ -805,7 +804,7 @@ impl<C: Codec, D: Document> IndexWriter<C, D> {
    }
 }

-impl<C: Codec, D: Document> Drop for IndexWriter<C, D> {
+impl<D: Document> Drop for IndexWriter<D> {
    fn drop(&mut self) {
        self.segment_updater.kill();
        self.drop_sender();
--- a/src/indexer/merge_index_test.rs
+++ b/src/indexer/merge_index_test.rs
@@ -1,10 +1,9 @@
 #[cfg(test)]
 mod tests {
-    use crate::codec::StandardCodec;
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
-    use crate::postings::{DocFreq, Postings};
+    use crate::postings::Postings;
    use crate::query::QueryParser;
    use crate::schema::{
        self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
@@ -122,26 +121,21 @@ mod tests {
            let my_text_field = index.schema().get_field("text_field").unwrap();
            let term_a = Term::from_field_text(my_text_field, "text");
            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
-            let term_info = inverted_index.get_term_info(&term_a).unwrap().unwrap();
            let mut postings = inverted_index
-                .read_postings_from_terminfo_specialized(
-                    &term_info,
-                    IndexRecordOption::WithFreqsAndPositions,
-                    &StandardCodec,
-                )
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
                .unwrap();
-            assert_eq!(postings.doc_freq(), DocFreq::Exact(2));
+            assert_eq!(postings.doc_freq(), 2);
            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
            assert_eq!(
-                crate::indexer::merger::doc_freq_given_deletes(
-                    &postings,
+                postings.doc_freq_given_deletes(
                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                ),
                2
            );

            assert_eq!(postings.term_freq(), 1);
-            let mut output = Vec::new();
+            let mut output = vec![];
            postings.positions(&mut output);
            assert_eq!(output, vec![1]);
            postings.advance();
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -7,8 +7,6 @@ use common::ReadOnlyBitSet;
 use itertools::Itertools;
 use measure_time::debug_time;

-use crate::codec::postings::PostingsCodec;
-use crate::codec::{Codec, StandardCodec};
 use crate::directory::WritePtr;
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
@@ -17,7 +15,7 @@ use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer,
 use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
-use crate::postings::{InvertedIndexSerializer, Postings};
+use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
 use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
@@ -78,11 +76,10 @@ fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::
    Ok(total_num_tokens)
 }

-pub struct IndexMerger<C: Codec = StandardCodec> {
+pub struct IndexMerger {
    schema: Schema,
    pub(crate) readers: Vec<SegmentReader>,
    max_doc: u32,
-    codec: C,
 }

 struct DeltaComputer {
@@ -147,8 +144,8 @@ fn extract_fast_field_required_columns(schema: &Schema) -> Vec<(String, ColumnTy
        .collect()
 }

-impl<C: Codec> IndexMerger<C> {
-    pub fn open(schema: Schema, segments: &[Segment<C>]) -> crate::Result<IndexMerger<C>> {
+impl IndexMerger {
+    pub fn open(schema: Schema, segments: &[Segment]) -> crate::Result<IndexMerger> {
        let alive_bitset = segments.iter().map(|_| None).collect_vec();
        Self::open_with_custom_alive_set(schema, segments, alive_bitset)
    }
@@ -165,15 +162,11 @@ impl<C: Codec> IndexMerger<C> {
    // This can be used to merge but also apply an additional filter.
    // One use case is demux, which is basically taking a list of
    // segments and partitions them e.g. by a value in a field.
-    //
-    // # Panics if segments is empty.
    pub fn open_with_custom_alive_set(
        schema: Schema,
-        segments: &[Segment<C>],
+        segments: &[Segment],
        alive_bitset_opt: Vec<Option<AliveBitSet>>,
-    ) -> crate::Result<IndexMerger<C>> {
-        assert!(!segments.is_empty());
-        let codec = segments[0].index().codec().clone();
+    ) -> crate::Result<IndexMerger> {
        let mut readers = vec![];
        for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) {
            if segment.meta().num_docs() > 0 {
@@ -196,7 +189,6 @@ impl<C: Codec> IndexMerger<C> {
            schema,
            readers,
            max_doc,
-            codec,
        })
    }

@@ -295,7 +287,7 @@ impl<C: Codec> IndexMerger<C> {
        &self,
        indexed_field: Field,
        _field_type: &FieldType,
-        serializer: &mut InvertedIndexSerializer<C>,
+        serializer: &mut InvertedIndexSerializer,
        fieldnorm_reader: Option<FieldNormReader>,
        doc_id_mapping: &SegmentDocIdMapping,
    ) -> crate::Result<()> {
@@ -363,10 +355,7 @@ impl<C: Codec> IndexMerger<C> {
                         indexed. Have you modified the schema?",
        );

-        let mut segment_postings_containing_the_term: Vec<(
-            usize,
-            <C::PostingsCodec as PostingsCodec>::Postings,
-        )> = Vec::with_capacity(self.readers.len());
+        let mut segment_postings_containing_the_term: Vec<(usize, SegmentPostings)> = vec![];

        while merged_terms.advance() {
            segment_postings_containing_the_term.clear();
@@ -378,24 +367,20 @@ impl<C: Codec> IndexMerger<C> {
            for (segment_ord, term_info) in merged_terms.current_segment_ords_and_term_infos() {
                let segment_reader = &self.readers[segment_ord];
                let inverted_index: &InvertedIndexReader = &field_readers[segment_ord];
-                let postings = inverted_index.read_postings_from_terminfo_specialized(
+                let segment_postings = inverted_index.read_postings_from_terminfo(
                    &term_info,
                    segment_postings_option,
-                    &self.codec,
+                    0u32,
                )?;
                let alive_bitset_opt = segment_reader.alive_bitset();
                let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
-                    doc_freq_given_deletes(&postings, alive_bitset)
+                    segment_postings.doc_freq_given_deletes(alive_bitset)
                } else {
-                    // We do not an exact document frequency here.
-                    match postings.doc_freq() {
-                        crate::postings::DocFreq::Approximate(_) => exact_doc_freq(&postings),
-                        crate::postings::DocFreq::Exact(doc_freq) => doc_freq,
-                    }
+                    segment_postings.doc_freq()
                };
                if doc_freq > 0u32 {
                    total_doc_freq += doc_freq;
-                    segment_postings_containing_the_term.push((segment_ord, postings));
+                    segment_postings_containing_the_term.push((segment_ord, segment_postings));
                }
            }

@@ -413,7 +398,11 @@ impl<C: Codec> IndexMerger<C> {
            assert!(!segment_postings_containing_the_term.is_empty());

            let has_term_freq = {
-                let has_term_freq = segment_postings_containing_the_term[0].1.has_freq();
+                let has_term_freq = !segment_postings_containing_the_term[0]
+                    .1
+                    .block_cursor
+                    .freqs()
+                    .is_empty();
                for (_, postings) in &segment_postings_containing_the_term[1..] {
                    // This may look at a strange way to test whether we have term freq or not.
                    // With JSON object, the schema is not sufficient to know whether a term
@@ -429,7 +418,7 @@ impl<C: Codec> IndexMerger<C> {
                    //
                    // Overall the reliable way to know if we have actual frequencies loaded or not
                    // is to check whether the actual decoded array is empty or not.
-                    if postings.has_freq() != has_term_freq {
+                    if has_term_freq == postings.block_cursor.freqs().is_empty() {
                        return Err(DataCorruption::comment_only(
                            "Term freqs are inconsistent across segments",
                        )
@@ -481,7 +470,7 @@ impl<C: Codec> IndexMerger<C> {

    fn write_postings(
        &self,
-        serializer: &mut InvertedIndexSerializer<C>,
+        serializer: &mut InvertedIndexSerializer,
        fieldnorm_readers: FieldNormReaders,
        doc_id_mapping: &SegmentDocIdMapping,
    ) -> crate::Result<()> {
@@ -539,7 +528,7 @@ impl<C: Codec> IndexMerger<C> {
    ///
    /// # Returns
    /// The number of documents in the resulting segment.
-    pub fn write(&self, mut serializer: SegmentSerializer<C>) -> crate::Result<u32> {
+    pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
        let doc_id_mapping = self.get_doc_id_from_concatenated_data()?;
        debug!("write-fieldnorms");
        if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -567,43 +556,6 @@ impl<C: Codec> IndexMerger<C> {
    }
 }

-/// Compute the number of non-deleted documents.
-///
-/// This method will clone and scan through the posting lists.
-/// (this is a rather expensive operation).
-pub(crate) fn doc_freq_given_deletes<P: Postings + Clone>(
-    postings: &P,
-    alive_bitset: &AliveBitSet,
-) -> u32 {
-    let mut docset = postings.clone();
-    let mut doc_freq = 0;
-    loop {
-        let doc = docset.doc();
-        if doc == TERMINATED {
-            return doc_freq;
-        }
-        if alive_bitset.is_alive(doc) {
-            doc_freq += 1u32;
-        }
-        docset.advance();
-    }
-}
-
-/// If the postings is not able to inform us of the document frequency,
-/// we just scan through it.
-pub(crate) fn exact_doc_freq<P: Postings + Clone>(postings: &P) -> u32 {
-    let mut docset = postings.clone();
-    let mut doc_freq = 0;
-    loop {
-        let doc = docset.doc();
-        if doc == TERMINATED {
-            return doc_freq;
-        }
-        doc_freq += 1u32;
-        docset.advance();
-    }
-}
-
 #[cfg(test)]
 mod tests {

@@ -612,16 +564,12 @@ mod tests {
    use proptest::strategy::Strategy;
    use schema::FAST;

-    use crate::codec::postings::PostingsCodec;
-    use crate::codec::standard::postings::StandardPostingsCodec;
    use crate::collector::tests::{
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
-    use crate::fastfield::AliveBitSet;
    use crate::index::{Index, SegmentId};
    use crate::indexer::NoMergePolicy;
-    use crate::postings::{DocFreq, Postings as _};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
    use crate::schema::{
        Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
@@ -1573,10 +1521,10 @@ mod tests {
        let searcher = reader.searcher();
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)
+            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)?
            .unwrap();
        assert_eq!(term_scorer.doc(), 0);
-        assert_nearly_equals!(term_scorer.seek_block_max(0), 0.0079681855);
+        assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
        assert_nearly_equals!(term_scorer.score(), 0.0079681855);
        for _ in 0..81 {
            writer.add_document(doc!(text=>"hello happy tax payer"))?;
@@ -1589,13 +1537,13 @@ mod tests {
        for segment_reader in searcher.segment_readers() {
            let mut term_scorer = term_query
                .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-                .term_scorer_for_test(segment_reader, 1.0)
+                .term_scorer_for_test(segment_reader, 1.0)?
                .unwrap();
            // the difference compared to before is intrinsic to the bm25 formula. no worries
            // there.
            for doc in segment_reader.doc_ids_alive() {
                assert_eq!(term_scorer.doc(), doc);
-                assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
+                assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
                assert_nearly_equals!(term_scorer.score(), 0.003478312);
                term_scorer.advance();
            }
@@ -1615,12 +1563,12 @@ mod tests {
        let segment_reader = searcher.segment_reader(0u32);
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(segment_reader, 1.0)
+            .term_scorer_for_test(segment_reader, 1.0)?
            .unwrap();
        // the difference compared to before is intrinsic to the bm25 formula. no worries there.
        for doc in segment_reader.doc_ids_alive() {
            assert_eq!(term_scorer.doc(), doc);
-            assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
+            assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
            assert_nearly_equals!(term_scorer.score(), 0.003478312);
            term_scorer.advance();
        }
@@ -1634,16 +1582,4 @@ mod tests {
        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
        assert!((super::MAX_DOC_LIMIT as i32) < 0);
    }
-
-    #[test]
-    fn test_doc_freq_given_delete() {
-        let docs =
-            <StandardPostingsCodec as PostingsCodec>::Postings::create_from_docs(&[0, 2, 10]);
-        assert_eq!(docs.doc_freq(), DocFreq::Exact(3));
-        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
-        assert_eq!(super::doc_freq_given_deletes(&docs, &alive_bitset), 2);
-        let all_deleted =
-            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
-        assert_eq!(super::doc_freq_given_deletes(&docs, &all_deleted), 0);
-    }
 }
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -4,7 +4,6 @@
 //! `IndexWriter` is the main entry point for that, which created from
 //! [`Index::writer`](crate::Index::writer).

-/// Delete queue implementation for broadcasting delete operations to consumers.
 pub(crate) mod delete_queue;
 pub(crate) mod path_to_unordered_id;

--- a/src/indexer/prepared_commit.rs
+++ b/src/indexer/prepared_commit.rs
@@ -1,17 +1,16 @@
 use super::IndexWriter;
-use crate::codec::Codec;
 use crate::schema::document::Document;
 use crate::{FutureResult, Opstamp, TantivyDocument};

 /// A prepared commit
-pub struct PreparedCommit<'a, C: Codec, D: Document = TantivyDocument> {
-    index_writer: &'a mut IndexWriter<C, D>,
+pub struct PreparedCommit<'a, D: Document = TantivyDocument> {
+    index_writer: &'a mut IndexWriter<D>,
    payload: Option<String>,
    opstamp: Opstamp,
 }

-impl<'a, C: Codec, D: Document> PreparedCommit<'a, C, D> {
-    pub(crate) fn new(index_writer: &'a mut IndexWriter<C, D>, opstamp: Opstamp) -> Self {
+impl<'a, D: Document> PreparedCommit<'a, D> {
+    pub(crate) fn new(index_writer: &'a mut IndexWriter<D>, opstamp: Opstamp) -> Self {
        Self {
            index_writer,
            payload: None,
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -8,17 +8,17 @@ use crate::store::StoreWriter;

 /// Segment serializer is in charge of laying out on disk
 /// the data accumulated and sorted by the `SegmentWriter`.
-pub struct SegmentSerializer<C: crate::codec::Codec> {
-    segment: Segment<C>,
+pub struct SegmentSerializer {
+    segment: Segment,
    pub(crate) store_writer: StoreWriter,
    fast_field_write: WritePtr,
    fieldnorms_serializer: Option<FieldNormsSerializer>,
-    postings_serializer: InvertedIndexSerializer<C>,
+    postings_serializer: InvertedIndexSerializer,
 }

-impl<C: crate::codec::Codec> SegmentSerializer<C> {
+impl SegmentSerializer {
    /// Creates a new `SegmentSerializer`.
-    pub fn for_segment(mut segment: Segment<C>) -> crate::Result<SegmentSerializer<C>> {
+    pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
        let settings = segment.index().settings().clone();
        let store_writer = {
            let store_write = segment.open_write(SegmentComponent::Store)?;
@@ -50,12 +50,12 @@ impl<C: crate::codec::Codec> SegmentSerializer<C> {
        self.store_writer.mem_usage()
    }

-    pub fn segment(&self) -> &Segment<C> {
+    pub fn segment(&self) -> &Segment {
        &self.segment
    }

    /// Accessor to the `PostingsSerializer`.
-    pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer<C> {
+    pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
        &mut self.postings_serializer
    }

--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -10,13 +10,10 @@ use std::sync::{Arc, RwLock};
 use rayon::{ThreadPool, ThreadPoolBuilder};

 use super::segment_manager::SegmentManager;
-use crate::codec::Codec;
 use crate::core::META_FILEPATH;
 use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
 use crate::fastfield::AliveBitSet;
-use crate::index::{
-    CodecConfiguration, Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta,
-};
+use crate::index::{Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta};
 use crate::indexer::delete_queue::DeleteCursor;
 use crate::indexer::index_writer::advance_deletes;
 use crate::indexer::merge_operation::MergeOperationInventory;
@@ -64,10 +61,10 @@ pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate:
 // We voluntarily pass a merge_operation ref to guarantee that
 // the merge_operation is alive during the process
 #[derive(Clone)]
-pub(crate) struct SegmentUpdater<C: Codec>(Arc<InnerSegmentUpdater<C>>);
+pub(crate) struct SegmentUpdater(Arc<InnerSegmentUpdater>);

-impl<C: Codec> Deref for SegmentUpdater<C> {
-    type Target = InnerSegmentUpdater<C>;
+impl Deref for SegmentUpdater {
+    type Target = InnerSegmentUpdater;

    #[inline]
    fn deref(&self) -> &Self::Target {
@@ -75,8 +72,8 @@ impl<C: Codec> Deref for SegmentUpdater<C> {
    }
 }

-fn garbage_collect_files<C: Codec>(
-    segment_updater: SegmentUpdater<C>,
+fn garbage_collect_files(
+    segment_updater: SegmentUpdater,
 ) -> crate::Result<GarbageCollectionResult> {
    info!("Running garbage collection");
    let mut index = segment_updater.index.clone();
@@ -87,8 +84,8 @@ fn garbage_collect_files<C: Codec>(

 /// Merges a list of segments the list of segment givens in the `segment_entries`.
 /// This function happens in the calling thread and is computationally expensive.
-fn merge<Codec: crate::codec::Codec>(
-    index: &Index<Codec>,
+fn merge(
+    index: &Index,
    mut segment_entries: Vec<SegmentEntry>,
    target_opstamp: Opstamp,
 ) -> crate::Result<Option<SegmentEntry>> {
@@ -111,13 +108,13 @@ fn merge<Codec: crate::codec::Codec>(

    let delete_cursor = segment_entries[0].delete_cursor().clone();

-    let segments: Vec<Segment<Codec>> = segment_entries
+    let segments: Vec<Segment> = segment_entries
        .iter()
        .map(|segment_entry| index.segment(segment_entry.meta().clone()))
        .collect();

    // An IndexMerger is like a "view" of our merged segments.
-    let merger: IndexMerger<Codec> = IndexMerger::open(index.schema(), &segments[..])?;
+    let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;

    // ... we just serialize this index merger in our new segment to merge the segments.
    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;
@@ -142,10 +139,10 @@ fn merge<Codec: crate::codec::Codec>(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_indices<Codec: crate::codec::Codec>(
-    indices: &[Index<Codec>],
-    output_directory: Box<dyn Directory>,
-) -> crate::Result<Index<Codec>> {
+pub fn merge_indices<T: Into<Box<dyn Directory>>>(
+    indices: &[Index],
+    output_directory: T,
+) -> crate::Result<Index> {
    if indices.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -166,7 +163,7 @@ pub fn merge_indices<Codec: crate::codec::Codec>(
        ));
    }

-    let mut segments: Vec<Segment<Codec>> = Vec::new();
+    let mut segments: Vec<Segment> = Vec::new();
    for index in indices {
        segments.extend(index.searchable_segments()?);
    }
@@ -188,12 +185,12 @@ pub fn merge_indices<Codec: crate::codec::Codec>(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory>>>(
-    segments: &[Segment<C>],
+pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
+    segments: &[Segment],
    target_settings: IndexSettings,
    filter_doc_ids: Vec<Option<AliveBitSet>>,
    output_directory: T,
-) -> crate::Result<Index<C>> {
+) -> crate::Result<Index> {
    if segments.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -214,15 +211,14 @@ pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory
        ));
    }

-    let mut merged_index: Index<C> = Index::builder()
-        .schema(target_schema.clone())
-        .codec(segments[0].index().codec().clone())
-        .settings(target_settings.clone())
-        .create(output_directory.into())?;
-
+    let mut merged_index = Index::create(
+        output_directory,
+        target_schema.clone(),
+        target_settings.clone(),
+    )?;
    let merged_segment = merged_index.new_segment();
    let merged_segment_id = merged_segment.id();
-    let merger: IndexMerger<C> =
+    let merger: IndexMerger =
        IndexMerger::open_with_custom_alive_set(merged_index.schema(), segments, filter_doc_ids)?;
    let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
    let num_docs = merger.write(segment_serializer)?;
@@ -239,7 +235,6 @@ pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory
            ))
            .trim_end()
    );
-    let codec_configuration = CodecConfiguration::from_codec(segments[0].index().codec());

    let index_meta = IndexMeta {
        index_settings: target_settings, // index_settings of all segments should be the same
@@ -247,7 +242,6 @@ pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory
        schema: target_schema,
        opstamp: 0u64,
        payload: Some(stats),
-        codec: codec_configuration,
    };

    // save the meta.json
@@ -256,7 +250,7 @@ pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory
    Ok(merged_index)
 }

-pub(crate) struct InnerSegmentUpdater<C: Codec> {
+pub(crate) struct InnerSegmentUpdater {
    // we keep a copy of the current active IndexMeta to
    // avoid loading the file every time we need it in the
    // `SegmentUpdater`.
@@ -267,7 +261,7 @@ pub(crate) struct InnerSegmentUpdater<C: Codec> {
    pool: ThreadPool,
    merge_thread_pool: ThreadPool,

-    index: Index<C>,
+    index: Index,
    segment_manager: SegmentManager,
    merge_policy: RwLock<Arc<dyn MergePolicy>>,
    killed: AtomicBool,
@@ -275,13 +269,13 @@ pub(crate) struct InnerSegmentUpdater<C: Codec> {
    merge_operations: MergeOperationInventory,
 }

-impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
+impl SegmentUpdater {
    pub fn create(
-        index: Index<Codec>,
+        index: Index,
        stamper: Stamper,
        delete_cursor: &DeleteCursor,
        num_merge_threads: usize,
-    ) -> crate::Result<Self> {
+    ) -> crate::Result<SegmentUpdater> {
        let segments = index.searchable_segment_metas()?;
        let segment_manager = SegmentManager::from_segments(segments, delete_cursor);
        let pool = ThreadPoolBuilder::new()
@@ -410,14 +404,12 @@ impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
            //
            // Segment 1 from disk 1, Segment 1 from disk 2, etc.
            committed_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
-            let codec = CodecConfiguration::from_codec(index.codec());
            let index_meta = IndexMeta {
                index_settings: index.settings().clone(),
                segments: committed_segment_metas,
                schema: index.schema(),
                opstamp,
                payload: commit_message,
-                codec,
            };
            // TODO add context to the error.
            save_metas(&index_meta, directory.box_clone().borrow_mut())?;
@@ -451,7 +443,7 @@ impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
        opstamp: Opstamp,
        payload: Option<String>,
    ) -> FutureResult<Opstamp> {
-        let segment_updater: SegmentUpdater<Codec> = self.clone();
+        let segment_updater: SegmentUpdater = self.clone();
        self.schedule_task(move || {
            let segment_entries = segment_updater.purge_deletes(opstamp)?;
            segment_updater.segment_manager.commit(segment_entries);
@@ -710,7 +702,6 @@ impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
 #[cfg(test)]
 mod tests {
    use super::merge_indices;
-    use crate::codec::StandardCodec;
    use crate::collector::TopDocs;
    use crate::directory::RamDirectory;
    use crate::fastfield::AliveBitSet;
@@ -924,7 +915,7 @@ mod tests {

    #[test]
    fn test_merge_empty_indices_array() {
-        let merge_result = merge_indices::<StandardCodec>(&[], Box::new(RamDirectory::default()));
+        let merge_result = merge_indices(&[], RamDirectory::default());
        assert!(merge_result.is_err());
    }

@@ -951,10 +942,7 @@ mod tests {
        };

        // mismatched schema index list
-        let result = merge_indices(
-            &[first_index, second_index],
-            Box::new(RamDirectory::default()),
-        );
+        let result = merge_indices(&[first_index, second_index], RamDirectory::default());
        assert!(result.is_err());

        Ok(())
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -4,7 +4,6 @@ use itertools::Itertools;
 use tokenizer_api::BoxTokenStream;

 use super::operation::AddOperation;
-use crate::codec::Codec;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
 use crate::index::{Segment, SegmentComponent};
@@ -13,7 +12,7 @@ use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
-    PerFieldPostingsWriter, PostingsWriter, PostingsWriterEnum,
+    PerFieldPostingsWriter, PostingsWriter,
 };
 use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
@@ -46,11 +45,11 @@ fn compute_initial_table_size(per_thread_memory_budget: usize) -> crate::Result<
 ///
 /// They creates the postings list in anonymous memory.
 /// The segment is laid on disk when the segment gets `finalized`.
-pub struct SegmentWriter<Codec: crate::codec::Codec> {
+pub struct SegmentWriter {
    pub(crate) max_doc: DocId,
    pub(crate) ctx: IndexingContext,
    pub(crate) per_field_postings_writers: PerFieldPostingsWriter,
-    pub(crate) segment_serializer: SegmentSerializer<Codec>,
+    pub(crate) segment_serializer: SegmentSerializer,
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
@@ -61,7 +60,7 @@ pub struct SegmentWriter<Codec: crate::codec::Codec> {
    schema: Schema,
 }

-impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
+impl SegmentWriter {
    /// Creates a new `SegmentWriter`
    ///
    /// The arguments are defined as follows
@@ -71,10 +70,7 @@ impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
    ///   behavior as a memory limit.
    /// - segment: The segment being written
    /// - schema
-    pub fn for_segment(
-        memory_budget_in_bytes: usize,
-        segment: Segment<Codec>,
-    ) -> crate::Result<Self> {
+    pub fn for_segment(memory_budget_in_bytes: usize, segment: Segment) -> crate::Result<Self> {
        let schema = segment.schema();
        let tokenizer_manager = segment.index().tokenizers().clone();
        let tokenizer_manager_fast_field = segment.index().fast_field_tokenizer().clone();
@@ -173,7 +169,7 @@ impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
            }

            let (term_buffer, ctx) = (&mut self.term_buffer, &mut self.ctx);
-            let postings_writer: &mut PostingsWriterEnum =
+            let postings_writer: &mut dyn PostingsWriter =
                self.per_field_postings_writers.get_for_field_mut(field);
            term_buffer.clear_with_field(field);

@@ -390,13 +386,13 @@ impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
 /// to the `SegmentSerializer`.
 ///
 /// `doc_id_map` is used to map to the new doc_id order.
-fn remap_and_write<C: Codec>(
+fn remap_and_write(
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    ctx: IndexingContext,
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: &FieldNormsWriter,
-    mut serializer: SegmentSerializer<C>,
+    mut serializer: SegmentSerializer,
 ) -> crate::Result<()> {
    debug!("remap-and-write");
    if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -425,9 +421,10 @@ fn remap_and_write<C: Codec>(
 #[cfg(test)]
 mod tests {
    use std::collections::BTreeMap;
-    use std::path::Path;
+    use std::path::{Path, PathBuf};

    use columnar::ColumnType;
+    use tempfile::TempDir;

    use crate::collector::{Count, TopDocs};
    use crate::directory::RamDirectory;
@@ -1070,7 +1067,10 @@ mod tests {
        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("title", text_options);
        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
+        let tempdir = TempDir::new().unwrap();
+        let tempdir_path = PathBuf::from(tempdir.path());
+        Index::create_in_dir(&tempdir_path, schema).unwrap();
+        let index = Index::open_in_dir(tempdir_path).unwrap();
        let schema = index.schema();
        let mut index_writer = index.writer(50_000_000).unwrap();
        let title = schema.get_field("title").unwrap();
--- a/src/indexer/single_segment_index_writer.rs
+++ b/src/indexer/single_segment_index_writer.rs
@@ -1,7 +1,5 @@
 use std::marker::PhantomData;

-use crate::codec::StandardCodec;
-use crate::index::CodecConfiguration;
 use crate::indexer::operation::AddOperation;
 use crate::indexer::segment_updater::save_metas;
 use crate::indexer::SegmentWriter;
@@ -9,25 +7,22 @@ use crate::schema::document::Document;
 use crate::{Directory, Index, IndexMeta, Opstamp, Segment, TantivyDocument};

 #[doc(hidden)]
-pub struct SingleSegmentIndexWriter<
-    Codec: crate::codec::Codec = StandardCodec,
-    D: Document = TantivyDocument,
-> {
-    segment_writer: SegmentWriter<Codec>,
-    segment: Segment<Codec>,
+pub struct SingleSegmentIndexWriter<D: Document = TantivyDocument> {
+    segment_writer: SegmentWriter,
+    segment: Segment,
    opstamp: Opstamp,
-    _doc: PhantomData<D>,
+    _phantom: PhantomData<D>,
 }

-impl<Codec: crate::codec::Codec, D: Document> SingleSegmentIndexWriter<Codec, D> {
-    pub fn new(index: Index<Codec>, mem_budget: usize) -> crate::Result<Self> {
+impl<D: Document> SingleSegmentIndexWriter<D> {
+    pub fn new(index: Index, mem_budget: usize) -> crate::Result<Self> {
        let segment = index.new_segment();
        let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?;
        Ok(Self {
            segment_writer,
            segment,
            opstamp: 0,
-            _doc: PhantomData,
+            _phantom: PhantomData,
        })
    }

@@ -42,10 +37,10 @@ impl<Codec: crate::codec::Codec, D: Document> SingleSegmentIndexWriter<Codec, D>
            .add_document(AddOperation { opstamp, document })
    }

-    pub fn finalize(self) -> crate::Result<Index<Codec>> {
+    pub fn finalize(self) -> crate::Result<Index> {
        let max_doc = self.segment_writer.max_doc();
        self.segment_writer.finalize()?;
-        let segment: Segment<Codec> = self.segment.with_max_doc(max_doc);
+        let segment: Segment = self.segment.with_max_doc(max_doc);
        let index = segment.index();
        let index_meta = IndexMeta {
            index_settings: index.settings().clone(),
@@ -53,7 +48,6 @@ impl<Codec: crate::codec::Codec, D: Document> SingleSegmentIndexWriter<Codec, D>
            schema: index.schema(),
            opstamp: 0,
            payload: None,
-            codec: CodecConfiguration::from_codec(index.codec()),
        };
        save_metas(&index_meta, index.directory())?;
        index.directory().sync_directory()?;
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,7 +17,6 @@
 //!
 //! ```rust
 //! # use std::path::Path;
-//! # use std::fs;
 //! # use tempfile::TempDir;
 //! # use tantivy::collector::TopDocs;
 //! # use tantivy::query::QueryParser;
@@ -28,11 +27,8 @@
 //! #     // Let's create a temporary directory for the
 //! #     // sake of this example
 //! #     if let Ok(dir) = TempDir::new() {
-//! #         let index_path = dir.path().join("index");
-//! #         // In case the directory already exists, we remove it
-//! #         let _ = fs::remove_dir_all(&index_path);
-//! #         fs::create_dir_all(&index_path).unwrap();
-//! #         run_example(&index_path).unwrap();
+//! #         run_example(dir.path()).unwrap();
+//! #         dir.close().unwrap();
 //! #     }
 //! # }
 //! #
@@ -166,9 +162,6 @@ mod functional_test;

 #[macro_use]
 mod macros;
-
-/// Tantivy codecs describes how data is layed out on disk.
-pub mod codec;
 mod future_result;

 // Re-exports
@@ -210,7 +203,6 @@ mod docset;
 mod reader;

 #[cfg(test)]
-#[cfg(feature = "mmap")]
 mod compat_tests;

 pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
@@ -1178,11 +1170,12 @@ pub mod tests {

    #[test]
    fn test_validate_checksum() -> crate::Result<()> {
+        let index_path = tempfile::tempdir().expect("dir");
        let mut builder = Schema::builder();
        let body = builder.add_text_field("body", TEXT | STORED);
        let schema = builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut writer: IndexWriter = index.writer_for_tests()?;
+        let index = Index::create_in_dir(&index_path, schema)?;
+        let mut writer: IndexWriter = index.writer(50_000_000)?;
        writer.set_merge_policy(Box::new(NoMergePolicy));
        for _ in 0..5000 {
            writer.add_document(doc!(body => "foo"))?;
--- a/src/codec/standard/postings/block_segment_postings.rs
+++ b/src/codec/standard/postings/block_segment_postings.rs
@@ -1,18 +1,28 @@
 use std::io;

-use common::{OwnedBytes, VInt};
+use common::VInt;

-use crate::codec::standard::postings::skip::{BlockInfo, SkipReader};
-use crate::codec::standard::postings::FreqReadingOption;
-use crate::postings::compression::{BlockDecoder, VIntDecoder as _, COMPRESSION_BLOCK_SIZE};
+use crate::directory::{FileSlice, OwnedBytes};
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE};
+use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
 use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};

+fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
+    it.next().map(|first| it.fold(first, Score::max))
+}
+
 /// `BlockSegmentPostings` is a cursor iterating over blocks
 /// of documents.
+///
+/// # Warning
+///
+/// While it is useful for some very specific high-performance
+/// use cases, you should prefer using `SegmentPostings` for most usage.
 #[derive(Clone)]
-pub(crate) struct BlockSegmentPostings {
+pub struct BlockSegmentPostings {
    pub(crate) doc_decoder: BlockDecoder,
    block_loaded: bool,
    freq_decoder: BlockDecoder,
@@ -78,7 +88,7 @@ fn split_into_skips_and_postings(
 }

 impl BlockSegmentPostings {
-    /// Opens a `StandardPostingsReader`.
+    /// Opens a `BlockSegmentPostings`.
    /// `doc_freq` is the number of documents in the posting list.
    /// `record_option` represents the amount of data available according to the schema.
    /// `requested_option` is the amount of data requested by the user.
@@ -86,10 +96,12 @@ impl BlockSegmentPostings {
    /// term frequency blocks.
    pub(crate) fn open(
        doc_freq: u32,
-        bytes: OwnedBytes,
+        data: FileSlice,
        mut record_option: IndexRecordOption,
        requested_option: IndexRecordOption,
-    ) -> io::Result<BlockSegmentPostings> {
+        seek_doc: DocId,
+    ) -> io::Result<(BlockSegmentPostings, usize)> {
+        let bytes = data.read_bytes()?;
        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
        let skip_reader = match skip_data_opt {
            Some(skip_data) => {
@@ -114,7 +126,7 @@ impl BlockSegmentPostings {
            (_, _) => FreqReadingOption::ReadFreq,
        };

-        let mut block_segment_postings = BlockSegmentPostings {
+        let mut block_segment_postings: BlockSegmentPostings = BlockSegmentPostings {
            doc_decoder: BlockDecoder::with_val(TERMINATED),
            block_loaded: false,
            freq_decoder: BlockDecoder::with_val(1),
@@ -124,12 +136,84 @@ impl BlockSegmentPostings {
            data: postings_data,
            skip_reader,
        };
-        block_segment_postings.load_block();
-        Ok(block_segment_postings)
+        let inner_pos = if seek_doc == 0 {
+            block_segment_postings.load_block();
+            0
+        } else {
+            block_segment_postings.seek(seek_doc)
+        };
+        Ok((block_segment_postings, inner_pos))
+    }
+
+    /// Returns the block_max_score for the current block.
+    /// It does not require the block to be loaded. For instance, it is ok to call this method
+    /// after having called `.shallow_advance(..)`.
+    ///
+    /// See `TermScorer::block_max_score(..)` for more information.
+    pub fn block_max_score(
+        &mut self,
+        fieldnorm_reader: &FieldNormReader,
+        bm25_weight: &Bm25Weight,
+    ) -> Score {
+        if let Some(score) = self.block_max_score_cache {
+            return score;
+        }
+        if let Some(skip_reader_max_score) = self.skip_reader.block_max_score(bm25_weight) {
+            // if we are on a full block, the skip reader should have the block max information
+            // for us
+            self.block_max_score_cache = Some(skip_reader_max_score);
+            return skip_reader_max_score;
+        }
+        // this is the last block of the segment posting list.
+        // If it is actually loaded, we can compute block max manually.
+        if self.block_is_loaded() {
+            let docs = self.doc_decoder.output_array().iter().cloned();
+            let freqs = self.freq_decoder.output_array().iter().cloned();
+            let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
+                let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
+                bm25_weight.score(fieldnorm_id, term_freq)
+            });
+            let block_max_score = max_score(bm25_scores).unwrap_or(0.0);
+            self.block_max_score_cache = Some(block_max_score);
+            return block_max_score;
+        }
+        // We do not have access to any good block max value. We return bm25_weight.max_score()
+        // as it is a valid upperbound.
+        //
+        // We do not cache it however, so that it gets computed when once block is loaded.
+        bm25_weight.max_score()
+    }
+
+    pub(crate) fn freq_reading_option(&self) -> FreqReadingOption {
+        self.freq_reading_option
+    }
+
+    // Resets the block segment postings on another position
+    // in the postings file.
+    //
+    // This is useful for enumerating through a list of terms,
+    // and consuming the associated posting lists while avoiding
+    // reallocating a `BlockSegmentPostings`.
+    //
+    // # Warning
+    //
+    // This does not reset the positions list.
+    pub(crate) fn reset(&mut self, doc_freq: u32, postings_data: OwnedBytes) -> io::Result<()> {
+        let (skip_data_opt, postings_data) =
+            split_into_skips_and_postings(doc_freq, postings_data)?;
+        self.data = postings_data;
+        self.block_max_score_cache = None;
+        self.block_loaded = false;
+        if let Some(skip_data) = skip_data_opt {
+            self.skip_reader.reset(skip_data, doc_freq);
+        } else {
+            self.skip_reader.reset(OwnedBytes::empty(), doc_freq);
+        }
+        self.doc_freq = doc_freq;
+        self.load_block();
+        Ok(())
    }
-}

-impl BlockSegmentPostings {
    /// Returns the overall number of documents in the block postings.
    /// It does not take in account whether documents are deleted or not.
    ///
@@ -145,7 +229,7 @@ impl BlockSegmentPostings {
    /// returned by `.docs()` is empty.
    #[inline]
    pub fn docs(&self) -> &[DocId] {
-        debug_assert!(self.block_loaded);
+        debug_assert!(self.block_is_loaded());
        self.doc_decoder.output_array()
    }

@@ -158,24 +242,37 @@ impl BlockSegmentPostings {
    /// Return the array of `term freq` in the block.
    #[inline]
    pub fn freqs(&self) -> &[u32] {
-        debug_assert!(self.block_loaded);
+        debug_assert!(self.block_is_loaded());
        self.freq_decoder.output_array()
    }

    /// Return the frequency at index `idx` of the block.
    #[inline]
    pub fn freq(&self, idx: usize) -> u32 {
-        debug_assert!(self.block_loaded);
+        debug_assert!(self.block_is_loaded());
        self.freq_decoder.output(idx)
    }

-    /// Position on a block that may contains `target_doc`.
+    /// Returns the length of the current block.
+    ///
+    /// All blocks have a length of `NUM_DOCS_PER_BLOCK`,
+    /// except the last block that may have a length
+    /// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
+    #[inline]
+    pub fn block_len(&self) -> usize {
+        debug_assert!(self.block_is_loaded());
+        self.doc_decoder.output_len
+    }
+
+    /// Position on a block that may contains `target_doc`, and returns the
+    /// position of the first document greater than or equal to `target_doc`
+    /// within that block.
    ///
    /// If all docs are smaller than target, the block loaded may be empty,
    /// or be the last an incomplete VInt block.
    pub fn seek(&mut self, target_doc: DocId) -> usize {
        // Move to the block that might contain our document.
-        self.seek_block_without_loading(target_doc);
+        self.seek_block(target_doc);
        self.load_block();

        // At this point we are on the block that might contain our document.
@@ -192,79 +289,32 @@ impl BlockSegmentPostings {
        doc
    }

-    pub fn position_offset(&self) -> u64 {
+    pub(crate) fn position_offset(&self) -> u64 {
        self.skip_reader.position_offset()
    }

-    /// Advance to the next block.
-    pub fn advance(&mut self) {
-        self.skip_reader.advance();
-        self.block_loaded = false;
-        self.block_max_score_cache = None;
-        self.load_block();
-    }
-
-    /// Returns the block_max_score for the current block.
-    /// It does not require the block to be loaded. For instance, it is ok to call this method
-    /// after having called `.shallow_advance(..)`.
-    ///
-    /// See `TermScorer::block_max_score(..)` for more information.
-    pub fn block_max_score(&mut self, bm25_weight: &Bm25Weight) -> Score {
-        if let Some(score) = self.block_max_score_cache {
-            return score;
-        }
-        if let Some(skip_reader_max_score) = self.skip_reader.block_max_score(bm25_weight) {
-            // if we are on a full block, the skip reader should have the block max information
-            // for us
-            self.block_max_score_cache = Some(skip_reader_max_score);
-            return skip_reader_max_score;
-        }
-        // We do not have access to any good block max value.
-        // It happens if this is the last block.
-        // We return bm25_weight.max_score() as it is a valid upperbound.
-        //
-        // We do not cache it however, so that it gets computed when once block is loaded.
-        bm25_weight.max_score()
-    }
-}
-
-impl BlockSegmentPostings {
-    /// Returns an empty segment postings object
-    pub fn empty() -> BlockSegmentPostings {
-        BlockSegmentPostings {
-            doc_decoder: BlockDecoder::with_val(TERMINATED),
-            block_loaded: true,
-            freq_decoder: BlockDecoder::with_val(1),
-            freq_reading_option: FreqReadingOption::NoFreq,
-            block_max_score_cache: None,
-            doc_freq: 0,
-            data: OwnedBytes::empty(),
-            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
-        }
-    }
-
-    pub(crate) fn skip_reader(&self) -> &SkipReader {
-        &self.skip_reader
-    }
-
    /// Dangerous API! This calls seeks the next block on the skip list,
    /// but does not `.load_block()` afterwards.
    ///
    /// `.load_block()` needs to be called manually afterwards.
    /// If all docs are smaller than target, the block loaded may be empty,
    /// or be the last an incomplete VInt block.
-    pub(crate) fn seek_block_without_loading(&mut self, target_doc: DocId) {
+    pub(crate) fn seek_block(&mut self, target_doc: DocId) {
        if self.skip_reader.seek(target_doc) {
            self.block_max_score_cache = None;
            self.block_loaded = false;
        }
    }

+    pub(crate) fn block_is_loaded(&self) -> bool {
+        self.block_loaded
+    }
+
    pub(crate) fn load_block(&mut self) {
-        if self.block_loaded {
+        let offset = self.skip_reader.byte_offset();
+        if self.block_is_loaded() {
            return;
        }
-        let offset = self.skip_reader.byte_offset();
        match self.skip_reader.block_info() {
            BlockInfo::BitPacked {
                doc_num_bits,
@@ -309,40 +359,68 @@ impl BlockSegmentPostings {
        }
        self.block_loaded = true;
    }
+
+    /// Advance to the next block.
+    pub fn advance(&mut self) {
+        self.skip_reader.advance();
+        self.block_loaded = false;
+        self.block_max_score_cache = None;
+        self.load_block();
+    }
+
+    /// Returns an empty segment postings object
+    pub fn empty() -> BlockSegmentPostings {
+        BlockSegmentPostings {
+            doc_decoder: BlockDecoder::with_val(TERMINATED),
+            block_loaded: true,
+            freq_decoder: BlockDecoder::with_val(1),
+            freq_reading_option: FreqReadingOption::NoFreq,
+            block_max_score_cache: None,
+            doc_freq: 0,
+            data: OwnedBytes::empty(),
+            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
+        }
+    }
+
+    pub(crate) fn skip_reader(&self) -> &SkipReader {
+        &self.skip_reader
+    }
 }

 #[cfg(test)]
 mod tests {
-    use common::OwnedBytes;
+    use common::HasLen;

    use super::BlockSegmentPostings;
-    use crate::codec::postings::PostingsSerializer;
-    use crate::codec::standard::postings::segment_postings::SegmentPostings;
-    use crate::codec::standard::postings::StandardPostingsSerializer;
    use crate::docset::{DocSet, TERMINATED};
+    use crate::index::Index;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-    use crate::schema::IndexRecordOption;
+    use crate::postings::postings::Postings;
+    use crate::postings::SegmentPostings;
+    use crate::schema::{IndexRecordOption, Schema, Term, INDEXED};
+    use crate::DocId;

-    #[cfg(test)]
-    fn build_block_postings(docs: &[u32]) -> BlockSegmentPostings {
-        let doc_freq = docs.len() as u32;
-        let mut postings_serializer =
-            StandardPostingsSerializer::new(1.0f32, IndexRecordOption::Basic, None);
-        postings_serializer.new_term(docs.len() as u32, false);
-        for doc in docs {
-            postings_serializer.write_doc(*doc, 1u32);
-        }
-        let mut buffer: Vec<u8> = Vec::new();
-        postings_serializer
-            .close_term(doc_freq, &mut buffer)
-            .unwrap();
-        BlockSegmentPostings::open(
-            doc_freq,
-            OwnedBytes::new(buffer),
-            IndexRecordOption::Basic,
-            IndexRecordOption::Basic,
-        )
-        .unwrap()
+    #[test]
+    fn test_empty_segment_postings() {
+        let mut postings = SegmentPostings::empty();
+        assert_eq!(postings.doc(), TERMINATED);
+        assert_eq!(postings.advance(), TERMINATED);
+        assert_eq!(postings.advance(), TERMINATED);
+        assert_eq!(postings.doc_freq(), 0);
+        assert_eq!(postings.len(), 0);
+    }
+
+    #[test]
+    fn test_empty_postings_doc_returns_terminated() {
+        let mut postings = SegmentPostings::empty();
+        assert_eq!(postings.doc(), TERMINATED);
+        assert_eq!(postings.advance(), TERMINATED);
+    }
+
+    #[test]
+    fn test_empty_postings_doc_term_freq_returns_0() {
+        let postings = SegmentPostings::empty();
+        assert_eq!(postings.term_freq(), 1);
    }

    #[test]
@@ -357,7 +435,7 @@ mod tests {

    #[test]
    fn test_block_segment_postings() -> crate::Result<()> {
-        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>());
+        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
        let mut offset: u32 = 0u32;
        // checking that the `doc_freq` is correct
        assert_eq!(block_segments.doc_freq(), 100_000);
@@ -382,8 +460,8 @@ mod tests {
        doc_ids.push(129);
        doc_ids.push(130);
        {
-            let block_segments = build_block_postings(&doc_ids);
-            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            let block_segments = build_block_postings(&doc_ids)?;
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None, 0);
            assert_eq!(docset.seek(128), 129);
            assert_eq!(docset.doc(), 129);
            assert_eq!(docset.advance(), 130);
@@ -391,8 +469,8 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids);
-            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            let block_segments = build_block_postings(&doc_ids)?;
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None, 0);
            assert_eq!(docset.seek(129), 129);
            assert_eq!(docset.doc(), 129);
            assert_eq!(docset.advance(), 130);
@@ -400,8 +478,8 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids);
-            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            let block_segments = build_block_postings(&doc_ids)?;
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None, 0);
            assert_eq!(docset.doc(), 0);
            assert_eq!(docset.seek(131), TERMINATED);
            assert_eq!(docset.doc(), TERMINATED);
@@ -409,13 +487,38 @@ mod tests {
        Ok(())
    }

+    fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
+        let mut schema_builder = Schema::builder();
+        let int_field = schema_builder.add_u64_field("id", INDEXED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests()?;
+        let mut last_doc = 0u32;
+        for &doc in docs {
+            for _ in last_doc..doc {
+                index_writer.add_document(doc!(int_field=>1u64))?;
+            }
+            index_writer.add_document(doc!(int_field=>0u64))?;
+            last_doc = doc + 1;
+        }
+        index_writer.commit()?;
+        let searcher = index.reader()?.searcher();
+        let segment_reader = searcher.segment_reader(0);
+        let inverted_index = segment_reader.inverted_index(int_field).unwrap();
+        let term = Term::from_field_u64(int_field, 0u64);
+        let term_info = inverted_index.get_term_info(&term)?.unwrap();
+        let block_postings = inverted_index
+            .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
+        Ok(block_postings)
+    }
+
    #[test]
    fn test_block_segment_postings_seek() -> crate::Result<()> {
-        let mut docs = Vec::new();
+        let mut docs = vec![0];
        for i in 0..1300 {
            docs.push((i * i / 100) + i);
        }
-        let mut block_postings = build_block_postings(&docs[..]);
+        let mut block_postings = build_block_postings(&docs[..])?;
        for i in &[0, 424, 10000] {
            block_postings.seek(*i);
            let docs = block_postings.docs();
@@ -426,4 +529,40 @@ mod tests {
        assert_eq!(block_postings.doc(COMPRESSION_BLOCK_SIZE - 1), TERMINATED);
        Ok(())
    }
+
+    #[test]
+    fn test_reset_block_segment_postings() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let int_field = schema_builder.add_u64_field("id", INDEXED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests()?;
+        // create two postings list, one containing even number,
+        // the other containing odd numbers.
+        for i in 0..6 {
+            let doc = doc!(int_field=> (i % 2) as u64);
+            index_writer.add_document(doc)?;
+        }
+        index_writer.commit()?;
+        let searcher = index.reader()?.searcher();
+        let segment_reader = searcher.segment_reader(0);
+
+        let mut block_segments;
+        {
+            let term = Term::from_field_u64(int_field, 0u64);
+            let inverted_index = segment_reader.inverted_index(int_field)?;
+            let term_info = inverted_index.get_term_info(&term)?.unwrap();
+            block_segments = inverted_index
+                .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
+        }
+        assert_eq!(block_segments.docs(), &[0, 2, 4]);
+        {
+            let term = Term::from_field_u64(int_field, 1u64);
+            let inverted_index = segment_reader.inverted_index(int_field)?;
+            let term_info = inverted_index.get_term_info(&term)?.unwrap();
+            inverted_index.reset_block_postings_from_terminfo(&term_info, &mut block_segments)?;
+        }
+        assert_eq!(block_segments.docs(), &[1, 3, 5]);
+        Ok(())
+    }
 }
--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -3,7 +3,6 @@ use std::io;
 use common::json_path_writer::JSON_END_OF_PATH;
 use stacker::Addr;

-use crate::codec::Codec;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
 use crate::postings::postings_writer::SpecializedPostingsWriter;
@@ -23,6 +22,12 @@ pub(crate) struct JsonPostingsWriter<Rec: Recorder> {
    non_str_posting_writer: SpecializedPostingsWriter<DocIdRecorder>,
 }

+impl<Rec: Recorder> From<JsonPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
+    fn from(json_postings_writer: JsonPostingsWriter<Rec>) -> Box<dyn PostingsWriter> {
+        Box::new(json_postings_writer)
+    }
+}
+
 impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    #[inline]
    fn subscribe(
@@ -53,12 +58,12 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    }

    /// The actual serialization format is handled by the `PostingsSerializer`.
-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        ordered_term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
        let mut term_buffer = JsonTermSerializer(Vec::with_capacity(48));
        let mut buffer_lender = BufferLender::default();
--- a/src/postings/loaded_postings.rs
+++ b/src/postings/loaded_postings.rs
@@ -1,5 +1,5 @@
 use crate::docset::{DocSet, TERMINATED};
-use crate::postings::{DocFreq, Postings};
+use crate::postings::{Postings, SegmentPostings};
 use crate::DocId;

 /// `LoadedPostings` is a `DocSet` and `Postings` implementation.
@@ -25,16 +25,16 @@ impl LoadedPostings {
    /// Creates a new `LoadedPostings` from a `SegmentPostings`.
    ///
    /// It will also preload positions, if positions are available in the SegmentPostings.
-    pub fn load(postings: &mut Box<dyn Postings>) -> LoadedPostings {
-        let num_docs: usize = u32::from(postings.doc_freq()) as usize;
+    pub fn load(segment_postings: &mut SegmentPostings) -> LoadedPostings {
+        let num_docs = segment_postings.doc_freq() as usize;
        let mut doc_ids = Vec::with_capacity(num_docs);
        let mut positions = Vec::with_capacity(num_docs);
        let mut position_offsets = Vec::with_capacity(num_docs);
-        while postings.doc() != TERMINATED {
+        while segment_postings.doc() != TERMINATED {
            position_offsets.push(positions.len() as u32);
-            doc_ids.push(postings.doc());
-            postings.append_positions_with_offset(0, &mut positions);
-            postings.advance();
+            doc_ids.push(segment_postings.doc());
+            segment_postings.append_positions_with_offset(0, &mut positions);
+            segment_postings.advance();
        }
        position_offsets.push(positions.len() as u32);
        LoadedPostings {
@@ -101,14 +101,6 @@ impl Postings for LoadedPostings {
            output.push(*pos + offset);
        }
    }
-
-    fn has_freq(&self) -> bool {
-        true
-    }
-
-    fn doc_freq(&self) -> DocFreq {
-        DocFreq::Exact(self.doc_ids.len() as u32)
-    }
 }

 #[cfg(test)]
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -4,6 +4,7 @@ mod block_search;

 pub(crate) use self::block_search::branchless_binary_search;

+mod block_segment_postings;
 pub(crate) mod compression;
 mod indexing_context;
 mod json_postings_writer;
@@ -12,22 +13,32 @@ mod per_field_postings_writer;
 mod postings;
 mod postings_writer;
 mod recorder;
+mod segment_postings;
 mod serializer;
+mod skip;
 mod term_info;

 pub(crate) use loaded_postings::LoadedPostings;
-pub use postings::DocFreq;
 pub(crate) use stacker::compute_table_memory_size;

+pub use self::block_segment_postings::BlockSegmentPostings;
 pub(crate) use self::indexing_context::IndexingContext;
 pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter;
 pub use self::postings::Postings;
-pub(crate) use self::postings_writer::{
-    serialize_postings, IndexingPosition, PostingsWriter, PostingsWriterEnum,
-};
+pub(crate) use self::postings_writer::{serialize_postings, IndexingPosition, PostingsWriter};
+pub use self::segment_postings::SegmentPostings;
 pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
+pub(crate) use self::skip::{BlockInfo, SkipReader};
 pub use self::term_info::TermInfo;

+#[expect(clippy::enum_variant_names)]
+#[derive(Debug, PartialEq, Clone, Copy, Eq)]
+pub(crate) enum FreqReadingOption {
+    NoFreq,
+    SkipFreq,
+    ReadFreq,
+}
+
 #[cfg(test)]
 pub(crate) mod tests {
    use std::mem;
@@ -38,7 +49,6 @@ pub(crate) mod tests {
    use crate::index::{Index, SegmentComponent, SegmentReader};
    use crate::indexer::operation::AddOperation;
    use crate::indexer::SegmentWriter;
-    use crate::postings::DocFreq;
    use crate::query::Scorer;
    use crate::schema::{
        Field, IndexRecordOption, Schema, Term, TextFieldIndexing, TextOptions, INDEXED, TEXT,
@@ -269,11 +279,11 @@ pub(crate) mod tests {
            }
            {
                let term_a = Term::from_field_text(text_field, "a");
-                let mut postings_a: Box<dyn Postings> = segment_reader
+                let mut postings_a = segment_reader
                    .inverted_index(term_a.field())?
                    .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_a.doc_freq(), DocFreq::Exact(1000));
+                assert_eq!(postings_a.len(), 1000);
                assert_eq!(postings_a.doc(), 0);
                assert_eq!(postings_a.term_freq(), 6);
                postings_a.positions(&mut positions);
@@ -296,7 +306,7 @@ pub(crate) mod tests {
                    .inverted_index(term_e.field())?
                    .read_postings(&term_e, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_e.doc_freq(), DocFreq::Exact(1000 - 2));
+                assert_eq!(postings_e.len(), 1000 - 2);
                for i in 2u32..1000u32 {
                    assert_eq!(postings_e.term_freq(), i);
                    postings_e.positions(&mut positions);
@@ -517,7 +527,6 @@ pub(crate) mod tests {
    }

    impl<TScorer: Scorer> Scorer for UnoptimizedDocSet<TScorer> {
-        #[inline]
        fn score(&mut self) -> Score {
            self.0.score()
        }
--- a/src/postings/per_field_postings_writer.rs
+++ b/src/postings/per_field_postings_writer.rs
@@ -1,15 +1,16 @@
 use crate::postings::json_postings_writer::JsonPostingsWriter;
-use crate::postings::postings_writer::{PostingsWriterEnum, SpecializedPostingsWriter};
+use crate::postings::postings_writer::SpecializedPostingsWriter;
 use crate::postings::recorder::{DocIdRecorder, TermFrequencyRecorder, TfAndPositionRecorder};
+use crate::postings::PostingsWriter;
 use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};

 pub(crate) struct PerFieldPostingsWriter {
-    per_field_postings_writers: Vec<PostingsWriterEnum>,
+    per_field_postings_writers: Vec<Box<dyn PostingsWriter>>,
 }

 impl PerFieldPostingsWriter {
    pub fn for_schema(schema: &Schema) -> Self {
-        let per_field_postings_writers: Vec<PostingsWriterEnum> = schema
+        let per_field_postings_writers = schema
            .fields()
            .map(|(_, field_entry)| posting_writer_from_field_entry(field_entry))
            .collect();
@@ -18,16 +19,16 @@ impl PerFieldPostingsWriter {
        }
    }

-    pub(crate) fn get_for_field(&self, field: Field) -> &PostingsWriterEnum {
-        &self.per_field_postings_writers[field.field_id() as usize]
+    pub(crate) fn get_for_field(&self, field: Field) -> &dyn PostingsWriter {
+        self.per_field_postings_writers[field.field_id() as usize].as_ref()
    }

-    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut PostingsWriterEnum {
-        &mut self.per_field_postings_writers[field.field_id() as usize]
+    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut dyn PostingsWriter {
+        self.per_field_postings_writers[field.field_id() as usize].as_mut()
    }
 }

-fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> PostingsWriterEnum {
+fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter> {
    match *field_entry.field_type() {
        FieldType::Str(ref text_options) => text_options
            .get_indexing_options()
@@ -50,7 +51,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> PostingsWriterEn
        | FieldType::Date(_)
        | FieldType::Bytes(_)
        | FieldType::IpAddr(_)
-        | FieldType::Facet(_) => <SpecializedPostingsWriter<DocIdRecorder>>::default().into(),
+        | FieldType::Facet(_) => Box::<SpecializedPostingsWriter<DocIdRecorder>>::default(),
        FieldType::JsonObject(ref json_object_options) => {
            if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() {
                match text_indexing_option.index_option() {
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,25 +1,5 @@
 use crate::docset::DocSet;

-/// Result of the doc_freq method.
-///
-/// Postings can inform us that the document frequency is approximate.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum DocFreq {
-    /// The document frequency is approximate.
-    Approximate(u32),
-    /// The document frequency is exact.
-    Exact(u32),
-}
-
-impl From<DocFreq> for u32 {
-    fn from(doc_freq: DocFreq) -> Self {
-        match doc_freq {
-            DocFreq::Approximate(approximate_doc_freq) => approximate_doc_freq,
-            DocFreq::Exact(doc_freq) => doc_freq,
-        }
-    }
-}
-
 /// Postings (also called inverted list)
 ///
 /// For a given term, it is the list of doc ids of the doc
@@ -34,9 +14,6 @@ pub trait Postings: DocSet + 'static {
    /// The number of times the term appears in the document.
    fn term_freq(&self) -> u32;

-    /// Returns the number of documents containing the term in the segment.
-    fn doc_freq(&self) -> DocFreq;
-
    /// Returns the positions offsetted with a given value.
    /// It is not necessary to clear the `output` before calling this method.
    /// The output vector will be resized to the `term_freq`.
@@ -54,16 +31,6 @@ pub trait Postings: DocSet + 'static {
    fn positions(&mut self, output: &mut Vec<u32>) {
        self.positions_with_offset(0u32, output);
    }
-
-    /// Returns true if the term_frequency is available.
-    ///
-    /// This is a tricky question, because on JSON fields, it is possible
-    /// for a text term to have term freq, whereas a number term in the field has none.
-    ///
-    /// This function returns whether the actual term has term frequencies or not.
-    /// In this above JSON field example, `has_freq` should return true for the
-    /// earlier and false for the latter.
-    fn has_freq(&self) -> bool;
 }

 impl Postings for Box<dyn Postings> {
@@ -74,12 +41,4 @@ impl Postings for Box<dyn Postings> {
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        (**self).append_positions_with_offset(offset, output);
    }
-
-    fn has_freq(&self) -> bool {
-        (**self).has_freq()
-    }
-
-    fn doc_freq(&self) -> DocFreq {
-        (**self).doc_freq()
-    }
 }
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -4,14 +4,10 @@ use std::ops::Range;

 use stacker::Addr;

-use crate::codec::Codec;
 use crate::fieldnorm::FieldNormReaders;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
-use crate::postings::json_postings_writer::JsonPostingsWriter;
-use crate::postings::recorder::{
-    BufferLender, DocIdRecorder, Recorder, TermFrequencyRecorder, TfAndPositionRecorder,
-};
+use crate::postings::recorder::{BufferLender, Recorder};
 use crate::postings::{
    FieldSerializer, IndexingContext, InvertedIndexSerializer, PerFieldPostingsWriter,
 };
@@ -49,12 +45,12 @@ fn make_field_partition(
 /// Serialize the inverted index.
 /// It pushes all term, one field at a time, towards the
 /// postings serializer.
-pub(crate) fn serialize_postings<C: Codec>(
+pub(crate) fn serialize_postings(
    ctx: IndexingContext,
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    fieldnorm_readers: FieldNormReaders,
-    serializer: &mut InvertedIndexSerializer<C>,
+    serializer: &mut InvertedIndexSerializer,
 ) -> crate::Result<()> {
    // Replace unordered ids by ordered ids to be able to sort
    let unordered_id_to_ordered_id: Vec<OrderedPathId> =
@@ -104,141 +100,6 @@ pub(crate) struct IndexingPosition {
    pub end_position: u32,
 }

-pub enum PostingsWriterEnum {
-    DocId(SpecializedPostingsWriter<DocIdRecorder>),
-    DocIdTf(SpecializedPostingsWriter<TermFrequencyRecorder>),
-    DocTfAndPosition(SpecializedPostingsWriter<TfAndPositionRecorder>),
-    JsonDocId(JsonPostingsWriter<DocIdRecorder>),
-    JsonDocIdTf(JsonPostingsWriter<TermFrequencyRecorder>),
-    JsonDocTfAndPosition(JsonPostingsWriter<TfAndPositionRecorder>),
-}
-
-impl From<SpecializedPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
-    fn from(doc_id_recorder_writer: SpecializedPostingsWriter<DocIdRecorder>) -> Self {
-        PostingsWriterEnum::DocId(doc_id_recorder_writer)
-    }
-}
-
-impl From<SpecializedPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
-    fn from(doc_id_tf_recorder_writer: SpecializedPostingsWriter<TermFrequencyRecorder>) -> Self {
-        PostingsWriterEnum::DocIdTf(doc_id_tf_recorder_writer)
-    }
-}
-
-impl From<SpecializedPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
-    fn from(
-        doc_id_tf_and_positions_recorder_writer: SpecializedPostingsWriter<TfAndPositionRecorder>,
-    ) -> Self {
-        PostingsWriterEnum::DocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
-    }
-}
-
-impl From<JsonPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
-    fn from(doc_id_recorder_writer: JsonPostingsWriter<DocIdRecorder>) -> Self {
-        PostingsWriterEnum::JsonDocId(doc_id_recorder_writer)
-    }
-}
-
-impl From<JsonPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
-    fn from(doc_id_tf_recorder_writer: JsonPostingsWriter<TermFrequencyRecorder>) -> Self {
-        PostingsWriterEnum::JsonDocIdTf(doc_id_tf_recorder_writer)
-    }
-}
-
-impl From<JsonPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
-    fn from(
-        doc_id_tf_and_positions_recorder_writer: JsonPostingsWriter<TfAndPositionRecorder>,
-    ) -> Self {
-        PostingsWriterEnum::JsonDocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
-    }
-}
-
-impl PostingsWriter for PostingsWriterEnum {
-    fn subscribe(&mut self, doc: DocId, pos: u32, term: &IndexingTerm, ctx: &mut IndexingContext) {
-        match self {
-            PostingsWriterEnum::DocId(writer) => writer.subscribe(doc, pos, term, ctx),
-            PostingsWriterEnum::DocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
-            PostingsWriterEnum::DocTfAndPosition(writer) => writer.subscribe(doc, pos, term, ctx),
-            PostingsWriterEnum::JsonDocId(writer) => writer.subscribe(doc, pos, term, ctx),
-            PostingsWriterEnum::JsonDocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
-            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
-                writer.subscribe(doc, pos, term, ctx)
-            }
-        }
-    }
-
-    fn serialize<C: Codec>(
-        &self,
-        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
-        ordered_id_to_path: &[&str],
-        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer<C>,
-    ) -> io::Result<()> {
-        match self {
-            PostingsWriterEnum::DocId(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-            PostingsWriterEnum::DocIdTf(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-            PostingsWriterEnum::DocTfAndPosition(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-            PostingsWriterEnum::JsonDocId(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-            PostingsWriterEnum::JsonDocIdTf(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
-                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
-            }
-        }
-    }
-
-    /// Tokenize a text and subscribe all of its token.
-    fn index_text(
-        &mut self,
-        doc_id: DocId,
-        token_stream: &mut dyn TokenStream,
-        term_buffer: &mut IndexingTerm,
-        ctx: &mut IndexingContext,
-        indexing_position: &mut IndexingPosition,
-    ) {
-        match self {
-            PostingsWriterEnum::DocId(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-            PostingsWriterEnum::DocIdTf(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-            PostingsWriterEnum::DocTfAndPosition(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-            PostingsWriterEnum::JsonDocId(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-            PostingsWriterEnum::JsonDocIdTf(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
-                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
-            }
-        }
-    }
-
-    fn total_num_tokens(&self) -> u64 {
-        match self {
-            PostingsWriterEnum::DocId(writer) => writer.total_num_tokens(),
-            PostingsWriterEnum::DocIdTf(writer) => writer.total_num_tokens(),
-            PostingsWriterEnum::DocTfAndPosition(writer) => writer.total_num_tokens(),
-            PostingsWriterEnum::JsonDocId(writer) => writer.total_num_tokens(),
-            PostingsWriterEnum::JsonDocIdTf(writer) => writer.total_num_tokens(),
-            PostingsWriterEnum::JsonDocTfAndPosition(writer) => writer.total_num_tokens(),
-        }
-    }
-}
-
 /// The `PostingsWriter` is in charge of receiving documenting
 /// and building a `Segment` in anonymous memory.
 ///
@@ -255,12 +116,12 @@ pub(crate) trait PostingsWriter: Send + Sync {

    /// Serializes the postings on disk.
    /// The actual serialization format is handled by the `PostingsSerializer`.
-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer,
    ) -> io::Result<()>;

    /// Tokenize a text and subscribe all of its token.
@@ -310,14 +171,22 @@ pub(crate) struct SpecializedPostingsWriter<Rec: Recorder> {
    _recorder_type: PhantomData<Rec>,
 }

+impl<Rec: Recorder> From<SpecializedPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
+    fn from(
+        specialized_postings_writer: SpecializedPostingsWriter<Rec>,
+    ) -> Box<dyn PostingsWriter> {
+        Box::new(specialized_postings_writer)
+    }
+}
+
 impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
    #[inline]
-    pub(crate) fn serialize_one_term<C: Codec>(
+    pub(crate) fn serialize_one_term(
        term: &[u8],
        addr: Addr,
        buffer_lender: &mut BufferLender,
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
        let recorder: Rec = ctx.term_index.read(addr);
        let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
@@ -358,12 +227,12 @@ impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
        });
    }

-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
        _ordered_id_to_path: &[&str],
        ctx: &IndexingContext,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer,
    ) -> io::Result<()> {
        let mut buffer_lender = BufferLender::default();
        for (_field, _path_id, term, addr) in term_addrs {
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,7 +1,6 @@
 use common::read_u32_vint;
 use stacker::{ExpUnrolledLinkedList, MemoryArena};

-use crate::codec::Codec;
 use crate::postings::FieldSerializer;
 use crate::DocId;

@@ -68,10 +67,10 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
    /// Close the document. It will help record the term frequency.
    fn close_doc(&mut self, arena: &mut MemoryArena);
    /// Pushes the postings information to the serializer.
-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    );
    /// Returns the number of document containing this term.
@@ -111,10 +110,10 @@ impl Recorder for DocIdRecorder {
    #[inline]
    fn close_doc(&mut self, _arena: &mut MemoryArena) {}

-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -179,10 +178,10 @@ impl Recorder for TermFrequencyRecorder {
        self.current_tf = 0;
    }

-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -236,10 +235,10 @@ impl Recorder for TfAndPositionRecorder {
        self.stack.writer(arena).write_u32_vint(POSITION_END);
    }

-    fn serialize<C: Codec>(
+    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<C>,
+        serializer: &mut FieldSerializer<'_>,
        buffer_lender: &mut BufferLender,
    ) {
        let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
--- a/src/codec/standard/postings/segment_postings.rs
+++ b/src/codec/standard/postings/segment_postings.rs
@@ -1,13 +1,11 @@
-use common::{BitSet, HasLen};
+use common::HasLen;

-use super::BlockSegmentPostings;
-use crate::codec::postings::PostingsWithBlockMax;
 use crate::docset::DocSet;
+use crate::fastfield::AliveBitSet;
 use crate::positions::PositionReader;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::postings::{DocFreq, Postings};
-use crate::query::Bm25Weight;
-use crate::{DocId, Score};
+use crate::postings::{BlockSegmentPostings, Postings};
+use crate::{DocId, TERMINATED};

 /// `SegmentPostings` represents the inverted list or postings associated with
 /// a term in a `Segment`.
@@ -31,6 +29,31 @@ impl SegmentPostings {
        }
    }

+    /// Compute the number of non-deleted documents.
+    ///
+    /// This method will clone and scan through the posting lists.
+    /// (this is a rather expensive operation).
+    pub fn doc_freq_given_deletes(&self, alive_bitset: &AliveBitSet) -> u32 {
+        let mut docset = self.clone();
+        let mut doc_freq = 0;
+        loop {
+            let doc = docset.doc();
+            if doc == TERMINATED {
+                return doc_freq;
+            }
+            if alive_bitset.is_alive(doc) {
+                doc_freq += 1u32;
+            }
+            docset.advance();
+        }
+    }
+
+    /// Returns the overall number of documents in the block postings.
+    /// It does not take in account whether documents are deleted or not.
+    pub fn doc_freq(&self) -> u32 {
+        self.block_cursor.doc_freq()
+    }
+
    /// Creates a segment postings object with the given documents
    /// and no frequency encoded.
    ///
@@ -41,35 +64,30 @@ impl SegmentPostings {
    /// buffer with the serialized data.
    #[cfg(test)]
    pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
-        use common::OwnedBytes;
-
+        use crate::directory::FileSlice;
+        use crate::postings::serializer::PostingsSerializer;
        use crate::schema::IndexRecordOption;
        let mut buffer = Vec::new();
        {
-            use crate::codec::postings::PostingsSerializer;
-
            let mut postings_serializer =
-                crate::codec::standard::postings::StandardPostingsSerializer::new(
-                    0.0,
-                    IndexRecordOption::Basic,
-                    None,
-                );
+                PostingsSerializer::new(&mut buffer, 0.0, IndexRecordOption::Basic, None);
            postings_serializer.new_term(docs.len() as u32, false);
            for &doc in docs {
                postings_serializer.write_doc(doc, 1u32);
            }
            postings_serializer
-                .close_term(docs.len() as u32, &mut buffer)
+                .close_term(docs.len() as u32)
                .expect("In memory Serialization should never fail.");
        }
-        let block_segment_postings = BlockSegmentPostings::open(
+        let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
            docs.len() as u32,
-            OwnedBytes::new(buffer),
+            FileSlice::from(buffer),
            IndexRecordOption::Basic,
            IndexRecordOption::Basic,
+            0u32,
        )
        .unwrap();
-        SegmentPostings::from_block_postings(block_segment_postings, None)
+        SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
    }

    /// Helper functions to create `SegmentPostings` for tests.
@@ -78,11 +96,9 @@ impl SegmentPostings {
        doc_and_tfs: &[(u32, u32)],
        fieldnorms: Option<&[u32]>,
    ) -> SegmentPostings {
-        use common::OwnedBytes;
-
-        use crate::codec::postings::PostingsSerializer as _;
-        use crate::codec::standard::postings::StandardPostingsSerializer;
+        use crate::directory::FileSlice;
        use crate::fieldnorm::FieldNormReader;
+        use crate::postings::serializer::PostingsSerializer;
        use crate::schema::IndexRecordOption;
        use crate::Score;
        let mut buffer: Vec<u8> = Vec::new();
@@ -99,7 +115,8 @@ impl SegmentPostings {
                total_num_tokens as Score / fieldnorms.len() as Score
            })
            .unwrap_or(0.0);
-        let mut postings_serializer = StandardPostingsSerializer::new(
+        let mut postings_serializer = PostingsSerializer::new(
+            &mut buffer,
            average_field_norm,
            IndexRecordOption::WithFreqs,
            fieldnorm_reader,
@@ -109,30 +126,31 @@ impl SegmentPostings {
            postings_serializer.write_doc(doc, tf);
        }
        postings_serializer
-            .close_term(doc_and_tfs.len() as u32, &mut buffer)
+            .close_term(doc_and_tfs.len() as u32)
            .unwrap();
-        let block_segment_postings = BlockSegmentPostings::open(
+        let (block_segment_postings, position_within_block) = BlockSegmentPostings::open(
            doc_and_tfs.len() as u32,
-            OwnedBytes::new(buffer),
+            FileSlice::from(buffer),
            IndexRecordOption::WithFreqs,
            IndexRecordOption::WithFreqs,
+            0u32,
        )
        .unwrap();
-        SegmentPostings::from_block_postings(block_segment_postings, None)
+        SegmentPostings::from_block_postings(block_segment_postings, None, position_within_block)
    }

-    /// Reads a Segment postings from an &[u8]
-    ///
-    /// * `len` - number of document in the posting lists.
-    /// * `data` - data array. The complete data is not necessarily used.
-    /// * `freq_handler` - the freq handler is in charge of decoding frequencies and/or positions
+    /// Creates a Segment Postings from a
+    /// - `BlockSegmentPostings`,
+    /// - a position reader
+    /// - a target document to seek to
    pub(crate) fn from_block_postings(
        segment_block_postings: BlockSegmentPostings,
        position_reader: Option<PositionReader>,
+        position_within_block: usize,
    ) -> SegmentPostings {
        SegmentPostings {
            block_cursor: segment_block_postings,
-            cur: 0, // cursor within the block
+            cur: position_within_block,
            position_reader,
        }
    }
@@ -143,6 +161,7 @@ impl DocSet for SegmentPostings {
    // next needs to be called a first time to point to the correct element.
    #[inline]
    fn advance(&mut self) -> DocId {
+        debug_assert!(self.block_cursor.block_is_loaded());
        if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
            self.cur = 0;
            self.block_cursor.advance();
@@ -175,19 +194,6 @@ impl DocSet for SegmentPostings {
    fn size_hint(&self) -> u32 {
        self.len() as u32
    }
-
-    fn fill_bitset(&mut self, bitset: &mut BitSet) {
-        loop {
-            let docs = self.block_cursor.docs();
-            if docs.is_empty() {
-                break;
-            }
-            for &doc in docs {
-                bitset.insert(doc);
-            }
-            self.block_cursor.advance();
-        }
-    }
 }

 impl HasLen for SegmentPostings {
@@ -203,7 +209,7 @@ impl Postings for SegmentPostings {
    ///
    /// # Panics
    ///
-    /// Will panics if called without having cagled advance before.
+    /// Will panics if called without having called advance before.
    fn term_freq(&self) -> u32 {
        debug_assert!(
            // Here we do not use the len of `freqs()`
@@ -218,13 +224,6 @@ impl Postings for SegmentPostings {
        self.block_cursor.freq(self.cur)
    }

-    /// Returns the overall number of documents in the block postings.
-    /// It does not take in account whether documents are deleted or not.
-    #[inline(always)]
-    fn doc_freq(&self) -> DocFreq {
-        DocFreq::Exact(self.block_cursor.doc_freq())
-    }
-
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        let term_freq = self.term_freq();
        let prev_len = output.len();
@@ -248,25 +247,6 @@ impl Postings for SegmentPostings {
            }
        }
    }
-
-    fn has_freq(&self) -> bool {
-        !self.block_cursor.freqs().is_empty()
-    }
-}
-
-impl PostingsWithBlockMax for SegmentPostings {
-    fn seek_block_max(
-        &mut self,
-        target_doc: crate::DocId,
-        similarity_weight: &Bm25Weight,
-    ) -> Score {
-        self.block_cursor.seek_block_without_loading(target_doc);
-        self.block_cursor.block_max_score(similarity_weight)
-    }
-
-    fn last_doc_in_block(&self) -> crate::DocId {
-        self.block_cursor.skip_reader().last_doc_in_block()
-    }
 }

 #[cfg(test)]
@@ -276,15 +256,14 @@ mod tests {

    use super::SegmentPostings;
    use crate::docset::{DocSet, TERMINATED};
-    use crate::postings::Postings;
+    use crate::fastfield::AliveBitSet;
+    use crate::postings::postings::Postings;

    #[test]
    fn test_empty_segment_postings() {
        let mut postings = SegmentPostings::empty();
-        assert_eq!(postings.doc(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.doc_freq(), crate::postings::DocFreq::Exact(0));
        assert_eq!(postings.len(), 0);
    }

@@ -300,4 +279,15 @@ mod tests {
        let postings = SegmentPostings::empty();
        assert_eq!(postings.term_freq(), 1);
    }
+
+    #[test]
+    fn test_doc_freq() {
+        let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
+        assert_eq!(docs.doc_freq(), 3);
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
+        assert_eq!(docs.doc_freq_given_deletes(&alive_bitset), 2);
+        let all_deleted =
+            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
+        assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
+    }
 }
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -1,14 +1,16 @@
+use std::cmp::Ordering;
 use std::io::{self, Write};

-use common::{BinarySerializable, CountingWriter};
+use common::{BinarySerializable, CountingWriter, VInt};

 use super::TermInfo;
-use crate::codec::postings::PostingsSerializer;
-use crate::codec::Codec;
 use crate::directory::{CompositeWrite, WritePtr};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::Segment;
 use crate::positions::PositionSerializer;
+use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE};
+use crate::postings::skip::SkipSerializer;
+use crate::query::Bm25Weight;
 use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};
 use crate::termdict::TermDictionaryBuilder;
 use crate::{DocId, Score};
@@ -44,27 +46,22 @@ use crate::{DocId, Score};
 ///
 /// A description of the serialization format is
 /// [available here](https://fulmicoton.gitbooks.io/tantivy-doc/content/inverted-index.html).
-pub struct InvertedIndexSerializer<C: Codec> {
+pub struct InvertedIndexSerializer {
    terms_write: CompositeWrite<WritePtr>,
    postings_write: CompositeWrite<WritePtr>,
    positions_write: CompositeWrite<WritePtr>,
    schema: Schema,
-    codec: C,
 }

-use crate::codec::postings::PostingsCodec;
-
-impl<C: Codec> InvertedIndexSerializer<C> {
+impl InvertedIndexSerializer {
    /// Open a new `InvertedIndexSerializer` for the given segment
-    pub fn open(segment: &mut Segment<C>) -> crate::Result<InvertedIndexSerializer<C>> {
+    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
        use crate::index::SegmentComponent::{Positions, Postings, Terms};
-        let codec = segment.index().codec().clone();
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
            postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
            positions_write: CompositeWrite::wrap(segment.open_write(Positions)?),
            schema: segment.schema(),
-            codec,
        };
        Ok(inv_index_serializer)
    }
@@ -78,7 +75,7 @@ impl<C: Codec> InvertedIndexSerializer<C> {
        field: Field,
        total_num_tokens: u64,
        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> io::Result<FieldSerializer<'_, C>> {
+    ) -> io::Result<FieldSerializer<'_>> {
        let field_entry: &FieldEntry = self.schema.get_field_entry(field);
        let term_dictionary_write = self.terms_write.for_field(field);
        let postings_write = self.postings_write.for_field(field);
@@ -91,7 +88,6 @@ impl<C: Codec> InvertedIndexSerializer<C> {
            postings_write,
            positions_write,
            fieldnorm_reader,
-            &self.codec,
        )
    }

@@ -106,17 +102,15 @@ impl<C: Codec> InvertedIndexSerializer<C> {

 /// The field serializer is in charge of
 /// the serialization of a specific field.
-pub struct FieldSerializer<'a, C: Codec> {
+pub struct FieldSerializer<'a> {
    term_dictionary_builder: TermDictionaryBuilder<&'a mut CountingWriter<WritePtr>>,
-    postings_serializer: <C::PostingsCodec as PostingsCodec>::PostingsSerializer,
+    postings_serializer: PostingsSerializer<&'a mut CountingWriter<WritePtr>>,
    positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<WritePtr>>>,
    current_term_info: TermInfo,
    term_open: bool,
-    postings_write: &'a mut CountingWriter<WritePtr>,
-    postings_start_offset: u64,
 }

-impl<'a, C: Codec> FieldSerializer<'a, C> {
+impl<'a> FieldSerializer<'a> {
    fn create(
        field_type: &FieldType,
        total_num_tokens: u64,
@@ -124,8 +118,7 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
        postings_write: &'a mut CountingWriter<WritePtr>,
        positions_write: &'a mut CountingWriter<WritePtr>,
        fieldnorm_reader: Option<FieldNormReader>,
-        codec: &C,
-    ) -> io::Result<FieldSerializer<'a, C>> {
+    ) -> io::Result<FieldSerializer<'a>> {
        total_num_tokens.serialize(postings_write)?;
        let index_record_option = field_type
            .index_record_option()
@@ -135,7 +128,8 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
            .as_ref()
            .map(|ff_reader| total_num_tokens as Score / ff_reader.num_docs() as Score)
            .unwrap_or(0.0);
-        let postings_serializer = codec.postings_codec().new_serializer(
+        let postings_serializer = PostingsSerializer::new(
+            postings_write,
            average_fieldnorm,
            index_record_option,
            fieldnorm_reader,
@@ -146,22 +140,15 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
            None
        };

-        let postings_start_offset = postings_write.written_bytes();
        Ok(FieldSerializer {
            term_dictionary_builder,
            postings_serializer,
            positions_serializer_opt,
            current_term_info: TermInfo::default(),
            term_open: false,
-            postings_write,
-            postings_start_offset,
        })
    }

-    fn postings_offset(&self) -> usize {
-        (self.postings_write.written_bytes() - self.postings_start_offset) as usize
-    }
-
    fn current_term_info(&self) -> TermInfo {
        let positions_start =
            if let Some(positions_serializer) = self.positions_serializer_opt.as_ref() {
@@ -169,7 +156,7 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
            } else {
                0u64
            } as usize;
-        let addr = self.postings_offset();
+        let addr = self.postings_serializer.written_bytes() as usize;
        TermInfo {
            doc_freq: 0,
            postings_range: addr..addr,
@@ -192,6 +179,7 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
            "Called new_term, while the previous term was not closed."
        );
        self.term_open = true;
+        self.postings_serializer.clear();
        self.current_term_info = self.current_term_info();
        self.term_dictionary_builder.insert_key(term)?;
        self.postings_serializer
@@ -225,22 +213,21 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
        crate::fail_point!("FieldSerializer::close_term", |msg: Option<String>| {
            Err(io::Error::new(io::ErrorKind::Other, format!("{msg:?}")))
        });
+        if self.term_open {
+            self.postings_serializer
+                .close_term(self.current_term_info.doc_freq)?;
+            self.current_term_info.postings_range.end =
+                self.postings_serializer.written_bytes() as usize;

-        if !self.term_open {
-            return Ok(());
-        };
-
-        self.postings_serializer
-            .close_term(self.current_term_info.doc_freq, self.postings_write)?;
-        self.current_term_info.postings_range.end = self.postings_offset();
-        if let Some(positions_serializer) = self.positions_serializer_opt.as_mut() {
-            positions_serializer.close_term()?;
-            self.current_term_info.positions_range.end =
-                positions_serializer.written_bytes() as usize;
+            if let Some(positions_serializer) = self.positions_serializer_opt.as_mut() {
+                positions_serializer.close_term()?;
+                self.current_term_info.positions_range.end =
+                    positions_serializer.written_bytes() as usize;
+            }
+            self.term_dictionary_builder
+                .insert_value(&self.current_term_info)?;
+            self.term_open = false;
        }
-        self.term_dictionary_builder
-            .insert_value(&self.current_term_info)?;
-        self.term_open = false;
        Ok(())
    }

@@ -250,8 +237,242 @@ impl<'a, C: Codec> FieldSerializer<'a, C> {
        if let Some(positions_serializer) = self.positions_serializer_opt {
            positions_serializer.close()?;
        }
-        self.postings_write.flush()?;
+        self.postings_serializer.close()?;
        self.term_dictionary_builder.finish()?;
        Ok(())
    }
 }
+
+struct Block {
+    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE],
+    term_freqs: [u32; COMPRESSION_BLOCK_SIZE],
+    len: usize,
+}
+
+impl Block {
+    fn new() -> Self {
+        Block {
+            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE],
+            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE],
+            len: 0,
+        }
+    }
+
+    fn doc_ids(&self) -> &[DocId] {
+        &self.doc_ids[..self.len]
+    }
+
+    fn term_freqs(&self) -> &[u32] {
+        &self.term_freqs[..self.len]
+    }
+
+    fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    fn append_doc(&mut self, doc: DocId, term_freq: u32) {
+        let len = self.len;
+        self.doc_ids[len] = doc;
+        self.term_freqs[len] = term_freq;
+        self.len = len + 1;
+    }
+
+    fn is_full(&self) -> bool {
+        self.len == COMPRESSION_BLOCK_SIZE
+    }
+
+    fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    fn last_doc(&self) -> DocId {
+        assert_eq!(self.len, COMPRESSION_BLOCK_SIZE);
+        self.doc_ids[COMPRESSION_BLOCK_SIZE - 1]
+    }
+}
+
+pub struct PostingsSerializer<W: Write> {
+    output_write: CountingWriter<W>,
+    last_doc_id_encoded: u32,
+
+    block_encoder: BlockEncoder,
+    block: Box<Block>,
+
+    postings_write: Vec<u8>,
+    skip_write: SkipSerializer,
+
+    mode: IndexRecordOption,
+    fieldnorm_reader: Option<FieldNormReader>,
+
+    bm25_weight: Option<Bm25Weight>,
+    avg_fieldnorm: Score, /* Average number of term in the field for that segment.
+                           * this value is used to compute the block wand information. */
+    term_has_freq: bool,
+}
+
+impl<W: Write> PostingsSerializer<W> {
+    pub fn new(
+        write: W,
+        avg_fieldnorm: Score,
+        mode: IndexRecordOption,
+        fieldnorm_reader: Option<FieldNormReader>,
+    ) -> PostingsSerializer<W> {
+        PostingsSerializer {
+            output_write: CountingWriter::wrap(write),
+
+            block_encoder: BlockEncoder::new(),
+            block: Box::new(Block::new()),
+
+            postings_write: Vec::new(),
+            skip_write: SkipSerializer::new(),
+
+            last_doc_id_encoded: 0u32,
+            mode,
+
+            fieldnorm_reader,
+            bm25_weight: None,
+            avg_fieldnorm,
+            term_has_freq: false,
+        }
+    }
+
+    pub fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
+        self.bm25_weight = None;
+
+        self.term_has_freq = self.mode.has_freq() && record_term_freq;
+        if !self.term_has_freq {
+            return;
+        }
+
+        let num_docs_in_segment: u64 =
+            if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
+                fieldnorm_reader.num_docs() as u64
+            } else {
+                return;
+            };
+
+        if num_docs_in_segment == 0 {
+            return;
+        }
+
+        self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
+            term_doc_freq as u64,
+            num_docs_in_segment,
+            self.avg_fieldnorm,
+        ));
+    }
+
+    fn write_block(&mut self) {
+        {
+            // encode the doc ids
+            let (num_bits, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
+            self.last_doc_id_encoded = self.block.last_doc();
+            self.skip_write
+                .write_doc(self.last_doc_id_encoded, num_bits);
+            // last el block 0, offset block 1,
+            self.postings_write.extend(block_encoded);
+        }
+        if self.term_has_freq {
+            let (num_bits, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_unsorted(self.block.term_freqs(), true);
+            self.postings_write.extend(block_encoded);
+            self.skip_write.write_term_freq(num_bits);
+            if self.mode.has_positions() {
+                // We serialize the sum of term freqs within the skip information
+                // in order to navigate through positions.
+                let sum_freq = self.block.term_freqs().iter().cloned().sum();
+                self.skip_write.write_total_term_freq(sum_freq);
+            }
+            let mut blockwand_params = (0u8, 0u32);
+            if let Some(bm25_weight) = self.bm25_weight.as_ref() {
+                if let Some(fieldnorm_reader) = self.fieldnorm_reader.as_ref() {
+                    let docs = self.block.doc_ids().iter().cloned();
+                    let term_freqs = self.block.term_freqs().iter().cloned();
+                    let fieldnorms = docs.map(|doc| fieldnorm_reader.fieldnorm_id(doc));
+                    blockwand_params = fieldnorms
+                        .zip(term_freqs)
+                        .max_by(
+                            |(left_fieldnorm_id, left_term_freq),
+                             (right_fieldnorm_id, right_term_freq)| {
+                                let left_score =
+                                    bm25_weight.tf_factor(*left_fieldnorm_id, *left_term_freq);
+                                let right_score =
+                                    bm25_weight.tf_factor(*right_fieldnorm_id, *right_term_freq);
+                                left_score
+                                    .partial_cmp(&right_score)
+                                    .unwrap_or(Ordering::Equal)
+                            },
+                        )
+                        .unwrap();
+                }
+            }
+            let (fieldnorm_id, term_freq) = blockwand_params;
+            self.skip_write.write_blockwand_max(fieldnorm_id, term_freq);
+        }
+        self.block.clear();
+    }
+
+    pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
+        self.block.append_doc(doc_id, term_freq);
+        if self.block.is_full() {
+            self.write_block();
+        }
+    }
+
+    fn close(mut self) -> io::Result<()> {
+        self.postings_write.flush()
+    }
+
+    pub fn close_term(&mut self, doc_freq: u32) -> io::Result<()> {
+        if !self.block.is_empty() {
+            // we have doc ids waiting to be written
+            // this happens when the number of doc ids is
+            // not a perfect multiple of our block size.
+            //
+            // In that case, the remaining part is encoded
+            // using variable int encoding.
+            {
+                let block_encoded = self
+                    .block_encoder
+                    .compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
+                self.postings_write.write_all(block_encoded)?;
+            }
+            // ... Idem for term frequencies
+            if self.term_has_freq {
+                let block_encoded = self
+                    .block_encoder
+                    .compress_vint_unsorted(self.block.term_freqs());
+                self.postings_write.write_all(block_encoded)?;
+            }
+            self.block.clear();
+        }
+        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
+            let skip_data = self.skip_write.data();
+            VInt(skip_data.len() as u64).serialize(&mut self.output_write)?;
+            self.output_write.write_all(skip_data)?;
+        }
+        self.output_write.write_all(&self.postings_write[..])?;
+        self.skip_write.clear();
+        self.postings_write.clear();
+        self.bm25_weight = None;
+        Ok(())
+    }
+
+    /// Returns the number of bytes written in the postings write object
+    /// at this point.
+    /// When called before writing the postings of a term, this value is used as
+    /// start offset.
+    /// When called after writing the postings of a term, this value is used as a
+    /// end offset.
+    fn written_bytes(&self) -> u64 {
+        self.output_write.written_bytes()
+    }
+
+    fn clear(&mut self) {
+        self.block.clear();
+        self.last_doc_id_encoded = 0;
+    }
+}
--- a/src/codec/standard/postings/skip.rs
+++ b/src/codec/standard/postings/skip.rs
@@ -6,21 +6,17 @@ use crate::{DocId, Score, TERMINATED};

 // doc num bits uses the following encoding:
 // given 0b a b cdefgh
-//         |1|2|3|  4  |
+//         |1|2|   3  |
 // - 1: unused
 // - 2: is delta-1 encoded. 0 if not, 1, if yes
-// - 3: unused
-// - 4: a 5 bit number in 0..32, the actual bitwidth. Bitpacking could in theory say this is 32
-//   (requiring a 6th bit), but the biggest doc_id we can want to encode is TERMINATED-1, which can
-//   be represented on 31b without delta encoding.
+// - 3: a 6 bit number in 0..=32, the actual bitwidth
 fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 {
-    assert!(bitwidth < 32);
    bitwidth | ((delta_1 as u8) << 6)
 }

 fn decode_bitwidth(raw_bitwidth: u8) -> (u8, bool) {
    let delta_1 = ((raw_bitwidth >> 6) & 1) != 0;
-    let bitwidth = raw_bitwidth & 0x1f;
+    let bitwidth = raw_bitwidth & 0x3f;
    (bitwidth, delta_1)
 }

@@ -142,6 +138,23 @@ impl SkipReader {
        skip_reader
    }

+    pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
+        self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
+            0
+        } else {
+            TERMINATED
+        };
+        self.last_doc_in_previous_block = 0u32;
+        self.owned_read = data;
+        self.block_info = BlockInfo::VInt { num_docs: doc_freq };
+        self.byte_offset = 0;
+        self.remaining_docs = doc_freq;
+        self.position_offset = 0u64;
+        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
+            self.read_block_info();
+        }
+    }
+
    // Returns the block max score for this block if available.
    //
    // The block max score is available for all full bitpacked block,
@@ -417,7 +430,7 @@ mod tests {

    #[test]
    fn test_encode_decode_bitwidth() {
-        for bitwidth in 0..32 {
+        for bitwidth in 0..=32 {
            for delta_1 in [false, true] {
                assert_eq!(
                    (bitwidth, delta_1),
--- a/src/query/all_query.rs
+++ b/src/query/all_query.rs
@@ -2,7 +2,7 @@ use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::boost_query::BoostScorer;
 use crate::query::explanation::does_not_match;
-use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, Score};

 /// Query that matches all of the documents.
@@ -21,12 +21,17 @@ impl Query for AllQuery {
 pub struct AllWeight;

 impl Weight for AllWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        _seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
        let all_scorer = AllScorer::new(reader.max_doc());
        if boost != 1.0 {
-            Ok(box_scorer(BoostScorer::new(all_scorer, boost)))
+            Ok(Box::new(BoostScorer::new(all_scorer, boost)))
        } else {
-            Ok(box_scorer(all_scorer))
+            Ok(Box::new(all_scorer))
        }
    }

@@ -105,7 +110,6 @@ impl DocSet for AllScorer {
 }

 impl Scorer for AllScorer {
-    #[inline]
    fn score(&mut self) -> Score {
        1.0
    }
@@ -141,7 +145,7 @@ mod tests {
        let weight = AllQuery.weight(EnableScoring::disabled_from_schema(&index.schema()))?;
        {
            let reader = searcher.segment_reader(0);
-            let mut scorer = weight.scorer(reader, 1.0)?;
+            let mut scorer = weight.scorer(reader, 1.0, 0)?;
            assert_eq!(scorer.doc(), 0u32);
            assert_eq!(scorer.advance(), 1u32);
            assert_eq!(scorer.doc(), 1u32);
@@ -149,7 +153,7 @@ mod tests {
        }
        {
            let reader = searcher.segment_reader(1);
-            let mut scorer = weight.scorer(reader, 1.0)?;
+            let mut scorer = weight.scorer(reader, 1.0, 0)?;
            assert_eq!(scorer.doc(), 0u32);
            assert_eq!(scorer.advance(), TERMINATED);
        }
@@ -164,12 +168,12 @@ mod tests {
        let weight = AllQuery.weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
        let reader = searcher.segment_reader(0);
        {
-            let mut scorer = weight.scorer(reader, 2.0)?;
+            let mut scorer = weight.scorer(reader, 2.0, 0)?;
            assert_eq!(scorer.doc(), 0u32);
            assert_eq!(scorer.score(), 2.0);
        }
        {
-            let mut scorer = weight.scorer(reader, 1.5)?;
+            let mut scorer = weight.scorer(reader, 1.5, 0)?;
            assert_eq!(scorer.doc(), 0u32);
            assert_eq!(scorer.score(), 1.5);
        }
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -10,7 +10,7 @@ use crate::postings::TermInfo;
 use crate::query::{BitSetDocSet, ConstScorer, Explanation, Scorer, Weight};
 use crate::schema::{Field, IndexRecordOption};
 use crate::termdict::{TermDictionary, TermStreamer};
-use crate::{DocId, DocSet, Score, TantivyError};
+use crate::{DocId, Score, TantivyError};

 /// A weight struct for Fuzzy Term and Regex Queries
 pub struct AutomatonWeight<A> {
@@ -84,7 +84,12 @@ where
    A: Automaton + Send + Sync + 'static,
    A::State: Clone,
 {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
        let max_doc = reader.max_doc();
        let mut doc_bitset = BitSet::with_max_value(max_doc);
        let inverted_index = reader.inverted_index(self.field)?;
@@ -92,9 +97,22 @@ where
        let mut term_stream = self.automaton_stream(term_dict)?;
        while term_stream.advance() {
            let term_info = term_stream.value();
-            let mut block_segment_postings =
-                inverted_index.read_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
-            block_segment_postings.fill_bitset(&mut doc_bitset);
+            let (mut block_segment_postings, _) = inverted_index
+                .read_block_postings_from_terminfo_with_seek(
+                    term_info,
+                    IndexRecordOption::Basic,
+                    seek_doc,
+                )?;
+            loop {
+                let docs = block_segment_postings.docs();
+                if docs.is_empty() {
+                    break;
+                }
+                for &doc in docs {
+                    doc_bitset.insert(doc);
+                }
+                block_segment_postings.advance();
+            }
        }
        let doc_bitset = BitSetDocSet::from(doc_bitset);
        let const_scorer = ConstScorer::new(doc_bitset, boost);
@@ -102,7 +120,7 @@ where
    }

    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
-        let mut scorer = self.scorer(reader, 1.0)?;
+        let mut scorer = self.scorer(reader, 1.0, 0)?;
        if scorer.seek(doc) == doc {
            Ok(Explanation::new("AutomatonScorer", 1.0))
        } else {
@@ -177,7 +195,7 @@ mod tests {
        let automaton_weight = AutomatonWeight::new(field, PrefixedByA);
        let reader = index.reader()?;
        let searcher = reader.searcher();
-        let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+        let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
        assert_eq!(scorer.doc(), 0u32);
        assert_eq!(scorer.score(), 1.0);
        assert_eq!(scorer.advance(), 2u32);
@@ -194,7 +212,7 @@ mod tests {
        let automaton_weight = AutomatonWeight::new(field, PrefixedByA);
        let reader = index.reader()?;
        let searcher = reader.searcher();
-        let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.32)?;
+        let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.32, 0)?;
        assert_eq!(scorer.doc(), 0u32);
        assert_eq!(scorer.score(), 1.32);
        Ok(())
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -24,13 +24,6 @@ impl BitSetDocSet {
        self.cursor_bucket = bucket_addr;
        self.cursor_tinybitset = self.docs.tinyset(bucket_addr);
    }
-
-    /// Returns the number of documents in the bitset.
-    ///
-    /// This call is not free: it will bitcount the number of bits in the bitset.
-    pub fn doc_freq(&self) -> u32 {
-        self.docs.len() as u32
-    }
 }

 impl From<BitSet> for BitSetDocSet {
--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -1,6 +1,5 @@
 use std::ops::{Deref, DerefMut};

-use crate::codec::postings::PostingsWithBlockMax;
 use crate::query::term_query::TermScorer;
 use crate::query::Scorer;
 use crate::{DocId, DocSet, Score, TERMINATED};
@@ -14,8 +13,8 @@ use crate::{DocId, DocSet, Score, TERMINATED};
 /// We always have `before_pivot_len` < `pivot_len`.
 ///
 /// `None` is returned if we establish that no document can exceed the threshold.
-fn find_pivot_doc<TPostings: PostingsWithBlockMax>(
-    term_scorers: &[TermScorerWithMaxScore<TPostings>],
+fn find_pivot_doc(
+    term_scorers: &[TermScorerWithMaxScore],
    threshold: Score,
 ) -> Option<(usize, usize, DocId)> {
    let mut max_score = 0.0;
@@ -47,8 +46,8 @@ fn find_pivot_doc<TPostings: PostingsWithBlockMax>(
 /// the next doc candidate defined by the min of `last_doc_in_block + 1` for
 /// scorer in scorers[..pivot_len] and `scorer.doc()` for scorer in scorers[pivot_len..].
 /// Note: before and after calling this method, scorers need to be sorted by their `.doc()`.
-fn block_max_was_too_low_advance_one_scorer<TPostings: PostingsWithBlockMax>(
-    scorers: &mut [TermScorerWithMaxScore<TPostings>],
+fn block_max_was_too_low_advance_one_scorer(
+    scorers: &mut [TermScorerWithMaxScore],
    pivot_len: usize,
 ) {
    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
@@ -83,10 +82,7 @@ fn block_max_was_too_low_advance_one_scorer<TPostings: PostingsWithBlockMax>(
 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
 // except term_scorers[ord] that might be in advance compared to its ranks,
 // bubble up term_scorers[ord] in order to restore the ordering.
-fn restore_ordering<TPostings: PostingsWithBlockMax>(
-    term_scorers: &mut [TermScorerWithMaxScore<TPostings>],
-    ord: usize,
-) {
+fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
    let doc = term_scorers[ord].doc();
    for i in ord + 1..term_scorers.len() {
        if term_scorers[i].doc() >= doc {
@@ -101,10 +97,9 @@ fn restore_ordering<TPostings: PostingsWithBlockMax>(
 // If this works, return true.
 // If this fails (ie: one of the term_scorer does not contain `pivot_doc` and seek goes past the
 // pivot), reorder the term_scorers to ensure the list is still sorted and returns `false`.
-// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and
-// return.
-fn align_scorers<TPostings: PostingsWithBlockMax>(
-    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
+// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and return.
+fn align_scorers(
+    term_scorers: &mut Vec<TermScorerWithMaxScore>,
    pivot_doc: DocId,
    before_pivot_len: usize,
 ) -> bool {
@@ -131,10 +126,7 @@ fn align_scorers<TPostings: PostingsWithBlockMax>(
 // Assumes terms_scorers[..pivot_len] are positioned on the same doc (pivot_doc).
 // Advance term_scorers[..pivot_len] and out of these removes the terminated scores.
 // Restores the ordering of term_scorers.
-fn advance_all_scorers_on_pivot<TPostings: PostingsWithBlockMax>(
-    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
-    pivot_len: usize,
-) {
+fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<TermScorerWithMaxScore>, pivot_len: usize) {
    for term_scorer in &mut term_scorers[..pivot_len] {
        term_scorer.advance();
    }
@@ -153,12 +145,12 @@ fn advance_all_scorers_on_pivot<TPostings: PostingsWithBlockMax>(
 /// Implements the WAND (Weak AND) algorithm for dynamic pruning
 /// described in the paper "Faster Top-k Document Retrieval Using Block-Max Indexes".
 /// Link: <http://engineering.nyu.edu/~suel/papers/bmw.pdf>
-pub fn block_wand<TPostings: PostingsWithBlockMax>(
-    mut scorers: Vec<TermScorer<TPostings>>,
+pub fn block_wand(
+    mut scorers: Vec<TermScorer>,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
-    let mut scorers: Vec<TermScorerWithMaxScore<TPostings>> = scorers
+    let mut scorers: Vec<TermScorerWithMaxScore> = scorers
        .iter_mut()
        .map(TermScorerWithMaxScore::from)
        .collect();
@@ -174,7 +166,10 @@ pub fn block_wand<TPostings: PostingsWithBlockMax>(

        let block_max_score_upperbound: Score = scorers[..pivot_len]
            .iter_mut()
-            .map(|scorer| scorer.seek_block_max(pivot_doc))
+            .map(|scorer| {
+                scorer.seek_block(pivot_doc);
+                scorer.block_max_score()
+            })
            .sum();

        // Beware after shallow advance, skip readers can be in advance compared to
@@ -225,22 +220,21 @@ pub fn block_wand<TPostings: PostingsWithBlockMax>(
 ///   - On a block, advance until the end and execute `callback` when the doc score is greater or
 ///     equal to the `threshold`.
 pub fn block_wand_single_scorer(
-    mut scorer: TermScorer<impl PostingsWithBlockMax>,
+    mut scorer: TermScorer,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
    let mut doc = scorer.doc();
-    let mut block_max_score = scorer.seek_block_max(doc);
    loop {
        // We position the scorer on a block that can reach
        // the threshold.
-        while block_max_score < threshold {
+        while scorer.block_max_score() < threshold {
            let last_doc_in_block = scorer.last_doc_in_block();
            if last_doc_in_block == TERMINATED {
                return;
            }
            doc = last_doc_in_block + 1;
-            block_max_score = scorer.seek_block_max(doc);
+            scorer.seek_block(doc);
        }
        // Seek will effectively load that block.
        doc = scorer.seek(doc);
@@ -262,33 +256,31 @@ pub fn block_wand_single_scorer(
            }
        }
        doc += 1;
-        block_max_score = scorer.seek_block_max(doc);
+        scorer.seek_block(doc);
    }
 }

-struct TermScorerWithMaxScore<'a, TPostings: PostingsWithBlockMax> {
-    scorer: &'a mut TermScorer<TPostings>,
+struct TermScorerWithMaxScore<'a> {
+    scorer: &'a mut TermScorer,
    max_score: Score,
 }

-impl<'a, TPostings: PostingsWithBlockMax> From<&'a mut TermScorer<TPostings>>
-    for TermScorerWithMaxScore<'a, TPostings>
-{
-    fn from(scorer: &'a mut TermScorer<TPostings>) -> Self {
+impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
+    fn from(scorer: &'a mut TermScorer) -> Self {
        let max_score = scorer.max_score();
        TermScorerWithMaxScore { scorer, max_score }
    }
 }

-impl<TPostings: PostingsWithBlockMax> Deref for TermScorerWithMaxScore<'_, TPostings> {
-    type Target = TermScorer<TPostings>;
+impl Deref for TermScorerWithMaxScore<'_> {
+    type Target = TermScorer;

    fn deref(&self) -> &Self::Target {
        self.scorer
    }
 }

-impl<TPostings: PostingsWithBlockMax> DerefMut for TermScorerWithMaxScore<'_, TPostings> {
+impl DerefMut for TermScorerWithMaxScore<'_> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.scorer
    }
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -2,15 +2,22 @@ use std::collections::HashMap;

 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
 use crate::index::SegmentReader;
+use crate::postings::FreqReadingOption;
 use crate::query::disjunction::Disjunction;
 use crate::query::explanation::does_not_match;
 use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
-use crate::query::weight::{for_each_docset_buffered, for_each_scorer};
+use crate::query::term_query::TermScorer;
+use crate::query::weight::{for_each_docset_buffered, for_each_pruning_scorer, for_each_scorer};
 use crate::query::{
-    box_scorer, intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude,
-    Explanation, Occur, RequiredOptionalScorer, Scorer, Weight,
+    intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
+    RequiredOptionalScorer, Scorer, Weight,
 };
-use crate::{DocId, Score};
+use crate::{DocId, Score, TERMINATED};
+
+enum SpecializedScorer {
+    TermUnion(Vec<TermScorer>),
+    Other(Box<dyn Scorer>),
+}

 fn scorer_disjunction<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
@@ -25,7 +32,7 @@ where
    if scorers.len() == 1 {
        return scorers.into_iter().next().unwrap(); // Safe unwrap.
    }
-    box_scorer(Disjunction::new(
+    Box::new(Disjunction::new(
        scorers,
        score_combiner,
        minimum_match_required,
@@ -37,18 +44,56 @@ fn scorer_union<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    num_docs: u32,
-) -> Box<dyn Scorer>
+) -> SpecializedScorer
 where
    TScoreCombiner: ScoreCombiner,
 {
-    match scorers.len() {
-        0 => box_scorer(EmptyScorer),
-        1 => scorers.into_iter().next().unwrap(),
-        _ => box_scorer(BufferedUnionScorer::build(
-            scorers,
-            score_combiner_fn,
-            num_docs,
-        )),
+    assert!(!scorers.is_empty());
+    if scorers.len() == 1 {
+        return SpecializedScorer::Other(scorers.into_iter().next().unwrap()); //< we checked the size beforehand
+    }
+
+    {
+        let is_all_term_queries = scorers.iter().all(|scorer| scorer.is::<TermScorer>());
+        if is_all_term_queries {
+            let scorers: Vec<TermScorer> = scorers
+                .into_iter()
+                .map(|scorer| *(scorer.downcast::<TermScorer>().map_err(|_| ()).unwrap()))
+                .collect();
+            if scorers
+                .iter()
+                .all(|scorer| scorer.freq_reading_option() == FreqReadingOption::ReadFreq)
+            {
+                // Block wand is only available if we read frequencies.
+                return SpecializedScorer::TermUnion(scorers);
+            } else {
+                return SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+                    scorers,
+                    score_combiner_fn,
+                    num_docs,
+                )));
+            }
+        }
+    }
+    SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+        scorers,
+        score_combiner_fn,
+        num_docs,
+    )))
+}
+
+fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
+    scorer: SpecializedScorer,
+    score_combiner_fn: impl Fn() -> TScoreCombiner,
+    num_docs: u32,
+) -> Box<dyn Scorer> {
+    match scorer {
+        SpecializedScorer::TermUnion(term_scorers) => {
+            let union_scorer =
+                BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
+            Box::new(union_scorer)
+        }
+        SpecializedScorer::Other(scorer) => scorer,
    }
 }

@@ -65,7 +110,7 @@ fn effective_must_scorer(
    if must_scorers.is_empty() {
        if removed_all_scorer_count > 0 {
            // Had AllScorer(s) only - all docs match
-            Some(box_scorer(AllScorer::new(max_doc)))
+            Some(Box::new(AllScorer::new(max_doc)))
        } else {
            // No MUST constraint at all
            None
@@ -83,39 +128,54 @@ fn effective_must_scorer(
 /// When `scoring_enabled` is false, we can just return AllScorer alone since
 /// we don't need score contributions from the should_scorer.
 fn effective_should_scorer_for_union<TScoreCombiner: ScoreCombiner>(
-    should_scorer: Box<dyn Scorer>,
+    should_scorer: SpecializedScorer,
    removed_all_scorer_count: usize,
    max_doc: DocId,
    num_docs: u32,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    scoring_enabled: bool,
-) -> Box<dyn Scorer> {
+) -> SpecializedScorer {
    if removed_all_scorer_count > 0 {
        if scoring_enabled {
            // Need to union to get score contributions from both
-            let all_scorers: Vec<Box<dyn Scorer>> =
-                vec![should_scorer, box_scorer(AllScorer::new(max_doc))];
-            box_scorer(BufferedUnionScorer::build(
+            let all_scorers: Vec<Box<dyn Scorer>> = vec![
+                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
+                Box::new(AllScorer::new(max_doc)),
+            ];
+            SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
                all_scorers,
                score_combiner_fn,
                num_docs,
-            ))
+            )))
        } else {
            // Scoring disabled - AllScorer alone is sufficient
-            box_scorer(AllScorer::new(max_doc))
+            SpecializedScorer::Other(Box::new(AllScorer::new(max_doc)))
        }
    } else {
        should_scorer
    }
 }

+fn create_scorer(
+    weight: &dyn Weight,
+    reader: &SegmentReader,
+    boost: Score,
+    target_doc: DocId,
+) -> crate::Result<Box<dyn Scorer>> {
+    if target_doc >= reader.max_doc() {
+        Ok(Box::new(EmptyScorer))
+    } else {
+        weight.scorer(reader, boost, target_doc)
+    }
+}
+
 enum ShouldScorersCombinationMethod {
    // Should scorers are irrelevant.
    Ignored,
    // Only contributes to final score.
-    Optional(Box<dyn Scorer>),
+    Optional(SpecializedScorer),
    // Regardless of score, the should scorers may impact whether a document is matching or not.
-    Required(Box<dyn Scorer>),
+    Required(SpecializedScorer),
 }

 /// Weight associated to the `BoolQuery`.
@@ -160,10 +220,29 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        &self,
        reader: &SegmentReader,
        boost: Score,
+        mut seek_first_doc: DocId,
    ) -> crate::Result<HashMap<Occur, Vec<Box<dyn Scorer>>>> {
        let mut per_occur_scorers: HashMap<Occur, Vec<Box<dyn Scorer>>> = HashMap::new();
-        for (occur, subweight) in &self.weights {
-            let sub_scorer: Box<dyn Scorer> = subweight.scorer(reader, boost)?;
+        let (mut must_weights, other_weights): (Vec<(Occur, _)>, Vec<(Occur, _)>) = self
+            .weights
+            .iter()
+            .map(|(occur, weight)| (*occur, weight))
+            .partition(|(occur, _weight)| *occur == Occur::Must);
+        // We start by must weights in order to get the best "seek_first_doc" so that we
+        // can skip the first few documents of the other scorers.
+        must_weights.sort_by_key(|weight| weight.1.intersection_priority());
+        for (_, must_sub_weight) in must_weights {
+            let sub_scorer: Box<dyn Scorer> =
+                create_scorer(must_sub_weight.as_ref(), reader, boost, seek_first_doc)?;
+            seek_first_doc = seek_first_doc.max(sub_scorer.doc());
+            per_occur_scorers
+                .entry(Occur::Must)
+                .or_default()
+                .push(sub_scorer);
+        }
+        for (occur, sub_weight) in &other_weights {
+            let sub_scorer: Box<dyn Scorer> =
+                create_scorer(sub_weight.as_ref(), reader, boost, seek_first_doc)?;
            per_occur_scorers
                .entry(*occur)
                .or_default()
@@ -177,9 +256,10 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        reader: &SegmentReader,
        boost: Score,
        score_combiner_fn: impl Fn() -> TComplexScoreCombiner,
-    ) -> crate::Result<Box<dyn Scorer>> {
+        seek_doc: u32,
+    ) -> crate::Result<SpecializedScorer> {
        let num_docs = reader.num_docs();
-        let mut per_occur_scorers = self.per_occur_scorers(reader, boost)?;
+        let mut per_occur_scorers = self.per_occur_scorers(reader, boost, seek_doc)?;

        // Indicate how should clauses are combined with must clauses.
        let mut must_scorers: Vec<Box<dyn Scorer>> =
@@ -187,7 +267,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        let must_special_scorer_counts = remove_and_count_all_and_empty_scorers(&mut must_scorers);

        if must_special_scorer_counts.num_empty_scorers > 0 {
-            return Ok(box_scorer(EmptyScorer));
+            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
        }

        let mut should_scorers = per_occur_scorers.remove(&Occur::Should).unwrap_or_default();
@@ -202,7 +282,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {

        if exclude_special_scorer_counts.num_all_scorers > 0 {
            // We exclude all documents at one point.
-            return Ok(box_scorer(EmptyScorer));
+            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
        }

        let effective_minimum_number_should_match = self
@@ -214,7 +294,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
            if effective_minimum_number_should_match > num_of_should_scorers {
                // We don't have enough scorers to satisfy the minimum number of should matches.
                // The request will match no documents.
-                return Ok(box_scorer(EmptyScorer));
+                return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
            }
            match effective_minimum_number_should_match {
                0 if num_of_should_scorers == 0 => ShouldScorersCombinationMethod::Ignored,
@@ -234,10 +314,12 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    must_scorers.append(&mut should_scorers);
                    ShouldScorersCombinationMethod::Ignored
                }
-                _ => ShouldScorersCombinationMethod::Required(scorer_disjunction(
-                    should_scorers,
-                    score_combiner_fn(),
-                    effective_minimum_number_should_match,
+                _ => ShouldScorersCombinationMethod::Required(SpecializedScorer::Other(
+                    scorer_disjunction(
+                        should_scorers,
+                        score_combiner_fn(),
+                        effective_minimum_number_should_match,
+                    ),
                )),
            }
        };
@@ -245,9 +327,13 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        let exclude_scorer_opt: Option<Box<dyn Scorer>> = if exclude_scorers.is_empty() {
            None
        } else {
-            let exclude_scorers_union: Box<dyn Scorer> =
+            let exclude_specialized_scorer: SpecializedScorer =
                scorer_union(exclude_scorers, DoNothingCombiner::default, num_docs);
-            Some(exclude_scorers_union)
+            Some(into_box_scorer(
+                exclude_specialized_scorer,
+                DoNothingCombiner::default,
+                num_docs,
+            ))
        };

        let include_scorer = match (should_scorers, must_scorers) {
@@ -262,8 +348,8 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    reader.max_doc(),
                    num_docs,
                )
-                .unwrap_or_else(|| box_scorer(EmptyScorer));
-                boxed_scorer
+                .unwrap_or_else(|| Box::new(EmptyScorer));
+                SpecializedScorer::Other(boxed_scorer)
            }
            (ShouldScorersCombinationMethod::Optional(should_scorer), must_scorers) => {
                // Optional SHOULD: contributes to scoring but not required for matching.
@@ -288,12 +374,16 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    Some(must_scorer) => {
                        // Has MUST constraint: SHOULD only affects scoring.
                        if self.scoring_enabled {
-                            box_scorer(RequiredOptionalScorer::<_, _, TScoreCombiner>::new(
+                            SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
+                                _,
+                                _,
+                                TScoreCombiner,
+                            >::new(
                                must_scorer,
-                                should_scorer,
-                            ))
+                                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
+                            )))
                        } else {
-                            must_scorer
+                            SpecializedScorer::Other(must_scorer)
                        }
                    }
                }
@@ -313,13 +403,23 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    }
                    Some(must_scorer) => {
                        // Has MUST constraint: intersect MUST with SHOULD.
-                        intersect_scorers(vec![must_scorer, should_scorer], num_docs)
+                        let should_boxed =
+                            into_box_scorer(should_scorer, &score_combiner_fn, num_docs);
+                        SpecializedScorer::Other(intersect_scorers(
+                            vec![must_scorer, should_boxed],
+                            num_docs,
+                        ))
                    }
                }
            }
        };
        if let Some(exclude_scorer) = exclude_scorer_opt {
-            Ok(box_scorer(Exclude::new(include_scorer, exclude_scorer)))
+            let include_scorer_boxed =
+                into_box_scorer(include_scorer, &score_combiner_fn, num_docs);
+            Ok(SpecializedScorer::Other(Box::new(Exclude::new(
+                include_scorer_boxed,
+                exclude_scorer,
+            ))))
        } else {
            Ok(include_scorer)
        }
@@ -340,7 +440,7 @@ fn remove_and_count_all_and_empty_scorers(
        if scorer.is::<AllScorer>() {
            counts.num_all_scorers += 1;
            false
-        } else if scorer.is::<EmptyScorer>() {
+        } else if scorer.doc() == TERMINATED {
            counts.num_empty_scorers += 1;
            false
        } else {
@@ -351,7 +451,13 @@ fn remove_and_count_all_and_empty_scorers(
 }

 impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombiner> {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
+        let num_docs = reader.num_docs();
        if self.weights.is_empty() {
            Ok(Box::new(EmptyScorer))
        } else if self.weights.len() == 1 {
@@ -359,17 +465,23 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
            if occur == Occur::MustNot {
                Ok(Box::new(EmptyScorer))
            } else {
-                weight.scorer(reader, boost)
+                weight.scorer(reader, boost, seek_doc)
            }
        } else if self.scoring_enabled {
-            self.complex_scorer(reader, boost, &self.score_combiner_fn)
+            self.complex_scorer(reader, boost, &self.score_combiner_fn, seek_doc)
+                .map(|specialized_scorer| {
+                    into_box_scorer(specialized_scorer, &self.score_combiner_fn, num_docs)
+                })
        } else {
-            self.complex_scorer(reader, boost, DoNothingCombiner::default)
+            self.complex_scorer(reader, boost, DoNothingCombiner::default, seek_doc)
+                .map(|specialized_scorer| {
+                    into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
+                })
        }
    }

    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
-        let mut scorer = self.scorer(reader, 1.0)?;
+        let mut scorer = self.scorer(reader, 1.0, 0)?;
        if scorer.seek(doc) != doc {
            return Err(does_not_match(doc));
        }
@@ -393,8 +505,20 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score),
    ) -> crate::Result<()> {
-        let mut scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        for_each_scorer(scorer.as_mut(), callback);
+        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn, 0)?;
+        match scorer {
+            SpecializedScorer::TermUnion(term_scorers) => {
+                let mut union_scorer = BufferedUnionScorer::build(
+                    term_scorers,
+                    &self.score_combiner_fn,
+                    reader.num_docs(),
+                );
+                for_each_scorer(&mut union_scorer, callback);
+            }
+            SpecializedScorer::Other(mut scorer) => {
+                for_each_scorer(scorer.as_mut(), callback);
+            }
+        }
        Ok(())
    }

@@ -403,9 +527,22 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
-        let mut scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
+        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner, 0u32)?;
        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-        for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
+
+        match scorer {
+            SpecializedScorer::TermUnion(term_scorers) => {
+                let mut union_scorer = BufferedUnionScorer::build(
+                    term_scorers,
+                    &self.score_combiner_fn,
+                    reader.num_docs(),
+                );
+                for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
+            }
+            SpecializedScorer::Other(mut scorer) => {
+                for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
+            }
+        }
        Ok(())
    }

@@ -425,8 +562,15 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        reader: &SegmentReader,
        callback: &mut dyn FnMut(DocId, Score) -> Score,
    ) -> crate::Result<()> {
-        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        reader.codec.for_each_pruning(threshold, scorer, callback);
+        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn, 0u32)?;
+        match scorer {
+            SpecializedScorer::TermUnion(term_scorers) => {
+                super::block_wand(term_scorers, threshold, callback);
+            }
+            SpecializedScorer::Other(mut scorer) => {
+                for_each_pruning_scorer(scorer.as_mut(), threshold, callback);
+            }
+        }
        Ok(())
    }
 }
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -1,6 +1,8 @@
+mod block_wand;
 mod boolean_query;
 mod boolean_weight;

+pub(crate) use self::block_wand::{block_wand, block_wand_single_scorer};
 pub use self::boolean_query::BooleanQuery;
 pub use self::boolean_weight::BooleanWeight;

@@ -55,7 +57,7 @@ mod tests {
        let query = query_parser.parse_query("+a")?;
        let searcher = index.reader()?.searcher();
        let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-        let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+        let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
        assert!(scorer.is::<TermScorer>());
        Ok(())
    }
@@ -68,13 +70,13 @@ mod tests {
        {
            let query = query_parser.parse_query("+a +b +c")?;
            let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
            assert!(scorer.is::<Intersection<TermScorer>>());
        }
        {
            let query = query_parser.parse_query("+a +(b c)")?;
            let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
            assert!(scorer.is::<Intersection<Box<dyn Scorer>>>());
        }
        Ok(())
@@ -88,14 +90,14 @@ mod tests {
        {
            let query = query_parser.parse_query("+a b")?;
            let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
            assert!(scorer
                .is::<RequiredOptionalScorer<Box<dyn Scorer>, Box<dyn Scorer>, SumCombiner>>());
        }
        {
            let query = query_parser.parse_query("+a b")?;
            let weight = query.weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
            assert!(scorer.is::<TermScorer>());
        }
        Ok(())
@@ -242,12 +244,14 @@ mod tests {
            .weight(EnableScoring::enabled_from_searcher(&searcher))
            .unwrap();
        {
-            let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+            let mut boolean_scorer =
+                boolean_weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
            assert_eq!(boolean_scorer.doc(), 0u32);
            assert_nearly_equals!(boolean_scorer.score(), 0.84163445);
        }
        {
-            let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 2.0)?;
+            let mut boolean_scorer =
+                boolean_weight.scorer(searcher.segment_reader(0u32), 2.0, 0)?;
            assert_eq!(boolean_scorer.doc(), 0u32);
            assert_nearly_equals!(boolean_scorer.score(), 1.6832689);
        }
@@ -341,7 +345,7 @@ mod tests {
                (Occur::Must, term_match_some.box_clone()),
            ]);
            let weight = query.weight(EnableScoring::disabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32, 0)?;
            assert!(scorer.is::<TermScorer>());
        }
        {
@@ -351,7 +355,7 @@ mod tests {
                (Occur::Must, term_match_none.box_clone()),
            ]);
            let weight = query.weight(EnableScoring::disabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32, 0)?;
            assert!(scorer.is::<EmptyScorer>());
        }
        {
@@ -360,7 +364,7 @@ mod tests {
                (Occur::Should, term_match_none.box_clone()),
            ]);
            let weight = query.weight(EnableScoring::disabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32, 0)?;
            assert!(scorer.is::<AllScorer>());
        }
        {
@@ -369,7 +373,7 @@ mod tests {
                (Occur::Should, term_match_none.box_clone()),
            ]);
            let weight = query.weight(EnableScoring::disabled_from_searcher(&searcher))?;
-            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32)?;
+            let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0f32, 0)?;
            assert!(scorer.is::<TermScorer>());
        }
        Ok(())
@@ -609,6 +613,134 @@ mod tests {
        Ok(())
    }

+    /// Test that the seek_doc parameter correctly skips documents in BooleanWeight::scorer.
+    ///
+    /// When seek_doc is provided, the scorer should start from that document (or the first
+    /// matching document >= seek_doc), skipping earlier documents.
+    #[test]
+    pub fn test_boolean_weight_seek_doc() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let value_field = schema_builder.add_u64_field("value", FAST);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer: IndexWriter = index.writer_for_tests()?;
+
+        // Create 11 documents:
+        // doc 0: value=0
+        // doc 1: value=10
+        // doc 2: value=20
+        // ...
+        // doc 9: value=90
+        // doc 10: value=50 (matches range 30-70)
+        for i in 0..10 {
+            index_writer.add_document(doc!(
+                text_field => "hello",
+                value_field => (i * 10) as u64
+            ))?;
+        }
+        index_writer.add_document(doc!(
+            text_field => "hello",
+            value_field => 50u64
+        ))?;
+        index_writer.commit()?;
+
+        let searcher = index.reader()?.searcher();
+        let segment_reader = searcher.segment_reader(0);
+
+        // Create a Boolean query: MUST(term "hello") AND MUST(range 30..=70)
+        // This should match docs with value in [30, 70]: docs 3, 4, 5, 6, 7, 10
+        let term_query: Box<dyn Query> = Box::new(TermQuery::new(
+            Term::from_field_text(text_field, "hello"),
+            IndexRecordOption::Basic,
+        ));
+        let range_query: Box<dyn Query> = Box::new(RangeQuery::new(
+            Bound::Included(Term::from_field_u64(value_field, 30)),
+            Bound::Included(Term::from_field_u64(value_field, 70)),
+        ));
+
+        let boolean_query =
+            BooleanQuery::new(vec![(Occur::Must, term_query), (Occur::Must, range_query)]);
+
+        let weight =
+            boolean_query.weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
+
+        let doc_when_seeking_from = |seek_from: DocId| {
+            let scorer = weight.scorer(segment_reader, 1.0f32, seek_from).unwrap();
+            crate::docset::docset_to_doc_vec(scorer)
+        };
+
+        // Expected matching docs: 3, 4, 5, 6, 7, 10 (values 30, 40, 50, 60, 70, 50)
+        assert_eq!(doc_when_seeking_from(0), vec![3, 4, 5, 6, 7, 10]);
+        assert_eq!(doc_when_seeking_from(1), vec![3, 4, 5, 6, 7, 10]);
+        assert_eq!(doc_when_seeking_from(3), vec![3, 4, 5, 6, 7, 10]);
+        assert_eq!(doc_when_seeking_from(4), vec![4, 5, 6, 7, 10]);
+        assert_eq!(doc_when_seeking_from(7), vec![7, 10]);
+        assert_eq!(doc_when_seeking_from(8), vec![10]);
+        assert_eq!(doc_when_seeking_from(10), vec![10]);
+        assert_eq!(doc_when_seeking_from(11), Vec::<DocId>::new());
+
+        Ok(())
+    }
+
+    /// Test that the seek_doc parameter works correctly with SHOULD clauses.
+    #[test]
+    pub fn test_boolean_weight_seek_doc_with_should() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer: IndexWriter = index.writer_for_tests()?;
+
+        // Create documents:
+        // doc 0: "a b"
+        // doc 1: "a"
+        // doc 2: "b"
+        // doc 3: "c"
+        // doc 4: "a b c"
+        index_writer.add_document(doc!(text_field => "a b"))?;
+        index_writer.add_document(doc!(text_field => "a"))?;
+        index_writer.add_document(doc!(text_field => "b"))?;
+        index_writer.add_document(doc!(text_field => "c"))?;
+        index_writer.add_document(doc!(text_field => "a b c"))?;
+        index_writer.commit()?;
+
+        let searcher = index.reader()?.searcher();
+        let segment_reader = searcher.segment_reader(0);
+
+        // Create a Boolean query: SHOULD(term "a") OR SHOULD(term "b")
+        // This should match docs 0, 1, 2, 4
+        let term_a: Box<dyn Query> = Box::new(TermQuery::new(
+            Term::from_field_text(text_field, "a"),
+            IndexRecordOption::Basic,
+        ));
+        let term_b: Box<dyn Query> = Box::new(TermQuery::new(
+            Term::from_field_text(text_field, "b"),
+            IndexRecordOption::Basic,
+        ));
+
+        let boolean_query =
+            BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
+
+        let weight =
+            boolean_query.weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
+
+        let doc_when_seeking_from = |seek_from: DocId| {
+            let scorer = weight.scorer(segment_reader, 1.0f32, seek_from).unwrap();
+            crate::docset::docset_to_doc_vec(scorer)
+        };
+
+        // Expected matching docs: 0, 1, 2, 4
+        assert_eq!(doc_when_seeking_from(0), vec![0, 1, 2, 4]);
+        assert_eq!(doc_when_seeking_from(1), vec![1, 2, 4]);
+        assert_eq!(doc_when_seeking_from(2), vec![2, 4]);
+        assert_eq!(doc_when_seeking_from(3), vec![4]);
+        assert_eq!(doc_when_seeking_from(4), vec![4]);
+        assert_eq!(doc_when_seeking_from(5), Vec::<DocId>::new());
+
+        Ok(())
+    }
+
    /// Test multiple AllScorer instances in different clause types.
    ///
    /// Verifies correct behavior when AllScorers appear in multiple positions.
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -67,8 +67,13 @@ impl BoostWeight {
 }

 impl Weight for BoostWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        self.weight.scorer(reader, boost * self.boost)
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
+        self.weight.scorer(reader, boost * self.boost, seek_doc)
    }

    fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
@@ -83,6 +88,10 @@ impl Weight for BoostWeight {
    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
        self.weight.count(reader)
    }
+
+    fn intersection_priority(&self) -> u32 {
+        self.weight.intersection_priority()
+    }
 }

 pub(crate) struct BoostScorer<S: Scorer> {
@@ -134,7 +143,6 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
 }

 impl<S: Scorer> Scorer for BoostScorer<S> {
-    #[inline]
    fn score(&mut self) -> Score {
        self.underlying.score() * self.boost
    }
--- a/src/query/const_score_query.rs
+++ b/src/query/const_score_query.rs
@@ -1,7 +1,7 @@
 use std::fmt;

 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
-use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};

 /// `ConstScoreQuery` is a wrapper over a query to provide a constant score.
@@ -63,16 +63,18 @@ impl ConstWeight {
 }

 impl Weight for ConstWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        let inner_scorer = self.weight.scorer(reader, boost)?;
-        Ok(box_scorer(ConstScorer::new(
-            inner_scorer,
-            boost * self.score,
-        )))
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
+        let inner_scorer = self.weight.scorer(reader, boost, seek_doc)?;
+        Ok(Box::new(ConstScorer::new(inner_scorer, boost * self.score)))
    }

    fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
-        let mut scorer = self.scorer(reader, 1.0)?;
+        let mut scorer = self.scorer(reader, 1.0, 0)?;
        if scorer.seek(doc) != doc {
            return Err(TantivyError::InvalidArgument(format!(
                "Document #({doc}) does not match"
@@ -87,6 +89,10 @@ impl Weight for ConstWeight {
    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
        self.weight.count(reader)
    }
+
+    fn intersection_priority(&self) -> u32 {
+        self.weight.intersection_priority()
+    }
 }

 /// Wraps a `DocSet` and simply returns a constant `Scorer`.
@@ -140,7 +146,6 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
 }

 impl<TDocSet: DocSet + 'static> Scorer for ConstScorer<TDocSet> {
-    #[inline]
    fn score(&mut self) -> Score {
        self.score
    }
--- a/src/query/disjunction.rs
+++ b/src/query/disjunction.rs
@@ -173,7 +173,6 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet
 impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Scorer
    for Disjunction<TScorer, TScoreCombiner>
 {
-    #[inline]
    fn score(&mut self) -> Score {
        self.current_score
    }
@@ -308,7 +307,6 @@ mod tests {
    }

    impl Scorer for DummyScorer {
-        #[inline]
        fn score(&mut self) -> Score {
            self.foo.get(self.cursor).map(|x| x.1).unwrap_or(0.0)
        }
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -2,7 +2,7 @@ use super::Scorer;
 use crate::docset::TERMINATED;
 use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
-use crate::query::{box_scorer, EnableScoring, Explanation, Query, Weight};
+use crate::query::{EnableScoring, Explanation, Query, Weight};
 use crate::{DocId, DocSet, Score, Searcher};

 /// `EmptyQuery` is a dummy `Query` in which no document matches.
@@ -26,13 +26,24 @@ impl Query for EmptyQuery {
 /// It is useful for tests and handling edge cases.
 pub struct EmptyWeight;
 impl Weight for EmptyWeight {
-    fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        Ok(box_scorer(EmptyScorer))
+    fn scorer(
+        &self,
+        _reader: &SegmentReader,
+        _boost: Score,
+        _seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
+        Ok(Box::new(EmptyScorer))
    }

    fn explain(&self, _reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        Err(does_not_match(doc))
    }
+
+    /// Returns a priority number used to sort weights when running an
+    /// intersection.
+    fn intersection_priority(&self) -> u32 {
+        0u32
+    }
 }

 /// `EmptyScorer` is a dummy `Scorer` in which no document matches.
@@ -55,7 +66,6 @@ impl DocSet for EmptyScorer {
 }

 impl Scorer for EmptyScorer {
-    #[inline]
    fn score(&mut self) -> Score {
        0.0
    }
--- a/src/query/exclude.rs
+++ b/src/query/exclude.rs
@@ -84,7 +84,6 @@ where
    TScorer: Scorer,
    TDocSetExclude: DocSet + 'static,
 {
-    #[inline]
    fn score(&mut self) -> Score {
        self.underlying_docset.score()
    }
--- a/src/query/exist_query.rs
+++ b/src/query/exist_query.rs
@@ -3,7 +3,7 @@ use core::fmt::Debug;
 use columnar::{ColumnIndex, DynamicColumn};
 use common::BitSet;

-use super::{box_scorer, ConstScorer, EmptyScorer};
+use super::{ConstScorer, EmptyScorer};
 use crate::docset::{DocSet, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::all_query::AllScorer;
@@ -98,7 +98,12 @@ pub struct ExistsWeight {
 }

 impl Weight for ExistsWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        _seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
        let fast_field_reader = reader.fast_fields();
        let mut column_handles = fast_field_reader.dynamic_column_handles(&self.field_name)?;
        if self.field_type == Type::Json && self.json_subpaths {
@@ -117,7 +122,7 @@ impl Weight for ExistsWeight {
            }
        }
        if non_empty_columns.is_empty() {
-            return Ok(box_scorer(EmptyScorer));
+            return Ok(Box::new(EmptyScorer));
        }

        // If any column is full, all docs match.
@@ -128,9 +133,9 @@ impl Weight for ExistsWeight {
        {
            let all_scorer = AllScorer::new(max_doc);
            if boost != 1.0f32 {
-                return Ok(box_scorer(BoostScorer::new(all_scorer, boost)));
+                return Ok(Box::new(BoostScorer::new(all_scorer, boost)));
            } else {
-                return Ok(box_scorer(all_scorer));
+                return Ok(Box::new(all_scorer));
            }
        }

@@ -138,7 +143,7 @@ impl Weight for ExistsWeight {
        // NOTE: A lower number may be better for very sparse columns
        if non_empty_columns.len() < 4 {
            let docset = ExistsDocSet::new(non_empty_columns, reader.max_doc());
-            return Ok(box_scorer(ConstScorer::new(docset, boost)));
+            return Ok(Box::new(ConstScorer::new(docset, boost)));
        }

        // If we have many dynamic columns, precompute a bitset of matching docs
@@ -162,11 +167,11 @@ impl Weight for ExistsWeight {
            }
        }
        let docset = BitSetDocSet::from(doc_bitset);
-        Ok(box_scorer(ConstScorer::new(docset, boost)))
+        Ok(Box::new(ConstScorer::new(docset, boost)))
    }

    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
-        let mut scorer = self.scorer(reader, 1.0)?;
+        let mut scorer = self.scorer(reader, 1.0, 0)?;
        if scorer.seek(doc) != doc {
            return Err(does_not_match(doc));
        }
--- a/src/query/intersection.rs
+++ b/src/query/intersection.rs
@@ -1,7 +1,7 @@
 use super::size_hint::estimate_intersection;
 use crate::docset::{DocSet, TERMINATED};
 use crate::query::term_query::TermScorer;
-use crate::query::{box_scorer, EmptyScorer, Scorer};
+use crate::query::{EmptyScorer, Scorer};
 use crate::{DocId, Score};

 /// Returns the intersection scorer.
@@ -20,7 +20,7 @@ pub fn intersect_scorers(
    num_docs_segment: u32,
 ) -> Box<dyn Scorer> {
    if scorers.is_empty() {
-        return box_scorer(EmptyScorer);
+        return Box::new(EmptyScorer);
    }
    if scorers.len() == 1 {
        return scorers.pop().unwrap();
@@ -29,7 +29,7 @@ pub fn intersect_scorers(
    scorers.sort_by_key(|scorer| scorer.cost());
    let doc = go_to_first_doc(&mut scorers[..]);
    if doc == TERMINATED {
-        return box_scorer(EmptyScorer);
+        return Box::new(EmptyScorer);
    }
    // We know that we have at least 2 elements.
    let left = scorers.remove(0);
@@ -38,14 +38,14 @@ pub fn intersect_scorers(
        .iter()
        .all(|&scorer| scorer.is::<TermScorer>());
    if all_term_scorers {
-        return box_scorer(Intersection {
+        return Box::new(Intersection {
            left: *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
            right: *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
            others: scorers,
            num_docs: num_docs_segment,
        });
    }
-    box_scorer(Intersection {
+    Box::new(Intersection {
        left,
        right,
        others: scorers,
@@ -105,7 +105,6 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
 }

 impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOtherDocSet> {
-    #[inline]
    fn advance(&mut self) -> DocId {
        let (left, right) = (&mut self.left, &mut self.right);
        let mut candidate = left.advance();
@@ -175,7 +174,6 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
                .all(|docset| docset.seek_into_the_danger_zone(target))
    }

-    #[inline]
    fn doc(&self) -> DocId {
        self.left.doc()
    }
@@ -202,7 +200,6 @@ where
    TScorer: Scorer,
    TOtherScorer: Scorer,
 {
-    #[inline]
    fn score(&mut self) -> Score {
        self.left.score()
            + self.right.score()
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -24,7 +24,7 @@ mod reqopt_scorer;
 mod scorer;
 mod set_query;
 mod size_hint;
-pub(crate) mod term_query;
+mod term_query;
 mod union;
 mod weight;

@@ -54,14 +54,13 @@ pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
 pub use self::phrase_prefix_query::PhrasePrefixQuery;
 pub use self::phrase_query::regex_phrase_query::{wildcard_query_to_regex_str, RegexPhraseQuery};
 pub use self::phrase_query::PhraseQuery;
-pub(crate) use self::phrase_query::PhraseScorer;
 pub use self::query::{EnableScoring, Query, QueryClone};
 pub use self::query_parser::{QueryParser, QueryParserError};
 pub use self::range_query::*;
 pub use self::regex_query::RegexQuery;
 pub use self::reqopt_scorer::RequiredOptionalScorer;
 pub use self::score_combiner::{DisjunctionMaxCombiner, ScoreCombiner, SumCombiner};
-pub use self::scorer::{box_scorer, Scorer};
+pub use self::scorer::Scorer;
 pub use self::set_query::TermSetQuery;
 pub use self::term_query::TermQuery;
 pub use self::union::BufferedUnionScorer;
--- a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
+++ b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
@@ -2,7 +2,7 @@ use crate::docset::{DocSet, TERMINATED};
 use crate::fieldnorm::FieldNormReader;
 use crate::postings::Postings;
 use crate::query::bm25::Bm25Weight;
-use crate::query::phrase_query::{intersection_exists, PhraseScorer};
+use crate::query::phrase_query::{intersection_count, PhraseScorer};
 use crate::query::Scorer;
 use crate::{DocId, Score};

@@ -81,7 +81,6 @@ impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
 }

 impl<TPostings: Postings> Scorer for PhraseKind<TPostings> {
-    #[inline]
    fn score(&mut self) -> Score {
        match self {
            PhraseKind::SinglePrefix { positions, .. } => {
@@ -100,6 +99,7 @@ pub struct PhrasePrefixScorer<TPostings: Postings> {
    phrase_scorer: PhraseKind<TPostings>,
    suffixes: Vec<TPostings>,
    suffix_offset: u32,
+    phrase_count: u32,
    suffix_position_buffer: Vec<u32>,
 }

@@ -143,6 +143,7 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
            phrase_scorer,
            suffixes,
            suffix_offset: (max_offset - suffix_pos) as u32,
+            phrase_count: 0,
            suffix_position_buffer: Vec::with_capacity(100),
        };
        if phrase_prefix_scorer.doc() != TERMINATED && !phrase_prefix_scorer.matches_prefix() {
@@ -151,7 +152,12 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
        phrase_prefix_scorer
    }

+    pub fn phrase_count(&self) -> u32 {
+        self.phrase_count
+    }
+
    fn matches_prefix(&mut self) -> bool {
+        let mut count = 0;
        let current_doc = self.doc();
        let pos_matching = self.phrase_scorer.get_intersection();
        for suffix in &mut self.suffixes {
@@ -161,12 +167,11 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
            let doc = suffix.seek(current_doc);
            if doc == current_doc {
                suffix.positions_with_offset(self.suffix_offset, &mut self.suffix_position_buffer);
-                if intersection_exists(pos_matching, &self.suffix_position_buffer) {
-                    return true;
-                }
+                count += intersection_count(pos_matching, &self.suffix_position_buffer);
            }
        }
-        false
+        self.phrase_count = count as u32;
+        count != 0
    }
 }

@@ -210,7 +215,6 @@ impl<TPostings: Postings> DocSet for PhrasePrefixScorer<TPostings> {
 }

 impl<TPostings: Postings> Scorer for PhrasePrefixScorer<TPostings> {
-    #[inline]
    fn score(&mut self) -> Score {
        // TODO modify score??
        self.phrase_scorer.score()
--- a/src/query/phrase_prefix_query/phrase_prefix_weight.rs
+++ b/src/query/phrase_prefix_query/phrase_prefix_weight.rs
@@ -1,11 +1,12 @@
 use super::{prefix_end, PhrasePrefixScorer};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
-use crate::postings::Postings;
+use crate::postings::SegmentPostings;
 use crate::query::bm25::Bm25Weight;
-use crate::query::{box_scorer, EmptyScorer, Explanation, Scorer, Weight};
+use crate::query::explanation::does_not_match;
+use crate::query::{EmptyScorer, Explanation, Scorer, Weight};
 use crate::schema::{IndexRecordOption, Term};
-use crate::{DocId, Score};
+use crate::{DocId, DocSet, Score};

 pub struct PhrasePrefixWeight {
    phrase_terms: Vec<(usize, Term)>,
@@ -41,26 +42,29 @@ impl PhrasePrefixWeight {
        Ok(FieldNormReader::constant(reader.max_doc(), 1))
    }

-    pub(crate) fn phrase_scorer(
+    pub(crate) fn prefix_phrase_scorer(
        &self,
        reader: &SegmentReader,
        boost: Score,
-    ) -> crate::Result<Option<Box<dyn Scorer>>> {
+        seek_doc: DocId,
+    ) -> crate::Result<Option<PhrasePrefixScorer<SegmentPostings>>> {
        let similarity_weight_opt = self
            .similarity_weight_opt
            .as_ref()
            .map(|similarity_weight| similarity_weight.boost_by(boost));
        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        let mut term_postings_list: Vec<(usize, Box<dyn Postings>)> = Vec::new();
+        let mut term_postings_list = Vec::new();
        for &(offset, ref term) in &self.phrase_terms {
-            if let Some(postings) = reader
-                .inverted_index(term.field())?
-                .read_postings(term, IndexRecordOption::WithFreqsAndPositions)?
-            {
-                term_postings_list.push((offset, postings));
-            } else {
+            let inverted_index = reader.inverted_index(term.field())?;
+            let Some(term_info) = inverted_index.get_term_info(term)? else {
                return Ok(None);
-            }
+            };
+            let postings = inverted_index.read_postings_from_terminfo(
+                &term_info,
+                IndexRecordOption::WithFreqsAndPositions,
+                seek_doc,
+            )?;
+            term_postings_list.push((offset, postings));
        }

        let inv_index = reader.inverted_index(self.prefix.1.field())?;
@@ -102,44 +106,51 @@ impl PhrasePrefixWeight {
            }
        }

-        // TODO make this specialized.
-        Ok(Some(box_scorer(PhrasePrefixScorer::new(
+        Ok(Some(PhrasePrefixScorer::new(
            term_postings_list,
            similarity_weight_opt,
            fieldnorm_reader,
            suffixes,
            self.prefix.0,
-        ))))
+        )))
    }
 }

 impl Weight for PhrasePrefixWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        if let Some(scorer) = self.phrase_scorer(reader, boost)? {
-            Ok(scorer)
+    fn scorer(
+        &self,
+        reader: &SegmentReader,
+        boost: Score,
+        seek_doc: DocId,
+    ) -> crate::Result<Box<dyn Scorer>> {
+        if let Some(scorer) = self.prefix_phrase_scorer(reader, boost, seek_doc)? {
+            Ok(Box::new(scorer))
        } else {
-            Ok(box_scorer(EmptyScorer))
+            Ok(Box::new(EmptyScorer))
        }
    }

-    fn explain(&self, _reader: &SegmentReader, _doc: DocId) -> crate::Result<Explanation> {
-        todo!();
-        // let scorer_opt = self.phrase_scorer(reader, 1.0)?;
-        // if scorer_opt.is_none() {
-        //     return Err(does_not_match(doc));
-        // }
-        // let mut scorer = scorer_opt.unwrap();
-        // if scorer.seek(doc) != doc {
-        //     return Err(does_not_match(doc));
-        // }
-        // let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        // let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
-        // let phrase_count = scorer.phrase_count();
-        // let mut explanation = Explanation::new("Phrase Prefix Scorer", scorer.score());
-        // if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
-        //     explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
-        // }
-        // Ok(explanation)
+    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+        let scorer_opt = self.prefix_phrase_scorer(reader, 1.0, doc)?;
+        if scorer_opt.is_none() {
+            return Err(does_not_match(doc));
+        }
+        let mut scorer = scorer_opt.unwrap();
+        if scorer.seek(doc) != doc {
+            return Err(does_not_match(doc));
+        }
+        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
+        let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
+        let phrase_count = scorer.phrase_count();
+        let mut explanation = Explanation::new("Phrase Prefix Scorer", scorer.score());
+        if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
+            explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
+        }
+        Ok(explanation)
+    }
+
+    fn intersection_priority(&self) -> u32 {
+        50u32
    }
 }

@@ -147,8 +158,6 @@ impl Weight for PhrasePrefixWeight {
 mod tests {
    use crate::docset::TERMINATED;
    use crate::index::Index;
-    use crate::postings::Postings;
-    use crate::query::phrase_prefix_query::PhrasePrefixScorer;
    use crate::query::{EnableScoring, PhrasePrefixQuery, Query};
    use crate::schema::{Schema, TEXT};
    use crate::{DocSet, IndexWriter, Term};
@@ -189,14 +198,14 @@ mod tests {
            .phrase_prefix_query_weight(enable_scoring)
            .unwrap()
            .unwrap();
-        let mut phrase_scorer_boxed = phrase_weight
-            .phrase_scorer(searcher.segment_reader(0u32), 1.0)?
+        let mut phrase_scorer = phrase_weight
+            .prefix_phrase_scorer(searcher.segment_reader(0u32), 1.0, 0u32)?
            .unwrap();
-        let phrase_scorer: &mut PhrasePrefixScorer<Box<dyn Postings>> =
-            phrase_scorer_boxed.as_any_mut().downcast_mut().unwrap();
        assert_eq!(phrase_scorer.doc(), 1);
+        assert_eq!(phrase_scorer.phrase_count(), 2);
        assert_eq!(phrase_scorer.advance(), 2);
        assert_eq!(phrase_scorer.doc(), 2);
+        assert_eq!(phrase_scorer.phrase_count(), 1);
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
    }
@@ -216,15 +225,14 @@ mod tests {
            .phrase_prefix_query_weight(enable_scoring)
            .unwrap()
            .unwrap();
-        let mut phrase_scorer_boxed = phrase_weight
-            .phrase_scorer(searcher.segment_reader(0u32), 1.0)?
-            .unwrap();
-        let phrase_scorer = phrase_scorer_boxed
-            .downcast_mut::<PhrasePrefixScorer<Box<dyn Postings>>>()
+        let mut phrase_scorer = phrase_weight
+            .prefix_phrase_scorer(searcher.segment_reader(0u32), 1.0, 0u32)?
            .unwrap();
        assert_eq!(phrase_scorer.doc(), 1);
+        assert_eq!(phrase_scorer.phrase_count(), 2);
        assert_eq!(phrase_scorer.advance(), 2);
        assert_eq!(phrase_scorer.doc(), 2);
+        assert_eq!(phrase_scorer.phrase_count(), 1);
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
    }
@@ -242,7 +250,7 @@ mod tests {
            .unwrap()
            .is_none());
        let weight = phrase_query.weight(enable_scoring).unwrap();
-        let mut phrase_scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+        let mut phrase_scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
        assert_eq!(phrase_scorer.doc(), 1);
        assert_eq!(phrase_scorer.advance(), 2);
        assert_eq!(phrase_scorer.doc(), 2);
@@ -263,7 +271,7 @@ mod tests {
        ]);
        let enable_scoring = EnableScoring::enabled_from_searcher(&searcher);
        let weight = phrase_query.weight(enable_scoring).unwrap();
-        let mut phrase_scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
+        let mut phrase_scorer = weight.scorer(searcher.segment_reader(0u32), 1.0, 0)?;
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
    }
--- a/src/query/phrase_query/mod.rs
+++ b/src/query/phrase_query/mod.rs
@@ -5,7 +5,7 @@ pub mod regex_phrase_query;
 mod regex_phrase_weight;

 pub use self::phrase_query::PhraseQuery;
-pub(crate) use self::phrase_scorer::intersection_exists;
+pub(crate) use self::phrase_scorer::intersection_count;
 pub use self::phrase_scorer::PhraseScorer;
 pub use self::phrase_weight::PhraseWeight;

@@ -84,7 +84,7 @@ pub(crate) mod tests {
        let phrase_query = PhraseQuery::new(terms);
        let phrase_weight =
            phrase_query.phrase_weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
-        let mut phrase_scorer = phrase_weight.scorer(searcher.segment_reader(0), 1.0)?;
+        let mut phrase_scorer = phrase_weight.scorer(searcher.segment_reader(0), 1.0, 0)?;
        assert_eq!(phrase_scorer.doc(), 1);
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
@@ -343,6 +343,43 @@ pub(crate) mod tests {
        Ok(())
    }

+    #[test]
+    pub fn test_phrase_weight_seek_doc() -> crate::Result<()> {
+        // Create an index with documents where the phrase "a b" appears in some of them.
+        // Documents: 0: "c d", 1: "a b", 2: "e f", 3: "a b c", 4: "g h", 5: "a b", 6: "i j"
+        let index = create_index(&["c d", "a b", "e f", "a b c", "g h", "a b", "i j"])?;
+        let text_field = index.schema().get_field("text").unwrap();
+        let searcher = index.reader()?.searcher();
+        let segment_reader = searcher.segment_reader(0);
+
+        let phrase_query = PhraseQuery::new(vec![
+            Term::from_field_text(text_field, "a"),
+            Term::from_field_text(text_field, "b"),
+        ]);
+        let phrase_weight =
+            phrase_query.phrase_weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
+
+        // Helper function to collect all docs from a scorer created with a given seek_doc
+        let docs_when_seeking_from = |seek_from: DocId| {
+            let scorer = phrase_weight
+                .scorer(segment_reader, 1.0f32, seek_from)
+                .unwrap();
+            crate::docset::docset_to_doc_vec(scorer)
+        };
+
+        // Documents with "a b": 1, 3, 5
+        assert_eq!(docs_when_seeking_from(0), vec![1, 3, 5]);
+        assert_eq!(docs_when_seeking_from(1), vec![1, 3, 5]);
+        assert_eq!(docs_when_seeking_from(2), vec![3, 5]);
+        assert_eq!(docs_when_seeking_from(3), vec![3, 5]);
+        assert_eq!(docs_when_seeking_from(4), vec![5]);
+        assert_eq!(docs_when_seeking_from(5), vec![5]);
+        assert_eq!(docs_when_seeking_from(6), Vec::<DocId>::new());
+        assert_eq!(docs_when_seeking_from(7), Vec::<DocId>::new());
+
+        Ok(())
+    }
+
    #[test]
    pub fn test_phrase_query_on_json() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
@@ -373,7 +410,7 @@ pub(crate) mod tests {
                .weight(EnableScoring::disabled_from_schema(searcher.schema()))
                .unwrap();
            let mut phrase_scorer = phrase_weight
-                .scorer(searcher.segment_reader(0), 1.0f32)
+                .scorer(searcher.segment_reader(0), 1.0f32, 0)
                .unwrap();
            let mut docs = Vec::new();
            loop {
--- a/src/query/phrase_query/phrase_query.rs
+++ b/src/query/phrase_query/phrase_query.rs
@@ -126,7 +126,7 @@ impl PhraseQuery {
        };
        let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
        if self.slop > 0 {
-            weight.set_slop(self.slop);
+            weight.slop(self.slop);
        }
        Ok(weight)
    }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	a5dc888cb9	Removed BlockNotLoaded	2026-01-06 14:37:10 +01:00
Paul Masurel	f62a806f47	Explicit doc for the meaning of intersection_priority	2026-01-06 14:09:52 +01:00
Paul Masurel	601541e9ae	Added unit tests	2026-01-02 13:07:00 +01:00
Paul Masurel	98be1a5423	Added seek_doc to intersections. tantivy requires Scorer to be positioned on a DocId at all time. This decision is not performance neutral. When we have an intersection of a heavy DocSet with a lighter one forcing the positioning of the first doc is needlessly expensive. This PR fixes this by introducing a seek_doc parameter in the weight function. Weights may skip over documents when they create the Scorer.	2025-12-31 18:20:28 +01:00