First stab at tantivy's codec

Convert SegmentReader, InvertedIndexReader and postinglists to traits. Add special functions to pushdown certain performance methods to keep them strictly typed. We rely on a ObjectSafeCodec contraption to avoid the proliferation of generics. That object's point is to make sure we can build TermScorer with a concrete codec specific type before reboxing it. (same thing for PhraseScorer). fix performance regression: fix incorrect scorer cast for buffered union bock wand
make serializer pub (#2835 )
2026-02-11 18:40:36 +00:00 · 2026-02-11 15:11:29 +01:00 · 2026-02-11 14:37:42 +01:00 · 2026-02-11 11:39:58 +01:00 · 2026-02-11 11:31:07 +01:00 · 2026-02-11 11:26:18 +01:00
113 changed files with 3277 additions and 1952 deletions
--- a/benches/str_search_and_get.rs
+++ b/benches/str_search_and_get.rs
@@ -45,7 +45,7 @@ fn build_shared_indices(num_docs: usize, distribution: &str) -> BenchIndex {
        match distribution {
            "dense_random" => {
                for _doc_id in 0..num_docs {
-                    let suffix = rng.gen_range(0u64..1000u64);
+                    let suffix = rng.random_range(0u64..1000u64);
                    let str_val = format!("str_{:03}", suffix);

                    writer
@@ -71,7 +71,7 @@ fn build_shared_indices(num_docs: usize, distribution: &str) -> BenchIndex {
            }
            "sparse_random" => {
                for _doc_id in 0..num_docs {
-                    let suffix = rng.gen_range(0u64..1000000u64);
+                    let suffix = rng.random_range(0u64..1000000u64);
                    let str_val = format!("str_{:07}", suffix);

                    writer
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -178,13 +178,11 @@ impl TinySet {
 #[derive(Clone)]
 pub struct BitSet {
    tinysets: Box<[TinySet]>,
-    len: u64,
    max_value: u32,
 }
 impl std::fmt::Debug for BitSet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("BitSet")
-            .field("len", &self.len)
            .field("max_value", &self.max_value)
            .finish()
    }
@@ -212,7 +210,6 @@ impl BitSet {
        let tinybitsets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
        BitSet {
            tinysets: tinybitsets,
-            len: 0,
            max_value,
        }
    }
@@ -230,7 +227,6 @@ impl BitSet {
        }
        BitSet {
            tinysets: tinybitsets,
-            len: max_value as u64,
            max_value,
        }
    }
@@ -249,17 +245,19 @@ impl BitSet {

    /// Intersect with tinysets
    fn intersect_update_with_iter(&mut self, other: impl Iterator<Item = TinySet>) {
-        self.len = 0;
        for (left, right) in self.tinysets.iter_mut().zip(other) {
            *left = left.intersect(right);
-            self.len += left.len() as u64;
        }
    }

    /// Returns the number of elements in the `BitSet`.
    #[inline]
    pub fn len(&self) -> usize {
-        self.len as usize
+        self.tinysets
+            .iter()
+            .copied()
+            .map(|tinyset| tinyset.len())
+            .sum::<u32>() as usize
    }

    /// Inserts an element in the `BitSet`
@@ -268,7 +266,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len += u64::from(self.tinysets[higher as usize].insert_mut(lower));
+        self.tinysets[higher as usize].insert_mut(lower);
    }

    /// Inserts an element in the `BitSet`
@@ -277,7 +275,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len -= u64::from(self.tinysets[higher as usize].remove_mut(lower));
+        self.tinysets[higher as usize].remove_mut(lower);
    }

    /// Returns true iff the elements is in the `BitSet`.
@@ -299,6 +297,9 @@ impl BitSet {
            .map(|delta_bucket| bucket + delta_bucket as u32)
    }

+    /// Returns the maximum number of elements in the bitset.
+    ///
+    /// Warning: The largest element the bitset can contain is `max_value - 1`.
    #[inline]
    pub fn max_value(&self) -> u32 {
        self.max_value
--- a/common/src/writer.rs
+++ b/common/src/writer.rs
@@ -62,7 +62,9 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
 pub struct AntiCallToken(());

 /// Trait used to indicate when no more write need to be done on a writer
-pub trait TerminatingWrite: Write + Send + Sync {
+///
+/// Thread-safety is enforced at the call sites that require it.
+pub trait TerminatingWrite: Write {
    /// Indicate that the writer will no longer be used. Internally call terminate_ref.
    fn terminate(mut self) -> io::Result<()>
    where Self: Sized {
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -70,7 +70,7 @@ impl Collector for StatsCollector {
    fn for_segment(
        &self,
        _segment_local_id: u32,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> tantivy::Result<StatsSegmentCollector> {
        let fast_field_reader = segment_reader.fast_fields().u64(&self.field)?;
        Ok(StatsSegmentCollector {
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -65,7 +65,7 @@ fn main() -> tantivy::Result<()> {
        );
        let top_docs_by_custom_score =
            // Call TopDocs with a custom tweak score
-            TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
+            TopDocs::with_limit(2).tweak_score(move |segment_reader: &dyn SegmentReader| {
                let ingredient_reader = segment_reader.facet_reader("ingredient").unwrap();
                let facet_dict = ingredient_reader.facet_dict();

--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -91,46 +91,10 @@ fn main() -> tantivy::Result<()> {
        }
    }

-    // A `Term` is a text token associated with a field.
-    // Let's go through all docs containing the term `title:the` and access their position
-    let term_the = Term::from_field_text(title, "the");
-
-    // Some other powerful operations (especially `.skip_to`) may be useful to consume these
+    // Some other powerful operations (especially `.seek`) may be useful to consume these
    // posting lists rapidly.
    // You can check for them in the [`DocSet`](https://docs.rs/tantivy/~0/tantivy/trait.DocSet.html) trait
    // and the [`Postings`](https://docs.rs/tantivy/~0/tantivy/trait.Postings.html) trait

-    // Also, for some VERY specific high performance use case like an OLAP analysis of logs,
-    // you can get better performance by accessing directly the blocks of doc ids.
-    for segment_reader in searcher.segment_readers() {
-        // A segment contains different data structure.
-        // Inverted index stands for the combination of
-        // - the term dictionary
-        // - the inverted lists associated with each terms and their positions
-        let inverted_index = segment_reader.inverted_index(title)?;
-
-        // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
-        // frequencies and positions.
-        //
-        // If you don't need all this information, you may get better performance by decompressing
-        // less information.
-        if let Some(mut block_segment_postings) =
-            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
-        {
-            loop {
-                let docs = block_segment_postings.docs();
-                if docs.is_empty() {
-                    break;
-                }
-                // Once again these docs MAY contains deleted documents as well.
-                let docs = block_segment_postings.docs();
-                // Prints `Docs [0, 2].`
-                println!("Docs {docs:?}");
-                block_segment_postings.advance();
-            }
-        }
-    }
-
    Ok(())
 }
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -43,7 +43,7 @@ impl DynamicPriceColumn {
        }
    }

-    pub fn price_for_segment(&self, segment_reader: &SegmentReader) -> Option<Arc<Vec<Price>>> {
+    pub fn price_for_segment(&self, segment_reader: &dyn SegmentReader) -> Option<Arc<Vec<Price>>> {
        let segment_key = (segment_reader.segment_id(), segment_reader.delete_opstamp());
        self.price_cache.read().unwrap().get(&segment_key).cloned()
    }
@@ -157,7 +157,7 @@ fn main() -> tantivy::Result<()> {
    let query = query_parser.parse_query("cooking")?;

    let searcher = reader.searcher();
-    let score_by_price = move |segment_reader: &SegmentReader| {
+    let score_by_price = move |segment_reader: &dyn SegmentReader| {
        let price = price_dynamic_column
            .price_for_segment(segment_reader)
            .unwrap();
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -704,7 +704,11 @@ fn regex(inp: &str) -> IResult<&str, UserInputLeaf> {
                many1(alt((preceded(char('\\'), char('/')), none_of("/")))),
                char('/'),
            ),
-            peek(alt((multispace1, eof))),
+            peek(alt((
+                value((), multispace1),
+                value((), char(')')),
+                value((), eof),
+            ))),
        ),
        |elements| UserInputLeaf::Regex {
            field: None,
@@ -721,8 +725,12 @@ fn regex_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
            opt_i_err(char('/'), "missing delimiter /"),
        ),
        opt_i_err(
-            peek(alt((multispace1, eof))),
-            "expected whitespace or end of input",
+            peek(alt((
+                value((), multispace1),
+                value((), char(')')),
+                value((), eof),
+            ))),
+            "expected whitespace, closing parenthesis, or end of input",
        ),
    )(inp)
    {
@@ -1707,6 +1715,10 @@ mod test {
        test_parse_query_to_ast_helper("foo:(A OR B)", "(?\"foo\":A ?\"foo\":B)");
        test_parse_query_to_ast_helper("foo:(A* OR B*)", "(?\"foo\":A* ?\"foo\":B*)");
        test_parse_query_to_ast_helper("foo:(*A OR *B)", "(?\"foo\":*A ?\"foo\":*B)");
+
+        // Regexes between parentheses
+        test_parse_query_to_ast_helper("foo:(/A.*/)", "\"foo\":/A.*/");
+        test_parse_query_to_ast_helper("foo:(/A.*/ OR /B.*/)", "(?\"foo\":/A.*/ ?\"foo\":/B.*/)");
    }

    #[test]
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -66,6 +66,7 @@ impl UserInputLeaf {
            }
            UserInputLeaf::Range { field, .. } if field.is_none() => *field = Some(default_field),
            UserInputLeaf::Set { field, .. } if field.is_none() => *field = Some(default_field),
+            UserInputLeaf::Regex { field, .. } if field.is_none() => *field = Some(default_field),
            _ => (), // field was already set, do nothing
        }
    }
--- a/src/aggregation/accessor_helpers.rs
+++ b/src/aggregation/accessor_helpers.rs
@@ -57,7 +57,7 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {

 /// Get fast field reader or empty as default.
 pub(crate) fn get_ff_reader(
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    field_name: &str,
    allowed_column_types: Option<&[ColumnType]>,
 ) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
@@ -74,7 +74,7 @@ pub(crate) fn get_ff_reader(
 }

 pub(crate) fn get_dynamic_columns(
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    field_name: &str,
 ) -> crate::Result<Vec<columnar::DynamicColumn>> {
    let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?;
@@ -90,7 +90,7 @@ pub(crate) fn get_dynamic_columns(
 ///
 /// Is guaranteed to return at least one column.
 pub(crate) fn get_all_ff_reader_or_empty(
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    field_name: &str,
    allowed_column_types: Option<&[ColumnType]>,
    fallback_type: ColumnType,
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
@@ -469,7 +469,7 @@ impl AggKind {
 /// Build AggregationsData by walking the request tree.
 pub(crate) fn build_aggregations_data_from_req(
    aggs: &Aggregations,
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    segment_ordinal: SegmentOrdinal,
    context: AggContextParams,
 ) -> crate::Result<AggregationsSegmentCtx> {
@@ -489,7 +489,7 @@ pub(crate) fn build_aggregations_data_from_req(
 fn build_nodes(
    agg_name: &str,
    req: &Aggregation,
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    segment_ordinal: SegmentOrdinal,
    data: &mut AggregationsSegmentCtx,
    is_top_level: bool,
@@ -728,7 +728,7 @@ fn build_nodes(
            let idx_in_req_data = data.push_filter_req_data(FilterAggReqData {
                name: agg_name.to_string(),
                req: filter_req.clone(),
-                segment_reader: reader.clone(),
+                segment_reader: reader.clone_arc(),
                evaluator,
                matching_docs_buffer,
                is_top_level,
@@ -745,7 +745,7 @@ fn build_nodes(

 fn build_children(
    aggs: &Aggregations,
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    segment_ordinal: SegmentOrdinal,
    data: &mut AggregationsSegmentCtx,
 ) -> crate::Result<Vec<AggRefNode>> {
@@ -764,7 +764,7 @@ fn build_children(
 }

 fn get_term_agg_accessors(
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    field_name: &str,
    missing: &Option<Key>,
 ) -> crate::Result<Vec<(Column<u64>, ColumnType)>> {
@@ -817,7 +817,7 @@ fn build_terms_or_cardinality_nodes(
    agg_name: &str,
    field_name: &str,
    missing: &Option<Key>,
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    segment_ordinal: SegmentOrdinal,
    data: &mut AggregationsSegmentCtx,
    sub_aggs: &Aggregations,
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -10,8 +10,7 @@ use serde::{Deserialize, Serialize};

 use super::bucket::GetDocCount;
 use super::metric::{
-    AverageMetricResult, CardinalityMetricResult, ExtendedStats, PercentilesMetricResult,
-    SingleMetricResult, Stats, TopHitsMetricResult,
+    ExtendedStats, PercentilesMetricResult, SingleMetricResult, Stats, TopHitsMetricResult,
 };
 use super::{AggregationError, Key};
 use crate::TantivyError;
@@ -82,8 +81,8 @@ impl AggregationResult {
 #[serde(untagged)]
 /// MetricResult
 pub enum MetricResult {
-    /// Average metric result with sum and count for multi-step merging.
-    Average(AverageMetricResult),
+    /// Average metric result.
+    Average(SingleMetricResult),
    /// Count metric result.
    Count(SingleMetricResult),
    /// Max metric result.
@@ -100,8 +99,8 @@ pub enum MetricResult {
    Percentiles(PercentilesMetricResult),
    /// Top hits metric result
    TopHits(TopHitsMetricResult),
-    /// Cardinality metric result with HLL sketch for multi-step merging.
-    Cardinality(CardinalityMetricResult),
+    /// Cardinality metric result
+    Cardinality(SingleMetricResult),
 }

 impl MetricResult {
@@ -120,7 +119,7 @@ impl MetricResult {
            MetricResult::TopHits(_) => Err(TantivyError::AggregationError(
                AggregationError::InvalidRequest("top_hits can't be used to order".to_string()),
            )),
-            MetricResult::Cardinality(card) => Ok(card.value), // CardinalityMetricResult.value
+            MetricResult::Cardinality(card) => Ok(card.value),
        }
    }
 }
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -1359,10 +1359,10 @@ fn test_aggregation_on_json_object_mixed_types() {
        &serde_json::json!({
          "rangeagg": {
            "buckets": [
-              { "average_in_range": { "value": -20.5, "sum": -20.5, "count": 1 }, "doc_count": 1, "key": "*-3", "to": 3.0 },
-              { "average_in_range": { "value": 10.0, "sum": 10.0, "count": 1 }, "doc_count": 1, "from": 3.0, "key": "3-19", "to": 19.0 },
-              { "average_in_range": { "value": null, "sum": 0.0, "count": 0 }, "doc_count": 0, "from": 19.0, "key": "19-20", "to": 20.0 },
-              { "average_in_range": { "value": null, "sum": 0.0, "count": 0 }, "doc_count": 0, "from": 20.0, "key": "20-*" }
+              { "average_in_range": { "value": -20.5 }, "doc_count": 1, "key": "*-3", "to": 3.0 },
+              { "average_in_range": { "value": 10.0 }, "doc_count": 1, "from": 3.0, "key": "3-19", "to": 19.0 },
+              { "average_in_range": { "value": null }, "doc_count": 0, "from": 19.0, "key": "19-20", "to": 20.0 },
+              { "average_in_range": { "value": null }, "doc_count": 0, "from": 20.0, "key": "20-*" }
            ]
          },
          "termagg": {
--- a/src/aggregation/bucket/filter.rs
+++ b/src/aggregation/bucket/filter.rs
@@ -1,4 +1,5 @@
 use std::fmt::Debug;
+use std::sync::Arc;

 use common::BitSet;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -402,7 +403,7 @@ pub struct FilterAggReqData {
    /// The filter aggregation
    pub req: FilterAggregation,
    /// The segment reader
-    pub segment_reader: SegmentReader,
+    pub segment_reader: Arc<dyn SegmentReader>,
    /// Document evaluator for the filter query (precomputed BitSet)
    /// This is built once when the request data is created
    pub evaluator: DocumentQueryEvaluator,
@@ -416,7 +417,7 @@ impl FilterAggReqData {
    pub(crate) fn get_memory_consumption(&self) -> usize {
        // Estimate: name + segment reader reference + bitset + buffer capacity
        self.name.len()
-        + std::mem::size_of::<SegmentReader>()
+        + std::mem::size_of::<Arc<dyn SegmentReader>>()
        + self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes)
        + self.matching_docs_buffer.capacity() * std::mem::size_of::<DocId>()
        + std::mem::size_of::<bool>()
@@ -438,7 +439,7 @@ impl DocumentQueryEvaluator {
    pub(crate) fn new(
        query: Box<dyn Query>,
        schema: Schema,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self> {
        let max_doc = segment_reader.max_doc();

@@ -838,7 +839,7 @@ mod tests {
        let expected = json!({
            "electronics": {
                "doc_count": 2,
-                "avg_price": { "value": 899.0, "sum": 1798.0, "count": 2 }  // (999 + 799) / 2
+                "avg_price": { "value": 899.0 }  // (999 + 799) / 2
            }
        });

@@ -868,7 +869,7 @@ mod tests {
        let expected = json!({
            "furniture": {
                "doc_count": 0,
-                "avg_price": { "value": null, "sum": 0.0, "count": 0 }
+                "avg_price": { "value": null }
            }
        });

@@ -904,7 +905,7 @@ mod tests {
        let expected = json!({
            "electronics": {
                "doc_count": 2,
-                "avg_price": { "value": 899.0, "sum": 1798.0, "count": 2 }
+                "avg_price": { "value": 899.0 }
            },
            "in_stock": {
                "doc_count": 3,  // apple, samsung, penguin
@@ -1000,7 +1001,7 @@ mod tests {
        let expected = json!({
            "premium_electronics": {
                "doc_count": 1,  // Only apple (999) is >= 800 in tantivy's range semantics
-                "avg_rating": { "value": 4.5, "sum": 4.5, "count": 1 }
+                "avg_rating": { "value": 4.5 }
            }
        });

@@ -1032,7 +1033,7 @@ mod tests {
        let expected = json!({
            "in_stock": {
                "doc_count": 3,  // apple, samsung, penguin
-                "avg_price": { "value": 607.67, "sum": 1823.0, "count": 3 }  // (999 + 799 + 25) / 3 ≈ 607.67
+                "avg_price": { "value": 607.67 }  // (999 + 799 + 25) / 3 ≈ 607.67
            },
            "out_of_stock": {
                "doc_count": 1,  // nike
@@ -1183,7 +1184,7 @@ mod tests {
                "doc_count": 4,
                "electronics_branch": {
                    "doc_count": 2,
-                    "avg_price": { "value": 899.0, "sum": 1798.0, "count": 2 }
+                    "avg_price": { "value": 899.0 }
                },
                "in_stock_branch": {
                    "doc_count": 3,
@@ -1259,7 +1260,7 @@ mod tests {
                    "doc_count": 2,  // apple (999), samsung (799)
                    "electronics": {
                        "doc_count": 2,  // both are electronics
-                        "avg_rating": { "value": 4.35, "sum": 8.7, "count": 2 }  // (4.5 + 4.2) / 2
+                        "avg_rating": { "value": 4.35 }  // (4.5 + 4.2) / 2
                    },
                    "in_stock": {
                        "doc_count": 2,  // both are in stock
@@ -1321,12 +1322,12 @@ mod tests {
                        {
                            "key": "samsung",
                            "doc_count": 1,
-                            "avg_price": { "value": 799.0, "sum": 799.0, "count": 1 }
+                            "avg_price": { "value": 799.0 }
                        },
                        {
                            "key": "apple",
                            "doc_count": 1,
-                            "avg_price": { "value": 999.0, "sum": 999.0, "count": 1 }
+                            "avg_price": { "value": 999.0 }
                        }
                    ],
                    "sum_other_doc_count": 0,
@@ -1370,7 +1371,7 @@ mod tests {
                    "sum": 1798.0,
                    "avg": 899.0
                },
-                "rating_avg": { "value": 4.35, "sum": 8.7, "count": 2 },
+                "rating_avg": { "value": 4.35 },
                "count": { "value": 2.0 }
            }
        });
@@ -1411,7 +1412,7 @@ mod tests {
        let expected = json!({
            "electronics": {
                "doc_count": 0,
-                "avg_price": { "value": null, "sum": 0.0, "count": 0 }
+                "avg_price": { "value": null }
            }
        });

@@ -1698,15 +1699,13 @@ mod tests {
        let filter_expected = json!({
            "electronics": {
                "doc_count": 2,
-                "avg_price": { "value": 899.0, "sum": 1798.0, "count": 2 }
+                "avg_price": { "value": 899.0 }
            }
        });

        let separate_expected = json!({
            "result": {
-                "value": 899.0,
-                "sum": 1798.0,
-                "count": 2
+                "value": 899.0
            }
        });

--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1222,9 +1222,7 @@ mod tests {
            res["histogram"]["buckets"][0],
            json!({
                "avg": {
-                    "value": Value::Null,
-                    "sum": 0.0,
-                    "count": 0
+                    "value": Value::Null
                },
                "doc_count": 0,
                "key": 2.0,
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -66,7 +66,7 @@ impl Collector for DistributedAggregationCollector {
    fn for_segment(
        &self,
        segment_local_id: crate::SegmentOrdinal,
-        reader: &crate::SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        AggregationSegmentCollector::from_agg_req_and_reader(
            &self.agg,
@@ -96,7 +96,7 @@ impl Collector for AggregationCollector {
    fn for_segment(
        &self,
        segment_local_id: crate::SegmentOrdinal,
-        reader: &crate::SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        AggregationSegmentCollector::from_agg_req_and_reader(
            &self.agg,
@@ -145,7 +145,7 @@ impl AggregationSegmentCollector {
    /// reader. Also includes validation, e.g. checking field types and existence.
    pub fn from_agg_req_and_reader(
        agg: &Aggregations,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        segment_ordinal: SegmentOrdinal,
        context: &AggContextParams,
    ) -> crate::Result<Self> {
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -19,9 +19,8 @@ use super::bucket::{
    GetDocCount, Order, OrderTarget, RangeAggregation, TermsAggregation,
 };
 use super::metric::{
-    AverageMetricResult, CardinalityMetricResult, IntermediateAverage, IntermediateCount,
-    IntermediateExtendedStats, IntermediateMax, IntermediateMin, IntermediateStats,
-    IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
+    IntermediateAverage, IntermediateCount, IntermediateExtendedStats, IntermediateMax,
+    IntermediateMin, IntermediateStats, IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
 };
 use super::segment_agg_result::AggregationLimitsGuard;
 use super::{format_date, AggregationError, Key, SerializedKey};
@@ -91,6 +90,19 @@ impl From<IntermediateKey> for Key {

 impl Eq for IntermediateKey {}

+impl std::fmt::Display for IntermediateKey {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            IntermediateKey::Str(val) => f.write_str(val),
+            IntermediateKey::F64(val) => f.write_str(&val.to_string()),
+            IntermediateKey::U64(val) => f.write_str(&val.to_string()),
+            IntermediateKey::I64(val) => f.write_str(&val.to_string()),
+            IntermediateKey::Bool(val) => f.write_str(&val.to_string()),
+            IntermediateKey::IpAddr(val) => f.write_str(&val.to_string()),
+        }
+    }
+}
+
 impl std::hash::Hash for IntermediateKey {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        core::mem::discriminant(self).hash(state);
@@ -106,6 +118,21 @@ impl std::hash::Hash for IntermediateKey {
 }

 impl IntermediateAggregationResults {
+    /// Returns a reference to the intermediate aggregation result for the given key.
+    pub fn get(&self, key: &str) -> Option<&IntermediateAggregationResult> {
+        self.aggs_res.get(key)
+    }
+
+    /// Removes and returns the intermediate aggregation result for the given key.
+    pub fn remove(&mut self, key: &str) -> Option<IntermediateAggregationResult> {
+        self.aggs_res.remove(key)
+    }
+
+    /// Returns an iterator over the keys in the intermediate aggregation results.
+    pub fn keys(&self) -> impl Iterator<Item = &String> {
+        self.aggs_res.keys()
+    }
+
    /// Add a result
    pub fn push(&mut self, key: String, value: IntermediateAggregationResult) -> crate::Result<()> {
        let entry = self.aggs_res.entry(key);
@@ -326,11 +353,7 @@ impl IntermediateMetricResult {
    fn into_final_metric_result(self, req: &Aggregation) -> MetricResult {
        match self {
            IntermediateMetricResult::Average(intermediate_avg) => {
-                MetricResult::Average(AverageMetricResult {
-                    value: intermediate_avg.finalize(),
-                    sum: intermediate_avg.sum(),
-                    count: intermediate_avg.count(),
-                })
+                MetricResult::Average(intermediate_avg.finalize().into())
            }
            IntermediateMetricResult::Count(intermediate_count) => {
                MetricResult::Count(intermediate_count.finalize().into())
@@ -358,11 +381,7 @@ impl IntermediateMetricResult {
                MetricResult::TopHits(top_hits.into_final_result())
            }
            IntermediateMetricResult::Cardinality(cardinality) => {
-                let value = cardinality.finalize();
-                MetricResult::Cardinality(CardinalityMetricResult {
-                    value,
-                    sketch: Some(cardinality),
-                })
+                MetricResult::Cardinality(cardinality.finalize().into())
            }
        }
    }
@@ -648,6 +667,21 @@ pub struct IntermediateTermBucketResult {
 }

 impl IntermediateTermBucketResult {
+    /// Returns a reference to the map of bucket entries keyed by [`IntermediateKey`].
+    pub fn entries(&self) -> &FxHashMap<IntermediateKey, IntermediateTermBucketEntry> {
+        &self.entries
+    }
+
+    /// Returns the count of documents not included in the returned buckets.
+    pub fn sum_other_doc_count(&self) -> u64 {
+        self.sum_other_doc_count
+    }
+
+    /// Returns the upper bound of the error on document counts in the returned buckets.
+    pub fn doc_count_error_upper_bound(&self) -> u64 {
+        self.doc_count_error_upper_bound
+    }
+
    pub(crate) fn into_final_result(
        self,
        req: &TermsAggregation,
--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -55,6 +55,12 @@ impl IntermediateAverage {
    pub(crate) fn from_stats(stats: IntermediateStats) -> Self {
        Self { stats }
    }
+
+    /// Returns a reference to the underlying [`IntermediateStats`].
+    pub fn stats(&self) -> &IntermediateStats {
+        &self.stats
+    }
+
    /// Merges the other intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateAverage) {
        self.stats.merge_fruits(other.stats);
@@ -63,16 +69,6 @@ impl IntermediateAverage {
    pub fn finalize(&self) -> Option<f64> {
        self.stats.finalize().avg
    }
-
-    /// Returns the sum of all collected values.
-    pub fn sum(&self) -> f64 {
-        self.stats.sum
-    }
-
-    /// Returns the count of all collected values.
-    pub fn count(&self) -> u64 {
-        self.stats.count
-    }
 }

 #[cfg(test)]
--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
@@ -340,7 +340,7 @@ impl PartialEq for CardinalityCollector {

 impl CardinalityCollector {
    /// Compute the final cardinality estimate.
-    pub fn finalize(&self) -> Option<f64> {
+    pub fn finalize(self) -> Option<f64> {
        Some(self.sketch.clone().count().trunc())
    }

--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -93,41 +93,6 @@ impl From<Option<f64>> for SingleMetricResult {
    }
 }

-/// Average metric result with intermediate data for merging.
-///
-/// Unlike [`SingleMetricResult`], this struct includes the raw `sum` and `count`
-/// values that can be used for multi-step query merging.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct AverageMetricResult {
-    /// The computed average value. None if no documents matched.
-    pub value: Option<f64>,
-    /// The sum of all values (for multi-step merging).
-    pub sum: f64,
-    /// The count of all values (for multi-step merging).
-    pub count: u64,
-}
-
-/// Cardinality metric result with computed value and raw HLL sketch for multi-step merging.
-///
-/// The `value` field contains the computed cardinality estimate.
-/// The `sketch` field contains the serialized HyperLogLog++ sketch that can be used
-/// for merging results across multiple query steps.
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct CardinalityMetricResult {
-    /// The computed cardinality estimate.
-    pub value: Option<f64>,
-    /// The serialized HyperLogLog++ sketch for multi-step merging.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub sketch: Option<CardinalityCollector>,
-}
-
-impl PartialEq for CardinalityMetricResult {
-    fn eq(&self, other: &Self) -> bool {
-        // Only compare values, not sketch (sketch comparison is complex)
-        self.value == other.value
-    }
-}
-
 /// This is the wrapper of percentile entries, which can be vector or hashmap
 /// depending on if it's keyed or not.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -142,30 +107,20 @@ pub enum PercentileValues {
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// The entry when requesting percentiles with keyed: false
 pub struct PercentileValuesVecEntry {
-    key: f64,
-    value: f64,
+    /// Percentile
+    pub key: f64,
+
+    /// Value at the percentile
+    pub value: f64,
 }

-/// Percentiles metric result with computed values and raw sketch for multi-step merging.
+/// Single-metric aggregations use this common result structure.
 ///
-/// The `values` field contains the computed percentile values.
-/// The `sketch` field contains the serialized DDSketch that can be used for merging
-/// results across multiple query steps.
-#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Main reason to wrap it in value is to match elasticsearch output structure.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct PercentilesMetricResult {
-    /// The computed percentile values.
+    /// The result of the percentile metric.
    pub values: PercentileValues,
-    /// The serialized DDSketch for multi-step merging.
-    /// This is the raw sketch data that can be deserialized and merged with other sketches.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub sketch: Option<PercentilesCollector>,
-}
-
-impl PartialEq for PercentilesMetricResult {
-    fn eq(&self, other: &Self) -> bool {
-        // Only compare values, not sketch (sketch comparison is complex)
-        self.values == other.values
-    }
 }

 /// The top_hits metric results entry
@@ -246,105 +201,4 @@ mod tests {
        assert_eq!(aggregations_res_json["price_min"]["value"], 0.0);
        assert_eq!(aggregations_res_json["price_sum"]["value"], 15.0);
    }
-
-    #[test]
-    fn test_average_returns_sum_and_count() {
-        let mut schema_builder = Schema::builder();
-        let field_options = NumericOptions::default().set_fast();
-        let field = schema_builder.add_f64_field("price", field_options);
-        let index = Index::create_in_ram(schema_builder.build());
-        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
-
-        // Add documents with values 0, 1, 2, 3, 4, 5
-        // sum = 15, count = 6, avg = 2.5
-        for i in 0..6 {
-            index_writer
-                .add_document(doc!(
-                    field => i as f64,
-                ))
-                .unwrap();
-        }
-        index_writer.commit().unwrap();
-
-        let aggregations_json = r#"{ "price_avg": { "avg": { "field": "price" } } }"#;
-        let aggregations: Aggregations = serde_json::from_str(aggregations_json).unwrap();
-        let collector = AggregationCollector::from_aggs(aggregations, Default::default());
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let aggregations_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-        let aggregations_res_json = serde_json::to_value(aggregations_res).unwrap();
-
-        // Verify all three fields are present and correct
-        assert_eq!(aggregations_res_json["price_avg"]["value"], 2.5);
-        assert_eq!(aggregations_res_json["price_avg"]["sum"], 15.0);
-        assert_eq!(aggregations_res_json["price_avg"]["count"], 6);
-    }
-
-    #[test]
-    fn test_percentiles_returns_sketch() {
-        let mut schema_builder = Schema::builder();
-        let field_options = NumericOptions::default().set_fast();
-        let field = schema_builder.add_f64_field("latency", field_options);
-        let index = Index::create_in_ram(schema_builder.build());
-        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
-
-        // Add documents with latency values
-        for i in 0..100 {
-            index_writer
-                .add_document(doc!(
-                    field => i as f64,
-                ))
-                .unwrap();
-        }
-        index_writer.commit().unwrap();
-
-        let aggregations_json =
-            r#"{ "latency_percentiles": { "percentiles": { "field": "latency" } } }"#;
-        let aggregations: Aggregations = serde_json::from_str(aggregations_json).unwrap();
-        let collector = AggregationCollector::from_aggs(aggregations, Default::default());
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let aggregations_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-        let aggregations_res_json = serde_json::to_value(aggregations_res).unwrap();
-
-        // Verify percentile values are present
-        assert!(aggregations_res_json["latency_percentiles"]["values"].is_object());
-        // Verify sketch is present (serialized DDSketch)
-        assert!(aggregations_res_json["latency_percentiles"]["sketch"].is_object());
-    }
-
-    #[test]
-    fn test_cardinality_returns_sketch() {
-        let mut schema_builder = Schema::builder();
-        let field_options = NumericOptions::default().set_fast();
-        let field = schema_builder.add_u64_field("user_id", field_options);
-        let index = Index::create_in_ram(schema_builder.build());
-        let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
-
-        // Add documents with some duplicate user_ids
-        for i in 0..50 {
-            index_writer
-                .add_document(doc!(
-                    field => (i % 10) as u64,  // 10 unique values
-                ))
-                .unwrap();
-        }
-        index_writer.commit().unwrap();
-
-        let aggregations_json = r#"{ "unique_users": { "cardinality": { "field": "user_id" } } }"#;
-        let aggregations: Aggregations = serde_json::from_str(aggregations_json).unwrap();
-        let collector = AggregationCollector::from_aggs(aggregations, Default::default());
-        let reader = index.reader().unwrap();
-        let searcher = reader.searcher();
-        let aggregations_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
-        let aggregations_res_json = serde_json::to_value(aggregations_res).unwrap();
-
-        // Verify cardinality value is present and approximately correct
-        let cardinality = aggregations_res_json["unique_users"]["value"]
-            .as_f64()
-            .unwrap();
-        assert!(cardinality >= 9.0 && cardinality <= 11.0); // HLL is approximate
-                                                            // Verify sketch is present (serialized HyperLogLog++)
-        assert!(aggregations_res_json["unique_users"]["sketch"].is_object());
-    }
 }
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -178,9 +178,6 @@ fn format_percentile(percentile: f64) -> String {
 impl PercentilesCollector {
    /// Convert result into final result. This will query the quantils from the underlying quantil
    /// collector.
-    ///
-    /// The result includes both the computed percentile values and the raw DDSketch
-    /// for multi-step query merging.
    pub fn into_final_result(self, req: &PercentilesAggregationReq) -> PercentilesMetricResult {
        let percentiles: &[f64] = req
            .percents
@@ -213,15 +210,7 @@ impl PercentilesCollector {
                    .collect(),
            )
        };
-        PercentilesMetricResult {
-            values,
-            sketch: Some(self),
-        }
-    }
-
-    /// Returns a reference to the underlying DDSketch.
-    pub fn sketch(&self) -> &sketches_ddsketch::DDSketch {
-        &self.sketch
+        PercentilesMetricResult { values }
    }

    fn new() -> Self {
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -110,6 +110,16 @@ impl Default for IntermediateStats {
 }

 impl IntermediateStats {
+    /// Returns the number of values collected.
+    pub fn count(&self) -> u64 {
+        self.count
+    }
+
+    /// Returns the sum of all values collected.
+    pub fn sum(&self) -> f64 {
+        self.sum
+    }
+
    /// Merges the other stats intermediate result into self.
    pub fn merge_fruits(&mut self, other: IntermediateStats) {
        self.count += other.count;
--- a/src/codec/mod.rs
+++ b/src/codec/mod.rs
@@ -0,0 +1,170 @@
+/// Codec specific to postings data.
+pub mod postings;
+
+/// Standard tantivy codec. This is the codec you use by default.
+pub mod standard;
+
+use std::sync::Arc;
+
+pub use standard::StandardCodec;
+
+use crate::codec::postings::PostingsCodec;
+use crate::directory::Directory;
+use crate::fastfield::AliveBitSet;
+use crate::query::score_combiner::DoNothingCombiner;
+use crate::query::term_query::TermScorer;
+use crate::query::{box_scorer, BufferedUnionScorer, Scorer, SumCombiner};
+use crate::schema::Schema;
+use crate::{DocId, Score, SegmentMeta, SegmentReader, TantivySegmentReader};
+
+/// Codecs describes how data is layed out on disk.
+///
+/// For the moment, only postings codec can be custom.
+pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
+    /// The specific postings type used by this codec.
+    type PostingsCodec: PostingsCodec;
+
+    /// ID of the codec. It should be unique to your codec.
+    /// Make it human-readable, descriptive, short and unique.
+    const ID: &'static str;
+
+    /// Load codec based on the codec configuration.
+    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self>;
+
+    /// Get codec configuration.
+    fn to_json_props(&self) -> serde_json::Value;
+
+    /// Returns the postings codec.
+    fn postings_codec(&self) -> &Self::PostingsCodec;
+
+    /// Loads postings using the codec's concrete postings type.
+    fn load_postings_typed(
+        &self,
+        reader: &dyn crate::index::InvertedIndexReader,
+        term_info: &crate::postings::TermInfo,
+        option: crate::schema::IndexRecordOption,
+    ) -> std::io::Result<<Self::PostingsCodec as crate::codec::postings::PostingsCodec>::Postings>
+    {
+        let postings_data = reader.read_raw_postings_data(term_info, option)?;
+        self.postings_codec()
+            .load_postings(term_info.doc_freq, postings_data)
+    }
+
+    /// Opens a segment reader using this codec.
+    ///
+    /// Override this if your codec uses a custom segment reader implementation.
+    fn open_segment_reader(
+        &self,
+        directory: &dyn Directory,
+        segment_meta: &SegmentMeta,
+        schema: Schema,
+        custom_bitset: Option<AliveBitSet>,
+    ) -> crate::Result<Arc<dyn SegmentReader>> {
+        let codec: Arc<dyn ObjectSafeCodec> = Arc::new(self.clone());
+        let reader = TantivySegmentReader::open_with_custom_alive_set_from_directory(
+            directory,
+            segment_meta,
+            schema,
+            codec,
+            custom_bitset,
+        )?;
+        Ok(Arc::new(reader))
+    }
+}
+
+/// Object-safe codec is a Codec that can be used in a trait object.
+///
+/// The point of it is to offer a way to use a codec without a proliferation of generics.
+pub trait ObjectSafeCodec: 'static + Send + Sync {
+    /// Performs a for_each_pruning operation on the given scorer.
+    ///
+    /// The function will go through matching documents and call the callback
+    /// function for all docs with a score exceeding the threshold.
+    ///
+    /// The function itself will return a larger threshold value,
+    /// meant to update the threshold value.
+    ///
+    /// If the codec and the scorer allow it, this function can rely on
+    /// optimizations like the block-max wand.
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    );
+
+    /// Builds a union scorer possibly specialized if
+    /// all scorers are `Term<Self::Postings>`.
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        score_combiner_type: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer>;
+}
+
+impl<TCodec: Codec> ObjectSafeCodec for TCodec {
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        sum_or_do_nothing_combiner: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer> {
+        if !scorers.iter().all(|scorer| {
+            scorer.is::<TermScorer<<<Self as Codec>::PostingsCodec as PostingsCodec>::Postings>>()
+        }) {
+            return box_scorer(BufferedUnionScorer::build(
+                scorers,
+                SumCombiner::default,
+                num_docs,
+            ));
+        }
+        let specialized_scorers: Vec<
+            TermScorer<<<Self as Codec>::PostingsCodec as PostingsCodec>::Postings>,
+        > = scorers
+            .into_iter()
+            .map(|scorer| {
+                *scorer.downcast::<TermScorer<_>>().ok().expect(
+                    "Downcast failed despite the fact we already checked the type was correct",
+                )
+            })
+            .collect();
+        match sum_or_do_nothing_combiner {
+            SumOrDoNothingCombiner::Sum => box_scorer(BufferedUnionScorer::build(
+                specialized_scorers,
+                SumCombiner::default,
+                num_docs,
+            )),
+            SumOrDoNothingCombiner::DoNothing => box_scorer(BufferedUnionScorer::build(
+                specialized_scorers,
+                DoNothingCombiner::default,
+                num_docs,
+            )),
+        }
+    }
+
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    ) {
+        let accerelerated_foreach_pruning_res =
+            <TCodec as Codec>::PostingsCodec::try_accelerated_for_each_pruning(
+                threshold, scorer, callback,
+            );
+        if let Err(mut scorer) = accerelerated_foreach_pruning_res {
+            // No acceleration available. We need to do things manually.
+            scorer.for_each_pruning(threshold, callback);
+        }
+    }
+}
+
+/// SumCombiner or DoNothingCombiner
+#[derive(Copy, Clone)]
+pub enum SumOrDoNothingCombiner {
+    /// Sum scores together
+    Sum,
+    /// Do not track any score.
+    DoNothing,
+}
--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -1,5 +1,6 @@
 use std::ops::{Deref, DerefMut};

+use crate::codec::postings::PostingsWithBlockMax;
 use crate::query::term_query::TermScorer;
 use crate::query::Scorer;
 use crate::{DocId, DocSet, Score, TERMINATED};
@@ -13,8 +14,8 @@ use crate::{DocId, DocSet, Score, TERMINATED};
 /// We always have `before_pivot_len` < `pivot_len`.
 ///
 /// `None` is returned if we establish that no document can exceed the threshold.
-fn find_pivot_doc(
-    term_scorers: &[TermScorerWithMaxScore],
+fn find_pivot_doc<TPostings: PostingsWithBlockMax>(
+    term_scorers: &[TermScorerWithMaxScore<TPostings>],
    threshold: Score,
 ) -> Option<(usize, usize, DocId)> {
    let mut max_score = 0.0;
@@ -46,8 +47,8 @@ fn find_pivot_doc(
 /// the next doc candidate defined by the min of `last_doc_in_block + 1` for
 /// scorer in scorers[..pivot_len] and `scorer.doc()` for scorer in scorers[pivot_len..].
 /// Note: before and after calling this method, scorers need to be sorted by their `.doc()`.
-fn block_max_was_too_low_advance_one_scorer(
-    scorers: &mut [TermScorerWithMaxScore],
+fn block_max_was_too_low_advance_one_scorer<TPostings: PostingsWithBlockMax>(
+    scorers: &mut [TermScorerWithMaxScore<TPostings>],
    pivot_len: usize,
 ) {
    debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
@@ -82,7 +83,10 @@ fn block_max_was_too_low_advance_one_scorer(
 // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
 // except term_scorers[ord] that might be in advance compared to its ranks,
 // bubble up term_scorers[ord] in order to restore the ordering.
-fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
+fn restore_ordering<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut [TermScorerWithMaxScore<TPostings>],
+    ord: usize,
+) {
    let doc = term_scorers[ord].doc();
    for i in ord + 1..term_scorers.len() {
        if term_scorers[i].doc() >= doc {
@@ -97,9 +101,10 @@ fn restore_ordering(term_scorers: &mut [TermScorerWithMaxScore], ord: usize) {
 // If this works, return true.
 // If this fails (ie: one of the term_scorer does not contain `pivot_doc` and seek goes past the
 // pivot), reorder the term_scorers to ensure the list is still sorted and returns `false`.
-// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and return.
-fn align_scorers(
-    term_scorers: &mut Vec<TermScorerWithMaxScore>,
+// If a term_scorer reach TERMINATED in the process return false remove the term_scorer and
+// return.
+fn align_scorers<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
    pivot_doc: DocId,
    before_pivot_len: usize,
 ) -> bool {
@@ -126,7 +131,10 @@ fn align_scorers(
 // Assumes terms_scorers[..pivot_len] are positioned on the same doc (pivot_doc).
 // Advance term_scorers[..pivot_len] and out of these removes the terminated scores.
 // Restores the ordering of term_scorers.
-fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<TermScorerWithMaxScore>, pivot_len: usize) {
+fn advance_all_scorers_on_pivot<TPostings: PostingsWithBlockMax>(
+    term_scorers: &mut Vec<TermScorerWithMaxScore<TPostings>>,
+    pivot_len: usize,
+) {
    for term_scorer in &mut term_scorers[..pivot_len] {
        term_scorer.advance();
    }
@@ -145,12 +153,12 @@ fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<TermScorerWithMaxScore>,
 /// Implements the WAND (Weak AND) algorithm for dynamic pruning
 /// described in the paper "Faster Top-k Document Retrieval Using Block-Max Indexes".
 /// Link: <http://engineering.nyu.edu/~suel/papers/bmw.pdf>
-pub fn block_wand(
-    mut scorers: Vec<TermScorer>,
+pub fn block_wand<TPostings: PostingsWithBlockMax>(
+    mut scorers: Vec<TermScorer<TPostings>>,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
-    let mut scorers: Vec<TermScorerWithMaxScore> = scorers
+    let mut scorers: Vec<TermScorerWithMaxScore<TPostings>> = scorers
        .iter_mut()
        .map(TermScorerWithMaxScore::from)
        .collect();
@@ -166,10 +174,7 @@ pub fn block_wand(

        let block_max_score_upperbound: Score = scorers[..pivot_len]
            .iter_mut()
-            .map(|scorer| {
-                scorer.seek_block(pivot_doc);
-                scorer.block_max_score()
-            })
+            .map(|scorer| scorer.seek_block_max(pivot_doc))
            .sum();

        // Beware after shallow advance, skip readers can be in advance compared to
@@ -220,21 +225,22 @@ pub fn block_wand(
 ///   - On a block, advance until the end and execute `callback` when the doc score is greater or
 ///     equal to the `threshold`.
 pub fn block_wand_single_scorer(
-    mut scorer: TermScorer,
+    mut scorer: TermScorer<impl PostingsWithBlockMax>,
    mut threshold: Score,
    callback: &mut dyn FnMut(u32, Score) -> Score,
 ) {
    let mut doc = scorer.doc();
+    let mut block_max_score = scorer.seek_block_max(doc);
    loop {
        // We position the scorer on a block that can reach
        // the threshold.
-        while scorer.block_max_score() < threshold {
+        while block_max_score < threshold {
            let last_doc_in_block = scorer.last_doc_in_block();
            if last_doc_in_block == TERMINATED {
                return;
            }
            doc = last_doc_in_block + 1;
-            scorer.seek_block(doc);
+            block_max_score = scorer.seek_block_max(doc);
        }
        // Seek will effectively load that block.
        doc = scorer.seek(doc);
@@ -256,31 +262,33 @@ pub fn block_wand_single_scorer(
            }
        }
        doc += 1;
-        scorer.seek_block(doc);
+        block_max_score = scorer.seek_block_max(doc);
    }
 }

-struct TermScorerWithMaxScore<'a> {
-    scorer: &'a mut TermScorer,
+struct TermScorerWithMaxScore<'a, TPostings: PostingsWithBlockMax> {
+    scorer: &'a mut TermScorer<TPostings>,
    max_score: Score,
 }

-impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
-    fn from(scorer: &'a mut TermScorer) -> Self {
+impl<'a, TPostings: PostingsWithBlockMax> From<&'a mut TermScorer<TPostings>>
+    for TermScorerWithMaxScore<'a, TPostings>
+{
+    fn from(scorer: &'a mut TermScorer<TPostings>) -> Self {
        let max_score = scorer.max_score();
        TermScorerWithMaxScore { scorer, max_score }
    }
 }

-impl Deref for TermScorerWithMaxScore<'_> {
-    type Target = TermScorer;
+impl<TPostings: PostingsWithBlockMax> Deref for TermScorerWithMaxScore<'_, TPostings> {
+    type Target = TermScorer<TPostings>;

    fn deref(&self) -> &Self::Target {
        self.scorer
    }
 }

-impl DerefMut for TermScorerWithMaxScore<'_> {
+impl<TPostings: PostingsWithBlockMax> DerefMut for TermScorerWithMaxScore<'_, TPostings> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.scorer
    }
--- a/src/codec/postings/mod.rs
+++ b/src/codec/postings/mod.rs
@@ -0,0 +1,75 @@
+/// Block-max WAND algorithm.
+pub mod block_wand;
+use std::io;
+
+use common::OwnedBytes;
+
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::Postings;
+use crate::query::{Bm25Weight, Scorer};
+use crate::schema::IndexRecordOption;
+use crate::{DocId, Score};
+
+/// Postings codec (read path).
+pub trait PostingsCodec: Send + Sync + 'static {
+    /// Postings type for the postings codec.
+    type Postings: Postings + Clone;
+
+    /// Load postings from raw bytes and metadata.
+    fn load_postings(
+        &self,
+        doc_freq: u32,
+        postings_data: RawPostingsData,
+    ) -> io::Result<Self::Postings>;
+
+    /// If your codec supports different ways to accelerate `for_each_pruning` that's
+    /// where you should implement it.
+    ///
+    /// Returning `Err(scorer)` without mutating the scorer nor calling the callback function,
+    /// is never "wrong". It just leaves the responsability to the caller to call a fallback
+    /// implementation on the scorer.
+    ///
+    /// If your codec supports BlockMax-Wand, you just need to have your
+    /// postings implement `PostingsWithBlockMax` and copy what is done in the StandardPostings
+    /// codec to enable it.
+    fn try_accelerated_for_each_pruning(
+        _threshold: Score,
+        scorer: Box<dyn Scorer>,
+        _callback: &mut dyn FnMut(DocId, Score) -> Score,
+    ) -> Result<(), Box<dyn Scorer>> {
+        Err(scorer)
+    }
+}
+
+/// Raw postings bytes and metadata read from storage.
+#[derive(Debug, Clone)]
+pub struct RawPostingsData {
+    /// Raw postings bytes for the term.
+    pub postings_data: OwnedBytes,
+    /// Raw positions bytes for the term, if positions are available.
+    pub positions_data: Option<OwnedBytes>,
+    /// Record option of the indexed field.
+    pub record_option: IndexRecordOption,
+    /// Effective record option after downgrading to the indexed field capability.
+    pub effective_option: IndexRecordOption,
+}
+
+/// A light complement interface to Postings to allow block-max wand acceleration.
+pub trait PostingsWithBlockMax: Postings {
+    /// Moves the postings to the block containign `target_doc` and returns
+    /// an upperbound of the score for documents in the block.
+    ///
+    /// `Warning`: Calling this method may leave the postings in an invalid state.
+    /// callers are required to call seek before calling any other of the
+    /// `Postings` method (like doc / advance etc.).
+    fn seek_block_max(
+        &mut self,
+        target_doc: crate::DocId,
+        fieldnorm_reader: &FieldNormReader,
+        similarity_weight: &Bm25Weight,
+    ) -> Score;
+
+    /// Returns the last document in the current block (or Terminated if this
+    /// is the last block).
+    fn last_doc_in_block(&self) -> crate::DocId;
+}
--- a/src/codec/standard/mod.rs
+++ b/src/codec/standard/mod.rs
@@ -0,0 +1,35 @@
+use serde::{Deserialize, Serialize};
+
+use crate::codec::standard::postings::StandardPostingsCodec;
+use crate::codec::Codec;
+
+/// Tantivy's default postings codec.
+pub mod postings;
+
+/// Tantivy's default codec.
+#[derive(Debug, Default, Clone, Serialize, Deserialize)]
+pub struct StandardCodec;
+
+impl Codec for StandardCodec {
+    type PostingsCodec = StandardPostingsCodec;
+
+    const ID: &'static str = "tantivy-default";
+
+    fn from_json_props(json_value: &serde_json::Value) -> crate::Result<Self> {
+        if !json_value.is_null() {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "Codec property for the StandardCodec are unexpected. expected null, got {}",
+                json_value.as_str().unwrap_or("null")
+            )));
+        }
+        Ok(StandardCodec)
+    }
+
+    fn to_json_props(&self) -> serde_json::Value {
+        serde_json::Value::Null
+    }
+
+    fn postings_codec(&self) -> &Self::PostingsCodec {
+        &StandardPostingsCodec
+    }
+}
--- a/src/codec/standard/postings/block_segment_postings.rs
+++ b/src/codec/standard/postings/block_segment_postings.rs
@@ -1,28 +1,19 @@
 use std::io;

-use common::VInt;
+use common::{OwnedBytes, VInt};

-use crate::directory::{FileSlice, OwnedBytes};
+use crate::codec::standard::postings::FreqReadingOption;
 use crate::fieldnorm::FieldNormReader;
-use crate::postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE};
-use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
+use crate::postings::compression::{BlockDecoder, VIntDecoder as _, COMPRESSION_BLOCK_SIZE};
+use crate::postings::skip::{BlockInfo, SkipReader};
 use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};

-fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
-    it.next().map(|first| it.fold(first, Score::max))
-}
-
 /// `BlockSegmentPostings` is a cursor iterating over blocks
 /// of documents.
-///
-/// # Warning
-///
-/// While it is useful for some very specific high-performance
-/// use cases, you should prefer using `SegmentPostings` for most usage.
 #[derive(Clone)]
-pub struct BlockSegmentPostings {
+pub(crate) struct BlockSegmentPostings {
    pub(crate) doc_decoder: BlockDecoder,
    block_loaded: bool,
    freq_decoder: BlockDecoder,
@@ -88,7 +79,7 @@ fn split_into_skips_and_postings(
 }

 impl BlockSegmentPostings {
-    /// Opens a `BlockSegmentPostings`.
+    /// Opens a `StandardPostingsReader`.
    /// `doc_freq` is the number of documents in the posting list.
    /// `record_option` represents the amount of data available according to the schema.
    /// `requested_option` is the amount of data requested by the user.
@@ -96,11 +87,10 @@ impl BlockSegmentPostings {
    /// term frequency blocks.
    pub(crate) fn open(
        doc_freq: u32,
-        data: FileSlice,
+        bytes: OwnedBytes,
        mut record_option: IndexRecordOption,
        requested_option: IndexRecordOption,
    ) -> io::Result<BlockSegmentPostings> {
-        let bytes = data.read_bytes()?;
        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
        let skip_reader = match skip_data_opt {
            Some(skip_data) => {
@@ -138,6 +128,86 @@ impl BlockSegmentPostings {
        block_segment_postings.load_block();
        Ok(block_segment_postings)
    }
+}
+
+fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
+    it.next().map(|first| it.fold(first, Score::max))
+}
+
+impl BlockSegmentPostings {
+    /// Returns the overall number of documents in the block postings.
+    /// It does not take in account whether documents are deleted or not.
+    ///
+    /// This `doc_freq` is simply the sum of the length of all of the blocks
+    /// length, and it does not take in account deleted documents.
+    pub fn doc_freq(&self) -> u32 {
+        self.doc_freq
+    }
+
+    /// Returns the array of docs in the current block.
+    ///
+    /// Before the first call to `.advance()`, the block
+    /// returned by `.docs()` is empty.
+    #[inline]
+    pub fn docs(&self) -> &[DocId] {
+        debug_assert!(self.block_loaded);
+        self.doc_decoder.output_array()
+    }
+
+    /// Return the document at index `idx` of the block.
+    #[inline]
+    pub fn doc(&self, idx: usize) -> u32 {
+        self.doc_decoder.output(idx)
+    }
+
+    /// Return the array of `term freq` in the block.
+    #[inline]
+    pub fn freqs(&self) -> &[u32] {
+        debug_assert!(self.block_loaded);
+        self.freq_decoder.output_array()
+    }
+
+    /// Return the frequency at index `idx` of the block.
+    #[inline]
+    pub fn freq(&self, idx: usize) -> u32 {
+        debug_assert!(self.block_loaded);
+        self.freq_decoder.output(idx)
+    }
+
+    /// Position on a block that may contains `target_doc`.
+    ///
+    /// If all docs are smaller than target, the block loaded may be empty,
+    /// or be the last an incomplete VInt block.
+    pub fn seek(&mut self, target_doc: DocId) -> usize {
+        // Move to the block that might contain our document.
+        self.seek_block_without_loading(target_doc);
+        self.load_block();
+
+        // At this point we are on the block that might contain our document.
+        let doc = self.doc_decoder.seek_within_block(target_doc);
+
+        // The last block is not full and padded with TERMINATED,
+        // so we are guaranteed to have at least one value (real or padding)
+        // that is >= target_doc.
+        debug_assert!(doc < COMPRESSION_BLOCK_SIZE);
+
+        // `doc` is now the first element >= `target_doc`.
+        // If all docs are smaller than target, the current block is incomplete and padded
+        // with TERMINATED. After the search, the cursor points to the first TERMINATED.
+        doc
+    }
+
+    pub fn position_offset(&self) -> u64 {
+        self.skip_reader.position_offset()
+    }
+
+    /// Advance to the next block.
+    pub fn advance(&mut self) {
+        self.skip_reader.advance();
+        self.block_loaded = false;
+        self.block_max_score_cache = None;
+        self.load_block();
+    }

    /// Returns the block_max_score for the current block.
    /// It does not require the block to be loaded. For instance, it is ok to call this method
@@ -160,7 +230,7 @@ impl BlockSegmentPostings {
        }
        // this is the last block of the segment posting list.
        // If it is actually loaded, we can compute block max manually.
-        if self.block_is_loaded() {
+        if self.block_loaded {
            let docs = self.doc_decoder.output_array().iter().cloned();
            let freqs = self.freq_decoder.output_array().iter().cloned();
            let bm25_scores = docs.zip(freqs).map(|(doc, term_freq)| {
@@ -177,112 +247,25 @@ impl BlockSegmentPostings {
        // We do not cache it however, so that it gets computed when once block is loaded.
        bm25_weight.max_score()
    }
+}

-    pub(crate) fn freq_reading_option(&self) -> FreqReadingOption {
-        self.freq_reading_option
-    }
-
-    // Resets the block segment postings on another position
-    // in the postings file.
-    //
-    // This is useful for enumerating through a list of terms,
-    // and consuming the associated posting lists while avoiding
-    // reallocating a `BlockSegmentPostings`.
-    //
-    // # Warning
-    //
-    // This does not reset the positions list.
-    pub(crate) fn reset(&mut self, doc_freq: u32, postings_data: OwnedBytes) -> io::Result<()> {
-        let (skip_data_opt, postings_data) =
-            split_into_skips_and_postings(doc_freq, postings_data)?;
-        self.data = postings_data;
-        self.block_max_score_cache = None;
-        self.block_loaded = false;
-        if let Some(skip_data) = skip_data_opt {
-            self.skip_reader.reset(skip_data, doc_freq);
-        } else {
-            self.skip_reader.reset(OwnedBytes::empty(), doc_freq);
+impl BlockSegmentPostings {
+    /// Returns an empty segment postings object
+    pub fn empty() -> BlockSegmentPostings {
+        BlockSegmentPostings {
+            doc_decoder: BlockDecoder::with_val(TERMINATED),
+            block_loaded: true,
+            freq_decoder: BlockDecoder::with_val(1),
+            freq_reading_option: FreqReadingOption::NoFreq,
+            block_max_score_cache: None,
+            doc_freq: 0,
+            data: OwnedBytes::empty(),
+            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
        }
-        self.doc_freq = doc_freq;
-        self.load_block();
-        Ok(())
    }

-    /// Returns the overall number of documents in the block postings.
-    /// It does not take in account whether documents are deleted or not.
-    ///
-    /// This `doc_freq` is simply the sum of the length of all of the blocks
-    /// length, and it does not take in account deleted documents.
-    pub fn doc_freq(&self) -> u32 {
-        self.doc_freq
-    }
-
-    /// Returns the array of docs in the current block.
-    ///
-    /// Before the first call to `.advance()`, the block
-    /// returned by `.docs()` is empty.
-    #[inline]
-    pub fn docs(&self) -> &[DocId] {
-        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_array()
-    }
-
-    /// Return the document at index `idx` of the block.
-    #[inline]
-    pub fn doc(&self, idx: usize) -> u32 {
-        self.doc_decoder.output(idx)
-    }
-
-    /// Return the array of `term freq` in the block.
-    #[inline]
-    pub fn freqs(&self) -> &[u32] {
-        debug_assert!(self.block_is_loaded());
-        self.freq_decoder.output_array()
-    }
-
-    /// Return the frequency at index `idx` of the block.
-    #[inline]
-    pub fn freq(&self, idx: usize) -> u32 {
-        debug_assert!(self.block_is_loaded());
-        self.freq_decoder.output(idx)
-    }
-
-    /// Returns the length of the current block.
-    ///
-    /// All blocks have a length of `NUM_DOCS_PER_BLOCK`,
-    /// except the last block that may have a length
-    /// of any number between 1 and `NUM_DOCS_PER_BLOCK - 1`
-    #[inline]
-    pub fn block_len(&self) -> usize {
-        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_len
-    }
-
-    /// Position on a block that may contains `target_doc`.
-    ///
-    /// If all docs are smaller than target, the block loaded may be empty,
-    /// or be the last an incomplete VInt block.
-    pub fn seek(&mut self, target_doc: DocId) -> usize {
-        // Move to the block that might contain our document.
-        self.seek_block(target_doc);
-        self.load_block();
-
-        // At this point we are on the block that might contain our document.
-        let doc = self.doc_decoder.seek_within_block(target_doc);
-
-        // The last block is not full and padded with TERMINATED,
-        // so we are guaranteed to have at least one value (real or padding)
-        // that is >= target_doc.
-        debug_assert!(doc < COMPRESSION_BLOCK_SIZE);
-
-        // `doc` is now the first element >= `target_doc`.
-        // If all docs are smaller than target, the current block is incomplete and padded
-        // with TERMINATED. After the search, the cursor points to the first TERMINATED.
-        doc
-    }
-
-    pub(crate) fn position_offset(&self) -> u64 {
-        self.skip_reader.position_offset()
+    pub(crate) fn skip_reader(&self) -> &SkipReader {
+        &self.skip_reader
    }

    /// Dangerous API! This calls seeks the next block on the skip list,
@@ -291,19 +274,15 @@ impl BlockSegmentPostings {
    /// `.load_block()` needs to be called manually afterwards.
    /// If all docs are smaller than target, the block loaded may be empty,
    /// or be the last an incomplete VInt block.
-    pub(crate) fn seek_block(&mut self, target_doc: DocId) {
+    pub(crate) fn seek_block_without_loading(&mut self, target_doc: DocId) {
        if self.skip_reader.seek(target_doc) {
            self.block_max_score_cache = None;
            self.block_loaded = false;
        }
    }

-    pub(crate) fn block_is_loaded(&self) -> bool {
-        self.block_loaded
-    }
-
    pub(crate) fn load_block(&mut self) {
-        if self.block_is_loaded() {
+        if self.block_loaded {
            return;
        }
        let offset = self.skip_reader.byte_offset();
@@ -351,68 +330,39 @@ impl BlockSegmentPostings {
        }
        self.block_loaded = true;
    }
-
-    /// Advance to the next block.
-    pub fn advance(&mut self) {
-        self.skip_reader.advance();
-        self.block_loaded = false;
-        self.block_max_score_cache = None;
-        self.load_block();
-    }
-
-    /// Returns an empty segment postings object
-    pub fn empty() -> BlockSegmentPostings {
-        BlockSegmentPostings {
-            doc_decoder: BlockDecoder::with_val(TERMINATED),
-            block_loaded: true,
-            freq_decoder: BlockDecoder::with_val(1),
-            freq_reading_option: FreqReadingOption::NoFreq,
-            block_max_score_cache: None,
-            doc_freq: 0,
-            data: OwnedBytes::empty(),
-            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
-        }
-    }
-
-    pub(crate) fn skip_reader(&self) -> &SkipReader {
-        &self.skip_reader
-    }
 }

 #[cfg(test)]
 mod tests {
-    use common::HasLen;
+    use common::OwnedBytes;

    use super::BlockSegmentPostings;
+    use crate::codec::standard::postings::segment_postings::SegmentPostings;
    use crate::docset::{DocSet, TERMINATED};
-    use crate::index::Index;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-    use crate::postings::postings::Postings;
-    use crate::postings::SegmentPostings;
-    use crate::schema::{IndexRecordOption, Schema, Term, INDEXED};
-    use crate::DocId;
+    use crate::postings::serializer::PostingsSerializer;
+    use crate::schema::IndexRecordOption;

-    #[test]
-    fn test_empty_segment_postings() {
-        let mut postings = SegmentPostings::empty();
-        assert_eq!(postings.doc(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.doc_freq(), 0);
-        assert_eq!(postings.len(), 0);
-    }
-
-    #[test]
-    fn test_empty_postings_doc_returns_terminated() {
-        let mut postings = SegmentPostings::empty();
-        assert_eq!(postings.doc(), TERMINATED);
-        assert_eq!(postings.advance(), TERMINATED);
-    }
-
-    #[test]
-    fn test_empty_postings_doc_term_freq_returns_0() {
-        let postings = SegmentPostings::empty();
-        assert_eq!(postings.term_freq(), 1);
+    #[cfg(test)]
+    fn build_block_postings(docs: &[u32]) -> BlockSegmentPostings {
+        let doc_freq = docs.len() as u32;
+        let mut postings_serializer =
+            PostingsSerializer::new(1.0f32, IndexRecordOption::Basic, None);
+        postings_serializer.new_term(docs.len() as u32, false);
+        for doc in docs {
+            postings_serializer.write_doc(*doc, 1u32);
+        }
+        let mut buffer: Vec<u8> = Vec::new();
+        postings_serializer
+            .close_term(doc_freq, &mut buffer)
+            .unwrap();
+        BlockSegmentPostings::open(
+            doc_freq,
+            OwnedBytes::new(buffer),
+            IndexRecordOption::Basic,
+            IndexRecordOption::Basic,
+        )
+        .unwrap()
    }

    #[test]
@@ -427,7 +377,7 @@ mod tests {

    #[test]
    fn test_block_segment_postings() -> crate::Result<()> {
-        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
+        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>());
        let mut offset: u32 = 0u32;
        // checking that the `doc_freq` is correct
        assert_eq!(block_segments.doc_freq(), 100_000);
@@ -452,7 +402,7 @@ mod tests {
        doc_ids.push(129);
        doc_ids.push(130);
        {
-            let block_segments = build_block_postings(&doc_ids)?;
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.seek(128), 129);
            assert_eq!(docset.doc(), 129);
@@ -461,7 +411,7 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids).unwrap();
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.seek(129), 129);
            assert_eq!(docset.doc(), 129);
@@ -470,7 +420,7 @@ mod tests {
            assert_eq!(docset.advance(), TERMINATED);
        }
        {
-            let block_segments = build_block_postings(&doc_ids)?;
+            let block_segments = build_block_postings(&doc_ids);
            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
            assert_eq!(docset.doc(), 0);
            assert_eq!(docset.seek(131), TERMINATED);
@@ -479,38 +429,13 @@ mod tests {
        Ok(())
    }

-    fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
-        let mut schema_builder = Schema::builder();
-        let int_field = schema_builder.add_u64_field("id", INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        let mut last_doc = 0u32;
-        for &doc in docs {
-            for _ in last_doc..doc {
-                index_writer.add_document(doc!(int_field=>1u64))?;
-            }
-            index_writer.add_document(doc!(int_field=>0u64))?;
-            last_doc = doc + 1;
-        }
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let segment_reader = searcher.segment_reader(0);
-        let inverted_index = segment_reader.inverted_index(int_field).unwrap();
-        let term = Term::from_field_u64(int_field, 0u64);
-        let term_info = inverted_index.get_term_info(&term)?.unwrap();
-        let block_postings = inverted_index
-            .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
-        Ok(block_postings)
-    }
-
    #[test]
    fn test_block_segment_postings_seek() -> crate::Result<()> {
-        let mut docs = vec![0];
+        let mut docs = Vec::new();
        for i in 0..1300 {
            docs.push((i * i / 100) + i);
        }
-        let mut block_postings = build_block_postings(&docs[..])?;
+        let mut block_postings = build_block_postings(&docs[..]);
        for i in &[0, 424, 10000] {
            block_postings.seek(*i);
            let docs = block_postings.docs();
@@ -521,40 +446,4 @@ mod tests {
        assert_eq!(block_postings.doc(COMPRESSION_BLOCK_SIZE - 1), TERMINATED);
        Ok(())
    }
-
-    #[test]
-    fn test_reset_block_segment_postings() -> crate::Result<()> {
-        let mut schema_builder = Schema::builder();
-        let int_field = schema_builder.add_u64_field("id", INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        // create two postings list, one containing even number,
-        // the other containing odd numbers.
-        for i in 0..6 {
-            let doc = doc!(int_field=> (i % 2) as u64);
-            index_writer.add_document(doc)?;
-        }
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let segment_reader = searcher.segment_reader(0);
-
-        let mut block_segments;
-        {
-            let term = Term::from_field_u64(int_field, 0u64);
-            let inverted_index = segment_reader.inverted_index(int_field)?;
-            let term_info = inverted_index.get_term_info(&term)?.unwrap();
-            block_segments = inverted_index
-                .read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
-        }
-        assert_eq!(block_segments.docs(), &[0, 2, 4]);
-        {
-            let term = Term::from_field_u64(int_field, 1u64);
-            let inverted_index = segment_reader.inverted_index(int_field)?;
-            let term_info = inverted_index.get_term_info(&term)?.unwrap();
-            inverted_index.reset_block_postings_from_terminfo(&term_info, &mut block_segments)?;
-        }
-        assert_eq!(block_segments.docs(), &[1, 3, 5]);
-        Ok(())
-    }
 }
--- a/src/codec/standard/postings/mod.rs
+++ b/src/codec/standard/postings/mod.rs
@@ -0,0 +1,171 @@
+use std::io;
+
+use common::BitSet;
+
+use crate::codec::postings::block_wand::{block_wand, block_wand_single_scorer};
+use crate::codec::postings::{PostingsCodec, RawPostingsData};
+use crate::codec::standard::postings::block_segment_postings::BlockSegmentPostings;
+pub use crate::codec::standard::postings::segment_postings::SegmentPostings;
+use crate::positions::PositionReader;
+use crate::query::term_query::TermScorer;
+use crate::query::{BufferedUnionScorer, Scorer, SumCombiner};
+use crate::{DocSet as _, Score, TERMINATED};
+
+mod block_segment_postings;
+mod segment_postings;
+
+pub use segment_postings::SegmentPostings as StandardPostings;
+
+/// The default postings codec for tantivy.
+pub struct StandardPostingsCodec;
+
+#[expect(clippy::enum_variant_names)]
+#[derive(Debug, PartialEq, Clone, Copy, Eq)]
+pub(crate) enum FreqReadingOption {
+    NoFreq,
+    SkipFreq,
+    ReadFreq,
+}
+
+impl PostingsCodec for StandardPostingsCodec {
+    type Postings = SegmentPostings;
+
+    fn load_postings(
+        &self,
+        doc_freq: u32,
+        postings_data: RawPostingsData,
+    ) -> io::Result<Self::Postings> {
+        load_postings_from_raw_data(doc_freq, postings_data)
+    }
+
+    fn try_accelerated_for_each_pruning(
+        mut threshold: Score,
+        mut scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(crate::DocId, Score) -> Score,
+    ) -> Result<(), Box<dyn Scorer>> {
+        scorer = match scorer.downcast::<TermScorer<Self::Postings>>() {
+            Ok(term_scorer) => {
+                block_wand_single_scorer(*term_scorer, threshold, callback);
+                return Ok(());
+            }
+            Err(scorer) => scorer,
+        };
+        let mut union_scorer =
+            scorer.downcast::<BufferedUnionScorer<TermScorer<Self::Postings>, SumCombiner>>()?;
+        let doc = union_scorer.doc();
+        if doc == TERMINATED {
+            return Ok(());
+        }
+        let score = union_scorer.score();
+        if score > threshold {
+            threshold = callback(doc, score);
+        }
+        let scorers: Vec<TermScorer<Self::Postings>> = union_scorer.into_scorers();
+        block_wand(scorers, threshold, callback);
+        Ok(())
+    }
+}
+pub(crate) fn load_postings_from_raw_data(
+    doc_freq: u32,
+    postings_data: RawPostingsData,
+) -> io::Result<SegmentPostings> {
+    let RawPostingsData {
+        postings_data,
+        positions_data: positions_data_opt,
+        record_option,
+        effective_option,
+    } = postings_data;
+    let requested_option = effective_option;
+    let block_segment_postings =
+        BlockSegmentPostings::open(doc_freq, postings_data, record_option, requested_option)?;
+    let position_reader = positions_data_opt.map(PositionReader::open).transpose()?;
+    Ok(SegmentPostings::from_block_postings(
+        block_segment_postings,
+        position_reader,
+    ))
+}
+
+pub(crate) fn fill_bitset_from_raw_data(
+    doc_freq: u32,
+    postings_data: RawPostingsData,
+    doc_bitset: &mut BitSet,
+) -> io::Result<()> {
+    let RawPostingsData {
+        postings_data,
+        record_option,
+        effective_option,
+        ..
+    } = postings_data;
+    let mut block_postings =
+        BlockSegmentPostings::open(doc_freq, postings_data, record_option, effective_option)?;
+    loop {
+        let docs = block_postings.docs();
+        if docs.is_empty() {
+            break;
+        }
+        for &doc in docs {
+            doc_bitset.insert(doc);
+        }
+        block_postings.advance();
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use common::OwnedBytes;
+
+    use super::*;
+    use crate::postings::serializer::PostingsSerializer;
+    use crate::postings::Postings as _;
+    use crate::schema::IndexRecordOption;
+
+    fn test_segment_postings_tf_aux(num_docs: u32, include_term_freq: bool) -> SegmentPostings {
+        let mut postings_serializer =
+            PostingsSerializer::new(1.0f32, IndexRecordOption::WithFreqs, None);
+        let mut buffer = Vec::new();
+        postings_serializer.new_term(num_docs, include_term_freq);
+        for i in 0..num_docs {
+            postings_serializer.write_doc(i, 2);
+        }
+        postings_serializer
+            .close_term(num_docs, &mut buffer)
+            .unwrap();
+        load_postings_from_raw_data(
+            num_docs,
+            RawPostingsData {
+                postings_data: OwnedBytes::new(buffer),
+                positions_data: None,
+                record_option: IndexRecordOption::WithFreqs,
+                effective_option: IndexRecordOption::WithFreqs,
+            },
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_segment_postings_small_block_with_and_without_freq() {
+        let small_block_without_term_freq = test_segment_postings_tf_aux(1, false);
+        assert!(!small_block_without_term_freq.has_freq());
+        assert_eq!(small_block_without_term_freq.doc(), 0);
+        assert_eq!(small_block_without_term_freq.term_freq(), 1);
+
+        let small_block_with_term_freq = test_segment_postings_tf_aux(1, true);
+        assert!(small_block_with_term_freq.has_freq());
+        assert_eq!(small_block_with_term_freq.doc(), 0);
+        assert_eq!(small_block_with_term_freq.term_freq(), 2);
+    }
+
+    #[test]
+    fn test_segment_postings_large_block_with_and_without_freq() {
+        let large_block_without_term_freq = test_segment_postings_tf_aux(128, false);
+        assert!(!large_block_without_term_freq.has_freq());
+        assert_eq!(large_block_without_term_freq.doc(), 0);
+        assert_eq!(large_block_without_term_freq.term_freq(), 1);
+
+        let large_block_with_term_freq = test_segment_postings_tf_aux(128, true);
+        assert!(large_block_with_term_freq.has_freq());
+        assert_eq!(large_block_with_term_freq.doc(), 0);
+        assert_eq!(large_block_with_term_freq.term_freq(), 2);
+    }
+}
--- a/src/codec/standard/postings/segment_postings.rs
+++ b/src/codec/standard/postings/segment_postings.rs
@@ -1,11 +1,14 @@
-use common::HasLen;
+use common::BitSet;

+use super::BlockSegmentPostings;
+use crate::codec::postings::PostingsWithBlockMax;
 use crate::docset::DocSet;
-use crate::fastfield::AliveBitSet;
+use crate::fieldnorm::FieldNormReader;
 use crate::positions::PositionReader;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::postings::{BlockSegmentPostings, Postings};
-use crate::{DocId, TERMINATED};
+use crate::postings::{DocFreq, Postings};
+use crate::query::Bm25Weight;
+use crate::{DocId, Score};

 /// `SegmentPostings` represents the inverted list or postings associated with
 /// a term in a `Segment`.
@@ -29,31 +32,6 @@ impl SegmentPostings {
        }
    }

-    /// Compute the number of non-deleted documents.
-    ///
-    /// This method will clone and scan through the posting lists.
-    /// (this is a rather expensive operation).
-    pub fn doc_freq_given_deletes(&self, alive_bitset: &AliveBitSet) -> u32 {
-        let mut docset = self.clone();
-        let mut doc_freq = 0;
-        loop {
-            let doc = docset.doc();
-            if doc == TERMINATED {
-                return doc_freq;
-            }
-            if alive_bitset.is_alive(doc) {
-                doc_freq += 1u32;
-            }
-            docset.advance();
-        }
-    }
-
-    /// Returns the overall number of documents in the block postings.
-    /// It does not take in account whether documents are deleted or not.
-    pub fn doc_freq(&self) -> u32 {
-        self.block_cursor.doc_freq()
-    }
-
    /// Creates a segment postings object with the given documents
    /// and no frequency encoded.
    ///
@@ -64,11 +42,13 @@ impl SegmentPostings {
    /// buffer with the serialized data.
    #[cfg(test)]
    pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
-        use crate::directory::FileSlice;
-        use crate::postings::serializer::PostingsSerializer;
+        use common::OwnedBytes;
+
        use crate::schema::IndexRecordOption;
        let mut buffer = Vec::new();
        {
+            use crate::postings::serializer::PostingsSerializer;
+
            let mut postings_serializer =
                PostingsSerializer::new(0.0, IndexRecordOption::Basic, None);
            postings_serializer.new_term(docs.len() as u32, false);
@@ -81,7 +61,7 @@ impl SegmentPostings {
        }
        let block_segment_postings = BlockSegmentPostings::open(
            docs.len() as u32,
-            FileSlice::from(buffer),
+            OwnedBytes::new(buffer),
            IndexRecordOption::Basic,
            IndexRecordOption::Basic,
        )
@@ -95,7 +75,8 @@ impl SegmentPostings {
        doc_and_tfs: &[(u32, u32)],
        fieldnorms: Option<&[u32]>,
    ) -> SegmentPostings {
-        use crate::directory::FileSlice;
+        use common::OwnedBytes;
+
        use crate::fieldnorm::FieldNormReader;
        use crate::postings::serializer::PostingsSerializer;
        use crate::schema::IndexRecordOption;
@@ -128,7 +109,7 @@ impl SegmentPostings {
            .unwrap();
        let block_segment_postings = BlockSegmentPostings::open(
            doc_and_tfs.len() as u32,
-            FileSlice::from(buffer),
+            OwnedBytes::new(buffer),
            IndexRecordOption::WithFreqs,
            IndexRecordOption::WithFreqs,
        )
@@ -158,7 +139,6 @@ impl DocSet for SegmentPostings {
    // next needs to be called a first time to point to the correct element.
    #[inline]
    fn advance(&mut self) -> DocId {
-        debug_assert!(self.block_cursor.block_is_loaded());
        if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
            self.cur = 0;
            self.block_cursor.advance();
@@ -197,13 +177,31 @@ impl DocSet for SegmentPostings {
    }

    fn size_hint(&self) -> u32 {
-        self.len() as u32
+        self.doc_freq().into()
    }
-}

-impl HasLen for SegmentPostings {
-    fn len(&self) -> usize {
-        self.block_cursor.doc_freq() as usize
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        let bitset_max_value: DocId = bitset.max_value();
+        loop {
+            let docs = self.block_cursor.docs();
+            let Some(&last_doc) = docs.last() else {
+                break;
+            };
+            if last_doc < bitset_max_value {
+                // All docs are within the range of the bitset
+                for &doc in docs {
+                    bitset.insert(doc);
+                }
+            } else {
+                for &doc in docs {
+                    if doc < bitset_max_value {
+                        bitset.insert(doc);
+                    }
+                }
+                break;
+            }
+            self.block_cursor.advance();
+        }
    }
 }

@@ -229,6 +227,13 @@ impl Postings for SegmentPostings {
        self.block_cursor.freq(self.cur)
    }

+    /// Returns the overall number of documents in the block postings.
+    /// It does not take in account whether documents are deleted or not.
+    #[inline(always)]
+    fn doc_freq(&self) -> DocFreq {
+        DocFreq::Exact(self.block_cursor.doc_freq())
+    }
+
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        let term_freq = self.term_freq();
        let prev_len = output.len();
@@ -252,24 +257,44 @@ impl Postings for SegmentPostings {
            }
        }
    }
+
+    fn has_freq(&self) -> bool {
+        !self.block_cursor.freqs().is_empty()
+    }
+}
+
+impl PostingsWithBlockMax for SegmentPostings {
+    #[inline]
+    fn seek_block_max(
+        &mut self,
+        target_doc: crate::DocId,
+        fieldnorm_reader: &FieldNormReader,
+        similarity_weight: &Bm25Weight,
+    ) -> Score {
+        self.block_cursor.seek_block_without_loading(target_doc);
+        self.block_cursor
+            .block_max_score(fieldnorm_reader, similarity_weight)
+    }
+
+    #[inline]
+    fn last_doc_in_block(&self) -> crate::DocId {
+        self.block_cursor.skip_reader().last_doc_in_block()
+    }
 }

 #[cfg(test)]
 mod tests {
-
-    use common::HasLen;
-
    use super::SegmentPostings;
    use crate::docset::{DocSet, TERMINATED};
-    use crate::fastfield::AliveBitSet;
-    use crate::postings::postings::Postings;
+    use crate::postings::Postings;

    #[test]
    fn test_empty_segment_postings() {
        let mut postings = SegmentPostings::empty();
+        assert_eq!(postings.doc(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
        assert_eq!(postings.advance(), TERMINATED);
-        assert_eq!(postings.len(), 0);
+        assert_eq!(postings.doc_freq(), crate::postings::DocFreq::Exact(0));
    }

    #[test]
@@ -284,15 +309,4 @@ mod tests {
        let postings = SegmentPostings::empty();
        assert_eq!(postings.term_freq(), 1);
    }
-
-    #[test]
-    fn test_doc_freq() {
-        let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
-        assert_eq!(docs.doc_freq(), 3);
-        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&alive_bitset), 2);
-        let all_deleted =
-            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
-    }
 }
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -43,7 +43,7 @@ impl Collector for Count {
    fn for_segment(
        &self,
        _: SegmentOrdinal,
-        _: &SegmentReader,
+        _: &dyn SegmentReader,
    ) -> crate::Result<SegmentCountCollector> {
        Ok(SegmentCountCollector::default())
    }
--- a/src/collector/docset_collector.rs
+++ b/src/collector/docset_collector.rs
@@ -1,7 +1,7 @@
 use std::collections::HashSet;

 use super::{Collector, SegmentCollector};
-use crate::{DocAddress, DocId, Score};
+use crate::{DocAddress, DocId, Score, SegmentReader};

 /// Collectors that returns the set of DocAddress that matches the query.
 ///
@@ -15,7 +15,7 @@ impl Collector for DocSetCollector {
    fn for_segment(
        &self,
        segment_local_id: crate::SegmentOrdinal,
-        _segment: &crate::SegmentReader,
+        _segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok(DocSetChildCollector {
            segment_local_id,
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -265,7 +265,7 @@ impl Collector for FacetCollector {
    fn for_segment(
        &self,
        _: SegmentOrdinal,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<FacetSegmentCollector> {
        let facet_reader = reader.facet_reader(&self.field_name)?;
        let facet_dict = facet_reader.facet_dict();
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -113,7 +113,7 @@ where
    fn for_segment(
        &self,
        segment_local_id: u32,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let column_opt = segment_reader.fast_fields().column_opt(&self.field)?;

@@ -287,7 +287,7 @@ where
    fn for_segment(
        &self,
        segment_local_id: u32,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let column_opt = segment_reader.fast_fields().bytes(&self.field)?;

--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -6,7 +6,7 @@ use fastdivide::DividerU64;
 use crate::collector::{Collector, SegmentCollector};
 use crate::fastfield::{FastFieldNotAvailableError, FastValue};
 use crate::schema::Type;
-use crate::{DocId, Score};
+use crate::{DocId, Score, SegmentReader};

 /// Histogram builds an histogram of the values of a fastfield for the
 /// collected DocSet.
@@ -110,7 +110,7 @@ impl Collector for HistogramCollector {
    fn for_segment(
        &self,
        _segment_local_id: crate::SegmentOrdinal,
-        segment: &crate::SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let column_opt = segment.fast_fields().u64_lenient(&self.field)?;
        let (column, _column_type) = column_opt.ok_or_else(|| FastFieldNotAvailableError {
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -156,7 +156,7 @@ pub trait Collector: Sync + Send {
    fn for_segment(
        &self,
        segment_local_id: SegmentOrdinal,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child>;

    /// Returns true iff the collector requires to compute scores for documents.
@@ -174,7 +174,7 @@ pub trait Collector: Sync + Send {
        &self,
        weight: &dyn Weight,
        segment_ord: u32,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
        let with_scoring = self.requires_scoring();
        let mut segment_collector = self.for_segment(segment_ord, reader)?;
@@ -186,7 +186,7 @@ pub trait Collector: Sync + Send {
 pub(crate) fn default_collect_segment_impl<TSegmentCollector: SegmentCollector>(
    segment_collector: &mut TSegmentCollector,
    weight: &dyn Weight,
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    with_scoring: bool,
 ) -> crate::Result<()> {
    match (reader.alive_bitset(), with_scoring) {
@@ -255,7 +255,7 @@ impl<TCollector: Collector> Collector for Option<TCollector> {
    fn for_segment(
        &self,
        segment_local_id: SegmentOrdinal,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok(if let Some(inner) = self {
            let inner_segment_collector = inner.for_segment(segment_local_id, segment)?;
@@ -336,7 +336,7 @@ where
    fn for_segment(
        &self,
        segment_local_id: u32,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let left = self.0.for_segment(segment_local_id, segment)?;
        let right = self.1.for_segment(segment_local_id, segment)?;
@@ -407,7 +407,7 @@ where
    fn for_segment(
        &self,
        segment_local_id: u32,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let one = self.0.for_segment(segment_local_id, segment)?;
        let two = self.1.for_segment(segment_local_id, segment)?;
@@ -487,7 +487,7 @@ where
    fn for_segment(
        &self,
        segment_local_id: u32,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let one = self.0.for_segment(segment_local_id, segment)?;
        let two = self.1.for_segment(segment_local_id, segment)?;
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -24,7 +24,7 @@ impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
    fn for_segment(
        &self,
        segment_local_id: u32,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<Box<dyn BoxableSegmentCollector>> {
        let child = self.0.for_segment(segment_local_id, reader)?;
        Ok(Box::new(SegmentCollectorWrapper(child)))
@@ -209,7 +209,7 @@ impl Collector for MultiCollector<'_> {
    fn for_segment(
        &self,
        segment_local_id: SegmentOrdinal,
-        segment: &SegmentReader,
+        segment: &dyn SegmentReader,
    ) -> crate::Result<MultiCollectorChild> {
        let children = self
            .collector_wrappers
--- a/src/collector/sort_key/mod.rs
+++ b/src/collector/sort_key/mod.rs
@@ -1,4 +1,5 @@
 mod order;
+mod sort_by_bytes;
 mod sort_by_erased_type;
 mod sort_by_score;
 mod sort_by_static_fast_value;
@@ -6,6 +7,7 @@ mod sort_by_string;
 mod sort_key_computer;

 pub use order::*;
+pub use sort_by_bytes::SortByBytes;
 pub use sort_by_erased_type::SortByErasedType;
 pub use sort_by_score::SortBySimilarityScore;
 pub use sort_by_static_fast_value::SortByStaticFastValue;
--- a/src/collector/sort_key/order.rs
+++ b/src/collector/sort_key/order.rs
@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};

 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
 use crate::schema::{OwnedValue, Schema};
-use crate::{DocId, Order, Score};
+use crate::{DocId, Order, Score, SegmentReader};

 fn compare_owned_value<const NULLS_FIRST: bool>(lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
    match (lhs, rhs) {
@@ -430,7 +430,7 @@ where

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &crate::SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let child = self.0.segment_sort_key_computer(segment_reader)?;
        Ok(SegmentSortKeyComputerWithComparator {
@@ -468,7 +468,7 @@ where

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &crate::SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let child = self.0.segment_sort_key_computer(segment_reader)?;
        Ok(SegmentSortKeyComputerWithComparator {
--- a/src/collector/sort_key/sort_by_bytes.rs
+++ b/src/collector/sort_key/sort_by_bytes.rs
@@ -0,0 +1,168 @@
+use columnar::BytesColumn;
+
+use crate::collector::sort_key::NaturalComparator;
+use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
+use crate::termdict::TermOrdinal;
+use crate::{DocId, Score};
+
+/// Sort by the first value of a bytes column.
+///
+/// If the field is multivalued, only the first value is considered.
+///
+/// Documents that do not have this value are still considered.
+/// Their sort key will simply be `None`.
+#[derive(Debug, Clone)]
+pub struct SortByBytes {
+    column_name: String,
+}
+
+impl SortByBytes {
+    /// Creates a new sort by bytes sort key computer.
+    pub fn for_field(column_name: impl ToString) -> Self {
+        SortByBytes {
+            column_name: column_name.to_string(),
+        }
+    }
+}
+
+impl SortKeyComputer for SortByBytes {
+    type SortKey = Option<Vec<u8>>;
+    type Child = ByBytesColumnSegmentSortKeyComputer;
+    type Comparator = NaturalComparator;
+
+    fn segment_sort_key_computer(
+        &self,
+        segment_reader: &dyn crate::SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let bytes_column_opt = segment_reader.fast_fields().bytes(&self.column_name)?;
+        Ok(ByBytesColumnSegmentSortKeyComputer { bytes_column_opt })
+    }
+}
+
+/// Segment-level sort key computer for bytes columns.
+pub struct ByBytesColumnSegmentSortKeyComputer {
+    bytes_column_opt: Option<BytesColumn>,
+}
+
+impl SegmentSortKeyComputer for ByBytesColumnSegmentSortKeyComputer {
+    type SortKey = Option<Vec<u8>>;
+    type SegmentSortKey = Option<TermOrdinal>;
+    type SegmentComparator = NaturalComparator;
+
+    #[inline(always)]
+    fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> Option<TermOrdinal> {
+        let bytes_column = self.bytes_column_opt.as_ref()?;
+        bytes_column.ords().first(doc)
+    }
+
+    fn convert_segment_sort_key(&self, term_ord_opt: Option<TermOrdinal>) -> Option<Vec<u8>> {
+        // TODO: Individual lookups to the dictionary like this are very likely to repeatedly
+        // decompress the same blocks. See https://github.com/quickwit-oss/tantivy/issues/2776
+        let term_ord = term_ord_opt?;
+        let bytes_column = self.bytes_column_opt.as_ref()?;
+        let mut bytes = Vec::new();
+        bytes_column
+            .dictionary()
+            .ord_to_term(term_ord, &mut bytes)
+            .ok()?;
+        Some(bytes)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SortByBytes;
+    use crate::collector::TopDocs;
+    use crate::query::AllQuery;
+    use crate::schema::{BytesOptions, Schema, FAST, INDEXED};
+    use crate::{Index, IndexWriter, Order, TantivyDocument};
+
+    #[test]
+    fn test_sort_by_bytes_asc() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let bytes_field = schema_builder
+            .add_bytes_field("data", BytesOptions::default().set_fast().set_indexed());
+        let id_field = schema_builder.add_u64_field("id", FAST | INDEXED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer: IndexWriter = index.writer_for_tests()?;
+
+        // Insert documents with byte values in non-sorted order
+        let test_data: Vec<(u64, Vec<u8>)> = vec![
+            (1, vec![0x02, 0x00]),
+            (2, vec![0x00, 0x10]),
+            (3, vec![0x01, 0x00]),
+            (4, vec![0x00, 0x20]),
+        ];
+
+        for (id, bytes) in &test_data {
+            let mut doc = TantivyDocument::new();
+            doc.add_u64(id_field, *id);
+            doc.add_bytes(bytes_field, bytes);
+            index_writer.add_document(doc)?;
+        }
+        index_writer.commit()?;
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+
+        // Sort ascending by bytes
+        let top_docs =
+            TopDocs::with_limit(10).order_by((SortByBytes::for_field("data"), Order::Asc));
+        let results: Vec<(Option<Vec<u8>>, _)> = searcher.search(&AllQuery, &top_docs)?;
+
+        // Expected order: [0x00,0x10], [0x00,0x20], [0x01,0x00], [0x02,0x00]
+        let sorted_bytes: Vec<Option<Vec<u8>>> = results.into_iter().map(|(b, _)| b).collect();
+        assert_eq!(
+            sorted_bytes,
+            vec![
+                Some(vec![0x00, 0x10]),
+                Some(vec![0x00, 0x20]),
+                Some(vec![0x01, 0x00]),
+                Some(vec![0x02, 0x00]),
+            ]
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_sort_by_bytes_desc() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let bytes_field = schema_builder
+            .add_bytes_field("data", BytesOptions::default().set_fast().set_indexed());
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer: IndexWriter = index.writer_for_tests()?;
+
+        let test_data: Vec<Vec<u8>> = vec![vec![0x00, 0x10], vec![0x02, 0x00], vec![0x01, 0x00]];
+
+        for bytes in &test_data {
+            let mut doc = TantivyDocument::new();
+            doc.add_bytes(bytes_field, bytes);
+            index_writer.add_document(doc)?;
+        }
+        index_writer.commit()?;
+
+        let reader = index.reader()?;
+        let searcher = reader.searcher();
+
+        // Sort descending by bytes
+        let top_docs =
+            TopDocs::with_limit(10).order_by((SortByBytes::for_field("data"), Order::Desc));
+        let results: Vec<(Option<Vec<u8>>, _)> = searcher.search(&AllQuery, &top_docs)?;
+
+        // Expected order (descending): [0x02,0x00], [0x01,0x00], [0x00,0x10]
+        let sorted_bytes: Vec<Option<Vec<u8>>> = results.into_iter().map(|(b, _)| b).collect();
+        assert_eq!(
+            sorted_bytes,
+            vec![
+                Some(vec![0x02, 0x00]),
+                Some(vec![0x01, 0x00]),
+                Some(vec![0x00, 0x10]),
+            ]
+        );
+
+        Ok(())
+    }
+}
--- a/src/collector/sort_key/sort_by_erased_type.rs
+++ b/src/collector/sort_key/sort_by_erased_type.rs
@@ -1,12 +1,12 @@
 use columnar::{ColumnType, MonotonicallyMappableToU64};

 use crate::collector::sort_key::{
-    NaturalComparator, SortBySimilarityScore, SortByStaticFastValue, SortByString,
+    NaturalComparator, SortByBytes, SortBySimilarityScore, SortByStaticFastValue, SortByString,
 };
 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
 use crate::fastfield::FastFieldNotAvailableError;
 use crate::schema::OwnedValue;
-use crate::{DateTime, DocId, Score};
+use crate::{DateTime, DocId, Score, SegmentReader};

 /// Sort by the boxed / OwnedValue representation of either a fast field, or of the score.
 ///
@@ -86,7 +86,7 @@ impl SortKeyComputer for SortByErasedType {

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &crate::SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let inner: Box<dyn ErasedSegmentSortKeyComputer> = match self {
            Self::Field(column_name) => {
@@ -114,6 +114,16 @@ impl SortKeyComputer for SortByErasedType {
                            },
                        })
                    }
+                    ColumnType::Bytes => {
+                        let computer = SortByBytes::for_field(column_name);
+                        let inner = computer.segment_sort_key_computer(segment_reader)?;
+                        Box::new(ErasedSegmentSortKeyComputerWrapper {
+                            inner,
+                            converter: |val: Option<Vec<u8>>| {
+                                val.map(OwnedValue::Bytes).unwrap_or(OwnedValue::Null)
+                            },
+                        })
+                    }
                    ColumnType::U64 => {
                        let computer = SortByStaticFastValue::<u64>::for_field(column_name);
                        let inner = computer.segment_sort_key_computer(segment_reader)?;
@@ -281,6 +291,65 @@ mod tests {
        );
    }

+    #[test]
+    fn test_sort_by_owned_bytes() {
+        let mut schema_builder = Schema::builder();
+        let data_field = schema_builder.add_bytes_field("data", FAST);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut writer = index.writer_for_tests().unwrap();
+        writer
+            .add_document(doc!(data_field => vec![0x03u8, 0x00]))
+            .unwrap();
+        writer
+            .add_document(doc!(data_field => vec![0x01u8, 0x00]))
+            .unwrap();
+        writer
+            .add_document(doc!(data_field => vec![0x02u8, 0x00]))
+            .unwrap();
+        writer.add_document(doc!()).unwrap();
+        writer.commit().unwrap();
+
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+
+        // Sort descending (Natural - highest first)
+        let collector = TopDocs::with_limit(10)
+            .order_by((SortByErasedType::for_field("data"), ComparatorEnum::Natural));
+        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
+
+        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
+
+        assert_eq!(
+            values,
+            vec![
+                OwnedValue::Bytes(vec![0x03, 0x00]),
+                OwnedValue::Bytes(vec![0x02, 0x00]),
+                OwnedValue::Bytes(vec![0x01, 0x00]),
+                OwnedValue::Null
+            ]
+        );
+
+        // Sort ascending (ReverseNoneLower - lowest first, nulls last)
+        let collector = TopDocs::with_limit(10).order_by((
+            SortByErasedType::for_field("data"),
+            ComparatorEnum::ReverseNoneLower,
+        ));
+        let top_docs = searcher.search(&AllQuery, &collector).unwrap();
+
+        let values: Vec<OwnedValue> = top_docs.into_iter().map(|(key, _)| key).collect();
+
+        assert_eq!(
+            values,
+            vec![
+                OwnedValue::Bytes(vec![0x01, 0x00]),
+                OwnedValue::Bytes(vec![0x02, 0x00]),
+                OwnedValue::Bytes(vec![0x03, 0x00]),
+                OwnedValue::Null
+            ]
+        );
+    }
+
    #[test]
    fn test_sort_by_owned_reverse() {
        let mut schema_builder = Schema::builder();
--- a/src/collector/sort_key/sort_by_score.rs
+++ b/src/collector/sort_key/sort_by_score.rs
@@ -1,6 +1,6 @@
 use crate::collector::sort_key::NaturalComparator;
 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
-use crate::{DocAddress, DocId, Score};
+use crate::{DocAddress, DocId, Score, SegmentReader};

 /// Sort by similarity score.
 #[derive(Clone, Debug, Copy)]
@@ -19,7 +19,7 @@ impl SortKeyComputer for SortBySimilarityScore {

    fn segment_sort_key_computer(
        &self,
-        _segment_reader: &crate::SegmentReader,
+        _segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok(SortBySimilarityScore)
    }
@@ -29,7 +29,7 @@ impl SortKeyComputer for SortBySimilarityScore {
        &self,
        k: usize,
        weight: &dyn crate::query::Weight,
-        reader: &crate::SegmentReader,
+        reader: &dyn SegmentReader,
        segment_ord: u32,
    ) -> crate::Result<Vec<(Self::SortKey, DocAddress)>> {
        let mut top_n: TopNComputer<Score, DocId, Self::Comparator> =
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -61,7 +61,7 @@ impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let sort_column_opt = segment_reader.fast_fields().u64_lenient(&self.field)?;
        let (sort_column, _sort_column_type) =
--- a/src/collector/sort_key/sort_by_string.rs
+++ b/src/collector/sort_key/sort_by_string.rs
@@ -3,7 +3,7 @@ use columnar::StrColumn;
 use crate::collector::sort_key::NaturalComparator;
 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
 use crate::termdict::TermOrdinal;
-use crate::{DocId, Score};
+use crate::{DocId, Score, SegmentReader};

 /// Sort by the first value of a string column.
 ///
@@ -35,7 +35,7 @@ impl SortKeyComputer for SortByString {

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &crate::SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        let str_column_opt = segment_reader.fast_fields().str(&self.column_name)?;
        Ok(ByStringColumnSegmentSortKeyComputer { str_column_opt })
--- a/src/collector/sort_key/sort_key_computer.rs
+++ b/src/collector/sort_key/sort_key_computer.rs
@@ -119,7 +119,7 @@ pub trait SortKeyComputer: Sync {
        &self,
        k: usize,
        weight: &dyn crate::query::Weight,
-        reader: &crate::SegmentReader,
+        reader: &dyn SegmentReader,
        segment_ord: u32,
    ) -> crate::Result<Vec<(Self::SortKey, DocAddress)>> {
        let with_scoring = self.requires_scoring();
@@ -135,7 +135,7 @@ pub trait SortKeyComputer: Sync {
    }

    /// Builds a child sort key computer for a specific segment.
-    fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child>;
+    fn segment_sort_key_computer(&self, segment_reader: &dyn SegmentReader) -> Result<Self::Child>;
 }

 impl<HeadSortKeyComputer, TailSortKeyComputer> SortKeyComputer
@@ -156,7 +156,7 @@ where
        (self.0.comparator(), self.1.comparator())
    }

-    fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
+    fn segment_sort_key_computer(&self, segment_reader: &dyn SegmentReader) -> Result<Self::Child> {
        Ok((
            self.0.segment_sort_key_computer(segment_reader)?,
            self.1.segment_sort_key_computer(segment_reader)?,
@@ -357,7 +357,7 @@ where
        )
    }

-    fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
+    fn segment_sort_key_computer(&self, segment_reader: &dyn SegmentReader) -> Result<Self::Child> {
        let sort_key_computer1 = self.0.segment_sort_key_computer(segment_reader)?;
        let sort_key_computer2 = self.1.segment_sort_key_computer(segment_reader)?;
        let sort_key_computer3 = self.2.segment_sort_key_computer(segment_reader)?;
@@ -420,7 +420,7 @@ where
        SortKeyComputer4::Comparator,
    );

-    fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
+    fn segment_sort_key_computer(&self, segment_reader: &dyn SegmentReader) -> Result<Self::Child> {
        let sort_key_computer1 = self.0.segment_sort_key_computer(segment_reader)?;
        let sort_key_computer2 = self.1.segment_sort_key_computer(segment_reader)?;
        let sort_key_computer3 = self.2.segment_sort_key_computer(segment_reader)?;
@@ -454,7 +454,7 @@ where

 impl<F, SegmentF, TSortKey> SortKeyComputer for F
 where
-    F: 'static + Send + Sync + Fn(&SegmentReader) -> SegmentF,
+    F: 'static + Send + Sync + Fn(&dyn SegmentReader) -> SegmentF,
    SegmentF: 'static + FnMut(DocId) -> TSortKey,
    TSortKey: 'static + PartialOrd + Clone + Send + Sync + std::fmt::Debug,
 {
@@ -462,7 +462,7 @@ where
    type Child = SegmentF;
    type Comparator = NaturalComparator;

-    fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
+    fn segment_sort_key_computer(&self, segment_reader: &dyn SegmentReader) -> Result<Self::Child> {
        Ok((self)(segment_reader))
    }
 }
@@ -509,10 +509,10 @@ mod tests {

    #[test]
    fn test_lazy_score_computer() {
-        let score_computer_primary = |_segment_reader: &SegmentReader| |_doc: DocId| 200u32;
+        let score_computer_primary = |_segment_reader: &dyn SegmentReader| |_doc: DocId| 200u32;
        let call_count = Arc::new(AtomicUsize::new(0));
        let call_count_clone = call_count.clone();
-        let score_computer_secondary = move |_segment_reader: &SegmentReader| {
+        let score_computer_secondary = move |_segment_reader: &dyn SegmentReader| {
            let call_count_new_clone = call_count_clone.clone();
            move |_doc: DocId| {
                call_count_new_clone.fetch_add(1, AtomicOrdering::SeqCst);
@@ -572,10 +572,10 @@ mod tests {

    #[test]
    fn test_lazy_score_computer_dynamic_ordering() {
-        let score_computer_primary = |_segment_reader: &SegmentReader| |_doc: DocId| 200u32;
+        let score_computer_primary = |_segment_reader: &dyn SegmentReader| |_doc: DocId| 200u32;
        let call_count = Arc::new(AtomicUsize::new(0));
        let call_count_clone = call_count.clone();
-        let score_computer_secondary = move |_segment_reader: &SegmentReader| {
+        let score_computer_secondary = move |_segment_reader: &dyn SegmentReader| {
            let call_count_new_clone = call_count_clone.clone();
            move |_doc: DocId| {
                call_count_new_clone.fetch_add(1, AtomicOrdering::SeqCst);
--- a/src/collector/sort_key_top_collector.rs
+++ b/src/collector/sort_key_top_collector.rs
@@ -32,7 +32,11 @@ where TSortKeyComputer: SortKeyComputer + Send + Sync + 'static
        self.sort_key_computer.check_schema(schema)
    }

-    fn for_segment(&self, segment_ord: u32, segment_reader: &SegmentReader) -> Result<Self::Child> {
+    fn for_segment(
+        &self,
+        segment_ord: u32,
+        segment_reader: &dyn SegmentReader,
+    ) -> Result<Self::Child> {
        let segment_sort_key_computer = self
            .sort_key_computer
            .segment_sort_key_computer(segment_reader)?;
@@ -63,7 +67,7 @@ where TSortKeyComputer: SortKeyComputer + Send + Sync + 'static
        &self,
        weight: &dyn Weight,
        segment_ord: u32,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
    ) -> crate::Result<Vec<(TSortKeyComputer::SortKey, DocAddress)>> {
        let k = self.doc_range.end;
        let docs = self
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -5,7 +5,7 @@ use crate::query::{AllQuery, QueryParser};
 use crate::schema::{Schema, FAST, TEXT};
 use crate::time::format_description::well_known::Rfc3339;
 use crate::time::OffsetDateTime;
-use crate::{DateTime, DocAddress, Index, Searcher, TantivyDocument};
+use crate::{DateTime, DocAddress, Index, Searcher, SegmentReader, TantivyDocument};

 pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
    compute_score: true,
@@ -109,7 +109,7 @@ impl Collector for TestCollector {
    fn for_segment(
        &self,
        segment_id: SegmentOrdinal,
-        _reader: &SegmentReader,
+        _reader: &dyn SegmentReader,
    ) -> crate::Result<TestSegmentCollector> {
        Ok(TestSegmentCollector {
            segment_id,
@@ -180,7 +180,7 @@ impl Collector for FastFieldTestCollector {
    fn for_segment(
        &self,
        _: SegmentOrdinal,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<FastFieldSegmentCollector> {
        let reader = segment_reader
            .fast_fields()
@@ -243,7 +243,7 @@ impl Collector for BytesFastFieldTestCollector {
    fn for_segment(
        &self,
        _segment_local_id: u32,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<BytesFastFieldSegmentCollector> {
        let column_opt = segment_reader.fast_fields().bytes(&self.field)?;
        Ok(BytesFastFieldSegmentCollector {
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -393,7 +393,7 @@ impl TopDocs {
    /// // This is where we build our collector with our custom score.
    /// let top_docs_by_custom_score = TopDocs
    ///         ::with_limit(10)
-    ///          .tweak_score(move |segment_reader: &SegmentReader| {
+    ///          .tweak_score(move |segment_reader: &dyn SegmentReader| {
    ///             // The argument is a function that returns our scoring
    ///             // function.
    ///             //
@@ -442,7 +442,7 @@ pub struct TweakScoreFn<F>(F);

 impl<F, TTweakScoreSortKeyFn, TSortKey> SortKeyComputer for TweakScoreFn<F>
 where
-    F: 'static + Send + Sync + Fn(&SegmentReader) -> TTweakScoreSortKeyFn,
+    F: 'static + Send + Sync + Fn(&dyn SegmentReader) -> TTweakScoreSortKeyFn,
    TTweakScoreSortKeyFn: 'static + Fn(DocId, Score) -> TSortKey,
    TweakScoreSegmentSortKeyComputer<TTweakScoreSortKeyFn>:
        SegmentSortKeyComputer<SortKey = TSortKey, SegmentSortKey = TSortKey>,
@@ -458,7 +458,7 @@ where

    fn segment_sort_key_computer(
        &self,
-        segment_reader: &SegmentReader,
+        segment_reader: &dyn SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok({
            TweakScoreSegmentSortKeyComputer {
@@ -1525,7 +1525,7 @@ mod tests {
        let text_query = query_parser.parse_query("droopy tax")?;
        let collector = TopDocs::with_limit(2)
            .and_offset(1)
-            .order_by(move |_segment_reader: &SegmentReader| move |doc: DocId| doc);
+            .order_by(move |_segment_reader: &dyn SegmentReader| move |doc: DocId| doc);
        let score_docs: Vec<(u32, DocAddress)> =
            index.reader()?.searcher().search(&text_query, &collector)?;
        assert_eq!(
@@ -1543,7 +1543,7 @@ mod tests {
        let text_query = query_parser.parse_query("droopy tax").unwrap();
        let collector = TopDocs::with_limit(2)
            .and_offset(1)
-            .order_by(move |_segment_reader: &SegmentReader| move |doc: DocId| doc);
+            .order_by(move |_segment_reader: &dyn SegmentReader| move |doc: DocId| doc);
        let score_docs: Vec<(u32, DocAddress)> = index
            .reader()
            .unwrap()
--- a/src/core/json_utils.rs
+++ b/src/core/json_utils.rs
@@ -4,7 +4,7 @@ use common::{replace_in_place, JsonPathWriter};
 use rustc_hash::FxHashMap;

 use crate::indexer::indexing_term::IndexingTerm;
-use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
+use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter as _, PostingsWriterEnum};
 use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
 use crate::schema::{Type, DATE_TIME_PRECISION_INDEXED};
 use crate::time::format_description::well_known::Rfc3339;
@@ -80,7 +80,7 @@ fn index_json_object<'a, V: Value<'a>>(
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut dyn PostingsWriter,
+    postings_writer: &mut PostingsWriterEnum,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
@@ -110,7 +110,7 @@ pub(crate) fn index_json_value<'a, V: Value<'a>>(
    text_analyzer: &mut TextAnalyzer,
    term_buffer: &mut IndexingTerm,
    json_path_writer: &mut JsonPathWriter,
-    postings_writer: &mut dyn PostingsWriter,
+    postings_writer: &mut PostingsWriterEnum,
    ctx: &mut IndexingContext,
    positions_per_path: &mut IndexingPositionsPerPath,
 ) {
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -36,7 +36,7 @@ pub struct SearcherGeneration {

 impl SearcherGeneration {
    pub(crate) fn from_segment_readers(
-        segment_readers: &[SegmentReader],
+        segment_readers: &[Arc<dyn SegmentReader>],
        generation_id: u64,
    ) -> Self {
        let mut segment_id_to_del_opstamp = BTreeMap::new();
@@ -154,13 +154,13 @@ impl Searcher {
    }

    /// Return the list of segment readers
-    pub fn segment_readers(&self) -> &[SegmentReader] {
+    pub fn segment_readers(&self) -> &[Arc<dyn SegmentReader>] {
        &self.inner.segment_readers
    }

    /// Returns the segment_reader associated with the given segment_ord
-    pub fn segment_reader(&self, segment_ord: u32) -> &SegmentReader {
-        &self.inner.segment_readers[segment_ord as usize]
+    pub fn segment_reader(&self, segment_ord: u32) -> &dyn SegmentReader {
+        self.inner.segment_readers[segment_ord as usize].as_ref()
    }

    /// Runs a query on the segment readers wrapped by the searcher.
@@ -229,7 +229,11 @@ impl Searcher {
        let segment_readers = self.segment_readers();
        let fruits = executor.map(
            |(segment_ord, segment_reader)| {
-                collector.collect_segment(weight.as_ref(), segment_ord as u32, segment_reader)
+                collector.collect_segment(
+                    weight.as_ref(),
+                    segment_ord as u32,
+                    segment_reader.as_ref(),
+                )
            },
            segment_readers.iter().enumerate(),
        )?;
@@ -259,7 +263,7 @@ impl From<Arc<SearcherInner>> for Searcher {
 pub(crate) struct SearcherInner {
    schema: Schema,
    index: Index,
-    segment_readers: Vec<SegmentReader>,
+    segment_readers: Vec<Arc<dyn SegmentReader>>,
    store_readers: Vec<StoreReader>,
    generation: TrackedObject<SearcherGeneration>,
 }
@@ -269,7 +273,7 @@ impl SearcherInner {
    pub(crate) fn new(
        schema: Schema,
        index: Index,
-        segment_readers: Vec<SegmentReader>,
+        segment_readers: Vec<Arc<dyn SegmentReader>>,
        generation: TrackedObject<SearcherGeneration>,
        doc_store_cache_num_blocks: usize,
    ) -> io::Result<SearcherInner> {
@@ -301,7 +305,7 @@ impl fmt::Debug for Searcher {
        let segment_ids = self
            .segment_readers()
            .iter()
-            .map(SegmentReader::segment_id)
+            .map(|segment_reader| segment_reader.segment_id())
            .collect::<Vec<_>>();
        write!(f, "Searcher({segment_ids:?})")
    }
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -21,7 +21,7 @@ use std::path::PathBuf;
 pub use common::file_slice::{FileHandle, FileSlice};
 pub use common::{AntiCallToken, OwnedBytes, TerminatingWrite};

-pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
+pub use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
 pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
 pub use self::ram_directory::RamDirectory;
@@ -52,7 +52,7 @@ pub use self::mmap_directory::MmapDirectory;
 ///
 /// `WritePtr` are required to implement both Write
 /// and Seek.
-pub type WritePtr = BufWriter<Box<dyn TerminatingWrite>>;
+pub type WritePtr = BufWriter<Box<dyn TerminatingWrite + Send + Sync>>;

 #[cfg(test)]
 mod tests;
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,4 +1,7 @@
-use std::borrow::{Borrow, BorrowMut};
+use std::borrow::BorrowMut;
+use std::ops::{Deref as _, DerefMut as _};
+
+use common::BitSet;

 use crate::fastfield::AliveBitSet;
 use crate::DocId;
@@ -130,6 +133,19 @@ pub trait DocSet: Send {
        buffer.len()
    }

+    /// Fills the given bitset with the documents in the docset.
+    ///
+    /// If the docset max_doc is smaller than the largest doc, this function might not consume the
+    /// docset entirely.
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        let bitset_max_value: u32 = bitset.max_value();
+        let mut doc = self.doc();
+        while doc < bitset_max_value {
+            bitset.insert(doc);
+            doc = self.advance();
+        }
+    }
+
    /// Returns the current document
    /// Right after creating a new `DocSet`, the docset points to the first document.
    ///
@@ -233,51 +249,59 @@ impl DocSet for &mut dyn DocSet {
    fn count_including_deleted(&mut self) -> u32 {
        (**self).count_including_deleted()
    }
+
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        (**self).fill_bitset(bitset);
+    }
 }

 impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
+    #[inline]
    fn advance(&mut self) -> DocId {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.advance()
+        self.deref_mut().advance()
    }

+    #[inline]
    fn seek(&mut self, target: DocId) -> DocId {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.seek(target)
+        self.deref_mut().seek(target)
    }

+    #[inline]
    fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
        let unboxed: &mut TDocSet = self.borrow_mut();
        unboxed.seek_danger(target)
    }

+    #[inline]
    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.fill_buffer(buffer)
+        self.deref_mut().fill_buffer(buffer)
    }

+    #[inline]
    fn doc(&self) -> DocId {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.doc()
+        self.deref().doc()
    }

+    #[inline]
    fn size_hint(&self) -> u32 {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.size_hint()
+        self.deref().size_hint()
    }

+    #[inline]
    fn cost(&self) -> u64 {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.cost()
+        self.deref().cost()
    }

+    #[inline]
    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count(alive_bitset)
+        self.deref_mut().count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
-        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count_including_deleted()
+        self.deref_mut().count_including_deleted()
+    }
+
+    fn fill_bitset(&mut self, bitset: &mut BitSet) {
+        self.deref_mut().fill_bitset(bitset);
    }
 }
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -96,7 +96,7 @@ mod tests {
    };
    use crate::time::OffsetDateTime;
    use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
-    use crate::{Index, IndexWriter, SegmentReader};
+    use crate::{Index, IndexWriter};

    pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
        let mut schema_builder = Schema::builder();
@@ -430,7 +430,7 @@ mod tests {
            .searcher()
            .segment_readers()
            .iter()
-            .map(SegmentReader::segment_id)
+            .map(|segment_reader| segment_reader.segment_id())
            .collect();
        assert_eq!(segment_ids.len(), 2);
        index_writer.merge(&segment_ids[..]).wait().unwrap();
--- a/src/index/codec_configuration.rs
+++ b/src/index/codec_configuration.rs
@@ -0,0 +1,49 @@
+use std::borrow::Cow;
+
+use serde::{Deserialize, Serialize};
+
+use crate::codec::{Codec, StandardCodec};
+
+/// A Codec configuration is just a serializable object.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct CodecConfiguration {
+    codec_id: Cow<'static, str>,
+    #[serde(default, skip_serializing_if = "serde_json::Value::is_null")]
+    props: serde_json::Value,
+}
+
+impl CodecConfiguration {
+    /// Returns true if the codec is the standard codec.
+    pub fn is_standard(&self) -> bool {
+        self.codec_id == StandardCodec::ID && self.props.is_null()
+    }
+
+    /// Creates a codec instance from the configuration.
+    ///
+    /// If the codec id does not match the code's name, an error is returned.
+    pub fn to_codec<C: Codec>(&self) -> crate::Result<C> {
+        if self.codec_id != C::ID {
+            return Err(crate::TantivyError::InvalidArgument(format!(
+                "Codec id mismatch: expected {}, got {}",
+                C::ID,
+                self.codec_id
+            )));
+        }
+        C::from_json_props(&self.props)
+    }
+}
+
+impl<'a, C: Codec> From<&'a C> for CodecConfiguration {
+    fn from(codec: &'a C) -> Self {
+        CodecConfiguration {
+            codec_id: Cow::Borrowed(C::ID),
+            props: codec.to_json_props(),
+        }
+    }
+}
+
+impl Default for CodecConfiguration {
+    fn default() -> Self {
+        CodecConfiguration::from(&StandardCodec)
+    }
+}
--- a/src/index/index.rs
+++ b/src/index/index.rs
@@ -8,12 +8,14 @@ use std::thread::available_parallelism;
 use super::segment::Segment;
 use super::segment_reader::merge_field_meta_data;
 use super::{FieldMetadata, IndexSettings};
+use crate::codec::StandardCodec;
 use crate::core::{Executor, META_FILEPATH};
 use crate::directory::error::OpenReadError;
 #[cfg(feature = "mmap")]
 use crate::directory::MmapDirectory;
 use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK};
 use crate::error::{DataCorruption, TantivyError};
+use crate::index::codec_configuration::CodecConfiguration;
 use crate::index::{IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory};
 use crate::indexer::index_writer::{
    IndexWriterOptions, MAX_NUM_THREAD, MEMORY_BUDGET_NUM_BYTES_MIN,
@@ -24,7 +26,6 @@ use crate::reader::{IndexReader, IndexReaderBuilder};
 use crate::schema::document::Document;
 use crate::schema::{Field, FieldType, Schema};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
-use crate::SegmentReader;

 fn load_metas(
    directory: &dyn Directory,
@@ -59,6 +60,7 @@ fn save_new_metas(
    schema: Schema,
    index_settings: IndexSettings,
    directory: &dyn Directory,
+    codec: CodecConfiguration,
 ) -> crate::Result<()> {
    save_metas(
        &IndexMeta {
@@ -67,6 +69,7 @@ fn save_new_metas(
            schema,
            opstamp: 0u64,
            payload: None,
+            codec,
        },
        directory,
    )?;
@@ -101,18 +104,21 @@ fn save_new_metas(
 /// };
 /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
 /// ```
-pub struct IndexBuilder {
+pub struct IndexBuilder<Codec: crate::codec::Codec = StandardCodec> {
    schema: Option<Schema>,
    index_settings: IndexSettings,
    tokenizer_manager: TokenizerManager,
    fast_field_tokenizer_manager: TokenizerManager,
+    codec: Codec,
 }
-impl Default for IndexBuilder {
+
+impl Default for IndexBuilder<StandardCodec> {
    fn default() -> Self {
        IndexBuilder::new()
    }
 }
-impl IndexBuilder {
+
+impl IndexBuilder<StandardCodec> {
    /// Creates a new `IndexBuilder`
    pub fn new() -> Self {
        Self {
@@ -120,6 +126,21 @@ impl IndexBuilder {
            index_settings: IndexSettings::default(),
            tokenizer_manager: TokenizerManager::default(),
            fast_field_tokenizer_manager: TokenizerManager::default(),
+            codec: StandardCodec,
+        }
+    }
+}
+
+impl<Codec: crate::codec::Codec> IndexBuilder<Codec> {
+    /// Set the codec
+    #[must_use]
+    pub fn codec<NewCodec: crate::codec::Codec>(self, codec: NewCodec) -> IndexBuilder<NewCodec> {
+        IndexBuilder {
+            schema: self.schema,
+            index_settings: self.index_settings,
+            tokenizer_manager: self.tokenizer_manager,
+            fast_field_tokenizer_manager: self.fast_field_tokenizer_manager,
+            codec,
        }
    }

@@ -154,7 +175,7 @@ impl IndexBuilder {
    /// The index will be allocated in anonymous memory.
    /// This is useful for indexing small set of documents
    /// for instances like unit test or temporary in memory index.
-    pub fn create_in_ram(self) -> Result<Index, TantivyError> {
+    pub fn create_in_ram(self) -> Result<Index<Codec>, TantivyError> {
        let ram_directory = RamDirectory::create();
        self.create(ram_directory)
    }
@@ -165,7 +186,7 @@ impl IndexBuilder {
    /// If a previous index was in this directory, it returns an
    /// [`TantivyError::IndexAlreadyExists`] error.
    #[cfg(feature = "mmap")]
-    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
+    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index<Codec>> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
        if Index::exists(&*mmap_directory)? {
            return Err(TantivyError::IndexAlreadyExists);
@@ -186,7 +207,7 @@ impl IndexBuilder {
        self,
        dir: impl Into<Box<dyn Directory>>,
        mem_budget: usize,
-    ) -> crate::Result<SingleSegmentIndexWriter<D>> {
+    ) -> crate::Result<SingleSegmentIndexWriter<Codec, D>> {
        let index = self.create(dir)?;
        let index_simple_writer = SingleSegmentIndexWriter::new(index, mem_budget)?;
        Ok(index_simple_writer)
@@ -202,7 +223,7 @@ impl IndexBuilder {
    /// For other unit tests, prefer the [`RamDirectory`], see:
    /// [`IndexBuilder::create_in_ram()`].
    #[cfg(feature = "mmap")]
-    pub fn create_from_tempdir(self) -> crate::Result<Index> {
+    pub fn create_from_tempdir(self) -> crate::Result<Index<Codec>> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
        self.create(mmap_directory)
    }
@@ -215,12 +236,15 @@ impl IndexBuilder {
    }

    /// Opens or creates a new index in the provided directory
-    pub fn open_or_create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+    pub fn open_or_create<T: Into<Box<dyn Directory>>>(
+        self,
+        dir: T,
+    ) -> crate::Result<Index<Codec>> {
        let dir: Box<dyn Directory> = dir.into();
        if !Index::exists(&*dir)? {
            return self.create(dir);
        }
-        let mut index = Index::open(dir)?;
+        let mut index: Index<Codec> = Index::<Codec>::open_with_codec(dir)?;
        index.set_tokenizers(self.tokenizer_manager.clone());
        if index.schema() == self.get_expect_schema()? {
            Ok(index)
@@ -244,18 +268,25 @@ impl IndexBuilder {
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
-    fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+    pub fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index<Codec>> {
+        self.create_avoid_monomorphization(dir.into())
+    }
+
+    fn create_avoid_monomorphization(self, dir: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
        self.validate()?;
-        let dir = dir.into();
        let directory = ManagedDirectory::wrap(dir)?;
+        let codec: CodecConfiguration = CodecConfiguration::from(&self.codec);
        save_new_metas(
            self.get_expect_schema()?,
            self.index_settings.clone(),
            &directory,
+            codec,
        )?;
-        let mut metas = IndexMeta::with_schema(self.get_expect_schema()?);
+        let schema = self.get_expect_schema()?;
+        let mut metas = IndexMeta::with_schema_and_codec(schema, &self.codec);
        metas.index_settings = self.index_settings;
-        let mut index = Index::open_from_metas(directory, &metas, SegmentMetaInventory::default());
+        let mut index: Index<Codec> =
+            Index::<Codec>::open_from_metas(directory, &metas, SegmentMetaInventory::default())?;
        index.set_tokenizers(self.tokenizer_manager);
        index.set_fast_field_tokenizers(self.fast_field_tokenizer_manager);
        Ok(index)
@@ -264,7 +295,7 @@ impl IndexBuilder {

 /// Search Index
 #[derive(Clone)]
-pub struct Index {
+pub struct Index<Codec: crate::codec::Codec = crate::codec::StandardCodec> {
    directory: ManagedDirectory,
    schema: Schema,
    settings: IndexSettings,
@@ -272,6 +303,7 @@ pub struct Index {
    tokenizers: TokenizerManager,
    fast_field_tokenizers: TokenizerManager,
    inventory: SegmentMetaInventory,
+    codec: Codec,
 }

 impl Index {
@@ -279,41 +311,6 @@ impl Index {
    pub fn builder() -> IndexBuilder {
        IndexBuilder::new()
    }
-    /// Examines the directory to see if it contains an index.
-    ///
-    /// Effectively, it only checks for the presence of the `meta.json` file.
-    pub fn exists(dir: &dyn Directory) -> Result<bool, OpenReadError> {
-        dir.exists(&META_FILEPATH)
-    }
-
-    /// Accessor to the search executor.
-    ///
-    /// This pool is used by default when calling `searcher.search(...)`
-    /// to perform search on the individual segments.
-    ///
-    /// By default the executor is single thread, and simply runs in the calling thread.
-    pub fn search_executor(&self) -> &Executor {
-        &self.executor
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with a given number of threads.
-    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
-        Ok(())
-    }
-
-    /// Custom thread pool by a outer thread pool.
-    pub fn set_executor(&mut self, executor: Executor) {
-        self.executor = executor;
-    }
-
-    /// Replace the default single thread search executor pool
-    /// by a thread pool with as many threads as there are CPUs on the system.
-    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
-        let default_num_threads = available_parallelism()?.get();
-        self.set_multithread_executor(default_num_threads)
-    }

    /// Creates a new index using the [`RamDirectory`].
    ///
@@ -324,6 +321,13 @@ impl Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }

+    /// Examines the directory to see if it contains an index.
+    ///
+    /// Effectively, it only checks for the presence of the `meta.json` file.
+    pub fn exists(directory: &dyn Directory) -> Result<bool, OpenReadError> {
+        directory.exists(&META_FILEPATH)
+    }
+
    /// Creates a new index in a given filepath.
    /// The index will use the [`MmapDirectory`].
    ///
@@ -370,20 +374,108 @@ impl Index {
        schema: Schema,
        settings: IndexSettings,
    ) -> crate::Result<Index> {
-        let dir: Box<dyn Directory> = dir.into();
+        Self::create_to_avoid_monomorphization(dir.into(), schema, settings)
+    }
+
+    fn create_to_avoid_monomorphization(
+        dir: Box<dyn Directory>,
+        schema: Schema,
+        settings: IndexSettings,
+    ) -> crate::Result<Index> {
        let mut builder = IndexBuilder::new().schema(schema);
        builder = builder.settings(settings);
        builder.create(dir)
    }

+    /// Opens a new directory from an index path.
+    #[cfg(feature = "mmap")]
+    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
+        Self::open_in_dir_to_avoid_monomorphization(directory_path.as_ref())
+    }
+
+    #[cfg(feature = "mmap")]
+    #[inline(never)]
+    fn open_in_dir_to_avoid_monomorphization(directory_path: &Path) -> crate::Result<Index> {
+        let mmap_directory = MmapDirectory::open(directory_path)?;
+        Index::open(mmap_directory)
+    }
+
+    /// Open the index using the provided directory
+    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
+        Index::<StandardCodec>::open_with_codec(directory.into())
+    }
+}
+
+impl<Codec: crate::codec::Codec> Index<Codec> {
+    /// Returns a version of this index with the standard codec.
+    /// This is useful when you need to pass the index to APIs that
+    /// don't care about the codec (e.g., for reading).
+    pub(crate) fn with_standard_codec(&self) -> Index<StandardCodec> {
+        Index {
+            directory: self.directory.clone(),
+            schema: self.schema.clone(),
+            settings: self.settings.clone(),
+            executor: self.executor.clone(),
+            tokenizers: self.tokenizers.clone(),
+            fast_field_tokenizers: self.fast_field_tokenizers.clone(),
+            inventory: self.inventory.clone(),
+            codec: StandardCodec,
+        }
+    }
+
+    /// Open the index using the provided directory
+    #[inline(never)]
+    pub fn open_with_codec(directory: Box<dyn Directory>) -> crate::Result<Index<Codec>> {
+        let directory = ManagedDirectory::wrap(directory)?;
+        let inventory = SegmentMetaInventory::default();
+        let metas = load_metas(&directory, &inventory)?;
+        let index: Index<Codec> = Index::<Codec>::open_from_metas(directory, &metas, inventory)?;
+        Ok(index)
+    }
+
+    /// Accessor to the codec.
+    pub fn codec(&self) -> &Codec {
+        &self.codec
+    }
+
+    /// Accessor to the search executor.
+    ///
+    /// This pool is used by default when calling `searcher.search(...)`
+    /// to perform search on the individual segments.
+    ///
+    /// By default the executor is single thread, and simply runs in the calling thread.
+    pub fn search_executor(&self) -> &Executor {
+        &self.executor
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
+        self.executor = Executor::multi_thread(num_threads, "tantivy-search-")?;
+        Ok(())
+    }
+
+    /// Custom thread pool by a outer thread pool.
+    pub fn set_executor(&mut self, executor: Executor) {
+        self.executor = executor;
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with as many threads as there are CPUs on the system.
+    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
+        let default_num_threads = available_parallelism()?.get();
+        self.set_multithread_executor(default_num_threads)
+    }
+
    /// Creates a new index given a directory and an [`IndexMeta`].
-    fn open_from_metas(
+    fn open_from_metas<C: crate::codec::Codec>(
        directory: ManagedDirectory,
        metas: &IndexMeta,
        inventory: SegmentMetaInventory,
-    ) -> Index {
+    ) -> crate::Result<Index<C>> {
        let schema = metas.schema.clone();
-        Index {
+        let codec = metas.codec.to_codec::<C>()?;
+        Ok(Index {
            settings: metas.index_settings.clone(),
            directory,
            schema,
@@ -391,7 +483,8 @@ impl Index {
            fast_field_tokenizers: TokenizerManager::default(),
            executor: Executor::single_thread(),
            inventory,
-        }
+            codec,
+        })
    }

    /// Setter for the tokenizer manager.
@@ -447,7 +540,7 @@ impl Index {
    /// Create a default [`IndexReader`] for the given index.
    ///
    /// See [`Index.reader_builder()`].
-    pub fn reader(&self) -> crate::Result<IndexReader> {
+    pub fn reader(&self) -> crate::Result<IndexReader<Codec>> {
        self.reader_builder().try_into()
    }

@@ -455,17 +548,10 @@ impl Index {
    ///
    /// Most project should create at most one reader for a given index.
    /// This method is typically called only once per `Index` instance.
-    pub fn reader_builder(&self) -> IndexReaderBuilder {
+    pub fn reader_builder(&self) -> IndexReaderBuilder<Codec> {
        IndexReaderBuilder::new(self.clone())
    }

-    /// Opens a new directory from an index path.
-    #[cfg(feature = "mmap")]
-    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
-        let mmap_directory = MmapDirectory::open(directory_path)?;
-        Index::open(mmap_directory)
-    }
-
    /// Returns the list of the segment metas tracked by the index.
    ///
    /// Such segments can of course be part of the index,
@@ -492,7 +578,15 @@ impl Index {
        let segments = self.searchable_segments()?;
        let fields_metadata: Vec<Vec<FieldMetadata>> = segments
            .into_iter()
-            .map(|segment| SegmentReader::open(&segment)?.fields_metadata())
+            .map(|segment| {
+                let segment_reader = segment.index().codec().open_segment_reader(
+                    segment.index().directory(),
+                    segment.meta(),
+                    segment.schema(),
+                    None,
+                )?;
+                segment_reader.fields_metadata()
+            })
            .collect::<Result<_, _>>()?;
        Ok(merge_field_meta_data(fields_metadata))
    }
@@ -506,16 +600,6 @@ impl Index {
        self.inventory.new_segment_meta(segment_id, max_doc)
    }

-    /// Open the index using the provided directory
-    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
-        let directory = directory.into();
-        let directory = ManagedDirectory::wrap(directory)?;
-        let inventory = SegmentMetaInventory::default();
-        let metas = load_metas(&directory, &inventory)?;
-        let index = Index::open_from_metas(directory, &metas, inventory);
-        Ok(index)
-    }
-
    /// Reads the index meta file from the directory.
    pub fn load_metas(&self) -> crate::Result<IndexMeta> {
        load_metas(self.directory(), &self.inventory)
@@ -539,7 +623,7 @@ impl Index {
    pub fn writer_with_options<D: Document>(
        &self,
        options: IndexWriterOptions,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let directory_lock = self
            .directory
            .acquire_lock(&INDEX_WRITER_LOCK)
@@ -581,7 +665,7 @@ impl Index {
        &self,
        num_threads: usize,
        overall_memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let memory_arena_in_bytes_per_thread = overall_memory_budget_in_bytes / num_threads;
        let options = IndexWriterOptions::builder()
            .num_worker_threads(num_threads)
@@ -595,7 +679,7 @@ impl Index {
    /// That index writer only simply has a single thread and a memory budget of 15 MB.
    /// Using a single thread gives us a deterministic allocation of DocId.
    #[cfg(test)]
-    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<D>> {
+    pub fn writer_for_tests<D: Document>(&self) -> crate::Result<IndexWriter<Codec, D>> {
        self.writer_with_num_threads(1, MEMORY_BUDGET_NUM_BYTES_MIN)
    }

@@ -613,7 +697,7 @@ impl Index {
    pub fn writer<D: Document>(
        &self,
        memory_budget_in_bytes: usize,
-    ) -> crate::Result<IndexWriter<D>> {
+    ) -> crate::Result<IndexWriter<Codec, D>> {
        let mut num_threads = std::cmp::min(available_parallelism()?.get(), MAX_NUM_THREAD);
        let memory_budget_num_bytes_per_thread = memory_budget_in_bytes / num_threads;
        if memory_budget_num_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
@@ -640,7 +724,7 @@ impl Index {
    }

    /// Returns the list of segments that are searchable
-    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment>> {
+    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment<Codec>>> {
        Ok(self
            .searchable_segment_metas()?
            .into_iter()
@@ -649,12 +733,12 @@ impl Index {
    }

    #[doc(hidden)]
-    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment {
+    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment<Codec> {
        Segment::for_index(self.clone(), segment_meta)
    }

    /// Creates a new segment.
-    pub fn new_segment(&self) -> Segment {
+    pub fn new_segment(&self) -> Segment<Codec> {
        let segment_meta = self
            .inventory
            .new_segment_meta(SegmentId::generate_random(), 0);
@@ -708,7 +792,7 @@ impl Index {
 }

 impl fmt::Debug for Index {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Index({:?})", self.directory)
    }
 }
--- a/src/index/index_meta.rs
+++ b/src/index/index_meta.rs
@@ -5,7 +5,8 @@ use std::path::PathBuf;
 use serde::{Deserialize, Serialize};

 use super::SegmentComponent;
-use crate::index::SegmentId;
+use crate::codec::Codec;
+use crate::index::{CodecConfiguration, SegmentId};
 use crate::schema::Schema;
 use crate::store::Compressor;
 use crate::{Inventory, Opstamp, TrackedObject};
@@ -286,8 +287,10 @@ pub struct IndexMeta {
    /// This payload is entirely unused by tantivy.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
+    /// Codec configuration for the index.
+    #[serde(skip_serializing_if = "CodecConfiguration::is_standard")]
+    pub codec: CodecConfiguration,
 }
-
 #[derive(Deserialize, Debug)]
 struct UntrackedIndexMeta {
    pub segments: Vec<InnerSegmentMeta>,
@@ -297,6 +300,8 @@ struct UntrackedIndexMeta {
    pub opstamp: Opstamp,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
+    #[serde(default)]
+    pub codec: CodecConfiguration,
 }

 impl UntrackedIndexMeta {
@@ -311,6 +316,7 @@ impl UntrackedIndexMeta {
            schema: self.schema,
            opstamp: self.opstamp,
            payload: self.payload,
+            codec: self.codec,
        }
    }
 }
@@ -321,13 +327,14 @@ impl IndexMeta {
    ///
    /// This new index does not contains any segments.
    /// Opstamp will the value `0u64`.
-    pub fn with_schema(schema: Schema) -> IndexMeta {
+    pub fn with_schema_and_codec<C: Codec>(schema: Schema, codec: &C) -> IndexMeta {
        IndexMeta {
            index_settings: IndexSettings::default(),
            segments: vec![],
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: CodecConfiguration::from(codec),
        }
    }

@@ -378,14 +385,38 @@ mod tests {
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: Default::default(),
        };
-        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
+        let json_value: serde_json::Value =
+            serde_json::to_value(&index_metas).expect("serialization failed");
        assert_eq!(
-            json,
-            r#"{"index_settings":{"docstore_compression":"none","docstore_blocksize":16384},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            &json_value,
+            &serde_json::json!(
+            {
+              "index_settings": {
+                "docstore_compression": "none",
+                "docstore_blocksize": 16384
+              },
+              "segments": [],
+              "schema": [
+                {
+                  "name": "text",
+                  "type": "text",
+                  "options": {
+                    "indexing": {
+                      "record": "position",
+                      "fieldnorms": true,
+                      "tokenizer": "default"
+                    },
+                    "stored": false,
+                    "fast": false
+                  }
+                }
+              ],
+              "opstamp": 0
+            })
        );
-
-        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
+        let deser_meta: UntrackedIndexMeta = serde_json::from_value(json_value).unwrap();
        assert_eq!(index_metas.index_settings, deser_meta.index_settings);
        assert_eq!(index_metas.schema, deser_meta.schema);
        assert_eq!(index_metas.opstamp, deser_meta.opstamp);
@@ -411,14 +442,39 @@ mod tests {
            schema,
            opstamp: 0u64,
            payload: None,
+            codec: Default::default(),
        };
-        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
+        let json_value = serde_json::to_value(&index_metas).expect("serialization failed");
        assert_eq!(
-            json,
-            r#"{"index_settings":{"docstore_compression":"zstd(compression_level=4)","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
+            &json_value,
+            &serde_json::json!(
+                {
+                  "index_settings": {
+                    "docstore_compression": "zstd(compression_level=4)",
+                    "docstore_blocksize": 1000000
+                  },
+                  "segments": [],
+                  "schema": [
+                    {
+                      "name": "text",
+                      "type": "text",
+                      "options": {
+                        "indexing": {
+                          "record": "position",
+                          "fieldnorms": true,
+                          "tokenizer": "default"
+                        },
+                        "stored": false,
+                        "fast": false
+                      }
+                    }
+                  ],
+                  "opstamp": 0
+                }
+            )
        );

-        let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
+        let deser_meta: UntrackedIndexMeta = serde_json::from_value(json_value).unwrap();
        assert_eq!(index_metas.index_settings, deser_meta.index_settings);
        assert_eq!(index_metas.schema, deser_meta.schema);
        assert_eq!(index_metas.opstamp, deser_meta.opstamp);
--- a/src/index/inverted_index_reader.rs
+++ b/src/index/inverted_index_reader.rs
@@ -1,7 +1,11 @@
+#[cfg(feature = "quickwit")]
+use std::future::Future;
 use std::io;
+#[cfg(feature = "quickwit")]
+use std::pin::Pin;

 use common::json_path_writer::JSON_END_OF_PATH;
-use common::{BinarySerializable, ByteCount};
+use common::{BinarySerializable, BitSet, ByteCount, OwnedBytes};
 #[cfg(feature = "quickwit")]
 use futures_util::{FutureExt, StreamExt, TryStreamExt};
 #[cfg(feature = "quickwit")]
@@ -9,25 +13,126 @@ use itertools::Itertools;
 #[cfg(feature = "quickwit")]
 use tantivy_fst::automaton::{AlwaysMatch, Automaton};

+use crate::codec::postings::RawPostingsData;
+use crate::codec::standard::postings::{
+    fill_bitset_from_raw_data, load_postings_from_raw_data, SegmentPostings,
+};
 use crate::directory::FileSlice;
-use crate::positions::PositionReader;
-use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
+use crate::docset::DocSet;
+use crate::fieldnorm::FieldNormReader;
+use crate::postings::{Postings, TermInfo};
+use crate::query::term_query::TermScorer;
+use crate::query::{box_scorer, Bm25Weight, PhraseScorer, Scorer};
 use crate::schema::{IndexRecordOption, Term, Type};
 use crate::termdict::TermDictionary;

+/// Trait defining the contract for inverted index readers.
+pub trait InvertedIndexReader: Send + Sync {
+    /// Returns the term info associated with the term.
+    fn get_term_info(&self, term: &Term) -> io::Result<Option<TermInfo>> {
+        self.terms().get(term.serialized_value_bytes())
+    }
+
+    /// Return the term dictionary datastructure.
+    fn terms(&self) -> &TermDictionary;
+
+    /// Return the fields and types encoded in the dictionary in lexicographic order.
+    /// Only valid on JSON fields.
+    ///
+    /// Notice: This requires a full scan and therefore **very expensive**.
+    fn list_encoded_json_fields(&self) -> io::Result<Vec<InvertedIndexFieldSpace>>;
+
+    /// Build a new term scorer.
+    fn new_term_scorer(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+    ) -> io::Result<Box<dyn Scorer>>;
+
+    /// Returns a posting object given a `term_info`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most users should prefer using [`Self::read_postings()`] instead.
+    fn read_postings_from_terminfo(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<Box<dyn Postings>>;
+
+    /// Returns the raw postings bytes and metadata for a term.
+    fn read_raw_postings_data(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<RawPostingsData>;
+
+    /// Fills a bitset with documents containing the term.
+    ///
+    /// Implementers can override this to avoid boxing postings.
+    fn fill_bitset_for_term(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        doc_bitset: &mut BitSet,
+    ) -> io::Result<()> {
+        let mut postings = self.read_postings_from_terminfo(term_info, option)?;
+        postings.fill_bitset(doc_bitset);
+        Ok(())
+    }
+
+    /// Builds a phrase scorer for the given term infos.
+    fn new_phrase_scorer(
+        &self,
+        term_infos: &[(usize, TermInfo)],
+        similarity_weight: Option<Bm25Weight>,
+        fieldnorm_reader: FieldNormReader,
+        slop: u32,
+    ) -> io::Result<Box<dyn Scorer>>;
+
+    /// Returns the total number of tokens recorded for all documents
+    /// (including deleted documents).
+    fn total_num_tokens(&self) -> u64;
+
+    /// Returns the segment postings associated with the term, and with the given option,
+    /// or `None` if the term has never been encountered and indexed.
+    fn read_postings(
+        &self,
+        term: &Term,
+        option: IndexRecordOption,
+    ) -> io::Result<Option<Box<dyn Postings>>> {
+        self.get_term_info(term)?
+            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
+            .transpose()
+    }
+
+    /// Returns the number of documents containing the term.
+    fn doc_freq(&self, term: &Term) -> io::Result<u32>;
+
+    /// Returns the number of documents containing the term asynchronously.
+    #[cfg(feature = "quickwit")]
+    fn doc_freq_async<'a>(
+        &'a self,
+        term: &'a Term,
+    ) -> Pin<Box<dyn Future<Output = io::Result<u32>> + Send + 'a>>;
+}
+
+/// Tantivy's default inverted index reader implementation.
+///
 /// The inverted index reader is in charge of accessing
 /// the inverted index associated with a specific field.
 ///
 /// # Note
 ///
 /// It is safe to delete the segment associated with
-/// an `InvertedIndexReader`. As long as it is open,
+/// an `InvertedIndexReader` implementation. As long as it is open,
 /// the [`FileSlice`] it is relying on should
 /// stay available.
 ///
-/// `InvertedIndexReader` are created by calling
+/// `TantivyInvertedIndexReader` instances are created by calling
 /// [`SegmentReader::inverted_index()`](crate::SegmentReader::inverted_index).
-pub struct InvertedIndexReader {
+pub struct TantivyInvertedIndexReader {
    termdict: TermDictionary,
    postings_file_slice: FileSlice,
    positions_file_slice: FileSlice,
@@ -36,11 +141,16 @@ pub struct InvertedIndexReader {
 }

 /// Object that records the amount of space used by a field in an inverted index.
-pub(crate) struct InvertedIndexFieldSpace {
+pub struct InvertedIndexFieldSpace {
+    /// Field name as encoded in the term dictionary.
    pub field_name: String,
+    /// Value type for the encoded field.
    pub field_type: Type,
+    /// Total bytes used by postings for this field.
    pub postings_size: ByteCount,
+    /// Total bytes used by positions for this field.
    pub positions_size: ByteCount,
+    /// Number of terms in the field.
    pub num_terms: u64,
 }

@@ -62,16 +172,43 @@ impl InvertedIndexFieldSpace {
    }
 }

-impl InvertedIndexReader {
+impl TantivyInvertedIndexReader {
+    pub(crate) fn read_raw_postings_data_inner(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<RawPostingsData> {
+        let effective_option = option.downgrade(self.record_option);
+        let postings_data = self
+            .postings_file_slice
+            .slice(term_info.postings_range.clone())
+            .read_bytes()?;
+        let positions_data: Option<OwnedBytes> = if effective_option.has_positions() {
+            let positions_data = self
+                .positions_file_slice
+                .slice(term_info.positions_range.clone())
+                .read_bytes()?;
+            Some(positions_data)
+        } else {
+            None
+        };
+        Ok(RawPostingsData {
+            postings_data,
+            positions_data,
+            record_option: self.record_option,
+            effective_option,
+        })
+    }
+
    pub(crate) fn new(
        termdict: TermDictionary,
        postings_file_slice: FileSlice,
        positions_file_slice: FileSlice,
        record_option: IndexRecordOption,
-    ) -> io::Result<InvertedIndexReader> {
+    ) -> io::Result<TantivyInvertedIndexReader> {
        let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
        let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?;
-        Ok(InvertedIndexReader {
+        Ok(TantivyInvertedIndexReader {
            termdict,
            postings_file_slice: postings_body,
            positions_file_slice,
@@ -80,10 +217,10 @@ impl InvertedIndexReader {
        })
    }

-    /// Creates an empty `InvertedIndexReader` object, which
+    /// Creates an empty `TantivyInvertedIndexReader` object, which
    /// contains no terms at all.
-    pub fn empty(record_option: IndexRecordOption) -> InvertedIndexReader {
-        InvertedIndexReader {
+    pub fn empty(record_option: IndexRecordOption) -> TantivyInvertedIndexReader {
+        TantivyInvertedIndexReader {
            termdict: TermDictionary::empty(),
            postings_file_slice: FileSlice::empty(),
            positions_file_slice: FileSlice::empty(),
@@ -92,22 +229,22 @@ impl InvertedIndexReader {
        }
    }

-    /// Returns the term info associated with the term.
-    pub fn get_term_info(&self, term: &Term) -> io::Result<Option<TermInfo>> {
-        self.termdict.get(term.serialized_value_bytes())
+    fn load_segment_postings(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<SegmentPostings> {
+        let postings_data = self.read_raw_postings_data_inner(term_info, option)?;
+        load_postings_from_raw_data(term_info.doc_freq, postings_data)
    }
+}

-    /// Return the term dictionary datastructure.
-    pub fn terms(&self) -> &TermDictionary {
+impl InvertedIndexReader for TantivyInvertedIndexReader {
+    fn terms(&self) -> &TermDictionary {
        &self.termdict
    }

-    /// Return the fields and types encoded in the dictionary in lexicographic order.
-    /// Only valid on JSON fields.
-    ///
-    /// Notice: This requires a full scan and therefore **very expensive**.
-    /// TODO: Move to sstable to use the index.
-    pub(crate) fn list_encoded_json_fields(&self) -> io::Result<Vec<InvertedIndexFieldSpace>> {
+    fn list_encoded_json_fields(&self) -> io::Result<Vec<InvertedIndexFieldSpace>> {
        let mut stream = self.termdict.stream()?;
        let mut fields: Vec<InvertedIndexFieldSpace> = Vec::new();

@@ -160,129 +297,106 @@ impl InvertedIndexReader {
        Ok(fields)
    }

-    /// Resets the block segment to another position of the postings
-    /// file.
-    ///
-    /// This is useful for enumerating through a list of terms,
-    /// and consuming the associated posting lists while avoiding
-    /// reallocating a [`BlockSegmentPostings`].
-    ///
-    /// # Warning
-    ///
-    /// This does not reset the positions list.
-    pub fn reset_block_postings_from_terminfo(
+    fn new_term_scorer(
        &self,
        term_info: &TermInfo,
-        block_postings: &mut BlockSegmentPostings,
+        option: IndexRecordOption,
+        fieldnorm_reader: FieldNormReader,
+        similarity_weight: Bm25Weight,
+    ) -> io::Result<Box<dyn Scorer>> {
+        let postings = self.load_segment_postings(term_info, option)?;
+        let term_scorer = TermScorer::new(postings, fieldnorm_reader, similarity_weight);
+        Ok(box_scorer(term_scorer))
+    }
+
+    fn read_postings_from_terminfo(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<Box<dyn Postings>> {
+        let postings = self.load_segment_postings(term_info, option)?;
+        Ok(Box::new(postings))
+    }
+
+    fn read_raw_postings_data(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+    ) -> io::Result<RawPostingsData> {
+        self.read_raw_postings_data_inner(term_info, option)
+    }
+
+    fn fill_bitset_for_term(
+        &self,
+        term_info: &TermInfo,
+        option: IndexRecordOption,
+        doc_bitset: &mut BitSet,
    ) -> io::Result<()> {
-        let postings_slice = self
-            .postings_file_slice
-            .slice(term_info.postings_range.clone());
-        let postings_bytes = postings_slice.read_bytes()?;
-        block_postings.reset(term_info.doc_freq, postings_bytes)?;
-        Ok(())
+        let postings_data = self.read_raw_postings_data_inner(term_info, option)?;
+        fill_bitset_from_raw_data(term_info.doc_freq, postings_data, doc_bitset)
    }

-    /// Returns a block postings given a `Term`.
-    /// This method is for an advanced usage only.
-    ///
-    /// Most users should prefer using [`Self::read_postings()`] instead.
-    pub fn read_block_postings(
+    fn new_phrase_scorer(
        &self,
-        term: &Term,
-        option: IndexRecordOption,
-    ) -> io::Result<Option<BlockSegmentPostings>> {
-        self.get_term_info(term)?
-            .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
-            .transpose()
+        term_infos: &[(usize, TermInfo)],
+        similarity_weight: Option<Bm25Weight>,
+        fieldnorm_reader: FieldNormReader,
+        slop: u32,
+    ) -> io::Result<Box<dyn Scorer>> {
+        let mut offset_and_term_postings: Vec<(usize, SegmentPostings)> =
+            Vec::with_capacity(term_infos.len());
+        for (offset, term_info) in term_infos {
+            let postings =
+                self.load_segment_postings(term_info, IndexRecordOption::WithFreqsAndPositions)?;
+            offset_and_term_postings.push((*offset, postings));
+        }
+        let scorer = PhraseScorer::new(
+            offset_and_term_postings,
+            similarity_weight,
+            fieldnorm_reader,
+            slop,
+        );
+        Ok(box_scorer(scorer))
    }

-    /// Returns a block postings given a `term_info`.
-    /// This method is for an advanced usage only.
-    ///
-    /// Most users should prefer using [`Self::read_postings()`] instead.
-    pub fn read_block_postings_from_terminfo(
-        &self,
-        term_info: &TermInfo,
-        requested_option: IndexRecordOption,
-    ) -> io::Result<BlockSegmentPostings> {
-        let postings_data = self
-            .postings_file_slice
-            .slice(term_info.postings_range.clone());
-        BlockSegmentPostings::open(
-            term_info.doc_freq,
-            postings_data,
-            self.record_option,
-            requested_option,
-        )
-    }
-
-    /// Returns a posting object given a `term_info`.
-    /// This method is for an advanced usage only.
-    ///
-    /// Most users should prefer using [`Self::read_postings()`] instead.
-    pub fn read_postings_from_terminfo(
-        &self,
-        term_info: &TermInfo,
-        option: IndexRecordOption,
-    ) -> io::Result<SegmentPostings> {
-        let option = option.downgrade(self.record_option);
-
-        let block_postings = self.read_block_postings_from_terminfo(term_info, option)?;
-        let position_reader = {
-            if option.has_positions() {
-                let positions_data = self
-                    .positions_file_slice
-                    .read_bytes_slice(term_info.positions_range.clone())?;
-                let position_reader = PositionReader::open(positions_data)?;
-                Some(position_reader)
-            } else {
-                None
-            }
-        };
-        Ok(SegmentPostings::from_block_postings(
-            block_postings,
-            position_reader,
-        ))
-    }
-
-    /// Returns the total number of tokens recorded for all documents
-    /// (including deleted documents).
-    pub fn total_num_tokens(&self) -> u64 {
+    fn total_num_tokens(&self) -> u64 {
        self.total_num_tokens
    }

-    /// Returns the segment postings associated with the term, and with the given option,
-    /// or `None` if the term has never been encountered and indexed.
-    ///
-    /// If the field was not indexed with the indexing options that cover
-    /// the requested options, the returned [`SegmentPostings`] the method does not fail
-    /// and returns a `SegmentPostings` with as much information as possible.
-    ///
-    /// For instance, requesting [`IndexRecordOption::WithFreqs`] for a
-    /// [`TextOptions`](crate::schema::TextOptions) that does not index position
-    /// will return a [`SegmentPostings`] with `DocId`s and frequencies.
-    pub fn read_postings(
+    fn read_postings(
        &self,
        term: &Term,
        option: IndexRecordOption,
-    ) -> io::Result<Option<SegmentPostings>> {
+    ) -> io::Result<Option<Box<dyn Postings>>> {
        self.get_term_info(term)?
            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
            .transpose()
    }

-    /// Returns the number of documents containing the term.
-    pub fn doc_freq(&self, term: &Term) -> io::Result<u32> {
+    fn doc_freq(&self, term: &Term) -> io::Result<u32> {
        Ok(self
            .get_term_info(term)?
            .map(|term_info| term_info.doc_freq)
            .unwrap_or(0u32))
    }
+
+    #[cfg(feature = "quickwit")]
+    fn doc_freq_async<'a>(
+        &'a self,
+        term: &'a Term,
+    ) -> Pin<Box<dyn Future<Output = io::Result<u32>> + Send + 'a>> {
+        Box::pin(async move {
+            Ok(self
+                .get_term_info_async(term)
+                .await?
+                .map(|term_info| term_info.doc_freq)
+                .unwrap_or(0u32))
+        })
+    }
 }

 #[cfg(feature = "quickwit")]
-impl InvertedIndexReader {
+impl TantivyInvertedIndexReader {
    pub(crate) async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
        self.termdict.get_async(term.serialized_value_bytes()).await
    }
@@ -482,13 +596,4 @@ impl InvertedIndexReader {
        }
        Ok(())
    }
-
-    /// Returns the number of documents containing the term asynchronously.
-    pub async fn doc_freq_async(&self, term: &Term) -> io::Result<u32> {
-        Ok(self
-            .get_term_info_async(term)
-            .await?
-            .map(|term_info| term_info.doc_freq)
-            .unwrap_or(0u32))
-    }
 }
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -2,6 +2,7 @@
 //!
 //! It contains `Index` and `Segment`, where a `Index` consists of one or more `Segment`s.

+mod codec_configuration;
 mod index;
 mod index_meta;
 mod inverted_index_reader;
@@ -10,11 +11,14 @@ mod segment_component;
 mod segment_id;
 mod segment_reader;

+pub use self::codec_configuration::CodecConfiguration;
 pub use self::index::{Index, IndexBuilder};
 pub(crate) use self::index_meta::SegmentMetaInventory;
 pub use self::index_meta::{IndexMeta, IndexSettings, Order, SegmentMeta};
-pub use self::inverted_index_reader::InvertedIndexReader;
+pub use self::inverted_index_reader::{
+    InvertedIndexFieldSpace, InvertedIndexReader, TantivyInvertedIndexReader,
+};
 pub use self::segment::Segment;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
-pub use self::segment_reader::{FieldMetadata, SegmentReader};
+pub use self::segment_reader::{FieldMetadata, SegmentReader, TantivySegmentReader};
--- a/src/index/segment.rs
+++ b/src/index/segment.rs
@@ -2,6 +2,7 @@ use std::fmt;
 use std::path::PathBuf;

 use super::SegmentComponent;
+use crate::codec::StandardCodec;
 use crate::directory::error::{OpenReadError, OpenWriteError};
 use crate::directory::{Directory, FileSlice, WritePtr};
 use crate::index::{Index, SegmentId, SegmentMeta};
@@ -10,25 +11,25 @@ use crate::Opstamp;

 /// A segment is a piece of the index.
 #[derive(Clone)]
-pub struct Segment {
-    index: Index,
+pub struct Segment<C: crate::codec::Codec = StandardCodec> {
+    index: Index<C>,
    meta: SegmentMeta,
 }

-impl fmt::Debug for Segment {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl<C: crate::codec::Codec> fmt::Debug for Segment<C> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Segment({:?})", self.id().uuid_string())
    }
 }

-impl Segment {
+impl<C: crate::codec::Codec> Segment<C> {
    /// Creates a new segment given an `Index` and a `SegmentId`
-    pub(crate) fn for_index(index: Index, meta: SegmentMeta) -> Segment {
+    pub(crate) fn for_index(index: Index<C>, meta: SegmentMeta) -> Segment<C> {
        Segment { index, meta }
    }

    /// Returns the index the segment belongs to.
-    pub fn index(&self) -> &Index {
+    pub fn index(&self) -> &Index<C> {
        &self.index
    }

@@ -46,7 +47,7 @@ impl Segment {
    ///
    /// This method is only used when updating `max_doc` from 0
    /// as we finalize a fresh new segment.
-    pub fn with_max_doc(self, max_doc: u32) -> Segment {
+    pub fn with_max_doc(self, max_doc: u32) -> Segment<C> {
        Segment {
            index: self.index,
            meta: self.meta.with_max_doc(max_doc),
@@ -55,7 +56,7 @@ impl Segment {

    #[doc(hidden)]
    #[must_use]
-    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment {
+    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment<C> {
        Segment {
            index: self.index,
            meta: self.meta.with_delete_meta(num_deleted_docs, opstamp),
--- a/src/index/segment_id.rs
+++ b/src/index/segment_id.rs
@@ -44,7 +44,7 @@ fn create_uuid() -> Uuid {
 }

 impl SegmentId {
-    #[doc(hidden)]
+    /// Generates a new random `SegmentId`.
    pub fn generate_random() -> SegmentId {
        SegmentId(create_uuid())
    }
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -6,17 +6,99 @@ use common::{ByteCount, HasLen};
 use fnv::FnvHashMap;
 use itertools::Itertools;

-use crate::directory::{CompositeFile, FileSlice};
+use crate::codec::{ObjectSafeCodec, SumOrDoNothingCombiner};
+use crate::directory::{CompositeFile, Directory, FileSlice};
 use crate::error::DataCorruption;
 use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
-use crate::index::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
+use crate::index::{
+    InvertedIndexReader, Segment, SegmentComponent, SegmentId, SegmentMeta,
+    TantivyInvertedIndexReader,
+};
 use crate::json_utils::json_path_sep_to_dot;
+use crate::query::Scorer;
 use crate::schema::{Field, IndexRecordOption, Schema, Type};
 use crate::space_usage::SegmentSpaceUsage;
 use crate::store::StoreReader;
 use crate::termdict::TermDictionary;
-use crate::{DocId, Opstamp};
+use crate::{DocId, Opstamp, Score};
+
+/// Trait defining the contract for a segment reader.
+pub trait SegmentReader: Send + Sync {
+    /// Returns the highest document id ever attributed in this segment + 1.
+    fn max_doc(&self) -> DocId;
+
+    /// Returns the number of alive documents. Deleted documents are not counted.
+    fn num_docs(&self) -> DocId;
+
+    /// Returns the schema of the index this segment belongs to.
+    fn schema(&self) -> &Schema;
+
+    /// Performs a for_each_pruning operation on the given scorer.
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    );
+
+    /// Builds a union scorer possibly specialized if all scorers are term scorers.
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        score_combiner_type: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer>;
+
+    /// Return the number of documents that have been deleted in the segment.
+    fn num_deleted_docs(&self) -> DocId;
+
+    /// Returns true if some of the documents of the segment have been deleted.
+    fn has_deletes(&self) -> bool;
+
+    /// Accessor to a segment's fast field reader given a field.
+    fn fast_fields(&self) -> &FastFieldReaders;
+
+    /// Accessor to the `FacetReader` associated with a given `Field`.
+    fn facet_reader(&self, field_name: &str) -> crate::Result<FacetReader>;
+
+    /// Accessor to the segment's `Field norms`'s reader.
+    fn get_fieldnorms_reader(&self, field: Field) -> crate::Result<FieldNormReader>;
+
+    /// Accessor to the segment's field norms readers.
+    #[doc(hidden)]
+    fn fieldnorms_readers(&self) -> &FieldNormReaders;
+
+    /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader).
+    fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result<StoreReader>;
+
+    /// Returns a field reader associated with the field given in argument.
+    fn inverted_index(&self, field: Field) -> crate::Result<Arc<dyn InvertedIndexReader>>;
+
+    /// Returns the list of fields that have been indexed in the segment.
+    fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>>;
+
+    /// Returns the segment id.
+    fn segment_id(&self) -> SegmentId;
+
+    /// Returns the delete opstamp.
+    fn delete_opstamp(&self) -> Option<Opstamp>;
+
+    /// Returns the bitset representing the alive `DocId`s.
+    fn alive_bitset(&self) -> Option<&AliveBitSet>;
+
+    /// Returns true if the `doc` is marked as deleted.
+    fn is_deleted(&self, doc: DocId) -> bool;
+
+    /// Returns an iterator that will iterate over the alive document ids.
+    fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + Send + '_>;
+
+    /// Summarize total space usage of this segment.
+    fn space_usage(&self) -> io::Result<SegmentSpaceUsage>;
+
+    /// Clones this reader into a shared trait object.
+    fn clone_arc(&self) -> Arc<dyn SegmentReader>;
+}

 /// Entry point to access all of the datastructures of the `Segment`
 ///
@@ -29,8 +111,8 @@ use crate::{DocId, Opstamp};
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
 #[derive(Clone)]
-pub struct SegmentReader {
-    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,
+pub struct TantivySegmentReader {
+    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<dyn InvertedIndexReader>>>>,

    segment_id: SegmentId,
    delete_opstamp: Option<Opstamp>,
@@ -47,75 +129,162 @@ pub struct SegmentReader {
    store_file: FileSlice,
    alive_bitset_opt: Option<AliveBitSet>,
    schema: Schema,
+    codec: Arc<dyn ObjectSafeCodec>,
 }

-impl SegmentReader {
-    /// Returns the highest document id ever attributed in
-    /// this segment + 1.
-    pub fn max_doc(&self) -> DocId {
+impl TantivySegmentReader {
+    /// Open a new segment for reading.
+    pub fn open<C: crate::codec::Codec>(
+        segment: &Segment<C>,
+    ) -> crate::Result<Arc<dyn SegmentReader>> {
+        Self::open_with_custom_alive_set(segment, None)
+    }
+
+    /// Open a new segment for reading.
+    pub fn open_with_custom_alive_set<C: crate::codec::Codec>(
+        segment: &Segment<C>,
+        custom_bitset: Option<AliveBitSet>,
+    ) -> crate::Result<Arc<dyn SegmentReader>> {
+        segment.index().codec().open_segment_reader(
+            segment.index().directory(),
+            segment.meta(),
+            segment.schema(),
+            custom_bitset,
+        )
+    }
+
+    pub(crate) fn open_with_custom_alive_set_from_directory(
+        directory: &dyn Directory,
+        segment_meta: &SegmentMeta,
+        schema: Schema,
+        codec: Arc<dyn ObjectSafeCodec>,
+        custom_bitset: Option<AliveBitSet>,
+    ) -> crate::Result<TantivySegmentReader> {
+        let termdict_file =
+            directory.open_read(&segment_meta.relative_path(SegmentComponent::Terms))?;
+        let termdict_composite = CompositeFile::open(&termdict_file)?;
+
+        let store_file =
+            directory.open_read(&segment_meta.relative_path(SegmentComponent::Store))?;
+
+        crate::fail_point!("SegmentReader::open#middle");
+
+        let postings_file =
+            directory.open_read(&segment_meta.relative_path(SegmentComponent::Postings))?;
+        let postings_composite = CompositeFile::open(&postings_file)?;
+
+        let positions_composite = {
+            if let Ok(positions_file) =
+                directory.open_read(&segment_meta.relative_path(SegmentComponent::Positions))
+            {
+                CompositeFile::open(&positions_file)?
+            } else {
+                CompositeFile::empty()
+            }
+        };
+
+        let fast_fields_data =
+            directory.open_read(&segment_meta.relative_path(SegmentComponent::FastFields))?;
+        let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
+        let fieldnorm_data =
+            directory.open_read(&segment_meta.relative_path(SegmentComponent::FieldNorms))?;
+        let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
+
+        let original_bitset = if segment_meta.has_deletes() {
+            let alive_doc_file_slice =
+                directory.open_read(&segment_meta.relative_path(SegmentComponent::Delete))?;
+            let alive_doc_data = alive_doc_file_slice.read_bytes()?;
+            Some(AliveBitSet::open(alive_doc_data))
+        } else {
+            None
+        };
+
+        let alive_bitset_opt = intersect_alive_bitset(original_bitset, custom_bitset);
+
+        let max_doc = segment_meta.max_doc();
+        let num_docs = alive_bitset_opt
+            .as_ref()
+            .map(|alive_bitset| alive_bitset.num_alive_docs() as u32)
+            .unwrap_or(max_doc);
+
+        Ok(TantivySegmentReader {
+            inv_idx_reader_cache: Default::default(),
+            num_docs,
+            max_doc,
+            termdict_composite,
+            postings_composite,
+            fast_fields_readers,
+            fieldnorm_readers,
+            segment_id: segment_meta.id(),
+            delete_opstamp: segment_meta.delete_opstamp(),
+            store_file,
+            alive_bitset_opt,
+            positions_composite,
+            schema,
+            codec,
+        })
+    }
+}
+
+impl SegmentReader for TantivySegmentReader {
+    fn max_doc(&self) -> DocId {
        self.max_doc
    }

-    /// Returns the number of alive documents.
-    /// Deleted documents are not counted.
-    pub fn num_docs(&self) -> DocId {
+    fn num_docs(&self) -> DocId {
        self.num_docs
    }

-    /// Returns the schema of the index this segment belongs to.
-    pub fn schema(&self) -> &Schema {
+    fn schema(&self) -> &Schema {
        &self.schema
    }

-    /// Return the number of documents that have been
-    /// deleted in the segment.
-    pub fn num_deleted_docs(&self) -> DocId {
+    fn for_each_pruning(
+        &self,
+        threshold: Score,
+        scorer: Box<dyn Scorer>,
+        callback: &mut dyn FnMut(DocId, Score) -> Score,
+    ) {
+        self.codec.for_each_pruning(threshold, scorer, callback);
+    }
+
+    fn build_union_scorer_with_sum_combiner(
+        &self,
+        scorers: Vec<Box<dyn Scorer>>,
+        num_docs: DocId,
+        score_combiner_type: SumOrDoNothingCombiner,
+    ) -> Box<dyn Scorer> {
+        self.codec
+            .build_union_scorer_with_sum_combiner(scorers, num_docs, score_combiner_type)
+    }
+
+    fn num_deleted_docs(&self) -> DocId {
        self.max_doc - self.num_docs
    }

-    /// Returns true if some of the documents of the segment have been deleted.
-    pub fn has_deletes(&self) -> bool {
-        self.num_deleted_docs() > 0
+    fn has_deletes(&self) -> bool {
+        self.num_docs != self.max_doc
    }

-    /// Accessor to a segment's fast field reader given a field.
-    ///
-    /// Returns the u64 fast value reader if the field
-    /// is a u64 field indexed as "fast".
-    ///
-    /// Return a FastFieldNotAvailableError if the field is not
-    /// declared as a fast field in the schema.
-    ///
-    /// # Panics
-    /// May panic if the index is corrupted.
-    pub fn fast_fields(&self) -> &FastFieldReaders {
+    fn fast_fields(&self) -> &FastFieldReaders {
        &self.fast_fields_readers
    }

-    /// Accessor to the `FacetReader` associated with a given `Field`.
-    pub fn facet_reader(&self, field_name: &str) -> crate::Result<FacetReader> {
-        let schema = self.schema();
-        let field = schema.get_field(field_name)?;
-        let field_entry = schema.get_field_entry(field);
+    fn facet_reader(&self, field_name: &str) -> crate::Result<FacetReader> {
+        let field = self.schema.get_field(field_name)?;
+        let field_entry = self.schema.get_field_entry(field);
        if field_entry.field_type().value_type() != Type::Facet {
            return Err(crate::TantivyError::SchemaError(format!(
                "`{field_name}` is not a facet field.`"
            )));
        }
-        let Some(facet_column) = self.fast_fields().str(field_name)? else {
+        let Some(facet_column) = self.fast_fields_readers.str(field_name)? else {
            panic!("Facet Field `{field_name}` is missing. This should not happen");
        };
        Ok(FacetReader::new(facet_column))
    }

-    /// Accessor to the segment's `Field norms`'s reader.
-    ///
-    /// Field norms are the length (in tokens) of the fields.
-    /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
-    ///
-    /// They are simply stored as a fast field, serialized in
-    /// the `.fieldnorm` file of the segment.
-    pub fn get_fieldnorms_reader(&self, field: Field) -> crate::Result<FieldNormReader> {
+    fn get_fieldnorms_reader(&self, field: Field) -> crate::Result<FieldNormReader> {
        self.fieldnorm_readers.get_field(field)?.ok_or_else(|| {
            let field_name = self.schema.get_field_name(field);
            let err_msg = format!(
@@ -126,100 +295,15 @@ impl SegmentReader {
        })
    }

-    #[doc(hidden)]
-    pub fn fieldnorms_readers(&self) -> &FieldNormReaders {
+    fn fieldnorms_readers(&self) -> &FieldNormReaders {
        &self.fieldnorm_readers
    }

-    /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader).
-    ///
-    /// `cache_num_blocks` sets the number of decompressed blocks to be cached in an LRU.
-    /// The size of blocks is configurable, this should be reflexted in the
-    pub fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result<StoreReader> {
+    fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result<StoreReader> {
        StoreReader::open(self.store_file.clone(), cache_num_blocks)
    }

-    /// Open a new segment for reading.
-    pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
-        Self::open_with_custom_alive_set(segment, None)
-    }
-
-    /// Open a new segment for reading.
-    pub fn open_with_custom_alive_set(
-        segment: &Segment,
-        custom_bitset: Option<AliveBitSet>,
-    ) -> crate::Result<SegmentReader> {
-        let termdict_file = segment.open_read(SegmentComponent::Terms)?;
-        let termdict_composite = CompositeFile::open(&termdict_file)?;
-
-        let store_file = segment.open_read(SegmentComponent::Store)?;
-
-        crate::fail_point!("SegmentReader::open#middle");
-
-        let postings_file = segment.open_read(SegmentComponent::Postings)?;
-        let postings_composite = CompositeFile::open(&postings_file)?;
-
-        let positions_composite = {
-            if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
-                CompositeFile::open(&positions_file)?
-            } else {
-                CompositeFile::empty()
-            }
-        };
-
-        let schema = segment.schema();
-
-        let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
-        let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
-        let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
-        let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
-
-        let original_bitset = if segment.meta().has_deletes() {
-            let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;
-            let alive_doc_data = alive_doc_file_slice.read_bytes()?;
-            Some(AliveBitSet::open(alive_doc_data))
-        } else {
-            None
-        };
-
-        let alive_bitset_opt = intersect_alive_bitset(original_bitset, custom_bitset);
-
-        let max_doc = segment.meta().max_doc();
-        let num_docs = alive_bitset_opt
-            .as_ref()
-            .map(|alive_bitset| alive_bitset.num_alive_docs() as u32)
-            .unwrap_or(max_doc);
-
-        Ok(SegmentReader {
-            inv_idx_reader_cache: Default::default(),
-            num_docs,
-            max_doc,
-            termdict_composite,
-            postings_composite,
-            fast_fields_readers,
-            fieldnorm_readers,
-            segment_id: segment.id(),
-            delete_opstamp: segment.meta().delete_opstamp(),
-            store_file,
-            alive_bitset_opt,
-            positions_composite,
-            schema,
-        })
-    }
-
-    /// Returns a field reader associated with the field given in argument.
-    /// If the field was not present in the index during indexing time,
-    /// the InvertedIndexReader is empty.
-    ///
-    /// The field reader is in charge of iterating through the
-    /// term dictionary associated with a specific field,
-    /// and opening the posting list associated with any term.
-    ///
-    /// If the field is not marked as index, a warning is logged and an empty `InvertedIndexReader`
-    /// is returned.
-    /// Similarly, if the field is marked as indexed but no term has been indexed for the given
-    /// index, an empty `InvertedIndexReader` is returned (but no warning is logged).
-    pub fn inverted_index(&self, field: Field) -> crate::Result<Arc<InvertedIndexReader>> {
+    fn inverted_index(&self, field: Field) -> crate::Result<Arc<dyn InvertedIndexReader>> {
        if let Some(inv_idx_reader) = self
            .inv_idx_reader_cache
            .read()
@@ -244,7 +328,9 @@ impl SegmentReader {
            //
            // Returns an empty inverted index.
            let record_option = record_option_opt.unwrap_or(IndexRecordOption::Basic);
-            return Ok(Arc::new(InvertedIndexReader::empty(record_option)));
+            let inv_idx_reader: Arc<dyn InvertedIndexReader> =
+                Arc::new(TantivyInvertedIndexReader::empty(record_option));
+            return Ok(inv_idx_reader);
        }

        let record_option = record_option_opt.unwrap();
@@ -268,12 +354,13 @@ impl SegmentReader {
            DataCorruption::comment_only(error_msg)
        })?;

-        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
-            TermDictionary::open(termdict_file)?,
-            postings_file,
-            positions_file,
-            record_option,
-        )?);
+        let inv_idx_reader: Arc<dyn InvertedIndexReader> =
+            Arc::new(TantivyInvertedIndexReader::new(
+                TermDictionary::open(termdict_file)?,
+                postings_file,
+                positions_file,
+                record_option,
+            )?);

        // by releasing the lock in between, we may end up opening the inverting index
        // twice, but this is fine.
@@ -285,23 +372,10 @@ impl SegmentReader {
        Ok(inv_idx_reader)
    }

-    /// Returns the list of fields that have been indexed in the segment.
-    /// The field list includes the field defined in the schema as well as the fields
-    /// that have been indexed as a part of a JSON field.
-    /// The returned field name is the full field name, including the name of the JSON field.
-    ///
-    /// The returned field names can be used in queries.
-    ///
-    /// Notice: If your data contains JSON fields this is **very expensive**, as it requires
-    /// browsing through the inverted index term dictionary and the columnar field dictionary.
-    ///
-    /// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json
-    /// field that is not indexed nor a fast field but is stored, it is possible for the field
-    /// to not be listed.
-    pub fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
+    fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
        let mut indexed_fields: Vec<FieldMetadata> = Vec::new();
        let mut map_to_canonical = FnvHashMap::default();
-        for (field, field_entry) in self.schema().fields() {
+        for (field, field_entry) in self.schema.fields() {
            let field_name = field_entry.name().to_string();
            let is_indexed = field_entry.is_indexed();
            if is_indexed {
@@ -391,7 +465,7 @@ impl SegmentReader {
            }
        }
        let fast_fields: Vec<FieldMetadata> = self
-            .fast_fields()
+            .fast_fields_readers
            .columnar()
            .iter_columns()?
            .map(|(mut field_name, handle)| {
@@ -419,31 +493,26 @@ impl SegmentReader {
        Ok(merged_field_metadatas)
    }

-    /// Returns the segment id
-    pub fn segment_id(&self) -> SegmentId {
+    fn segment_id(&self) -> SegmentId {
        self.segment_id
    }

-    /// Returns the delete opstamp
-    pub fn delete_opstamp(&self) -> Option<Opstamp> {
+    fn delete_opstamp(&self) -> Option<Opstamp> {
        self.delete_opstamp
    }

-    /// Returns the bitset representing the alive `DocId`s.
-    pub fn alive_bitset(&self) -> Option<&AliveBitSet> {
+    fn alive_bitset(&self) -> Option<&AliveBitSet> {
        self.alive_bitset_opt.as_ref()
    }

-    /// Returns true if the `doc` is marked
-    /// as deleted.
-    pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.alive_bitset()
+    fn is_deleted(&self, doc: DocId) -> bool {
+        self.alive_bitset_opt
+            .as_ref()
            .map(|alive_bitset| alive_bitset.is_deleted(doc))
            .unwrap_or(false)
    }

-    /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + Send + '_> {
+    fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + Send + '_> {
        if let Some(alive_bitset) = &self.alive_bitset_opt {
            Box::new(alive_bitset.iter_alive())
        } else {
@@ -451,22 +520,25 @@ impl SegmentReader {
        }
    }

-    /// Summarize total space usage of this segment.
-    pub fn space_usage(&self) -> io::Result<SegmentSpaceUsage> {
+    fn space_usage(&self) -> io::Result<SegmentSpaceUsage> {
        Ok(SegmentSpaceUsage::new(
-            self.num_docs(),
-            self.termdict_composite.space_usage(self.schema()),
-            self.postings_composite.space_usage(self.schema()),
-            self.positions_composite.space_usage(self.schema()),
+            self.num_docs,
+            self.termdict_composite.space_usage(&self.schema),
+            self.postings_composite.space_usage(&self.schema),
+            self.positions_composite.space_usage(&self.schema),
            self.fast_fields_readers.space_usage()?,
-            self.fieldnorm_readers.space_usage(self.schema()),
-            self.get_store_reader(0)?.space_usage(),
+            self.fieldnorm_readers.space_usage(&self.schema),
+            StoreReader::open(self.store_file.clone(), 0)?.space_usage(),
            self.alive_bitset_opt
                .as_ref()
                .map(AliveBitSet::space_usage)
                .unwrap_or_default(),
        ))
    }
+
+    fn clone_arc(&self) -> Arc<dyn SegmentReader> {
+        Arc::new(self.clone())
+    }
 }

 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -576,7 +648,7 @@ fn intersect_alive_bitset(
    }
 }

-impl fmt::Debug for SegmentReader {
+impl fmt::Debug for TantivySegmentReader {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "SegmentReader({:?})", self.segment_id)
    }
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -250,11 +250,15 @@ mod tests {

    struct DummyWeight;
    impl Weight for DummyWeight {
-        fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
+        fn scorer(
+            &self,
+            _reader: &dyn SegmentReader,
+            _boost: Score,
+        ) -> crate::Result<Box<dyn Scorer>> {
            Err(crate::TantivyError::InternalError("dummy impl".to_owned()))
        }

-        fn explain(&self, _reader: &SegmentReader, _doc: DocId) -> crate::Result<Explanation> {
+        fn explain(&self, _reader: &dyn SegmentReader, _doc: DocId) -> crate::Result<Explanation> {
            Err(crate::TantivyError::InternalError("dummy impl".to_owned()))
        }
    }
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -9,6 +9,7 @@ use smallvec::smallvec;
 use super::operation::{AddOperation, UserOperation};
 use super::segment_updater::SegmentUpdater;
 use super::{AddBatch, AddBatchReceiver, AddBatchSender, PreparedCommit};
+use crate::codec::{Codec, StandardCodec};
 use crate::directory::{DirectoryLock, GarbageCollectionResult, TerminatingWrite};
 use crate::error::TantivyError;
 use crate::fastfield::write_alive_bitset;
@@ -68,12 +69,12 @@ pub struct IndexWriterOptions {
 /// indexing queue.
 /// Each indexing thread builds its own independent [`Segment`], via
 /// a `SegmentWriter` object.
-pub struct IndexWriter<D: Document = TantivyDocument> {
+pub struct IndexWriter<C: Codec = StandardCodec, D: Document = TantivyDocument> {
    // the lock is just used to bind the
    // lifetime of the lock with that of the IndexWriter.
    _directory_lock: Option<DirectoryLock>,

-    index: Index,
+    index: Index<C>,

    options: IndexWriterOptions,

@@ -82,7 +83,7 @@ pub struct IndexWriter<D: Document = TantivyDocument> {
    index_writer_status: IndexWriterStatus<D>,
    operation_sender: AddBatchSender<D>,

-    segment_updater: SegmentUpdater,
+    segment_updater: SegmentUpdater<C>,

    worker_id: usize,

@@ -94,7 +95,7 @@ pub struct IndexWriter<D: Document = TantivyDocument> {

 fn compute_deleted_bitset(
    alive_bitset: &mut BitSet,
-    segment_reader: &SegmentReader,
+    segment_reader: &dyn SegmentReader,
    delete_cursor: &mut DeleteCursor,
    doc_opstamps: &DocToOpstampMapping,
    target_opstamp: Opstamp,
@@ -128,8 +129,8 @@ fn compute_deleted_bitset(
 /// is `==` target_opstamp.
 /// For instance, there was no delete operation between the state of the `segment_entry` and
 /// the `target_opstamp`, `segment_entry` is not updated.
-pub fn advance_deletes(
-    mut segment: Segment,
+pub fn advance_deletes<C: Codec>(
+    mut segment: Segment<C>,
    segment_entry: &mut SegmentEntry,
    target_opstamp: Opstamp,
 ) -> crate::Result<()> {
@@ -143,7 +144,12 @@ pub fn advance_deletes(
        return Ok(());
    }

-    let segment_reader = SegmentReader::open(&segment)?;
+    let segment_reader = segment.index().codec().open_segment_reader(
+        segment.index().directory(),
+        segment.meta(),
+        segment.schema(),
+        None,
+    )?;

    let max_doc = segment_reader.max_doc();
    let mut alive_bitset: BitSet = match segment_entry.alive_bitset() {
@@ -155,7 +161,7 @@ pub fn advance_deletes(

    compute_deleted_bitset(
        &mut alive_bitset,
-        &segment_reader,
+        segment_reader.as_ref(),
        segment_entry.delete_cursor(),
        &DocToOpstampMapping::None,
        target_opstamp,
@@ -179,11 +185,11 @@ pub fn advance_deletes(
    Ok(())
 }

-fn index_documents<D: Document>(
+fn index_documents<C: crate::codec::Codec, D: Document>(
    memory_budget: usize,
-    segment: Segment,
+    segment: Segment<C>,
    grouped_document_iterator: &mut dyn Iterator<Item = AddBatch<D>>,
-    segment_updater: &SegmentUpdater,
+    segment_updater: &SegmentUpdater<C>,
    mut delete_cursor: DeleteCursor,
 ) -> crate::Result<()> {
    let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone())?;
@@ -226,8 +232,8 @@ fn index_documents<D: Document>(
 }

 /// `doc_opstamps` is required to be non-empty.
-fn apply_deletes(
-    segment: &Segment,
+fn apply_deletes<C: crate::codec::Codec>(
+    segment: &Segment<C>,
    delete_cursor: &mut DeleteCursor,
    doc_opstamps: &[Opstamp],
 ) -> crate::Result<Option<BitSet>> {
@@ -243,14 +249,19 @@ fn apply_deletes(
        .max()
        .expect("Empty DocOpstamp is forbidden");

-    let segment_reader = SegmentReader::open(segment)?;
+    let segment_reader = segment.index().codec().open_segment_reader(
+        segment.index().directory(),
+        segment.meta(),
+        segment.schema(),
+        None,
+    )?;
    let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);

    let max_doc = segment.meta().max_doc();
    let mut deleted_bitset = BitSet::with_max_value_and_full(max_doc);
    let may_have_deletes = compute_deleted_bitset(
        &mut deleted_bitset,
-        &segment_reader,
+        segment_reader.as_ref(),
        delete_cursor,
        &doc_to_opstamps,
        max_doc_opstamp,
@@ -262,7 +273,7 @@ fn apply_deletes(
    })
 }

-impl<D: Document> IndexWriter<D> {
+impl<C: Codec, D: Document> IndexWriter<C, D> {
    /// Create a new index writer. Attempts to acquire a lockfile.
    ///
    /// The lockfile should be deleted on drop, but it is possible
@@ -278,7 +289,7 @@ impl<D: Document> IndexWriter<D> {
    /// If the memory arena per thread is too small or too big, returns
    /// `TantivyError::InvalidArgument`
    pub(crate) fn new(
-        index: &Index,
+        index: &Index<C>,
        options: IndexWriterOptions,
        directory_lock: DirectoryLock,
    ) -> crate::Result<Self> {
@@ -345,7 +356,7 @@ impl<D: Document> IndexWriter<D> {
    }

    /// Accessor to the index.
-    pub fn index(&self) -> &Index {
+    pub fn index(&self) -> &Index<C> {
        &self.index
    }

@@ -393,7 +404,7 @@ impl<D: Document> IndexWriter<D> {
    /// It is safe to start writing file associated with the new `Segment`.
    /// These will not be garbage collected as long as an instance object of
    /// `SegmentMeta` object associated with the new `Segment` is "alive".
-    pub fn new_segment(&self) -> Segment {
+    pub fn new_segment(&self) -> Segment<C> {
        self.index.new_segment()
    }

@@ -615,7 +626,7 @@ impl<D: Document> IndexWriter<D> {
    /// It is also possible to add a payload to the `commit`
    /// using this API.
    /// See [`PreparedCommit::set_payload()`].
-    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, D>> {
+    pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, C, D>> {
        // Here, because we join all of the worker threads,
        // all of the segment update for this commit have been
        // sent.
@@ -665,7 +676,7 @@ impl<D: Document> IndexWriter<D> {
        self.prepare_commit()?.commit()
    }

-    pub(crate) fn segment_updater(&self) -> &SegmentUpdater {
+    pub(crate) fn segment_updater(&self) -> &SegmentUpdater<C> {
        &self.segment_updater
    }

@@ -804,7 +815,7 @@ impl<D: Document> IndexWriter<D> {
    }
 }

-impl<D: Document> Drop for IndexWriter<D> {
+impl<C: Codec, D: Document> Drop for IndexWriter<C, D> {
    fn drop(&mut self) {
        self.segment_updater.kill();
        self.drop_sender();
--- a/src/indexer/merge_index_test.rs
+++ b/src/indexer/merge_index_test.rs
@@ -3,7 +3,7 @@ mod tests {
    use crate::collector::TopDocs;
    use crate::fastfield::AliveBitSet;
    use crate::index::Index;
-    use crate::postings::Postings;
+    use crate::postings::{DocFreq, Postings};
    use crate::query::QueryParser;
    use crate::schema::{
        self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
@@ -121,21 +121,32 @@ mod tests {
            let my_text_field = index.schema().get_field("text_field").unwrap();
            let term_a = Term::from_field_text(my_text_field, "text");
            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
-            let mut postings = inverted_index
-                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
-                .unwrap()
-                .unwrap();
-            assert_eq!(postings.doc_freq(), 2);
+            let term_info = inverted_index.get_term_info(&term_a).unwrap().unwrap();
+            let typed_postings = crate::codec::Codec::load_postings_typed(
+                index.codec(),
+                inverted_index.as_ref(),
+                &term_info,
+                IndexRecordOption::WithFreqsAndPositions,
+            )
+            .unwrap();
            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
            assert_eq!(
-                postings.doc_freq_given_deletes(
+                crate::indexer::merger::doc_freq_given_deletes(
+                    &typed_postings,
                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                ),
                2
            );
+            let mut postings = inverted_index
+                .read_postings_from_terminfo(&term_info, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap();
+            assert_eq!(postings.doc_freq(), DocFreq::Exact(2));
+            let mut postings = inverted_index
+                .read_postings_from_terminfo(&term_info, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap();

            assert_eq!(postings.term_freq(), 1);
-            let mut output = vec![];
+            let mut output = Vec::new();
            postings.positions(&mut output);
            assert_eq!(output, vec![1]);
            postings.advance();
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,3 +1,5 @@
+use std::io;
+use std::marker::PhantomData;
 use std::sync::Arc;

 use columnar::{
@@ -7,6 +9,8 @@ use common::ReadOnlyBitSet;
 use itertools::Itertools;
 use measure_time::debug_time;

+use crate::codec::postings::PostingsCodec;
+use crate::codec::{Codec, StandardCodec};
 use crate::directory::WritePtr;
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::DataCorruption;
@@ -15,8 +19,8 @@ use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer,
 use crate::index::{Segment, SegmentComponent, SegmentReader};
 use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
 use crate::indexer::SegmentSerializer;
-use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
-use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
+use crate::postings::{InvertedIndexSerializer, Postings, TermInfo};
+use crate::schema::{value_type_to_column_type, Field, FieldType, IndexRecordOption, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::{TermMerger, TermOrdinal};
 use crate::{DocAddress, DocId, InvertedIndexReader};
@@ -27,7 +31,7 @@ use crate::{DocAddress, DocId, InvertedIndexReader};
 pub const MAX_DOC_LIMIT: u32 = 1 << 31;

 fn estimate_total_num_tokens_in_single_segment(
-    reader: &SegmentReader,
+    reader: &dyn SegmentReader,
    field: Field,
 ) -> crate::Result<u64> {
    // There are no deletes. We can simply use the exact value saved into the posting list.
@@ -68,18 +72,23 @@ fn estimate_total_num_tokens_in_single_segment(
    Ok((segment_num_tokens as f64 * ratio) as u64)
 }

-fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result<u64> {
+fn estimate_total_num_tokens(
+    readers: &[Arc<dyn SegmentReader>],
+    field: Field,
+) -> crate::Result<u64> {
    let mut total_num_tokens: u64 = 0;
    for reader in readers {
-        total_num_tokens += estimate_total_num_tokens_in_single_segment(reader, field)?;
+        total_num_tokens += estimate_total_num_tokens_in_single_segment(reader.as_ref(), field)?;
    }
    Ok(total_num_tokens)
 }

-pub struct IndexMerger {
+pub struct IndexMerger<C: Codec = StandardCodec> {
    schema: Schema,
-    pub(crate) readers: Vec<SegmentReader>,
+    pub(crate) readers: Vec<Arc<dyn SegmentReader>>,
    max_doc: u32,
+    codec: C,
+    phantom: PhantomData<C>,
 }

 struct DeltaComputer {
@@ -144,8 +153,8 @@ fn extract_fast_field_required_columns(schema: &Schema) -> Vec<(String, ColumnTy
        .collect()
 }

-impl IndexMerger {
-    pub fn open(schema: Schema, segments: &[Segment]) -> crate::Result<IndexMerger> {
+impl<C: Codec> IndexMerger<C> {
+    pub fn open(schema: Schema, segments: &[Segment<C>]) -> crate::Result<IndexMerger<C>> {
        let alive_bitset = segments.iter().map(|_| None).collect_vec();
        Self::open_with_custom_alive_set(schema, segments, alive_bitset)
    }
@@ -162,16 +171,24 @@ impl IndexMerger {
    // This can be used to merge but also apply an additional filter.
    // One use case is demux, which is basically taking a list of
    // segments and partitions them e.g. by a value in a field.
+    //
+    // # Panics if segments is empty.
    pub fn open_with_custom_alive_set(
        schema: Schema,
-        segments: &[Segment],
+        segments: &[Segment<C>],
        alive_bitset_opt: Vec<Option<AliveBitSet>>,
-    ) -> crate::Result<IndexMerger> {
+    ) -> crate::Result<IndexMerger<C>> {
+        assert!(!segments.is_empty());
+        let codec = segments[0].index().codec().clone();
        let mut readers = vec![];
        for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) {
            if segment.meta().num_docs() > 0 {
-                let reader =
-                    SegmentReader::open_with_custom_alive_set(segment, new_alive_bitset_opt)?;
+                let reader = segment.index().codec().open_segment_reader(
+                    segment.index().directory(),
+                    segment.meta(),
+                    segment.schema(),
+                    new_alive_bitset_opt,
+                )?;
                readers.push(reader);
            }
        }
@@ -189,6 +206,8 @@ impl IndexMerger {
            schema,
            readers,
            max_doc,
+            codec,
+            phantom: PhantomData,
        })
    }

@@ -262,7 +281,7 @@ impl IndexMerger {
                }),
        );

-        let has_deletes: bool = self.readers.iter().any(SegmentReader::has_deletes);
+        let has_deletes: bool = self.readers.iter().any(|reader| reader.has_deletes());
        let mapping_type = if has_deletes {
            MappingType::StackedWithDeletes
        } else {
@@ -297,7 +316,7 @@ impl IndexMerger {

        let mut max_term_ords: Vec<TermOrdinal> = Vec::new();

-        let field_readers: Vec<Arc<InvertedIndexReader>> = self
+        let field_readers: Vec<Arc<dyn InvertedIndexReader>> = self
            .readers
            .iter()
            .map(|reader| reader.inverted_index(indexed_field))
@@ -355,7 +374,10 @@ impl IndexMerger {
                         indexed. Have you modified the schema?",
        );

-        let mut segment_postings_containing_the_term: Vec<(usize, SegmentPostings)> = vec![];
+        let mut segment_postings_containing_the_term: Vec<(
+            usize,
+            <C::PostingsCodec as PostingsCodec>::Postings,
+        )> = Vec::with_capacity(self.readers.len());

        while merged_terms.advance() {
            segment_postings_containing_the_term.clear();
@@ -366,18 +388,16 @@ impl IndexMerger {
            // Let's compute the list of non-empty posting lists
            for (segment_ord, term_info) in merged_terms.current_segment_ords_and_term_infos() {
                let segment_reader = &self.readers[segment_ord];
-                let inverted_index: &InvertedIndexReader = &field_readers[segment_ord];
-                let segment_postings = inverted_index
-                    .read_postings_from_terminfo(&term_info, segment_postings_option)?;
-                let alive_bitset_opt = segment_reader.alive_bitset();
-                let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
-                    segment_postings.doc_freq_given_deletes(alive_bitset)
-                } else {
-                    segment_postings.doc_freq()
-                };
-                if doc_freq > 0u32 {
+                let inverted_index = &field_readers[segment_ord];
+                if let Some((doc_freq, postings)) = postings_for_merge::<C>(
+                    inverted_index.as_ref(),
+                    &self.codec,
+                    &term_info,
+                    segment_postings_option,
+                    segment_reader.alive_bitset(),
+                )? {
                    total_doc_freq += doc_freq;
-                    segment_postings_containing_the_term.push((segment_ord, segment_postings));
+                    segment_postings_containing_the_term.push((segment_ord, postings));
                }
            }

@@ -395,11 +415,7 @@ impl IndexMerger {
            assert!(!segment_postings_containing_the_term.is_empty());

            let has_term_freq = {
-                let has_term_freq = !segment_postings_containing_the_term[0]
-                    .1
-                    .block_cursor
-                    .freqs()
-                    .is_empty();
+                let has_term_freq = segment_postings_containing_the_term[0].1.has_freq();
                for (_, postings) in &segment_postings_containing_the_term[1..] {
                    // This may look at a strange way to test whether we have term freq or not.
                    // With JSON object, the schema is not sufficient to know whether a term
@@ -415,7 +431,7 @@ impl IndexMerger {
                    //
                    // Overall the reliable way to know if we have actual frequencies loaded or not
                    // is to check whether the actual decoded array is empty or not.
-                    if has_term_freq == postings.block_cursor.freqs().is_empty() {
+                    if postings.has_freq() != has_term_freq {
                        return Err(DataCorruption::comment_only(
                            "Term freqs are inconsistent across segments",
                        )
@@ -525,7 +541,7 @@ impl IndexMerger {
    ///
    /// # Returns
    /// The number of documents in the resulting segment.
-    pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
+    pub fn write(&self, mut serializer: SegmentSerializer<C>) -> crate::Result<u32> {
        let doc_id_mapping = self.get_doc_id_from_concatenated_data()?;
        debug!("write-fieldnorms");
        if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -553,6 +569,77 @@ impl IndexMerger {
    }
 }

+/// Compute the number of non-deleted documents.
+///
+/// This method will clone and scan through the posting lists.
+/// (this is a rather expensive operation).
+pub(crate) fn doc_freq_given_deletes<P: Postings + Clone>(
+    postings: &P,
+    alive_bitset: &AliveBitSet,
+) -> u32 {
+    let mut postings = postings.clone();
+    let mut doc_freq = 0;
+    loop {
+        let doc = postings.doc();
+        if doc == TERMINATED {
+            return doc_freq;
+        }
+        if alive_bitset.is_alive(doc) {
+            doc_freq += 1u32;
+        }
+        postings.advance();
+    }
+}
+
+fn read_postings_for_merge<C: Codec>(
+    inverted_index: &dyn InvertedIndexReader,
+    codec: &C,
+    term_info: &TermInfo,
+    option: IndexRecordOption,
+) -> io::Result<<C::PostingsCodec as PostingsCodec>::Postings> {
+    codec.load_postings_typed(inverted_index, term_info, option)
+}
+
+fn postings_for_merge<C: Codec>(
+    inverted_index: &dyn InvertedIndexReader,
+    codec: &C,
+    term_info: &TermInfo,
+    option: IndexRecordOption,
+    alive_bitset_opt: Option<&AliveBitSet>,
+) -> io::Result<Option<(u32, <C::PostingsCodec as PostingsCodec>::Postings)>> {
+    let postings = read_postings_for_merge(inverted_index, codec, term_info, option)?;
+    let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
+        doc_freq_given_deletes(&postings, alive_bitset)
+    } else {
+        // We do not need an exact document frequency here.
+        match postings.doc_freq() {
+            crate::postings::DocFreq::Exact(doc_freq) => doc_freq,
+            crate::postings::DocFreq::Approximate(_) => exact_doc_freq(&postings),
+        }
+    };
+
+    if doc_freq == 0u32 {
+        return Ok(None);
+    }
+
+    Ok(Some((doc_freq, postings)))
+}
+
+/// If the postings is not able to inform us of the document frequency,
+/// we just scan through it.
+pub(crate) fn exact_doc_freq<P: Postings + Clone>(postings: &P) -> u32 {
+    let mut postings = postings.clone();
+    let mut doc_freq = 0;
+    loop {
+        let doc = postings.doc();
+        if doc == TERMINATED {
+            return doc_freq;
+        }
+        doc_freq += 1u32;
+        postings.advance();
+    }
+}
+
 #[cfg(test)]
 mod tests {

@@ -561,12 +648,16 @@ mod tests {
    use proptest::strategy::Strategy;
    use schema::FAST;

+    use crate::codec::postings::PostingsCodec;
+    use crate::codec::standard::postings::StandardPostingsCodec;
    use crate::collector::tests::{
        BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
    };
    use crate::collector::{Count, FacetCollector};
+    use crate::fastfield::AliveBitSet;
    use crate::index::{Index, SegmentId};
    use crate::indexer::NoMergePolicy;
+    use crate::postings::{DocFreq, Postings as _};
    use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
    use crate::schema::{
        Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
@@ -1518,10 +1609,10 @@ mod tests {
        let searcher = reader.searcher();
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)?
+            .term_scorer_for_test(searcher.segment_reader(0u32), 1.0)
            .unwrap();
        assert_eq!(term_scorer.doc(), 0);
-        assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
+        assert_nearly_equals!(term_scorer.seek_block_max(0), 0.0079681855);
        assert_nearly_equals!(term_scorer.score(), 0.0079681855);
        for _ in 0..81 {
            writer.add_document(doc!(text=>"hello happy tax payer"))?;
@@ -1534,13 +1625,13 @@ mod tests {
        for segment_reader in searcher.segment_readers() {
            let mut term_scorer = term_query
                .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-                .term_scorer_for_test(segment_reader, 1.0)?
+                .term_scorer_for_test(segment_reader.as_ref(), 1.0)
                .unwrap();
            // the difference compared to before is intrinsic to the bm25 formula. no worries
            // there.
            for doc in segment_reader.doc_ids_alive() {
                assert_eq!(term_scorer.doc(), doc);
-                assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
+                assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
                assert_nearly_equals!(term_scorer.score(), 0.003478312);
                term_scorer.advance();
            }
@@ -1560,12 +1651,12 @@ mod tests {
        let segment_reader = searcher.segment_reader(0u32);
        let mut term_scorer = term_query
            .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
-            .term_scorer_for_test(segment_reader, 1.0)?
+            .term_scorer_for_test(segment_reader, 1.0)
            .unwrap();
        // the difference compared to before is intrinsic to the bm25 formula. no worries there.
        for doc in segment_reader.doc_ids_alive() {
            assert_eq!(term_scorer.doc(), doc);
-            assert_nearly_equals!(term_scorer.block_max_score(), 0.003478312);
+            assert_nearly_equals!(term_scorer.seek_block_max(doc), 0.003478312);
            assert_nearly_equals!(term_scorer.score(), 0.003478312);
            term_scorer.advance();
        }
@@ -1579,4 +1670,18 @@ mod tests {
        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
        assert!((super::MAX_DOC_LIMIT as i32) < 0);
    }
+
+    #[test]
+    fn test_doc_freq_given_delete() {
+        let docs =
+            <StandardPostingsCodec as PostingsCodec>::Postings::create_from_docs(&[0, 2, 10]);
+        assert_eq!(docs.doc_freq(), DocFreq::Exact(3));
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
+        assert_eq!(super::doc_freq_given_deletes(&docs, &alive_bitset), 2);
+        let all_deleted =
+            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
+        let docs =
+            <StandardPostingsCodec as PostingsCodec>::Postings::create_from_docs(&[0, 2, 10]);
+        assert_eq!(super::doc_freq_given_deletes(&docs, &all_deleted), 0);
+    }
 }
--- a/src/indexer/prepared_commit.rs
+++ b/src/indexer/prepared_commit.rs
@@ -1,16 +1,17 @@
 use super::IndexWriter;
+use crate::codec::Codec;
 use crate::schema::document::Document;
 use crate::{FutureResult, Opstamp, TantivyDocument};

 /// A prepared commit
-pub struct PreparedCommit<'a, D: Document = TantivyDocument> {
-    index_writer: &'a mut IndexWriter<D>,
+pub struct PreparedCommit<'a, C: Codec, D: Document = TantivyDocument> {
+    index_writer: &'a mut IndexWriter<C, D>,
    payload: Option<String>,
    opstamp: Opstamp,
 }

-impl<'a, D: Document> PreparedCommit<'a, D> {
-    pub(crate) fn new(index_writer: &'a mut IndexWriter<D>, opstamp: Opstamp) -> Self {
+impl<'a, C: Codec, D: Document> PreparedCommit<'a, C, D> {
+    pub(crate) fn new(index_writer: &'a mut IndexWriter<C, D>, opstamp: Opstamp) -> Self {
        Self {
            index_writer,
            payload: None,
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -8,17 +8,17 @@ use crate::store::StoreWriter;

 /// Segment serializer is in charge of laying out on disk
 /// the data accumulated and sorted by the `SegmentWriter`.
-pub struct SegmentSerializer {
-    segment: Segment,
+pub struct SegmentSerializer<C: crate::codec::Codec> {
+    segment: Segment<C>,
    pub(crate) store_writer: StoreWriter,
    fast_field_write: WritePtr,
    fieldnorms_serializer: Option<FieldNormsSerializer>,
    postings_serializer: InvertedIndexSerializer,
 }

-impl SegmentSerializer {
+impl<C: crate::codec::Codec> SegmentSerializer<C> {
    /// Creates a new `SegmentSerializer`.
-    pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
+    pub fn for_segment(mut segment: Segment<C>) -> crate::Result<SegmentSerializer<C>> {
        let settings = segment.index().settings().clone();
        let store_writer = {
            let store_write = segment.open_write(SegmentComponent::Store)?;
@@ -50,7 +50,7 @@ impl SegmentSerializer {
        self.store_writer.mem_usage()
    }

-    pub fn segment(&self) -> &Segment {
+    pub fn segment(&self) -> &Segment<C> {
        &self.segment
    }

--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -10,10 +10,13 @@ use std::sync::{Arc, RwLock};
 use rayon::{ThreadPool, ThreadPoolBuilder};

 use super::segment_manager::SegmentManager;
+use crate::codec::Codec;
 use crate::core::META_FILEPATH;
 use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
 use crate::fastfield::AliveBitSet;
-use crate::index::{Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta};
+use crate::index::{
+    CodecConfiguration, Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta,
+};
 use crate::indexer::delete_queue::DeleteCursor;
 use crate::indexer::index_writer::advance_deletes;
 use crate::indexer::merge_operation::MergeOperationInventory;
@@ -61,10 +64,10 @@ pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate:
 // We voluntarily pass a merge_operation ref to guarantee that
 // the merge_operation is alive during the process
 #[derive(Clone)]
-pub(crate) struct SegmentUpdater(Arc<InnerSegmentUpdater>);
+pub(crate) struct SegmentUpdater<C: Codec>(Arc<InnerSegmentUpdater<C>>);

-impl Deref for SegmentUpdater {
-    type Target = InnerSegmentUpdater;
+impl<C: Codec> Deref for SegmentUpdater<C> {
+    type Target = InnerSegmentUpdater<C>;

    #[inline]
    fn deref(&self) -> &Self::Target {
@@ -72,8 +75,8 @@ impl Deref for SegmentUpdater {
    }
 }

-fn garbage_collect_files(
-    segment_updater: SegmentUpdater,
+fn garbage_collect_files<C: Codec>(
+    segment_updater: SegmentUpdater<C>,
 ) -> crate::Result<GarbageCollectionResult> {
    info!("Running garbage collection");
    let mut index = segment_updater.index.clone();
@@ -84,8 +87,8 @@ fn garbage_collect_files(

 /// Merges a list of segments the list of segment givens in the `segment_entries`.
 /// This function happens in the calling thread and is computationally expensive.
-fn merge(
-    index: &Index,
+fn merge<Codec: crate::codec::Codec>(
+    index: &Index<Codec>,
    mut segment_entries: Vec<SegmentEntry>,
    target_opstamp: Opstamp,
 ) -> crate::Result<Option<SegmentEntry>> {
@@ -108,13 +111,13 @@ fn merge(

    let delete_cursor = segment_entries[0].delete_cursor().clone();

-    let segments: Vec<Segment> = segment_entries
+    let segments: Vec<Segment<Codec>> = segment_entries
        .iter()
        .map(|segment_entry| index.segment(segment_entry.meta().clone()))
        .collect();

    // An IndexMerger is like a "view" of our merged segments.
-    let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;
+    let merger: IndexMerger<Codec> = IndexMerger::open(index.schema(), &segments[..])?;

    // ... we just serialize this index merger in our new segment to merge the segments.
    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;
@@ -139,10 +142,10 @@ fn merge(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_indices<T: Into<Box<dyn Directory>>>(
-    indices: &[Index],
-    output_directory: T,
-) -> crate::Result<Index> {
+pub fn merge_indices<Codec: crate::codec::Codec>(
+    indices: &[Index<Codec>],
+    output_directory: Box<dyn Directory>,
+) -> crate::Result<Index<Codec>> {
    if indices.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -163,7 +166,7 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
        ));
    }

-    let mut segments: Vec<Segment> = Vec::new();
+    let mut segments: Vec<Segment<Codec>> = Vec::new();
    for index in indices {
        segments.extend(index.searchable_segments()?);
    }
@@ -185,12 +188,12 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
 /// meant to work if you have an `IndexWriter` running for the origin indices, or
 /// the destination `Index`.
 #[doc(hidden)]
-pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
-    segments: &[Segment],
+pub fn merge_filtered_segments<C: crate::codec::Codec, T: Into<Box<dyn Directory>>>(
+    segments: &[Segment<C>],
    target_settings: IndexSettings,
    filter_doc_ids: Vec<Option<AliveBitSet>>,
    output_directory: T,
-) -> crate::Result<Index> {
+) -> crate::Result<Index<C>> {
    if segments.is_empty() {
        // If there are no indices to merge, there is no need to do anything.
        return Err(crate::TantivyError::InvalidArgument(
@@ -211,14 +214,15 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
        ));
    }

-    let mut merged_index = Index::create(
-        output_directory,
-        target_schema.clone(),
-        target_settings.clone(),
-    )?;
+    let mut merged_index: Index<C> = Index::builder()
+        .schema(target_schema.clone())
+        .codec(segments[0].index().codec().clone())
+        .settings(target_settings.clone())
+        .create(output_directory.into())?;
+
    let merged_segment = merged_index.new_segment();
    let merged_segment_id = merged_segment.id();
-    let merger: IndexMerger =
+    let merger: IndexMerger<C> =
        IndexMerger::open_with_custom_alive_set(merged_index.schema(), segments, filter_doc_ids)?;
    let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
    let num_docs = merger.write(segment_serializer)?;
@@ -235,6 +239,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
            ))
            .trim_end()
    );
+    let codec_configuration = CodecConfiguration::from(segments[0].index().codec());

    let index_meta = IndexMeta {
        index_settings: target_settings, // index_settings of all segments should be the same
@@ -242,6 +247,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
        schema: target_schema,
        opstamp: 0u64,
        payload: Some(stats),
+        codec: codec_configuration,
    };

    // save the meta.json
@@ -250,7 +256,7 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
    Ok(merged_index)
 }

-pub(crate) struct InnerSegmentUpdater {
+pub(crate) struct InnerSegmentUpdater<C: Codec> {
    // we keep a copy of the current active IndexMeta to
    // avoid loading the file every time we need it in the
    // `SegmentUpdater`.
@@ -261,7 +267,7 @@ pub(crate) struct InnerSegmentUpdater {
    pool: ThreadPool,
    merge_thread_pool: ThreadPool,

-    index: Index,
+    index: Index<C>,
    segment_manager: SegmentManager,
    merge_policy: RwLock<Arc<dyn MergePolicy>>,
    killed: AtomicBool,
@@ -269,13 +275,13 @@ pub(crate) struct InnerSegmentUpdater {
    merge_operations: MergeOperationInventory,
 }

-impl SegmentUpdater {
+impl<Codec: crate::codec::Codec> SegmentUpdater<Codec> {
    pub fn create(
-        index: Index,
+        index: Index<Codec>,
        stamper: Stamper,
        delete_cursor: &DeleteCursor,
        num_merge_threads: usize,
-    ) -> crate::Result<SegmentUpdater> {
+    ) -> crate::Result<Self> {
        let segments = index.searchable_segment_metas()?;
        let segment_manager = SegmentManager::from_segments(segments, delete_cursor);
        let pool = ThreadPoolBuilder::new()
@@ -404,12 +410,14 @@ impl SegmentUpdater {
            //
            // Segment 1 from disk 1, Segment 1 from disk 2, etc.
            committed_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
+            let codec = CodecConfiguration::from(index.codec());
            let index_meta = IndexMeta {
                index_settings: index.settings().clone(),
                segments: committed_segment_metas,
                schema: index.schema(),
                opstamp,
                payload: commit_message,
+                codec,
            };
            // TODO add context to the error.
            save_metas(&index_meta, directory.box_clone().borrow_mut())?;
@@ -443,7 +451,7 @@ impl SegmentUpdater {
        opstamp: Opstamp,
        payload: Option<String>,
    ) -> FutureResult<Opstamp> {
-        let segment_updater: SegmentUpdater = self.clone();
+        let segment_updater: SegmentUpdater<Codec> = self.clone();
        self.schedule_task(move || {
            let segment_entries = segment_updater.purge_deletes(opstamp)?;
            segment_updater.segment_manager.commit(segment_entries);
@@ -702,6 +710,7 @@ impl SegmentUpdater {
 #[cfg(test)]
 mod tests {
    use super::merge_indices;
+    use crate::codec::StandardCodec;
    use crate::collector::TopDocs;
    use crate::directory::RamDirectory;
    use crate::fastfield::AliveBitSet;
@@ -915,7 +924,7 @@ mod tests {

    #[test]
    fn test_merge_empty_indices_array() {
-        let merge_result = merge_indices(&[], RamDirectory::default());
+        let merge_result = merge_indices::<StandardCodec>(&[], Box::new(RamDirectory::default()));
        assert!(merge_result.is_err());
    }

@@ -942,7 +951,10 @@ mod tests {
        };

        // mismatched schema index list
-        let result = merge_indices(&[first_index, second_index], RamDirectory::default());
+        let result = merge_indices(
+            &[first_index, second_index],
+            Box::new(RamDirectory::default()),
+        );
        assert!(result.is_err());

        Ok(())
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -4,6 +4,7 @@ use itertools::Itertools;
 use tokenizer_api::BoxTokenStream;

 use super::operation::AddOperation;
+use crate::codec::Codec;
 use crate::fastfield::FastFieldsWriter;
 use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
 use crate::index::{Segment, SegmentComponent};
@@ -12,7 +13,7 @@ use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::json_utils::{index_json_value, IndexingPositionsPerPath};
 use crate::postings::{
    compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
-    PerFieldPostingsWriter, PostingsWriter,
+    PerFieldPostingsWriter, PostingsWriter, PostingsWriterEnum,
 };
 use crate::schema::document::{Document, Value};
 use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
@@ -45,11 +46,11 @@ fn compute_initial_table_size(per_thread_memory_budget: usize) -> crate::Result<
 ///
 /// They creates the postings list in anonymous memory.
 /// The segment is laid on disk when the segment gets `finalized`.
-pub struct SegmentWriter {
+pub struct SegmentWriter<Codec: crate::codec::Codec> {
    pub(crate) max_doc: DocId,
    pub(crate) ctx: IndexingContext,
    pub(crate) per_field_postings_writers: PerFieldPostingsWriter,
-    pub(crate) segment_serializer: SegmentSerializer,
+    pub(crate) segment_serializer: SegmentSerializer<Codec>,
    pub(crate) fast_field_writers: FastFieldsWriter,
    pub(crate) fieldnorms_writer: FieldNormsWriter,
    pub(crate) json_path_writer: JsonPathWriter,
@@ -60,7 +61,7 @@ pub struct SegmentWriter {
    schema: Schema,
 }

-impl SegmentWriter {
+impl<Codec: crate::codec::Codec> SegmentWriter<Codec> {
    /// Creates a new `SegmentWriter`
    ///
    /// The arguments are defined as follows
@@ -70,7 +71,10 @@ impl SegmentWriter {
    ///   behavior as a memory limit.
    /// - segment: The segment being written
    /// - schema
-    pub fn for_segment(memory_budget_in_bytes: usize, segment: Segment) -> crate::Result<Self> {
+    pub fn for_segment(
+        memory_budget_in_bytes: usize,
+        segment: Segment<Codec>,
+    ) -> crate::Result<Self> {
        let schema = segment.schema();
        let tokenizer_manager = segment.index().tokenizers().clone();
        let tokenizer_manager_fast_field = segment.index().fast_field_tokenizer().clone();
@@ -169,7 +173,7 @@ impl SegmentWriter {
            }

            let (term_buffer, ctx) = (&mut self.term_buffer, &mut self.ctx);
-            let postings_writer: &mut dyn PostingsWriter =
+            let postings_writer: &mut PostingsWriterEnum =
                self.per_field_postings_writers.get_for_field_mut(field);
            term_buffer.clear_with_field(field);

@@ -386,13 +390,13 @@ impl SegmentWriter {
 /// to the `SegmentSerializer`.
 ///
 /// `doc_id_map` is used to map to the new doc_id order.
-fn remap_and_write(
+fn remap_and_write<C: Codec>(
    schema: Schema,
    per_field_postings_writers: &PerFieldPostingsWriter,
    ctx: IndexingContext,
    fast_field_writers: FastFieldsWriter,
    fieldnorms_writer: &FieldNormsWriter,
-    mut serializer: SegmentSerializer,
+    mut serializer: SegmentSerializer<C>,
 ) -> crate::Result<()> {
    debug!("remap-and-write");
    if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -871,7 +875,7 @@ mod tests {
        let searcher = reader.searcher();
        let segment_reader = searcher.segment_reader(0u32);

-        fn assert_type(reader: &SegmentReader, field: &str, typ: ColumnType) {
+        fn assert_type(reader: &dyn SegmentReader, field: &str, typ: ColumnType) {
            let cols = reader.fast_fields().dynamic_column_handles(field).unwrap();
            assert_eq!(cols.len(), 1, "{field}");
            assert_eq!(cols[0].column_type(), typ, "{field}");
@@ -890,7 +894,7 @@ mod tests {
        assert_type(segment_reader, "json.my_arr", ColumnType::I64);
        assert_type(segment_reader, "json.my_arr.my_key", ColumnType::Str);

-        fn assert_empty(reader: &SegmentReader, field: &str) {
+        fn assert_empty(reader: &dyn SegmentReader, field: &str) {
            let cols = reader.fast_fields().dynamic_column_handles(field).unwrap();
            assert_eq!(cols.len(), 0);
        }
--- a/src/indexer/single_segment_index_writer.rs
+++ b/src/indexer/single_segment_index_writer.rs
@@ -1,5 +1,7 @@
 use std::marker::PhantomData;

+use crate::codec::StandardCodec;
+use crate::index::CodecConfiguration;
 use crate::indexer::operation::AddOperation;
 use crate::indexer::segment_updater::save_metas;
 use crate::indexer::SegmentWriter;
@@ -7,22 +9,25 @@ use crate::schema::document::Document;
 use crate::{Directory, Index, IndexMeta, Opstamp, Segment, TantivyDocument};

 #[doc(hidden)]
-pub struct SingleSegmentIndexWriter<D: Document = TantivyDocument> {
-    segment_writer: SegmentWriter,
-    segment: Segment,
+pub struct SingleSegmentIndexWriter<
+    Codec: crate::codec::Codec = StandardCodec,
+    D: Document = TantivyDocument,
+> {
+    segment_writer: SegmentWriter<Codec>,
+    segment: Segment<Codec>,
    opstamp: Opstamp,
-    _phantom: PhantomData<D>,
+    _doc: PhantomData<D>,
 }

-impl<D: Document> SingleSegmentIndexWriter<D> {
-    pub fn new(index: Index, mem_budget: usize) -> crate::Result<Self> {
+impl<Codec: crate::codec::Codec, D: Document> SingleSegmentIndexWriter<Codec, D> {
+    pub fn new(index: Index<Codec>, mem_budget: usize) -> crate::Result<Self> {
        let segment = index.new_segment();
        let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?;
        Ok(Self {
            segment_writer,
            segment,
            opstamp: 0,
-            _phantom: PhantomData,
+            _doc: PhantomData,
        })
    }

@@ -37,10 +42,10 @@ impl<D: Document> SingleSegmentIndexWriter<D> {
            .add_document(AddOperation { opstamp, document })
    }

-    pub fn finalize(self) -> crate::Result<Index> {
+    pub fn finalize(self) -> crate::Result<Index<Codec>> {
        let max_doc = self.segment_writer.max_doc();
        self.segment_writer.finalize()?;
-        let segment: Segment = self.segment.with_max_doc(max_doc);
+        let segment: Segment<Codec> = self.segment.with_max_doc(max_doc);
        let index = segment.index();
        let index_meta = IndexMeta {
            index_settings: index.settings().clone(),
@@ -48,6 +53,7 @@ impl<D: Document> SingleSegmentIndexWriter<D> {
            schema: index.schema(),
            opstamp: 0,
            payload: None,
+            codec: CodecConfiguration::from(index.codec()),
        };
        save_metas(&index_meta, index.directory())?;
        index.directory().sync_directory()?;
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -166,6 +166,9 @@ mod functional_test;

 #[macro_use]
 mod macros;
+
+/// Tantivy codecs describes how data is layed out on disk.
+pub mod codec;
 mod future_result;

 // Re-exports
@@ -225,7 +228,7 @@ pub use crate::core::{json_utils, Executor, Searcher, SearcherGeneration};
 pub use crate::directory::Directory;
 pub use crate::index::{
    Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment,
-    SegmentMeta, SegmentReader,
+    SegmentMeta, SegmentReader, TantivyInvertedIndexReader, TantivySegmentReader,
 };
 pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
 pub use crate::schema::{Document, TantivyDocument, Term};
@@ -545,7 +548,7 @@ pub mod tests {
        index_writer.commit()?;
        let reader = index.reader()?;
        let searcher = reader.searcher();
-        let segment_reader: &SegmentReader = searcher.segment_reader(0);
+        let segment_reader: &dyn SegmentReader = searcher.segment_reader(0);
        let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?;
        assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
        assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
@@ -553,7 +556,7 @@ pub mod tests {
        Ok(())
    }

-    fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool {
+    fn advance_undeleted(docset: &mut dyn DocSet, reader: &dyn SegmentReader) -> bool {
        let mut doc = docset.advance();
        while doc != TERMINATED {
            if !reader.is_deleted(doc) {
@@ -1070,7 +1073,7 @@ pub mod tests {
        }
        let reader = index.reader()?;
        let searcher = reader.searcher();
-        let segment_reader: &SegmentReader = searcher.segment_reader(0);
+        let segment_reader: &dyn SegmentReader = searcher.segment_reader(0);
        {
            let fast_field_reader_res = segment_reader.fast_fields().u64("text");
            assert!(fast_field_reader_res.is_err());
--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -22,12 +22,6 @@ pub(crate) struct JsonPostingsWriter<Rec: Recorder> {
    non_str_posting_writer: SpecializedPostingsWriter<DocIdRecorder>,
 }

-impl<Rec: Recorder> From<JsonPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
-    fn from(json_postings_writer: JsonPostingsWriter<Rec>) -> Box<dyn PostingsWriter> {
-        Box::new(json_postings_writer)
-    }
-}
-
 impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    #[inline]
    fn subscribe(
--- a/src/postings/loaded_postings.rs
+++ b/src/postings/loaded_postings.rs
@@ -1,5 +1,5 @@
 use crate::docset::{DocSet, TERMINATED};
-use crate::postings::{Postings, SegmentPostings};
+use crate::postings::{DocFreq, Postings};
 use crate::DocId;

 /// `LoadedPostings` is a `DocSet` and `Postings` implementation.
@@ -25,16 +25,16 @@ impl LoadedPostings {
    /// Creates a new `LoadedPostings` from a `SegmentPostings`.
    ///
    /// It will also preload positions, if positions are available in the SegmentPostings.
-    pub fn load(segment_postings: &mut SegmentPostings) -> LoadedPostings {
-        let num_docs = segment_postings.doc_freq() as usize;
+    pub fn load(postings: &mut Box<dyn Postings>) -> LoadedPostings {
+        let num_docs: usize = u32::from(postings.doc_freq()) as usize;
        let mut doc_ids = Vec::with_capacity(num_docs);
        let mut positions = Vec::with_capacity(num_docs);
        let mut position_offsets = Vec::with_capacity(num_docs);
-        while segment_postings.doc() != TERMINATED {
+        while postings.doc() != TERMINATED {
            position_offsets.push(positions.len() as u32);
-            doc_ids.push(segment_postings.doc());
-            segment_postings.append_positions_with_offset(0, &mut positions);
-            segment_postings.advance();
+            doc_ids.push(postings.doc());
+            postings.append_positions_with_offset(0, &mut positions);
+            postings.advance();
        }
        position_offsets.push(positions.len() as u32);
        LoadedPostings {
@@ -101,6 +101,14 @@ impl Postings for LoadedPostings {
            output.push(*pos + offset);
        }
    }
+
+    fn has_freq(&self) -> bool {
+        true
+    }
+
+    fn doc_freq(&self) -> DocFreq {
+        DocFreq::Exact(self.doc_ids.len() as u32)
+    }
 }

 #[cfg(test)]
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -4,7 +4,6 @@ mod block_search;

 pub(crate) use self::block_search::branchless_binary_search;

-mod block_segment_postings;
 pub(crate) mod compression;
 mod indexing_context;
 mod json_postings_writer;
@@ -13,32 +12,24 @@ mod per_field_postings_writer;
 mod postings;
 mod postings_writer;
 mod recorder;
-mod segment_postings;
-mod serializer;
-mod skip;
+/// Serializer module for the inverted index
+pub mod serializer;
+pub(crate) mod skip;
 mod term_info;

 pub(crate) use loaded_postings::LoadedPostings;
+pub use postings::DocFreq;
 pub(crate) use stacker::compute_table_memory_size;

-pub use self::block_segment_postings::BlockSegmentPostings;
 pub(crate) use self::indexing_context::IndexingContext;
 pub(crate) use self::per_field_postings_writer::PerFieldPostingsWriter;
 pub use self::postings::Postings;
-pub(crate) use self::postings_writer::{serialize_postings, IndexingPosition, PostingsWriter};
-pub use self::segment_postings::SegmentPostings;
+pub(crate) use self::postings_writer::{
+    serialize_postings, IndexingPosition, PostingsWriter, PostingsWriterEnum,
+};
 pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
-pub(crate) use self::skip::{BlockInfo, SkipReader};
 pub use self::term_info::TermInfo;

-#[expect(clippy::enum_variant_names)]
-#[derive(Debug, PartialEq, Clone, Copy, Eq)]
-pub(crate) enum FreqReadingOption {
-    NoFreq,
-    SkipFreq,
-    ReadFreq,
-}
-
 #[cfg(test)]
 pub(crate) mod tests {
    use std::mem;
@@ -46,9 +37,10 @@ pub(crate) mod tests {
    use super::{InvertedIndexSerializer, Postings};
    use crate::docset::{DocSet, TERMINATED};
    use crate::fieldnorm::FieldNormReader;
-    use crate::index::{Index, SegmentComponent, SegmentReader};
+    use crate::index::{Index, SegmentComponent};
    use crate::indexer::operation::AddOperation;
    use crate::indexer::SegmentWriter;
+    use crate::postings::DocFreq;
    use crate::query::Scorer;
    use crate::schema::{
        Field, IndexRecordOption, Schema, Term, TextFieldIndexing, TextOptions, INDEXED, TEXT,
@@ -258,7 +250,13 @@ pub(crate) mod tests {
            segment_writer.finalize()?;
        }
        {
-            let segment_reader = SegmentReader::open(&segment)?;
+            let segment_reader = crate::codec::Codec::open_segment_reader(
+                segment.index().codec(),
+                segment.index().directory(),
+                segment.meta(),
+                segment.schema(),
+                None,
+            )?;
            {
                let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field)?;
                assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
@@ -279,11 +277,11 @@ pub(crate) mod tests {
            }
            {
                let term_a = Term::from_field_text(text_field, "a");
-                let mut postings_a = segment_reader
+                let mut postings_a: Box<dyn Postings> = segment_reader
                    .inverted_index(term_a.field())?
                    .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_a.len(), 1000);
+                assert_eq!(postings_a.doc_freq(), DocFreq::Exact(1000));
                assert_eq!(postings_a.doc(), 0);
                assert_eq!(postings_a.term_freq(), 6);
                postings_a.positions(&mut positions);
@@ -306,7 +304,7 @@ pub(crate) mod tests {
                    .inverted_index(term_e.field())?
                    .read_postings(&term_e, IndexRecordOption::WithFreqsAndPositions)?
                    .unwrap();
-                assert_eq!(postings_e.len(), 1000 - 2);
+                assert_eq!(postings_e.doc_freq(), DocFreq::Exact(1000 - 2));
                for i in 2u32..1000u32 {
                    assert_eq!(postings_e.term_freq(), i);
                    postings_e.positions(&mut positions);
--- a/src/postings/per_field_postings_writer.rs
+++ b/src/postings/per_field_postings_writer.rs
@@ -1,16 +1,15 @@
 use crate::postings::json_postings_writer::JsonPostingsWriter;
-use crate::postings::postings_writer::SpecializedPostingsWriter;
+use crate::postings::postings_writer::{PostingsWriterEnum, SpecializedPostingsWriter};
 use crate::postings::recorder::{DocIdRecorder, TermFrequencyRecorder, TfAndPositionRecorder};
-use crate::postings::PostingsWriter;
 use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};

 pub(crate) struct PerFieldPostingsWriter {
-    per_field_postings_writers: Vec<Box<dyn PostingsWriter>>,
+    per_field_postings_writers: Vec<PostingsWriterEnum>,
 }

 impl PerFieldPostingsWriter {
    pub fn for_schema(schema: &Schema) -> Self {
-        let per_field_postings_writers = schema
+        let per_field_postings_writers: Vec<PostingsWriterEnum> = schema
            .fields()
            .map(|(_, field_entry)| posting_writer_from_field_entry(field_entry))
            .collect();
@@ -19,16 +18,16 @@ impl PerFieldPostingsWriter {
        }
    }

-    pub(crate) fn get_for_field(&self, field: Field) -> &dyn PostingsWriter {
-        self.per_field_postings_writers[field.field_id() as usize].as_ref()
+    pub(crate) fn get_for_field(&self, field: Field) -> &PostingsWriterEnum {
+        &self.per_field_postings_writers[field.field_id() as usize]
    }

-    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut dyn PostingsWriter {
-        self.per_field_postings_writers[field.field_id() as usize].as_mut()
+    pub(crate) fn get_for_field_mut(&mut self, field: Field) -> &mut PostingsWriterEnum {
+        &mut self.per_field_postings_writers[field.field_id() as usize]
    }
 }

-fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter> {
+fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> PostingsWriterEnum {
    match *field_entry.field_type() {
        FieldType::Str(ref text_options) => text_options
            .get_indexing_options()
@@ -51,7 +50,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn Postings
        | FieldType::Date(_)
        | FieldType::Bytes(_)
        | FieldType::IpAddr(_)
-        | FieldType::Facet(_) => Box::<SpecializedPostingsWriter<DocIdRecorder>>::default(),
+        | FieldType::Facet(_) => <SpecializedPostingsWriter<DocIdRecorder>>::default().into(),
        FieldType::JsonObject(ref json_object_options) => {
            if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() {
                match text_indexing_option.index_option() {
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,5 +1,25 @@
 use crate::docset::DocSet;

+/// Result of the doc_freq method.
+///
+/// Postings can inform us that the document frequency is approximate.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DocFreq {
+    /// The document frequency is approximate.
+    Approximate(u32),
+    /// The document frequency is exact.
+    Exact(u32),
+}
+
+impl From<DocFreq> for u32 {
+    fn from(doc_freq: DocFreq) -> Self {
+        match doc_freq {
+            DocFreq::Approximate(approximate_doc_freq) => approximate_doc_freq,
+            DocFreq::Exact(doc_freq) => doc_freq,
+        }
+    }
+}
+
 /// Postings (also called inverted list)
 ///
 /// For a given term, it is the list of doc ids of the doc
@@ -14,6 +34,9 @@ pub trait Postings: DocSet + 'static {
    /// The number of times the term appears in the document.
    fn term_freq(&self) -> u32;

+    /// Returns the number of documents containing the term in the segment.
+    fn doc_freq(&self) -> DocFreq;
+
    /// Returns the positions offsetted with a given value.
    /// It is not necessary to clear the `output` before calling this method.
    /// The output vector will be resized to the `term_freq`.
@@ -31,6 +54,16 @@ pub trait Postings: DocSet + 'static {
    fn positions(&mut self, output: &mut Vec<u32>) {
        self.positions_with_offset(0u32, output);
    }
+
+    /// Returns true if the term_frequency is available.
+    ///
+    /// This is a tricky question, because on JSON fields, it is possible
+    /// for a text term to have term freq, whereas a number term in the field has none.
+    ///
+    /// This function returns whether the actual term has term frequencies or not.
+    /// In this above JSON field example, `has_freq` should return true for the
+    /// earlier and false for the latter.
+    fn has_freq(&self) -> bool;
 }

 impl Postings for Box<dyn Postings> {
@@ -41,4 +74,12 @@ impl Postings for Box<dyn Postings> {
    fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
        (**self).append_positions_with_offset(offset, output);
    }
+
+    fn has_freq(&self) -> bool {
+        (**self).has_freq()
+    }
+
+    fn doc_freq(&self) -> DocFreq {
+        (**self).doc_freq()
+    }
 }
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -7,7 +7,10 @@ use stacker::Addr;
 use crate::fieldnorm::FieldNormReaders;
 use crate::indexer::indexing_term::IndexingTerm;
 use crate::indexer::path_to_unordered_id::OrderedPathId;
-use crate::postings::recorder::{BufferLender, Recorder};
+use crate::postings::json_postings_writer::JsonPostingsWriter;
+use crate::postings::recorder::{
+    BufferLender, DocIdRecorder, Recorder, TermFrequencyRecorder, TfAndPositionRecorder,
+};
 use crate::postings::{
    FieldSerializer, IndexingContext, InvertedIndexSerializer, PerFieldPostingsWriter,
 };
@@ -100,6 +103,141 @@ pub(crate) struct IndexingPosition {
    pub end_position: u32,
 }

+pub enum PostingsWriterEnum {
+    DocId(SpecializedPostingsWriter<DocIdRecorder>),
+    DocIdTf(SpecializedPostingsWriter<TermFrequencyRecorder>),
+    DocTfAndPosition(SpecializedPostingsWriter<TfAndPositionRecorder>),
+    JsonDocId(JsonPostingsWriter<DocIdRecorder>),
+    JsonDocIdTf(JsonPostingsWriter<TermFrequencyRecorder>),
+    JsonDocTfAndPosition(JsonPostingsWriter<TfAndPositionRecorder>),
+}
+
+impl From<SpecializedPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_recorder_writer: SpecializedPostingsWriter<DocIdRecorder>) -> Self {
+        PostingsWriterEnum::DocId(doc_id_recorder_writer)
+    }
+}
+
+impl From<SpecializedPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_tf_recorder_writer: SpecializedPostingsWriter<TermFrequencyRecorder>) -> Self {
+        PostingsWriterEnum::DocIdTf(doc_id_tf_recorder_writer)
+    }
+}
+
+impl From<SpecializedPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
+    fn from(
+        doc_id_tf_and_positions_recorder_writer: SpecializedPostingsWriter<TfAndPositionRecorder>,
+    ) -> Self {
+        PostingsWriterEnum::DocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<DocIdRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_recorder_writer: JsonPostingsWriter<DocIdRecorder>) -> Self {
+        PostingsWriterEnum::JsonDocId(doc_id_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<TermFrequencyRecorder>> for PostingsWriterEnum {
+    fn from(doc_id_tf_recorder_writer: JsonPostingsWriter<TermFrequencyRecorder>) -> Self {
+        PostingsWriterEnum::JsonDocIdTf(doc_id_tf_recorder_writer)
+    }
+}
+
+impl From<JsonPostingsWriter<TfAndPositionRecorder>> for PostingsWriterEnum {
+    fn from(
+        doc_id_tf_and_positions_recorder_writer: JsonPostingsWriter<TfAndPositionRecorder>,
+    ) -> Self {
+        PostingsWriterEnum::JsonDocTfAndPosition(doc_id_tf_and_positions_recorder_writer)
+    }
+}
+
+impl PostingsWriter for PostingsWriterEnum {
+    fn subscribe(&mut self, doc: DocId, pos: u32, term: &IndexingTerm, ctx: &mut IndexingContext) {
+        match self {
+            PostingsWriterEnum::DocId(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::DocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::DocTfAndPosition(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocId(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocIdTf(writer) => writer.subscribe(doc, pos, term, ctx),
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.subscribe(doc, pos, term, ctx)
+            }
+        }
+    }
+
+    fn serialize(
+        &self,
+        term_addrs: &[(Field, OrderedPathId, &[u8], Addr)],
+        ordered_id_to_path: &[&str],
+        ctx: &IndexingContext,
+        serializer: &mut FieldSerializer,
+    ) -> io::Result<()> {
+        match self {
+            PostingsWriterEnum::DocId(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::DocIdTf(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::DocTfAndPosition(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocId(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocIdTf(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.serialize(term_addrs, ordered_id_to_path, ctx, serializer)
+            }
+        }
+    }
+
+    /// Tokenize a text and subscribe all of its token.
+    fn index_text(
+        &mut self,
+        doc_id: DocId,
+        token_stream: &mut dyn TokenStream,
+        term_buffer: &mut IndexingTerm,
+        ctx: &mut IndexingContext,
+        indexing_position: &mut IndexingPosition,
+    ) {
+        match self {
+            PostingsWriterEnum::DocId(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::DocIdTf(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::DocTfAndPosition(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocId(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocIdTf(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => {
+                writer.index_text(doc_id, token_stream, term_buffer, ctx, indexing_position)
+            }
+        }
+    }
+
+    fn total_num_tokens(&self) -> u64 {
+        match self {
+            PostingsWriterEnum::DocId(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::DocIdTf(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::DocTfAndPosition(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocId(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocIdTf(writer) => writer.total_num_tokens(),
+            PostingsWriterEnum::JsonDocTfAndPosition(writer) => writer.total_num_tokens(),
+        }
+    }
+}
+
 /// The `PostingsWriter` is in charge of receiving documenting
 /// and building a `Segment` in anonymous memory.
 ///
@@ -171,14 +309,6 @@ pub(crate) struct SpecializedPostingsWriter<Rec: Recorder> {
    _recorder_type: PhantomData<Rec>,
 }

-impl<Rec: Recorder> From<SpecializedPostingsWriter<Rec>> for Box<dyn PostingsWriter> {
-    fn from(
-        specialized_postings_writer: SpecializedPostingsWriter<Rec>,
-    ) -> Box<dyn PostingsWriter> {
-        Box::new(specialized_postings_writer)
-    }
-}
-
 impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
    #[inline]
    pub(crate) fn serialize_one_term(
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -70,7 +70,7 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer,
        buffer_lender: &mut BufferLender,
    );
    /// Returns the number of document containing this term.
@@ -113,7 +113,7 @@ impl Recorder for DocIdRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -181,7 +181,7 @@ impl Recorder for TermFrequencyRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer,
        buffer_lender: &mut BufferLender,
    ) {
        let buffer = buffer_lender.lend_u8();
@@ -238,7 +238,7 @@ impl Recorder for TfAndPositionRecorder {
    fn serialize(
        &self,
        arena: &MemoryArena,
-        serializer: &mut FieldSerializer<'_>,
+        serializer: &mut FieldSerializer,
        buffer_lender: &mut BufferLender,
    ) {
        let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -8,10 +8,10 @@ use crate::directory::{CompositeWrite, WritePtr};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::Segment;
 use crate::positions::PositionSerializer;
-use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE};
+use crate::postings::compression::{BlockEncoder, VIntEncoder as _, COMPRESSION_BLOCK_SIZE};
 use crate::postings::skip::SkipSerializer;
 use crate::query::Bm25Weight;
-use crate::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema};
+use crate::schema::{Field, FieldEntry, IndexRecordOption, Schema};
 use crate::termdict::TermDictionaryBuilder;
 use crate::{DocId, Score};

@@ -55,7 +55,9 @@ pub struct InvertedIndexSerializer {

 impl InvertedIndexSerializer {
    /// Open a new `InvertedIndexSerializer` for the given segment
-    pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
+    pub fn open<C: crate::codec::Codec>(
+        segment: &mut Segment<C>,
+    ) -> crate::Result<InvertedIndexSerializer> {
        use crate::index::SegmentComponent::{Positions, Postings, Terms};
        let inv_index_serializer = InvertedIndexSerializer {
            terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
@@ -80,9 +82,12 @@ impl InvertedIndexSerializer {
        let term_dictionary_write = self.terms_write.for_field(field);
        let postings_write = self.postings_write.for_field(field);
        let positions_write = self.positions_write.for_field(field);
-        let field_type: FieldType = (*field_entry.field_type()).clone();
+        let index_record_option = field_entry
+            .field_type()
+            .index_record_option()
+            .unwrap_or(IndexRecordOption::Basic);
        FieldSerializer::create(
-            &field_type,
+            index_record_option,
            total_num_tokens,
            term_dictionary_write,
            postings_write,
@@ -102,29 +107,27 @@ impl InvertedIndexSerializer {

 /// The field serializer is in charge of
 /// the serialization of a specific field.
-pub struct FieldSerializer<'a> {
-    term_dictionary_builder: TermDictionaryBuilder<&'a mut CountingWriter<WritePtr>>,
+pub struct FieldSerializer<'a, W: Write = WritePtr> {
+    term_dictionary_builder: TermDictionaryBuilder<&'a mut CountingWriter<W>>,
    postings_serializer: PostingsSerializer,
-    positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<WritePtr>>>,
+    positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<W>>>,
    current_term_info: TermInfo,
    term_open: bool,
-    postings_write: &'a mut CountingWriter<WritePtr>,
+    postings_write: &'a mut CountingWriter<W>,
    postings_start_offset: u64,
 }

-impl<'a> FieldSerializer<'a> {
-    fn create(
-        field_type: &FieldType,
+impl<'a, W: Write> FieldSerializer<'a, W> {
+    /// Creates a new `FieldSerializer` for the given field type.
+    pub fn create(
+        index_record_option: IndexRecordOption,
        total_num_tokens: u64,
-        term_dictionary_write: &'a mut CountingWriter<WritePtr>,
-        postings_write: &'a mut CountingWriter<WritePtr>,
-        positions_write: &'a mut CountingWriter<WritePtr>,
+        term_dictionary_write: &'a mut CountingWriter<W>,
+        postings_write: &'a mut CountingWriter<W>,
+        positions_write: &'a mut CountingWriter<W>,
        fieldnorm_reader: Option<FieldNormReader>,
-    ) -> io::Result<FieldSerializer<'a>> {
+    ) -> io::Result<FieldSerializer<'a, W>> {
        total_num_tokens.serialize(postings_write)?;
-        let index_record_option = field_type
-            .index_record_option()
-            .unwrap_or(IndexRecordOption::Basic);
        let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
        let average_fieldnorm = fieldnorm_reader
            .as_ref()
@@ -192,6 +195,11 @@ impl<'a> FieldSerializer<'a> {
        Ok(())
    }

+    /// Starts the postings for a new term without recording term frequencies.
+    pub fn new_term_without_freq(&mut self, term: &[u8]) -> io::Result<()> {
+        self.new_term(term, 0, false)
+    }
+
    /// Serialize the information that a document contains for the current term:
    /// its term frequency, and the position deltas.
    ///
@@ -297,6 +305,7 @@ impl Block {
    }
 }

+/// Serializer for postings lists.
 pub struct PostingsSerializer {
    last_doc_id_encoded: u32,

@@ -316,6 +325,9 @@ pub struct PostingsSerializer {
 }

 impl PostingsSerializer {
+    /// Creates a new `PostingsSerializer`.
+    /// * avg_fieldnorm - average field norm for the field being serialized.
+    /// * mode - indexing options for the field being serialized.
    pub fn new(
        avg_fieldnorm: Score,
        mode: IndexRecordOption,
@@ -338,6 +350,8 @@ impl PostingsSerializer {
        }
    }

+    /// Starts the serialization for a new term.
+    /// * term_doc_freq - the number of documents containing the term.
    pub fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
        self.bm25_weight = None;

@@ -377,6 +391,7 @@ impl PostingsSerializer {
            self.postings_write.extend(block_encoded);
        }
        if self.term_has_freq {
+            // encode the term frequencies
            let (num_bits, block_encoded): (u8, &[u8]) = self
                .block_encoder
                .compress_block_unsorted(self.block.term_freqs(), true);
@@ -417,6 +432,9 @@ impl PostingsSerializer {
        self.block.clear();
    }

+    /// Register that the given document contains the current term.
+    /// * doc_id - the document id.
+    /// * term_freq - the term frequency within the document.
    pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) {
        self.block.append_doc(doc_id, term_freq);
        if self.block.is_full() {
@@ -424,6 +442,7 @@ impl PostingsSerializer {
        }
    }

+    /// Finish the serialization for this term.
    pub fn close_term(
        &mut self,
        doc_freq: u32,
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -14,7 +14,11 @@ use crate::{DocId, Score, TERMINATED};
 //   (requiring a 6th bit), but the biggest doc_id we can want to encode is TERMINATED-1, which can
 //   be represented on 31b without delta encoding.
 fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 {
-    assert!(bitwidth < 32);
+    assert!(
+        bitwidth < 32,
+        "bitwidth needs to be less than 32, but got {}",
+        bitwidth
+    );
    bitwidth | ((delta_1 as u8) << 6)
 }

@@ -142,23 +146,6 @@ impl SkipReader {
        skip_reader
    }

-    pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
-        self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            0
-        } else {
-            TERMINATED
-        };
-        self.last_doc_in_previous_block = 0u32;
-        self.owned_read = data;
-        self.block_info = BlockInfo::VInt { num_docs: doc_freq };
-        self.byte_offset = 0;
-        self.remaining_docs = doc_freq;
-        self.position_offset = 0u64;
-        if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
-            self.read_block_info();
-        }
-    }
-
    // Returns the block max score for this block if available.
    //
    // The block max score is available for all full bitpacked block,
--- a/src/query/all_query.rs
+++ b/src/query/all_query.rs
@@ -2,7 +2,7 @@ use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::boost_query::BoostScorer;
 use crate::query::explanation::does_not_match;
-use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, Score};

 /// Query that matches all of the documents.
@@ -21,16 +21,16 @@ impl Query for AllQuery {
 pub struct AllWeight;

 impl Weight for AllWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let all_scorer = AllScorer::new(reader.max_doc());
        if boost != 1.0 {
-            Ok(Box::new(BoostScorer::new(all_scorer, boost)))
+            Ok(box_scorer(BoostScorer::new(all_scorer, boost)))
        } else {
-            Ok(Box::new(all_scorer))
+            Ok(box_scorer(all_scorer))
        }
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        if doc >= reader.max_doc() {
            return Err(does_not_match(doc));
        }
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -10,7 +10,7 @@ use crate::postings::TermInfo;
 use crate::query::{BitSetDocSet, ConstScorer, Explanation, Scorer, Weight};
 use crate::schema::{Field, IndexRecordOption};
 use crate::termdict::{TermDictionary, TermStreamer};
-use crate::{DocId, Score, TantivyError};
+use crate::{DocId, DocSet, Score, TantivyError};

 /// A weight struct for Fuzzy Term and Regex Queries
 pub struct AutomatonWeight<A> {
@@ -67,7 +67,7 @@ where
    }

    /// Returns the term infos that match the automaton
-    pub fn get_match_term_infos(&self, reader: &SegmentReader) -> crate::Result<Vec<TermInfo>> {
+    pub fn get_match_term_infos(&self, reader: &dyn SegmentReader) -> crate::Result<Vec<TermInfo>> {
        let inverted_index = reader.inverted_index(self.field)?;
        let term_dict = inverted_index.terms();
        let mut term_stream = self.automaton_stream(term_dict)?;
@@ -84,7 +84,7 @@ where
    A: Automaton + Send + Sync + 'static,
    A::State: Clone,
 {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let max_doc = reader.max_doc();
        let mut doc_bitset = BitSet::with_max_value(max_doc);
        let inverted_index = reader.inverted_index(self.field)?;
@@ -92,25 +92,18 @@ where
        let mut term_stream = self.automaton_stream(term_dict)?;
        while term_stream.advance() {
            let term_info = term_stream.value();
-            let mut block_segment_postings = inverted_index
-                .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
-            loop {
-                let docs = block_segment_postings.docs();
-                if docs.is_empty() {
-                    break;
-                }
-                for &doc in docs {
-                    doc_bitset.insert(doc);
-                }
-                block_segment_postings.advance();
-            }
+            inverted_index.fill_bitset_for_term(
+                term_info,
+                IndexRecordOption::Basic,
+                &mut doc_bitset,
+            )?;
        }
        let doc_bitset = BitSetDocSet::from(doc_bitset);
        let const_scorer = ConstScorer::new(doc_bitset, boost);
        Ok(Box::new(const_scorer))
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        let mut scorer = self.scorer(reader, 1.0)?;
        if scorer.seek(doc) == doc {
            Ok(Explanation::new("AutomatonScorer", 1.0))
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -24,6 +24,13 @@ impl BitSetDocSet {
        self.cursor_bucket = bucket_addr;
        self.cursor_tinybitset = self.docs.tinyset(bucket_addr);
    }
+
+    /// Returns the number of documents in the bitset.
+    ///
+    /// This call is not free: it will bitcount the number of bits in the bitset.
+    pub fn doc_freq(&self) -> u32 {
+        self.docs.len() as u32
+    }
 }

 impl From<BitSet> for BitSetDocSet {
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,24 +1,19 @@
 use std::collections::HashMap;

+use crate::codec::SumOrDoNothingCombiner;
 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
 use crate::index::SegmentReader;
-use crate::postings::FreqReadingOption;
 use crate::query::disjunction::Disjunction;
 use crate::query::explanation::does_not_match;
 use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
 use crate::query::term_query::TermScorer;
-use crate::query::weight::{for_each_docset_buffered, for_each_pruning_scorer, for_each_scorer};
+use crate::query::weight::for_each_docset_buffered;
 use crate::query::{
-    intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
-    RequiredOptionalScorer, Scorer, Weight,
+    box_scorer, intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude,
+    Explanation, Occur, RequiredOptionalScorer, Scorer, SumCombiner, Weight,
 };
 use crate::{DocId, Score};

-enum SpecializedScorer {
-    TermUnion(Vec<TermScorer>),
-    Other(Box<dyn Scorer>),
-}
-
 fn scorer_disjunction<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
    score_combiner: TScoreCombiner,
@@ -32,7 +27,7 @@ where
    if scorers.len() == 1 {
        return scorers.into_iter().next().unwrap(); // Safe unwrap.
    }
-    Box::new(Disjunction::new(
+    box_scorer(Disjunction::new(
        scorers,
        score_combiner,
        minimum_match_required,
@@ -44,57 +39,39 @@ fn scorer_union<TScoreCombiner>(
    scorers: Vec<Box<dyn Scorer>>,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    num_docs: u32,
-) -> SpecializedScorer
+    reader: &dyn SegmentReader,
+) -> Box<dyn Scorer>
 where
    TScoreCombiner: ScoreCombiner,
 {
-    assert!(!scorers.is_empty());
-    if scorers.len() == 1 {
-        return SpecializedScorer::Other(scorers.into_iter().next().unwrap()); //< we checked the size beforehand
-    }
-
-    {
-        let is_all_term_queries = scorers.iter().all(|scorer| scorer.is::<TermScorer>());
-        if is_all_term_queries {
-            let scorers: Vec<TermScorer> = scorers
-                .into_iter()
-                .map(|scorer| *(scorer.downcast::<TermScorer>().map_err(|_| ()).unwrap()))
-                .collect();
-            if scorers
-                .iter()
-                .all(|scorer| scorer.freq_reading_option() == FreqReadingOption::ReadFreq)
+    match scorers.len() {
+        0 => box_scorer(EmptyScorer),
+        1 => scorers.into_iter().next().unwrap(),
+        _ => {
+            let combiner_opt: Option<SumOrDoNothingCombiner> = if std::any::TypeId::of::<
+                TScoreCombiner,
+            >() == std::any::TypeId::of::<
+                SumCombiner,
+            >() {
+                Some(SumOrDoNothingCombiner::Sum)
+            } else if std::any::TypeId::of::<TScoreCombiner>()
+                == std::any::TypeId::of::<DoNothingCombiner>()
            {
-                // Block wand is only available if we read frequencies.
-                return SpecializedScorer::TermUnion(scorers);
+                Some(SumOrDoNothingCombiner::DoNothing)
            } else {
-                return SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+                None
+            };
+            if let Some(combiner) = combiner_opt {
+                reader.build_union_scorer_with_sum_combiner(scorers, num_docs, combiner)
+            } else {
+                box_scorer(BufferedUnionScorer::build(
                    scorers,
                    score_combiner_fn,
                    num_docs,
-                )));
+                ))
            }
        }
    }
-    SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
-        scorers,
-        score_combiner_fn,
-        num_docs,
-    )))
-}
-
-fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
-    scorer: SpecializedScorer,
-    score_combiner_fn: impl Fn() -> TScoreCombiner,
-    num_docs: u32,
-) -> Box<dyn Scorer> {
-    match scorer {
-        SpecializedScorer::TermUnion(term_scorers) => {
-            let union_scorer =
-                BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
-            Box::new(union_scorer)
-        }
-        SpecializedScorer::Other(scorer) => scorer,
-    }
 }

 /// Returns the effective MUST scorer, accounting for removed AllScorers.
@@ -110,7 +87,7 @@ fn effective_must_scorer(
    if must_scorers.is_empty() {
        if removed_all_scorer_count > 0 {
            // Had AllScorer(s) only - all docs match
-            Some(Box::new(AllScorer::new(max_doc)))
+            Some(box_scorer(AllScorer::new(max_doc)))
        } else {
            // No MUST constraint at all
            None
@@ -128,28 +105,26 @@ fn effective_must_scorer(
 /// When `scoring_enabled` is false, we can just return AllScorer alone since
 /// we don't need score contributions from the should_scorer.
 fn effective_should_scorer_for_union<TScoreCombiner: ScoreCombiner>(
-    should_scorer: SpecializedScorer,
+    should_scorer: Box<dyn Scorer>,
    removed_all_scorer_count: usize,
    max_doc: DocId,
    num_docs: u32,
    score_combiner_fn: impl Fn() -> TScoreCombiner,
    scoring_enabled: bool,
-) -> SpecializedScorer {
+) -> Box<dyn Scorer> {
    if removed_all_scorer_count > 0 {
        if scoring_enabled {
            // Need to union to get score contributions from both
-            let all_scorers: Vec<Box<dyn Scorer>> = vec![
-                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
-                Box::new(AllScorer::new(max_doc)),
-            ];
-            SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
+            let all_scorers: Vec<Box<dyn Scorer>> =
+                vec![should_scorer, box_scorer(AllScorer::new(max_doc))];
+            box_scorer(BufferedUnionScorer::build(
                all_scorers,
                score_combiner_fn,
                num_docs,
-            )))
+            ))
        } else {
            // Scoring disabled - AllScorer alone is sufficient
-            SpecializedScorer::Other(Box::new(AllScorer::new(max_doc)))
+            box_scorer(AllScorer::new(max_doc))
        }
    } else {
        should_scorer
@@ -160,9 +135,9 @@ enum ShouldScorersCombinationMethod {
    // Should scorers are irrelevant.
    Ignored,
    // Only contributes to final score.
-    Optional(SpecializedScorer),
+    Optional(Box<dyn Scorer>),
    // Regardless of score, the should scorers may impact whether a document is matching or not.
-    Required(SpecializedScorer),
+    Required(Box<dyn Scorer>),
 }

 /// Weight associated to the `BoolQuery`.
@@ -205,7 +180,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {

    fn per_occur_scorers(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        boost: Score,
    ) -> crate::Result<HashMap<Occur, Vec<Box<dyn Scorer>>>> {
        let mut per_occur_scorers: HashMap<Occur, Vec<Box<dyn Scorer>>> = HashMap::new();
@@ -221,10 +196,10 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {

    fn complex_scorer<TComplexScoreCombiner: ScoreCombiner>(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        boost: Score,
        score_combiner_fn: impl Fn() -> TComplexScoreCombiner,
-    ) -> crate::Result<SpecializedScorer> {
+    ) -> crate::Result<Box<dyn Scorer>> {
        let num_docs = reader.num_docs();
        let mut per_occur_scorers = self.per_occur_scorers(reader, boost)?;

@@ -234,7 +209,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
        let must_special_scorer_counts = remove_and_count_all_and_empty_scorers(&mut must_scorers);

        if must_special_scorer_counts.num_empty_scorers > 0 {
-            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+            return Ok(box_scorer(EmptyScorer));
        }

        let mut should_scorers = per_occur_scorers.remove(&Occur::Should).unwrap_or_default();
@@ -249,7 +224,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {

        if exclude_special_scorer_counts.num_all_scorers > 0 {
            // We exclude all documents at one point.
-            return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+            return Ok(box_scorer(EmptyScorer));
        }

        let effective_minimum_number_should_match = self
@@ -261,7 +236,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
            if effective_minimum_number_should_match > num_of_should_scorers {
                // We don't have enough scorers to satisfy the minimum number of should matches.
                // The request will match no documents.
-                return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
+                return Ok(box_scorer(EmptyScorer));
            }
            match effective_minimum_number_should_match {
                0 if num_of_should_scorers == 0 => ShouldScorersCombinationMethod::Ignored,
@@ -269,11 +244,13 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    should_scorers,
                    &score_combiner_fn,
                    num_docs,
+                    reader,
                )),
                1 => ShouldScorersCombinationMethod::Required(scorer_union(
                    should_scorers,
                    &score_combiner_fn,
                    num_docs,
+                    reader,
                )),
                n if num_of_should_scorers == n => {
                    // When num_of_should_scorers equals the number of should clauses,
@@ -281,12 +258,10 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    must_scorers.append(&mut should_scorers);
                    ShouldScorersCombinationMethod::Ignored
                }
-                _ => ShouldScorersCombinationMethod::Required(SpecializedScorer::Other(
-                    scorer_disjunction(
-                        should_scorers,
-                        score_combiner_fn(),
-                        effective_minimum_number_should_match,
-                    ),
+                _ => ShouldScorersCombinationMethod::Required(scorer_disjunction(
+                    should_scorers,
+                    score_combiner_fn(),
+                    effective_minimum_number_should_match,
                )),
            }
        };
@@ -303,8 +278,8 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    reader.max_doc(),
                    num_docs,
                )
-                .unwrap_or_else(|| Box::new(EmptyScorer));
-                SpecializedScorer::Other(boxed_scorer)
+                .unwrap_or_else(|| box_scorer(EmptyScorer));
+                boxed_scorer
            }
            (ShouldScorersCombinationMethod::Optional(should_scorer), must_scorers) => {
                // Optional SHOULD: contributes to scoring but not required for matching.
@@ -329,16 +304,12 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    Some(must_scorer) => {
                        // Has MUST constraint: SHOULD only affects scoring.
                        if self.scoring_enabled {
-                            SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
-                                _,
-                                _,
-                                TScoreCombiner,
-                            >::new(
+                            box_scorer(RequiredOptionalScorer::<_, _, TScoreCombiner>::new(
                                must_scorer,
-                                into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
-                            )))
+                                should_scorer,
+                            ))
                        } else {
-                            SpecializedScorer::Other(must_scorer)
+                            must_scorer
                        }
                    }
                }
@@ -358,12 +329,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
                    }
                    Some(must_scorer) => {
                        // Has MUST constraint: intersect MUST with SHOULD.
-                        let should_boxed =
-                            into_box_scorer(should_scorer, &score_combiner_fn, num_docs);
-                        SpecializedScorer::Other(intersect_scorers(
-                            vec![must_scorer, should_boxed],
-                            num_docs,
-                        ))
+                        intersect_scorers(vec![must_scorer, should_scorer], num_docs)
                    }
                }
            }
@@ -372,19 +338,18 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
            return Ok(include_scorer);
        }

-        let include_scorer_boxed = into_box_scorer(include_scorer, &score_combiner_fn, num_docs);
        let scorer: Box<dyn Scorer> = if exclude_scorers.len() == 1 {
            let exclude_scorer = exclude_scorers.pop().unwrap();
            match exclude_scorer.downcast::<TermScorer>() {
                // Cast to TermScorer succeeded
-                Ok(exclude_scorer) => Box::new(Exclude::new(include_scorer_boxed, *exclude_scorer)),
+                Ok(exclude_scorer) => Box::new(Exclude::new(include_scorer, *exclude_scorer)),
                // We get back the original Box<dyn Scorer>
-                Err(exclude_scorer) => Box::new(Exclude::new(include_scorer_boxed, exclude_scorer)),
+                Err(exclude_scorer) => Box::new(Exclude::new(include_scorer, exclude_scorer)),
            }
        } else {
-            Box::new(Exclude::new(include_scorer_boxed, exclude_scorers))
+            Box::new(Exclude::new(include_scorer, exclude_scorers))
        };
-        Ok(SpecializedScorer::Other(scorer))
+        Ok(scorer)
    }
 }

@@ -413,8 +378,7 @@ fn remove_and_count_all_and_empty_scorers(
 }

 impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombiner> {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        let num_docs = reader.num_docs();
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        if self.weights.is_empty() {
            Ok(Box::new(EmptyScorer))
        } else if self.weights.len() == 1 {
@@ -426,18 +390,12 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
            }
        } else if self.scoring_enabled {
            self.complex_scorer(reader, boost, &self.score_combiner_fn)
-                .map(|specialized_scorer| {
-                    into_box_scorer(specialized_scorer, &self.score_combiner_fn, num_docs)
-                })
        } else {
            self.complex_scorer(reader, boost, DoNothingCombiner::default)
-                .map(|specialized_scorer| {
-                    into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
-                })
        }
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        let mut scorer = self.scorer(reader, 1.0)?;
        if scorer.seek(doc) != doc {
            return Err(does_not_match(doc));
@@ -459,47 +417,22 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin

    fn for_each(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        callback: &mut dyn FnMut(DocId, Score),
    ) -> crate::Result<()> {
-        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer = BufferedUnionScorer::build(
-                    term_scorers,
-                    &self.score_combiner_fn,
-                    reader.num_docs(),
-                );
-                for_each_scorer(&mut union_scorer, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_scorer(scorer.as_mut(), callback);
-            }
-        }
+        let mut scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
+        scorer.for_each(callback);
        Ok(())
    }

    fn for_each_no_score(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
-        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
+        let mut scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                let mut union_scorer = BufferedUnionScorer::build(
-                    term_scorers,
-                    &self.score_combiner_fn,
-                    reader.num_docs(),
-                );
-                for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
-            }
-        }
+        for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
        Ok(())
    }

@@ -516,18 +449,11 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
    fn for_each_pruning(
        &self,
        threshold: Score,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        callback: &mut dyn FnMut(DocId, Score) -> Score,
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
-        match scorer {
-            SpecializedScorer::TermUnion(term_scorers) => {
-                super::block_wand(term_scorers, threshold, callback);
-            }
-            SpecializedScorer::Other(mut scorer) => {
-                for_each_pruning_scorer(scorer.as_mut(), threshold, callback);
-            }
-        }
+        reader.for_each_pruning(threshold, scorer, callback);
        Ok(())
    }
 }
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -1,8 +1,6 @@
-mod block_wand;
 mod boolean_query;
 mod boolean_weight;

-pub(crate) use self::block_wand::{block_wand, block_wand_single_scorer};
 pub use self::boolean_query::BooleanQuery;
 pub use self::boolean_weight::BooleanWeight;

--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -67,11 +67,11 @@ impl BoostWeight {
 }

 impl Weight for BoostWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        self.weight.scorer(reader, boost * self.boost)
    }

-    fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: u32) -> crate::Result<Explanation> {
        let underlying_explanation = self.weight.explain(reader, doc)?;
        let score = underlying_explanation.value() * self.boost;
        let mut explanation =
@@ -80,7 +80,7 @@ impl Weight for BoostWeight {
        Ok(explanation)
    }

-    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
+    fn count(&self, reader: &dyn SegmentReader) -> crate::Result<u32> {
        self.weight.count(reader)
    }
 }
--- a/src/query/const_score_query.rs
+++ b/src/query/const_score_query.rs
@@ -1,7 +1,7 @@
 use std::fmt;

 use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
-use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};

 /// `ConstScoreQuery` is a wrapper over a query to provide a constant score.
@@ -63,12 +63,15 @@ impl ConstWeight {
 }

 impl Weight for ConstWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let inner_scorer = self.weight.scorer(reader, boost)?;
-        Ok(Box::new(ConstScorer::new(inner_scorer, boost * self.score)))
+        Ok(box_scorer(ConstScorer::new(
+            inner_scorer,
+            boost * self.score,
+        )))
    }

-    fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: u32) -> crate::Result<Explanation> {
        let mut scorer = self.scorer(reader, 1.0)?;
        if scorer.seek(doc) != doc {
            return Err(TantivyError::InvalidArgument(format!(
@@ -81,7 +84,7 @@ impl Weight for ConstWeight {
        Ok(explanation)
    }

-    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
+    fn count(&self, reader: &dyn SegmentReader) -> crate::Result<u32> {
        self.weight.count(reader)
    }
 }
--- a/src/query/empty_query.rs
+++ b/src/query/empty_query.rs
@@ -2,7 +2,7 @@ use super::Scorer;
 use crate::docset::TERMINATED;
 use crate::index::SegmentReader;
 use crate::query::explanation::does_not_match;
-use crate::query::{EnableScoring, Explanation, Query, Weight};
+use crate::query::{box_scorer, EnableScoring, Explanation, Query, Weight};
 use crate::{DocId, DocSet, Score, Searcher};

 /// `EmptyQuery` is a dummy `Query` in which no document matches.
@@ -26,11 +26,11 @@ impl Query for EmptyQuery {
 /// It is useful for tests and handling edge cases.
 pub struct EmptyWeight;
 impl Weight for EmptyWeight {
-    fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
-        Ok(Box::new(EmptyScorer))
+    fn scorer(&self, _reader: &dyn SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
+        Ok(box_scorer(EmptyScorer))
    }

-    fn explain(&self, _reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, _reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        Err(does_not_match(doc))
    }
 }
--- a/src/query/exist_query.rs
+++ b/src/query/exist_query.rs
@@ -3,7 +3,7 @@ use core::fmt::Debug;
 use columnar::{ColumnIndex, DynamicColumn};
 use common::BitSet;

-use super::{ConstScorer, EmptyScorer};
+use super::{box_scorer, ConstScorer, EmptyScorer};
 use crate::docset::{DocSet, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::all_query::AllScorer;
@@ -98,7 +98,7 @@ pub struct ExistsWeight {
 }

 impl Weight for ExistsWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        let fast_field_reader = reader.fast_fields();
        let mut column_handles = fast_field_reader.dynamic_column_handles(&self.field_name)?;
        if self.field_type == Type::Json && self.json_subpaths {
@@ -117,7 +117,7 @@ impl Weight for ExistsWeight {
            }
        }
        if non_empty_columns.is_empty() {
-            return Ok(Box::new(EmptyScorer));
+            return Ok(box_scorer(EmptyScorer));
        }

        // If any column is full, all docs match.
@@ -128,9 +128,9 @@ impl Weight for ExistsWeight {
        {
            let all_scorer = AllScorer::new(max_doc);
            if boost != 1.0f32 {
-                return Ok(Box::new(BoostScorer::new(all_scorer, boost)));
+                return Ok(box_scorer(BoostScorer::new(all_scorer, boost)));
            } else {
-                return Ok(Box::new(all_scorer));
+                return Ok(box_scorer(all_scorer));
            }
        }

@@ -138,7 +138,7 @@ impl Weight for ExistsWeight {
        // NOTE: A lower number may be better for very sparse columns
        if non_empty_columns.len() < 4 {
            let docset = ExistsDocSet::new(non_empty_columns, reader.max_doc());
-            return Ok(Box::new(ConstScorer::new(docset, boost)));
+            return Ok(box_scorer(ConstScorer::new(docset, boost)));
        }

        // If we have many dynamic columns, precompute a bitset of matching docs
@@ -162,10 +162,10 @@ impl Weight for ExistsWeight {
            }
        }
        let docset = BitSetDocSet::from(doc_bitset);
-        Ok(Box::new(ConstScorer::new(docset, boost)))
+        Ok(box_scorer(ConstScorer::new(docset, boost)))
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        let mut scorer = self.scorer(reader, 1.0)?;
        if scorer.seek(doc) != doc {
            return Err(does_not_match(doc));
--- a/src/query/intersection.rs
+++ b/src/query/intersection.rs
@@ -1,7 +1,7 @@
 use super::size_hint::estimate_intersection;
 use crate::docset::{DocSet, SeekDangerResult, TERMINATED};
 use crate::query::term_query::TermScorer;
-use crate::query::{EmptyScorer, Scorer};
+use crate::query::{box_scorer, EmptyScorer, Scorer};
 use crate::{DocId, Score};

 /// Returns the intersection scorer.
@@ -20,7 +20,7 @@ pub fn intersect_scorers(
    num_docs_segment: u32,
 ) -> Box<dyn Scorer> {
    if scorers.is_empty() {
-        return Box::new(EmptyScorer);
+        return box_scorer(EmptyScorer);
    }
    if scorers.len() == 1 {
        return scorers.pop().unwrap();
@@ -29,7 +29,7 @@ pub fn intersect_scorers(
    scorers.sort_by_key(|scorer| scorer.cost());
    let doc = go_to_first_doc(&mut scorers[..]);
    if doc == TERMINATED {
-        return Box::new(EmptyScorer);
+        return box_scorer(EmptyScorer);
    }
    // We know that we have at least 2 elements.
    let left = scorers.remove(0);
@@ -38,14 +38,14 @@ pub fn intersect_scorers(
        .iter()
        .all(|&scorer| scorer.is::<TermScorer>());
    if all_term_scorers {
-        return Box::new(Intersection {
+        return box_scorer(Intersection {
            left: *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
            right: *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
            others: scorers,
            num_docs: num_docs_segment,
        });
    }
-    Box::new(Intersection {
+    box_scorer(Intersection {
        left,
        right,
        others: scorers,
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -24,7 +24,7 @@ mod reqopt_scorer;
 mod scorer;
 mod set_query;
 mod size_hint;
-mod term_query;
+pub(crate) mod term_query;
 mod union;
 mod weight;

@@ -54,13 +54,14 @@ pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
 pub use self::phrase_prefix_query::PhrasePrefixQuery;
 pub use self::phrase_query::regex_phrase_query::{wildcard_query_to_regex_str, RegexPhraseQuery};
 pub use self::phrase_query::PhraseQuery;
+pub(crate) use self::phrase_query::PhraseScorer;
 pub use self::query::{EnableScoring, Query, QueryClone};
 pub use self::query_parser::{QueryParser, QueryParserError};
 pub use self::range_query::*;
 pub use self::regex_query::RegexQuery;
 pub use self::reqopt_scorer::RequiredOptionalScorer;
 pub use self::score_combiner::{DisjunctionMaxCombiner, ScoreCombiner, SumCombiner};
-pub use self::scorer::Scorer;
+pub use self::scorer::{box_scorer, Scorer};
 pub use self::set_query::TermSetQuery;
 pub use self::term_query::TermQuery;
 pub use self::union::BufferedUnionScorer;
--- a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
+++ b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
@@ -2,7 +2,7 @@ use crate::docset::{DocSet, SeekDangerResult, TERMINATED};
 use crate::fieldnorm::FieldNormReader;
 use crate::postings::Postings;
 use crate::query::bm25::Bm25Weight;
-use crate::query::phrase_query::{intersection_count, PhraseScorer};
+use crate::query::phrase_query::{intersection_exists, PhraseScorer};
 use crate::query::Scorer;
 use crate::{DocId, Score};

@@ -100,7 +100,6 @@ pub struct PhrasePrefixScorer<TPostings: Postings> {
    phrase_scorer: PhraseKind<TPostings>,
    suffixes: Vec<TPostings>,
    suffix_offset: u32,
-    phrase_count: u32,
    suffix_position_buffer: Vec<u32>,
 }

@@ -144,7 +143,6 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
            phrase_scorer,
            suffixes,
            suffix_offset: (max_offset - suffix_pos) as u32,
-            phrase_count: 0,
            suffix_position_buffer: Vec::with_capacity(100),
        };
        if phrase_prefix_scorer.doc() != TERMINATED && !phrase_prefix_scorer.matches_prefix() {
@@ -153,12 +151,7 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
        phrase_prefix_scorer
    }

-    pub fn phrase_count(&self) -> u32 {
-        self.phrase_count
-    }
-
    fn matches_prefix(&mut self) -> bool {
-        let mut count = 0;
        let current_doc = self.doc();
        let pos_matching = self.phrase_scorer.get_intersection();
        for suffix in &mut self.suffixes {
@@ -168,11 +161,12 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
            let doc = suffix.seek(current_doc);
            if doc == current_doc {
                suffix.positions_with_offset(self.suffix_offset, &mut self.suffix_position_buffer);
-                count += intersection_count(pos_matching, &self.suffix_position_buffer);
+                if intersection_exists(pos_matching, &self.suffix_position_buffer) {
+                    return true;
+                }
            }
        }
-        self.phrase_count = count as u32;
-        count != 0
+        false
    }
 }

--- a/src/query/phrase_prefix_query/phrase_prefix_weight.rs
+++ b/src/query/phrase_prefix_query/phrase_prefix_weight.rs
@@ -1,12 +1,11 @@
 use super::{prefix_end, PhrasePrefixScorer};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
-use crate::postings::SegmentPostings;
+use crate::postings::Postings;
 use crate::query::bm25::Bm25Weight;
-use crate::query::explanation::does_not_match;
-use crate::query::{EmptyScorer, Explanation, Scorer, Weight};
+use crate::query::{box_scorer, EmptyScorer, Scorer, Weight};
 use crate::schema::{IndexRecordOption, Term};
-use crate::{DocId, DocSet, Score};
+use crate::Score;

 pub struct PhrasePrefixWeight {
    phrase_terms: Vec<(usize, Term)>,
@@ -32,7 +31,7 @@ impl PhrasePrefixWeight {
        }
    }

-    fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result<FieldNormReader> {
+    fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result<FieldNormReader> {
        let field = self.phrase_terms[0].1.field();
        if self.similarity_weight_opt.is_some() {
            if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? {
@@ -44,15 +43,15 @@ impl PhrasePrefixWeight {

    pub(crate) fn phrase_scorer(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        boost: Score,
-    ) -> crate::Result<Option<PhrasePrefixScorer<SegmentPostings>>> {
+    ) -> crate::Result<Option<Box<dyn Scorer>>> {
        let similarity_weight_opt = self
            .similarity_weight_opt
            .as_ref()
            .map(|similarity_weight| similarity_weight.boost_by(boost));
        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        let mut term_postings_list = Vec::new();
+        let mut term_postings_list: Vec<(usize, Box<dyn Postings>)> = Vec::new();
        for &(offset, ref term) in &self.phrase_terms {
            if let Some(postings) = reader
                .inverted_index(term.field())?
@@ -103,49 +102,32 @@ impl PhrasePrefixWeight {
            }
        }

-        Ok(Some(PhrasePrefixScorer::new(
+        Ok(Some(box_scorer(PhrasePrefixScorer::new(
            term_postings_list,
            similarity_weight_opt,
            fieldnorm_reader,
            suffixes,
            self.prefix.0,
-        )))
+        ))))
    }
 }

 impl Weight for PhrasePrefixWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        if let Some(scorer) = self.phrase_scorer(reader, boost)? {
-            Ok(Box::new(scorer))
+            Ok(scorer)
        } else {
-            Ok(Box::new(EmptyScorer))
+            Ok(box_scorer(EmptyScorer))
        }
    }
-
-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
-        let scorer_opt = self.phrase_scorer(reader, 1.0)?;
-        if scorer_opt.is_none() {
-            return Err(does_not_match(doc));
-        }
-        let mut scorer = scorer_opt.unwrap();
-        if scorer.seek(doc) != doc {
-            return Err(does_not_match(doc));
-        }
-        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
-        let phrase_count = scorer.phrase_count();
-        let mut explanation = Explanation::new("Phrase Prefix Scorer", scorer.score());
-        if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
-            explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
-        }
-        Ok(explanation)
-    }
 }

 #[cfg(test)]
 mod tests {
    use crate::docset::TERMINATED;
    use crate::index::Index;
+    use crate::postings::Postings;
+    use crate::query::phrase_prefix_query::PhrasePrefixScorer;
    use crate::query::{EnableScoring, PhrasePrefixQuery, Query};
    use crate::schema::{Schema, TEXT};
    use crate::{DocSet, IndexWriter, Term};
@@ -186,14 +168,14 @@ mod tests {
            .phrase_prefix_query_weight(enable_scoring)
            .unwrap()
            .unwrap();
-        let mut phrase_scorer = phrase_weight
+        let mut phrase_scorer_boxed = phrase_weight
            .phrase_scorer(searcher.segment_reader(0u32), 1.0)?
            .unwrap();
+        let phrase_scorer: &mut PhrasePrefixScorer<Box<dyn Postings>> =
+            phrase_scorer_boxed.as_any_mut().downcast_mut().unwrap();
        assert_eq!(phrase_scorer.doc(), 1);
-        assert_eq!(phrase_scorer.phrase_count(), 2);
        assert_eq!(phrase_scorer.advance(), 2);
        assert_eq!(phrase_scorer.doc(), 2);
-        assert_eq!(phrase_scorer.phrase_count(), 1);
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
    }
@@ -213,14 +195,15 @@ mod tests {
            .phrase_prefix_query_weight(enable_scoring)
            .unwrap()
            .unwrap();
-        let mut phrase_scorer = phrase_weight
+        let mut phrase_scorer_boxed = phrase_weight
            .phrase_scorer(searcher.segment_reader(0u32), 1.0)?
            .unwrap();
+        let phrase_scorer = phrase_scorer_boxed
+            .downcast_mut::<PhrasePrefixScorer<Box<dyn Postings>>>()
+            .unwrap();
        assert_eq!(phrase_scorer.doc(), 1);
-        assert_eq!(phrase_scorer.phrase_count(), 2);
        assert_eq!(phrase_scorer.advance(), 2);
        assert_eq!(phrase_scorer.doc(), 2);
-        assert_eq!(phrase_scorer.phrase_count(), 1);
        assert_eq!(phrase_scorer.advance(), TERMINATED);
        Ok(())
    }
--- a/src/query/phrase_query/mod.rs
+++ b/src/query/phrase_query/mod.rs
@@ -5,7 +5,7 @@ pub mod regex_phrase_query;
 mod regex_phrase_weight;

 pub use self::phrase_query::PhraseQuery;
-pub(crate) use self::phrase_scorer::intersection_count;
+pub(crate) use self::phrase_scorer::intersection_exists;
 pub use self::phrase_scorer::PhraseScorer;
 pub use self::phrase_weight::PhraseWeight;

--- a/src/query/phrase_query/phrase_query.rs
+++ b/src/query/phrase_query/phrase_query.rs
@@ -126,7 +126,7 @@ impl PhraseQuery {
        };
        let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
        if self.slop > 0 {
-            weight.slop(self.slop);
+            weight.set_slop(self.slop);
        }
        Ok(weight)
    }
--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -1,10 +1,11 @@
 use std::cmp::Ordering;

+use crate::codec::standard::postings::StandardPostings;
 use crate::docset::{DocSet, SeekDangerResult, TERMINATED};
 use crate::fieldnorm::FieldNormReader;
 use crate::postings::Postings;
 use crate::query::bm25::Bm25Weight;
-use crate::query::{Intersection, Scorer};
+use crate::query::{Explanation, Intersection, Scorer};
 use crate::{DocId, Score};

 struct PostingsWithOffset<TPostings> {
@@ -43,7 +44,7 @@ impl<TPostings: Postings> DocSet for PostingsWithOffset<TPostings> {
    }
 }

-pub struct PhraseScorer<TPostings: Postings> {
+pub struct PhraseScorer<TPostings: Postings = StandardPostings> {
    intersection_docset: Intersection<PostingsWithOffset<TPostings>, PostingsWithOffset<TPostings>>,
    num_terms: usize,
    left_positions: Vec<u32>,
@@ -58,7 +59,7 @@ pub struct PhraseScorer<TPostings: Postings> {
 }

 /// Returns true if and only if the two sorted arrays contain a common element
-fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
+pub(crate) fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
    let mut left_index = 0;
    let mut right_index = 0;
    while left_index < left.len() && right_index < right.len() {
@@ -79,7 +80,7 @@ fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
    false
 }

-pub(crate) fn intersection_count(left: &[u32], right: &[u32]) -> usize {
+fn intersection_count(left: &[u32], right: &[u32]) -> usize {
    let mut left_index = 0;
    let mut right_index = 0;
    let mut count = 0;
@@ -402,6 +403,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
        scorer
    }

+    /// Returns the number of phrases identified in the current matching doc.
    pub fn phrase_count(&self) -> u32 {
        self.phrase_count
    }
@@ -584,6 +586,17 @@ impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {
            1.0f32
        }
    }
+
+    fn explain(&mut self) -> Explanation {
+        let doc = self.doc();
+        let phrase_count = self.phrase_count();
+        let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);
+        let mut explanation = Explanation::new("Phrase Scorer", self.score());
+        if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
+            explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
+        }
+        explanation
+    }
 }

 #[cfg(test)]
--- a/src/query/phrase_query/phrase_weight.rs
+++ b/src/query/phrase_query/phrase_weight.rs
@@ -1,11 +1,10 @@
-use super::PhraseScorer;
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
-use crate::postings::SegmentPostings;
+use crate::postings::TermInfo;
 use crate::query::bm25::Bm25Weight;
 use crate::query::explanation::does_not_match;
-use crate::query::{EmptyScorer, Explanation, Scorer, Weight};
-use crate::schema::{IndexRecordOption, Term};
+use crate::query::{box_scorer, EmptyScorer, Explanation, Scorer, Weight};
+use crate::schema::Term;
 use crate::{DocId, DocSet, Score};

 pub struct PhraseWeight {
@@ -21,15 +20,14 @@ impl PhraseWeight {
        phrase_terms: Vec<(usize, Term)>,
        similarity_weight_opt: Option<Bm25Weight>,
    ) -> PhraseWeight {
-        let slop = 0;
        PhraseWeight {
            phrase_terms,
            similarity_weight_opt,
-            slop,
+            slop: 0,
        }
    }

-    fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result<FieldNormReader> {
+    fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result<FieldNormReader> {
        let field = self.phrase_terms[0].1.field();
        if self.similarity_weight_opt.is_some() {
            if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? {
@@ -41,48 +39,67 @@ impl PhraseWeight {

    pub(crate) fn phrase_scorer(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        boost: Score,
-    ) -> crate::Result<Option<PhraseScorer<SegmentPostings>>> {
+    ) -> crate::Result<Option<Box<dyn Scorer>>> {
        let similarity_weight_opt = self
            .similarity_weight_opt
            .as_ref()
            .map(|similarity_weight| similarity_weight.boost_by(boost));
        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        let mut term_postings_list = Vec::new();
-        for &(offset, ref term) in &self.phrase_terms {
-            if let Some(postings) = reader
-                .inverted_index(term.field())?
-                .read_postings(term, IndexRecordOption::WithFreqsAndPositions)?
-            {
-                term_postings_list.push((offset, postings));
-            } else {
-                return Ok(None);
-            }
+
+        if self.phrase_terms.is_empty() {
+            return Ok(None);
        }
-        Ok(Some(PhraseScorer::new(
-            term_postings_list,
+        let field = self.phrase_terms[0].1.field();
+
+        if !self
+            .phrase_terms
+            .iter()
+            .all(|(_offset, term)| term.field() == field)
+        {
+            return Err(crate::TantivyError::InvalidArgument(
+                "All terms in a phrase query must belong to the same field".to_string(),
+            ));
+        }
+
+        let inverted_index_reader = reader.inverted_index(field)?;
+
+        let mut term_infos: Vec<(usize, TermInfo)> = Vec::with_capacity(self.phrase_terms.len());
+
+        for &(offset, ref term) in &self.phrase_terms {
+            let Some(term_info) = inverted_index_reader.get_term_info(term)? else {
+                return Ok(None);
+            };
+            term_infos.push((offset, term_info));
+        }
+
+        let scorer = inverted_index_reader.new_phrase_scorer(
+            &term_infos[..],
            similarity_weight_opt,
            fieldnorm_reader,
            self.slop,
-        )))
+        )?;
+
+        Ok(Some(scorer))
    }

-    pub fn slop(&mut self, slop: u32) {
+    /// Sets the slop for the given PhraseWeight.
+    pub fn set_slop(&mut self, slop: u32) {
        self.slop = slop;
    }
 }

 impl Weight for PhraseWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        if let Some(scorer) = self.phrase_scorer(reader, boost)? {
-            Ok(Box::new(scorer))
+            Ok(scorer)
        } else {
-            Ok(Box::new(EmptyScorer))
+            Ok(box_scorer(EmptyScorer))
        }
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        let scorer_opt = self.phrase_scorer(reader, 1.0)?;
        if scorer_opt.is_none() {
            return Err(does_not_match(doc));
@@ -91,14 +108,7 @@ impl Weight for PhraseWeight {
        if scorer.seek(doc) != doc {
            return Err(does_not_match(doc));
        }
-        let fieldnorm_reader = self.fieldnorm_reader(reader)?;
-        let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
-        let phrase_count = scorer.phrase_count();
-        let mut explanation = Explanation::new("Phrase Scorer", scorer.score());
-        if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
-            explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
-        }
-        Ok(explanation)
+        Ok(scorer.explain())
    }
 }

@@ -106,7 +116,8 @@ impl Weight for PhraseWeight {
 mod tests {
    use super::super::tests::create_index;
    use crate::docset::TERMINATED;
-    use crate::query::{EnableScoring, PhraseQuery};
+    use crate::query::phrase_query::PhraseScorer;
+    use crate::query::{EnableScoring, PhraseQuery, Scorer};
    use crate::{DocSet, Term};

    #[test]
@@ -121,9 +132,11 @@ mod tests {
        ]);
        let enable_scoring = EnableScoring::enabled_from_searcher(&searcher);
        let phrase_weight = phrase_query.phrase_weight(enable_scoring).unwrap();
-        let mut phrase_scorer = phrase_weight
+        let phrase_scorer_boxed: Box<dyn Scorer> = phrase_weight
            .phrase_scorer(searcher.segment_reader(0u32), 1.0)?
            .unwrap();
+        let mut phrase_scorer: Box<PhraseScorer> =
+            phrase_scorer_boxed.downcast::<PhraseScorer>().ok().unwrap();
        assert_eq!(phrase_scorer.doc(), 1);
        assert_eq!(phrase_scorer.phrase_count(), 2);
        assert_eq!(phrase_scorer.advance(), 2);
--- a/src/query/phrase_query/regex_phrase_weight.rs
+++ b/src/query/phrase_query/regex_phrase_weight.rs
@@ -6,11 +6,13 @@ use tantivy_fst::Regex;
 use super::PhraseScorer;
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
-use crate::postings::{LoadedPostings, Postings, SegmentPostings, TermInfo};
+use crate::postings::{LoadedPostings, Postings, TermInfo};
 use crate::query::bm25::Bm25Weight;
 use crate::query::explanation::does_not_match;
 use crate::query::union::{BitSetPostingUnion, SimpleUnion};
-use crate::query::{AutomatonWeight, BitSetDocSet, EmptyScorer, Explanation, Scorer, Weight};
+use crate::query::{
+    box_scorer, AutomatonWeight, BitSetDocSet, EmptyScorer, Explanation, Scorer, Weight,
+};
 use crate::schema::{Field, IndexRecordOption};
 use crate::{DocId, DocSet, InvertedIndexReader, Score};

@@ -45,7 +47,7 @@ impl RegexPhraseWeight {
        }
    }

-    fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result<FieldNormReader> {
+    fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result<FieldNormReader> {
        if self.similarity_weight_opt.is_some() {
            if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(self.field)? {
                return Ok(fieldnorm_reader);
@@ -56,7 +58,7 @@ impl RegexPhraseWeight {

    pub(crate) fn phrase_scorer(
        &self,
-        reader: &SegmentReader,
+        reader: &dyn SegmentReader,
        boost: Score,
    ) -> crate::Result<Option<PhraseScorer<UnionType>>> {
        let similarity_weight_opt = self
@@ -84,7 +86,8 @@ impl RegexPhraseWeight {
                    "Phrase query exceeded max expansions {num_terms}"
                )));
            }
-            let union = Self::get_union_from_term_infos(&term_infos, reader, &inverted_index)?;
+            let union =
+                Self::get_union_from_term_infos(&term_infos, reader, inverted_index.as_ref())?;

            posting_lists.push((offset, union));
        }
@@ -99,22 +102,11 @@ impl RegexPhraseWeight {

    /// Add all docs of the term to the docset
    fn add_to_bitset(
-        inverted_index: &InvertedIndexReader,
+        inverted_index: &dyn InvertedIndexReader,
        term_info: &TermInfo,
        doc_bitset: &mut BitSet,
    ) -> crate::Result<()> {
-        let mut block_segment_postings = inverted_index
-            .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
-        loop {
-            let docs = block_segment_postings.docs();
-            if docs.is_empty() {
-                break;
-            }
-            for &doc in docs {
-                doc_bitset.insert(doc);
-            }
-            block_segment_postings.advance();
-        }
+        inverted_index.fill_bitset_for_term(term_info, IndexRecordOption::Basic, doc_bitset)?;
        Ok(())
    }

@@ -174,8 +166,8 @@ impl RegexPhraseWeight {
    /// Use Roaring Bitmaps for sparse terms. The full bitvec is main memory consumer currently.
    pub(crate) fn get_union_from_term_infos(
        term_infos: &[TermInfo],
-        reader: &SegmentReader,
-        inverted_index: &InvertedIndexReader,
+        reader: &dyn SegmentReader,
+        inverted_index: &dyn InvertedIndexReader,
    ) -> crate::Result<UnionType> {
        let max_doc = reader.max_doc();

@@ -188,7 +180,7 @@ impl RegexPhraseWeight {
        // - Bucket 1: Terms appearing in 0.1% to 1% of documents
        // - Bucket 2: Terms appearing in 1% to 10% of documents
        // - Bucket 3: Terms appearing in more than 10% of documents
-        let mut buckets: Vec<(BitSet, Vec<SegmentPostings>)> = (0..4)
+        let mut buckets: Vec<(BitSet, Vec<Box<dyn Postings>>)> = (0..4)
            .map(|_| (BitSet::with_max_value(max_doc), Vec::new()))
            .collect();

@@ -197,7 +189,7 @@ impl RegexPhraseWeight {
        for term_info in term_infos {
            let mut term_posting = inverted_index
                .read_postings_from_terminfo(term_info, IndexRecordOption::WithFreqsAndPositions)?;
-            let num_docs = term_posting.doc_freq();
+            let num_docs = u32::from(term_posting.doc_freq());

            if num_docs < SPARSE_TERM_DOC_THRESHOLD {
                let current_bucket = &mut sparse_buckets[0];
@@ -269,15 +261,15 @@ impl RegexPhraseWeight {
 }

 impl Weight for RegexPhraseWeight {
-    fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
+    fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
        if let Some(scorer) = self.phrase_scorer(reader, boost)? {
-            Ok(Box::new(scorer))
+            Ok(box_scorer(scorer))
        } else {
-            Ok(Box::new(EmptyScorer))
+            Ok(box_scorer(EmptyScorer))
        }
    }

-    fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
+    fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result<Explanation> {
        let scorer_opt = self.phrase_scorer(reader, 1.0)?;
        if scorer_opt.is_none() {
            return Err(does_not_match(doc));
--- a/src/query/query.rs
+++ b/src/query/query.rs
@@ -146,7 +146,7 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
        let weight = self.weight(EnableScoring::disabled_from_searcher(searcher))?;
        let mut result = 0;
        for reader in searcher.segment_readers() {
-            result += weight.count(reader)? as usize;
+            result += weight.count(reader.as_ref())? as usize;
        }
        Ok(result)
    }
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -2068,6 +2068,16 @@ mod test {
            format!("Regex(Field(0), {:#?})", expected_regex).as_str(),
            false,
        );
+        let expected_regex2 = tantivy_fst::Regex::new(r".*a").unwrap();
+        test_parse_query_to_logical_ast_helper(
+            "title:(/.*b/ OR /.*a/)",
+            format!(
+                "(Regex(Field(0), {:#?}) Regex(Field(0), {:#?}))",
+                expected_regex, expected_regex2
+            )
+            .as_str(),
+            false,
+        );

        // Invalid field
        let err = parse_query_to_logical_ast("float:/.*b/", false).unwrap_err();
--- a/src/query/range_query/mod.rs
+++ b/src/query/range_query/mod.rs
@@ -19,7 +19,8 @@ pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
        | Type::Bool
        | Type::Date
        | Type::Json
-        | Type::IpAddr => true,
-        Type::Facet | Type::Bytes => false,
+        | Type::IpAddr
+        | Type::Bytes => true,
+        Type::Facet => false,
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	6038455761	First stab at tantivy's codec Convert SegmentReader, InvertedIndexReader and postinglists to traits. Add special functions to pushdown certain performance methods to keep them strictly typed. We rely on a ObjectSafeCodec contraption to avoid the proliferation of generics. That object's point is to make sure we can build TermScorer with a concrete codec specific type before reboxing it. (same thing for PhraseScorer). fix performance regression: fix incorrect scorer cast for buffered union bock wand	2026-02-11 15:11:29 +01:00
PSeitz	57fe659fff	make serializer pub (#2835 ) some changes on the posting list serializer to make it usable in other contexts. Improve errors Signed-off-by: Pascal Seitz <pascal.seitz@gmail.com>	2026-02-11 14:37:42 +01:00
trinity-1686a	5562ce6037	Merge pull request #2818 from Darkheir/fix/query_grammar_regex_between_parentheses	2026-02-11 11:39:58 +01:00
Metin Dumandag	09b6ececa7	Export fields of the PercentileValuesVecEntry (#2833 ) Otherwise, there is no way to access these fields when not using the json serialized form of the aggregation results. This simple data struct is part of the public api, so its fields should be accessible as well.	2026-02-11 11:31:07 +01:00
Moe	8018016e46	feat: add fast field support for Bytes type (#100 ) (#2830 ) ## What Enable range queries and TopN sorting on `Bytes` fast fields, bringing them to parity with `Str` fields. ## Why `BytesColumn` uses the same dictionary encoding as `StrColumn` internally, but range queries and TopN sorting were explicitly disabled for `Bytes`. This prevented use cases like storing lexicographically sortable binary data (e.g., arbitrary-precision decimals) that need efficient range filtering. ## How 1. Enable range queries for Bytes - Changed `is_type_valid_for_fastfield_range_query()` to return `true` for `Type::Bytes` 2. Add BytesColumn handling in scorer - Added a branch in `FastFieldRangeWeight::scorer()` to handle bytes fields using dictionary ordinal lookup (mirrors the existing `StrColumn` logic) 3. Add SortByBytes - New sort key computer for TopN queries on bytes columns ## Tests - `test_bytes_field_ff_range_query` - Tests inclusive/exclusive bounds and unbounded ranges - `test_sort_by_bytes_asc` / `test_sort_by_bytes_desc` - Tests lexicographic ordering in both directions	2026-02-11 11:26:18 +01:00
trinity-1686a	6bf185dc3f	Merge pull request #2829 from quickwit-oss/cong.xie/add-intermediate-accessors	2026-02-10 17:07:24 +01:00
cong.xie	bb141abe22	feat(aggregation): add keys() accessor to IntermediateAggregationResults	2026-02-09 15:38:35 -05:00
cong.xie	f1c29ba972	resolve conflcit	2026-02-06 14:23:11 -05:00
cong.xie	ae0554a6a5	feat(aggregation): add public accessors for intermediate aggregation results Add accessor methods to allow external crates to read intermediate aggregation results without accessing pub(crate) fields: - IntermediateAggregationResults: get(), remove() - IntermediateTermBucketResult: entries(), sum_other_doc_count(), doc_count_error_upper_bound() - IntermediateAverage: stats() - IntermediateStats: count(), sum() - IntermediateKey: Display impl for string conversion	2026-02-06 11:12:20 -05:00
cong.xie	0d7abe5d23	feat(aggregation): add public accessors for intermediate aggregation results Add accessor methods to allow external crates to read intermediate aggregation results without accessing pub(crate) fields: - IntermediateAggregationResults: get(), get_mut(), remove() - IntermediateTermBucketResult: entries(), sum_other_doc_count(), doc_count_error_upper_bound() - IntermediateAverage: stats() - IntermediateStats: count(), sum() - IntermediateKey: Display impl for string conversion	2026-02-06 10:28:59 -05:00
Darkheir	a55e4069e4	feat(query-grammar): Apply PR review suggestions Signed-off-by: Darkheir <raphael.cohen@sekoia.io>	2026-01-28 14:13:55 +01:00
Darkheir	1fd30c62be	fix(query-grammar): Fix regexes between parentheses Signed-off-by: Darkheir <raphael.cohen@sekoia.io>	2026-01-28 10:37:51 +01:00